rubabel 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +13 -9
- data/Rakefile +10 -2
- data/VERSION +1 -1
- data/bin/rubabel +229 -0
- data/lib/rubabel.rb +46 -14
- data/lib/rubabel/atom.rb +78 -0
- data/lib/rubabel/bond.rb +18 -0
- data/lib/rubabel/molecule.rb +187 -59
- data/lib/rubabel/molecule/fragmentable.rb +209 -0
- data/lib/rubabel/molecule_data.rb +89 -0
- data/reference/arity_method_list.txt +1564 -1294
- data/rubabel.gemspec +100 -0
- data/spec/chemistry_toolkit_rosetta/README.txt +1 -0
- data/spec/chemistry_toolkit_rosetta/benzodiazepine.sdf.gz +0 -0
- data/spec/chemistry_toolkit_rosetta/benzodiazepine.smi.gz +0 -0
- data/spec/chemistry_toolkit_rosetta/chemistry_toolkit_rosetta_spec.rb +225 -0
- data/spec/chemistry_toolkit_rosetta/key/benzodiazepine_heavy_atom_counts.output.10.txt +10 -0
- data/spec/chemistry_toolkit_rosetta/key/benzodiazepine_ring_counts.output.10.txt +10 -0
- data/spec/chemistry_toolkit_rosetta/key/rule5.10.sdf +1924 -0
- data/spec/rubabel/atom_spec.rb +35 -0
- data/spec/rubabel/molecule/fragmentable_spec.rb +51 -0
- data/spec/rubabel/molecule_data_spec.rb +76 -0
- data/spec/rubabel/molecule_spec.rb +131 -1
- data/spec/rubabel_spec.rb +33 -3
- data/spec/spec_helper.rb +24 -0
- data/spec/testfiles/Samples.sdf.gz +0 -0
- metadata +65 -4
- data/lib/rubabel/fragmentation.rb +0 -23
data/spec/rubabel/atom_spec.rb
CHANGED
@@ -4,6 +4,21 @@ require 'rubabel/molecule'
|
|
4
4
|
require 'rubabel/atom'
|
5
5
|
|
6
6
|
describe Rubabel::Atom do
|
7
|
+
|
8
|
+
it 'can be created given an element symbol' do
|
9
|
+
hydrogen = Rubabel::Atom[:h]
|
10
|
+
hydrogen.el.should == :h
|
11
|
+
hydrogen.id.should == 0
|
12
|
+
|
13
|
+
carbon = Rubabel::Atom[:c]
|
14
|
+
carbon.el.should == :c
|
15
|
+
carbon.id.should == 0
|
16
|
+
|
17
|
+
chlorine = Rubabel::Atom[:cl, 3]
|
18
|
+
chlorine.el.should == :cl
|
19
|
+
chlorine.id.should == 3
|
20
|
+
end
|
21
|
+
|
7
22
|
describe 'working with a complex molecule' do
|
8
23
|
|
9
24
|
before do
|
@@ -24,6 +39,15 @@ describe Rubabel::Atom do
|
|
24
39
|
end
|
25
40
|
end
|
26
41
|
|
42
|
+
|
43
|
+
it '#mol retrieves the parent molecule' do
|
44
|
+
@atom.mol.should == @mol
|
45
|
+
|
46
|
+
# no parent molecule
|
47
|
+
h = Rubabel::Atom[:h]
|
48
|
+
h.mol.should be_nil
|
49
|
+
end
|
50
|
+
|
27
51
|
it 'can get the bonds' do
|
28
52
|
@atom.each_bond do |bond|
|
29
53
|
bond.should be_a(Rubabel::Bond)
|
@@ -31,6 +55,9 @@ describe Rubabel::Atom do
|
|
31
55
|
@atom.bonds.size.should == 4
|
32
56
|
end
|
33
57
|
|
58
|
+
it 'can add a bond' do
|
59
|
+
end
|
60
|
+
|
34
61
|
it 'can get the neighboring atoms' do
|
35
62
|
@atom.id.should == 0
|
36
63
|
@atom.atomic_num.should == 6
|
@@ -42,6 +69,14 @@ describe Rubabel::Atom do
|
|
42
69
|
@atom.atoms.size.should == 4
|
43
70
|
end
|
44
71
|
|
72
|
+
it '#get_bond can retrieve a particular bond based on the atom' do
|
73
|
+
other = @mol.atoms[3]
|
74
|
+
bond = @atom.get_bond(other)
|
75
|
+
bond.atoms.map(&:id).should == [0,3]
|
76
|
+
other = @mol.atoms[15] # these are not connected
|
77
|
+
@atom.get_bond(other).should be_nil
|
78
|
+
end
|
79
|
+
|
45
80
|
it '#coords gets the coordinates' do
|
46
81
|
@atom.coords
|
47
82
|
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'rubabel'
|
4
|
+
|
5
|
+
describe Rubabel::Molecule::Fragmentable do
|
6
|
+
describe 'the :co rule' do
|
7
|
+
|
8
|
+
describe 'water loss' do
|
9
|
+
|
10
|
+
it ':h2oloss' do
|
11
|
+
mol = Rubabel["NCCC(O)CC"]
|
12
|
+
fragments = mol.fragment( rules: [:h2oloss] )
|
13
|
+
fragments.flatten(1).map(&:csmiles).sort.should == ["CC=CCC[NH3+]", "CCC=CC[NH3+]", "O", "O"]
|
14
|
+
end
|
15
|
+
|
16
|
+
it ':h2oloss [does not allow bad chemistry]' do
|
17
|
+
# lone pair and double bond resonance ?
|
18
|
+
mol = Rubabel["NCC(O)CC"]
|
19
|
+
fragments = mol.fragment( rules: [:h2oloss] )
|
20
|
+
fragments.flatten(1).map(&:csmiles).sort.should == ["CC=CC[NH3+]", "O"]
|
21
|
+
|
22
|
+
mol = Rubabel["NC(O)CC"]
|
23
|
+
fragments = mol.fragment( rules: [:h2oloss] )
|
24
|
+
fragments.flatten(1).map(&:csmiles).sort.should == []
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe 'backbone cleavage' do
|
29
|
+
|
30
|
+
it 'cleaves beside alcohols yielding aldehydes' do
|
31
|
+
mol = Rubabel["NCCC(O)CC"]
|
32
|
+
mol.correct_for_ph!
|
33
|
+
total_mass = mol.add_h!.mass
|
34
|
+
|
35
|
+
pieces = mol.fragment(rules: [:co])
|
36
|
+
pieces.size.should == 2
|
37
|
+
pieces.map(&:size).should == [2,2]
|
38
|
+
pieces.flatten(1).map(&:csmiles).should == ["CC[NH3+]", "CCC=O", "C(C=O)C[NH3+]", "CC"]
|
39
|
+
pieces.each do |pair|
|
40
|
+
pair.map(&:mass).reduce(:+).should == total_mass
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
xit 'does not cleave esters' do
|
45
|
+
mol = Rubabel["NCCC(=O)OC"]
|
46
|
+
pieces = mol.fragment( rules: [:co] )
|
47
|
+
pieces.should be_empty
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'rubabel/molecule_data'
|
4
|
+
require 'rubabel'
|
5
|
+
|
6
|
+
describe Rubabel::MoleculeData do
|
7
|
+
|
8
|
+
before(:each) do
|
9
|
+
sdf = TESTFILES + "/Samples.sdf"
|
10
|
+
@mol = Rubabel::Molecule.from_file( sdf )
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'is initialized with an OpenBabel molecule' do
|
14
|
+
md = Rubabel::MoleculeData.new(@mol.ob)
|
15
|
+
md.should be_a(Rubabel::MoleculeData)
|
16
|
+
end
|
17
|
+
|
18
|
+
describe 'hash-like behavior' do
|
19
|
+
subject { Rubabel::MoleculeData.new(@mol.ob) }
|
20
|
+
|
21
|
+
it '#each' do
|
22
|
+
enum = subject.each
|
23
|
+
enum.should be_a(Enumerator)
|
24
|
+
pair = enum.next
|
25
|
+
pair.should == ["NAME", "2Ferrocene"]
|
26
|
+
end
|
27
|
+
|
28
|
+
it '#to_a' do
|
29
|
+
subject.to_a.size.should == 2
|
30
|
+
end
|
31
|
+
|
32
|
+
it '#size & #length' do
|
33
|
+
subject.size.should == 2
|
34
|
+
subject.length.should == 2
|
35
|
+
end
|
36
|
+
|
37
|
+
it '#[]' do
|
38
|
+
subject['NAME'].should == "2Ferrocene"
|
39
|
+
end
|
40
|
+
|
41
|
+
it '#[]=' do
|
42
|
+
# modify one:
|
43
|
+
subject['NAME'] = 'PEPPER'
|
44
|
+
subject.size.should == 2
|
45
|
+
# create_a new one:
|
46
|
+
subject['jtp_special'] = 'sauce'
|
47
|
+
subject.size.should == 3
|
48
|
+
string = subject.obmol.upcast.write(:sdf)
|
49
|
+
string.should =~ /jtp_special/
|
50
|
+
string.should =~ /sauce/
|
51
|
+
string.should =~ /PEPPER/
|
52
|
+
end
|
53
|
+
|
54
|
+
it '#key?' do
|
55
|
+
subject.key?('NAME').should be_true
|
56
|
+
subject.key?('bananas').should be_false
|
57
|
+
end
|
58
|
+
|
59
|
+
it '#keys' do
|
60
|
+
subject.keys.should == ["NAME", "OpenBabel Symmetry Classes"]
|
61
|
+
end
|
62
|
+
|
63
|
+
it '#values' do
|
64
|
+
subject.values.should == ["2Ferrocene", "8 4 9 4 4 4 4 4 4 4 4 4 5 3 6 2 7 1 5 3 6 2 5 3 7 1 5 3 6 2 6 2"]
|
65
|
+
end
|
66
|
+
|
67
|
+
it '#delete' do
|
68
|
+
key = "OpenBabel Symmetry Classes"
|
69
|
+
subject.delete(key).should =~ /8 4 9/
|
70
|
+
subject.key?(key).should be_false
|
71
|
+
subject.size.should == 1
|
72
|
+
subject.delete("nonsense").should be_nil
|
73
|
+
subject.delete("nonsense") { 'wow' }.should == 'wow'
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -3,6 +3,13 @@ require 'spec_helper'
|
|
3
3
|
require 'rubabel/molecule'
|
4
4
|
|
5
5
|
describe Rubabel::Molecule do
|
6
|
+
describe 'creation' do
|
7
|
+
it 'can be made with Rubabel[]' do
|
8
|
+
mol = Rubabel["CC(O)O"]
|
9
|
+
mol.csmiles.should == "CC(O)O"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
6
13
|
before(:each) do
|
7
14
|
@mol = Rubabel::Molecule.from_file( TESTFILES + '/cholesterol.sdf' )
|
8
15
|
end
|
@@ -18,6 +25,14 @@ describe Rubabel::Molecule do
|
|
18
25
|
end
|
19
26
|
end
|
20
27
|
|
28
|
+
it '#dup creates an entirely new molecule based on the first' do
|
29
|
+
another = @mol.dup
|
30
|
+
# this is a deep copy all the way. Even the atoms are duplicated so that
|
31
|
+
# they can be modified in one and do not affect the other at all.
|
32
|
+
@mol.atoms.first.charge = 1
|
33
|
+
@mol.charge.should_not == another.charge
|
34
|
+
end
|
35
|
+
|
21
36
|
it '#each iterates through each atom in id order' do
|
22
37
|
cnt = 0
|
23
38
|
@mol.each do |atom|
|
@@ -29,6 +44,14 @@ describe Rubabel::Molecule do
|
|
29
44
|
@mol.atoms.size.should == 74
|
30
45
|
end
|
31
46
|
|
47
|
+
it '#hydrogens_added?' do
|
48
|
+
@mol.hydrogens_added?.should be_false
|
49
|
+
@mol.atoms.size.should == 33
|
50
|
+
@mol.add_h!
|
51
|
+
@mol.atoms.size.should == 74
|
52
|
+
@mol.hydrogens_added?.should be_true
|
53
|
+
end
|
54
|
+
|
32
55
|
it 'calculates #ob_sssr (smallest set of smallest rings)' do
|
33
56
|
ar = @mol.ob_sssr
|
34
57
|
ar.should be_an(Array)
|
@@ -36,6 +59,106 @@ describe Rubabel::Molecule do
|
|
36
59
|
ar.first.should be_a(OpenBabel::OBRing)
|
37
60
|
end
|
38
61
|
|
62
|
+
describe 'masses' do
|
63
|
+
subject { Rubabel::Molecule.from_string("C(=O)COC(=O)C[NH3+]") }
|
64
|
+
it '#mol_wt (or #avg_mass)' do
|
65
|
+
subject.mol_wt.should be_within(0.000001).of(118.11121999999999)
|
66
|
+
end
|
67
|
+
|
68
|
+
it '#exact_mass' do
|
69
|
+
subject.exact_mass.should be_within(0.00000001).of(118.05041812003999)
|
70
|
+
end
|
71
|
+
|
72
|
+
it '#mass is the exact mass adjusted for electron gain/loss' do
|
73
|
+
subject.mass.should be_within(0.00000001).of(118.04986952003999)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
describe 'getting other descriptors' do
|
78
|
+
# can't figure this out yet
|
79
|
+
end
|
80
|
+
|
81
|
+
describe 'pH' do
|
82
|
+
|
83
|
+
subject { Rubabel::Molecule.from_string("NCC(=O)OCC(=O)O") }
|
84
|
+
|
85
|
+
it '#correct_for_ph! neutral' do
|
86
|
+
subject.correct_for_ph!.to_s.should == '[O-]C(=O)COC(=O)C[NH3+]'
|
87
|
+
end
|
88
|
+
|
89
|
+
it '#correct_for_ph!(1.4) [low]' do
|
90
|
+
subject.correct_for_ph!(1.4).to_s.should == 'OC(=O)COC(=O)C[NH3+]'
|
91
|
+
end
|
92
|
+
|
93
|
+
it '#correct_for_ph!(11.0) [high]' do
|
94
|
+
subject.correct_for_ph!(11.0).to_s.should == '[O-]C(=O)COC(=O)CN'
|
95
|
+
end
|
96
|
+
|
97
|
+
it '#correct_for_ph!(nil) [gives neutral molecule]' do
|
98
|
+
subject.correct_for_ph!.to_s.should == '[O-]C(=O)COC(=O)C[NH3+]'
|
99
|
+
subject.correct_for_ph!(nil).to_s.should == "NCC(=O)OCC(=O)O"
|
100
|
+
end
|
101
|
+
|
102
|
+
it '#neutral! [can be set neutral again]' do
|
103
|
+
subject.correct_for_ph!
|
104
|
+
subject.to_s.should == '[O-]C(=O)COC(=O)C[NH3+]'
|
105
|
+
subject.h_added?.should == false
|
106
|
+
subject.neutral!.to_s.should == "NCC(=O)OCC(=O)O"
|
107
|
+
subject.h_added?.should == false
|
108
|
+
end
|
109
|
+
|
110
|
+
it '#neutral! [preserves hydrogens added state]' do
|
111
|
+
subject.correct_for_ph!
|
112
|
+
subject.add_h!
|
113
|
+
subject.to_s.should == '[O-]C(=O)COC(=O)C[NH3+]'
|
114
|
+
subject.h_added?.should == true
|
115
|
+
subject.neutral!.to_s.should == "NCC(=O)OCC(=O)O"
|
116
|
+
subject.h_added?.should == true
|
117
|
+
end
|
118
|
+
|
119
|
+
it '#add_h!(11.0) [can correct for ph if given a ph]' do
|
120
|
+
subject.add_h!(11.0).to_s.should == '[O-]C(=O)COC(=O)CN'
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|
124
|
+
|
125
|
+
it 'can calculate a molecular fingerprint (for similarity calcs)' do
|
126
|
+
# this just returns the std::vector object at the moment
|
127
|
+
fp = @mol.ob_fingerprint
|
128
|
+
# this is an array of unsigned ints that really need to be coerced into
|
129
|
+
# bits for further usefulness.
|
130
|
+
fp.to_a.should == [0, 604110848, 16777216, 0, 2147483648, 4210688, 0, 2097152, 16, 16809984, 0, 0, 1, 37756928, 32, 0, 524296, 1028, 8388612, 131072, 1073741824, 512, 1048584, 16384, 1026, 0, 0, 524288, 0, 2048, 16777248, 0]
|
131
|
+
lambda { @mol.ob_fingerprint("WACKY") }.should raise_error
|
132
|
+
end
|
133
|
+
|
134
|
+
it 'can calculate the tanimoto similarity' do
|
135
|
+
# oo way to call:
|
136
|
+
@mol.tanimoto(@mol).should == 1.0
|
137
|
+
mol2 = Rubabel::Molecule.from_string("CCC(O)OCCC")
|
138
|
+
# class way to call this
|
139
|
+
t = Rubabel::Molecule.tanimoto(@mol, mol2)
|
140
|
+
# actual: 0.11363636363636363
|
141
|
+
t.should be < 0.2
|
142
|
+
t.should be > 0.0
|
143
|
+
end
|
144
|
+
|
145
|
+
describe '3D' do
|
146
|
+
before(:each) do
|
147
|
+
# cholesterol
|
148
|
+
@mol = Rubabel::Molecule.from_string("OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O")
|
149
|
+
end
|
150
|
+
|
151
|
+
it 'can be turned into a 3D molecule' do
|
152
|
+
# this is only as good as the Builder is good. For instance, it fails
|
153
|
+
# to get all the stereo centers of cholesterol (but it does warn on
|
154
|
+
# this, although I don't know how to capture the warnings (can't get
|
155
|
+
# with stdout or stderr??))
|
156
|
+
@mol.ob.has_3d.should be_false
|
157
|
+
@mol.make_3d!
|
158
|
+
@mol.ob.has_3d.should be_true
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
39
162
|
describe 'breaking a molecule' do
|
40
163
|
before(:each) do
|
41
164
|
@mol = Rubabel::Molecule.from_string("NC(=O)CO")
|
@@ -61,13 +184,20 @@ describe Rubabel::Molecule do
|
|
61
184
|
@mol.each_bond.map.to_a.size.should == 9
|
62
185
|
end
|
63
186
|
|
64
|
-
it 'can be split into multiple molecules' do
|
187
|
+
it 'can be split into multiple molecules [unaffecting self]' do
|
188
|
+
num_bonds_before = @mol.num_bonds
|
189
|
+
num_atoms_before = @mol.num_atoms
|
190
|
+
|
65
191
|
reply = @mol.split(@mol.bonds.first, @mol.bonds.last)
|
192
|
+
|
66
193
|
reply.should be_a(Array)
|
67
194
|
reply.size.should == 3
|
195
|
+
@mol.num_bonds.should == num_bonds_before
|
196
|
+
@mol.num_atoms.should == num_atoms_before
|
68
197
|
csmiles = reply.map(&:csmiles)
|
69
198
|
csmiles.sort.should == %w(N CC=O O).sort
|
70
199
|
end
|
200
|
+
|
71
201
|
end
|
72
202
|
|
73
203
|
describe 'matching patterns (SMARTS)' do
|
data/spec/rubabel_spec.rb
CHANGED
@@ -48,19 +48,49 @@ describe Rubabel do
|
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
51
|
-
describe '::
|
51
|
+
describe '::molecule_from_file and ::molecule_from_string' do
|
52
52
|
before(:each) do
|
53
53
|
@samples = TESTFILES + "/Samples.sdf"
|
54
54
|
end
|
55
55
|
|
56
56
|
it 'return a single molecule (the first one in the file)' do
|
57
|
-
mol_f = Rubabel.
|
58
|
-
mol_s = Rubabel.
|
57
|
+
mol_f = Rubabel.molecule_from_file(@samples)
|
58
|
+
mol_s = Rubabel.molecule_from_string(IO.read(@samples), :sdf)
|
59
59
|
[mol_f, mol_s].each {|mol| mol.should be_a(Rubabel::Molecule) }
|
60
60
|
mol_f.should == mol_s
|
61
61
|
end
|
62
62
|
end
|
63
63
|
|
64
|
+
describe 'can deal with .gz files properly' do
|
65
|
+
before(:each) do
|
66
|
+
@gz_file = TESTFILES + "/Samples.sdf.gz"
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'can get the file format' do
|
70
|
+
# non-existant file okay
|
71
|
+
Rubabel.format_from_ext("silly.sdf.gz").should == :sdf
|
72
|
+
Rubabel.format_from_ext(@gz_file).should == :sdf
|
73
|
+
Rubabel.foreach(@gz_file) do |mol|
|
74
|
+
mol.should be_a(Rubabel::Molecule)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
|
80
|
+
describe 'format from extension' do
|
81
|
+
|
82
|
+
it 'determines format from extension' do
|
83
|
+
Rubabel.format_from_ext( TESTFILES + "/Samples.sdf" ).should == :sdf
|
84
|
+
Rubabel.format_from_ext( TESTFILES + "/Samples.non_existent" ).should be_nil
|
85
|
+
end
|
86
|
+
|
87
|
+
it 'determines format from mime-type' do
|
88
|
+
Rubabel.format_from_mime( "chemical/x-mdl-sdfile" ).should == :sdf
|
89
|
+
Rubabel.format_from_mime( "chemical/wierdness" ).should be_nil
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
|
64
94
|
#describe 'an atom in it' do
|
65
95
|
#end
|
66
96
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'rspec'
|
2
|
+
require 'stringio'
|
2
3
|
|
3
4
|
# Requires supporting files with custom matchers and macros, etc,
|
4
5
|
# in ./support/ and its subdirectories.
|
@@ -10,3 +11,26 @@ RSpec.configure do |config|
|
|
10
11
|
end
|
11
12
|
|
12
13
|
TESTFILES = File.dirname(__FILE__) + "/testfiles"
|
14
|
+
|
15
|
+
module Kernel
|
16
|
+
# from: http://thinkingdigitally.com/archive/capturing-output-from-puts-in-ruby/
|
17
|
+
def capture_stdout
|
18
|
+
out = StringIO.new
|
19
|
+
$stdout = out
|
20
|
+
yield
|
21
|
+
return out.string
|
22
|
+
ensure
|
23
|
+
$stdout = STDOUT
|
24
|
+
end
|
25
|
+
|
26
|
+
def capture_stderr
|
27
|
+
out = StringIO.new
|
28
|
+
$stderr = out
|
29
|
+
yield
|
30
|
+
return out.string
|
31
|
+
ensure
|
32
|
+
$stderr = STDERR
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
end
|