rubabel 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +13 -9
- data/Rakefile +10 -2
- data/VERSION +1 -1
- data/bin/rubabel +229 -0
- data/lib/rubabel.rb +46 -14
- data/lib/rubabel/atom.rb +78 -0
- data/lib/rubabel/bond.rb +18 -0
- data/lib/rubabel/molecule.rb +187 -59
- data/lib/rubabel/molecule/fragmentable.rb +209 -0
- data/lib/rubabel/molecule_data.rb +89 -0
- data/reference/arity_method_list.txt +1564 -1294
- data/rubabel.gemspec +100 -0
- data/spec/chemistry_toolkit_rosetta/README.txt +1 -0
- data/spec/chemistry_toolkit_rosetta/benzodiazepine.sdf.gz +0 -0
- data/spec/chemistry_toolkit_rosetta/benzodiazepine.smi.gz +0 -0
- data/spec/chemistry_toolkit_rosetta/chemistry_toolkit_rosetta_spec.rb +225 -0
- data/spec/chemistry_toolkit_rosetta/key/benzodiazepine_heavy_atom_counts.output.10.txt +10 -0
- data/spec/chemistry_toolkit_rosetta/key/benzodiazepine_ring_counts.output.10.txt +10 -0
- data/spec/chemistry_toolkit_rosetta/key/rule5.10.sdf +1924 -0
- data/spec/rubabel/atom_spec.rb +35 -0
- data/spec/rubabel/molecule/fragmentable_spec.rb +51 -0
- data/spec/rubabel/molecule_data_spec.rb +76 -0
- data/spec/rubabel/molecule_spec.rb +131 -1
- data/spec/rubabel_spec.rb +33 -3
- data/spec/spec_helper.rb +24 -0
- data/spec/testfiles/Samples.sdf.gz +0 -0
- metadata +65 -4
- data/lib/rubabel/fragmentation.rb +0 -23
data/spec/rubabel/atom_spec.rb
CHANGED
@@ -4,6 +4,21 @@ require 'rubabel/molecule'
|
|
4
4
|
require 'rubabel/atom'
|
5
5
|
|
6
6
|
describe Rubabel::Atom do
|
7
|
+
|
8
|
+
it 'can be created given an element symbol' do
|
9
|
+
hydrogen = Rubabel::Atom[:h]
|
10
|
+
hydrogen.el.should == :h
|
11
|
+
hydrogen.id.should == 0
|
12
|
+
|
13
|
+
carbon = Rubabel::Atom[:c]
|
14
|
+
carbon.el.should == :c
|
15
|
+
carbon.id.should == 0
|
16
|
+
|
17
|
+
chlorine = Rubabel::Atom[:cl, 3]
|
18
|
+
chlorine.el.should == :cl
|
19
|
+
chlorine.id.should == 3
|
20
|
+
end
|
21
|
+
|
7
22
|
describe 'working with a complex molecule' do
|
8
23
|
|
9
24
|
before do
|
@@ -24,6 +39,15 @@ describe Rubabel::Atom do
|
|
24
39
|
end
|
25
40
|
end
|
26
41
|
|
42
|
+
|
43
|
+
it '#mol retrieves the parent molecule' do
|
44
|
+
@atom.mol.should == @mol
|
45
|
+
|
46
|
+
# no parent molecule
|
47
|
+
h = Rubabel::Atom[:h]
|
48
|
+
h.mol.should be_nil
|
49
|
+
end
|
50
|
+
|
27
51
|
it 'can get the bonds' do
|
28
52
|
@atom.each_bond do |bond|
|
29
53
|
bond.should be_a(Rubabel::Bond)
|
@@ -31,6 +55,9 @@ describe Rubabel::Atom do
|
|
31
55
|
@atom.bonds.size.should == 4
|
32
56
|
end
|
33
57
|
|
58
|
+
it 'can add a bond' do
|
59
|
+
end
|
60
|
+
|
34
61
|
it 'can get the neighboring atoms' do
|
35
62
|
@atom.id.should == 0
|
36
63
|
@atom.atomic_num.should == 6
|
@@ -42,6 +69,14 @@ describe Rubabel::Atom do
|
|
42
69
|
@atom.atoms.size.should == 4
|
43
70
|
end
|
44
71
|
|
72
|
+
it '#get_bond can retrieve a particular bond based on the atom' do
|
73
|
+
other = @mol.atoms[3]
|
74
|
+
bond = @atom.get_bond(other)
|
75
|
+
bond.atoms.map(&:id).should == [0,3]
|
76
|
+
other = @mol.atoms[15] # these are not connected
|
77
|
+
@atom.get_bond(other).should be_nil
|
78
|
+
end
|
79
|
+
|
45
80
|
it '#coords gets the coordinates' do
|
46
81
|
@atom.coords
|
47
82
|
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'rubabel'
|
4
|
+
|
5
|
+
describe Rubabel::Molecule::Fragmentable do
|
6
|
+
describe 'the :co rule' do
|
7
|
+
|
8
|
+
describe 'water loss' do
|
9
|
+
|
10
|
+
it ':h2oloss' do
|
11
|
+
mol = Rubabel["NCCC(O)CC"]
|
12
|
+
fragments = mol.fragment( rules: [:h2oloss] )
|
13
|
+
fragments.flatten(1).map(&:csmiles).sort.should == ["CC=CCC[NH3+]", "CCC=CC[NH3+]", "O", "O"]
|
14
|
+
end
|
15
|
+
|
16
|
+
it ':h2oloss [does not allow bad chemistry]' do
|
17
|
+
# lone pair and double bond resonance ?
|
18
|
+
mol = Rubabel["NCC(O)CC"]
|
19
|
+
fragments = mol.fragment( rules: [:h2oloss] )
|
20
|
+
fragments.flatten(1).map(&:csmiles).sort.should == ["CC=CC[NH3+]", "O"]
|
21
|
+
|
22
|
+
mol = Rubabel["NC(O)CC"]
|
23
|
+
fragments = mol.fragment( rules: [:h2oloss] )
|
24
|
+
fragments.flatten(1).map(&:csmiles).sort.should == []
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe 'backbone cleavage' do
|
29
|
+
|
30
|
+
it 'cleaves beside alcohols yielding aldehydes' do
|
31
|
+
mol = Rubabel["NCCC(O)CC"]
|
32
|
+
mol.correct_for_ph!
|
33
|
+
total_mass = mol.add_h!.mass
|
34
|
+
|
35
|
+
pieces = mol.fragment(rules: [:co])
|
36
|
+
pieces.size.should == 2
|
37
|
+
pieces.map(&:size).should == [2,2]
|
38
|
+
pieces.flatten(1).map(&:csmiles).should == ["CC[NH3+]", "CCC=O", "C(C=O)C[NH3+]", "CC"]
|
39
|
+
pieces.each do |pair|
|
40
|
+
pair.map(&:mass).reduce(:+).should == total_mass
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
xit 'does not cleave esters' do
|
45
|
+
mol = Rubabel["NCCC(=O)OC"]
|
46
|
+
pieces = mol.fragment( rules: [:co] )
|
47
|
+
pieces.should be_empty
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'rubabel/molecule_data'
|
4
|
+
require 'rubabel'
|
5
|
+
|
6
|
+
describe Rubabel::MoleculeData do
|
7
|
+
|
8
|
+
before(:each) do
|
9
|
+
sdf = TESTFILES + "/Samples.sdf"
|
10
|
+
@mol = Rubabel::Molecule.from_file( sdf )
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'is initialized with an OpenBabel molecule' do
|
14
|
+
md = Rubabel::MoleculeData.new(@mol.ob)
|
15
|
+
md.should be_a(Rubabel::MoleculeData)
|
16
|
+
end
|
17
|
+
|
18
|
+
describe 'hash-like behavior' do
|
19
|
+
subject { Rubabel::MoleculeData.new(@mol.ob) }
|
20
|
+
|
21
|
+
it '#each' do
|
22
|
+
enum = subject.each
|
23
|
+
enum.should be_a(Enumerator)
|
24
|
+
pair = enum.next
|
25
|
+
pair.should == ["NAME", "2Ferrocene"]
|
26
|
+
end
|
27
|
+
|
28
|
+
it '#to_a' do
|
29
|
+
subject.to_a.size.should == 2
|
30
|
+
end
|
31
|
+
|
32
|
+
it '#size & #length' do
|
33
|
+
subject.size.should == 2
|
34
|
+
subject.length.should == 2
|
35
|
+
end
|
36
|
+
|
37
|
+
it '#[]' do
|
38
|
+
subject['NAME'].should == "2Ferrocene"
|
39
|
+
end
|
40
|
+
|
41
|
+
it '#[]=' do
|
42
|
+
# modify one:
|
43
|
+
subject['NAME'] = 'PEPPER'
|
44
|
+
subject.size.should == 2
|
45
|
+
# create_a new one:
|
46
|
+
subject['jtp_special'] = 'sauce'
|
47
|
+
subject.size.should == 3
|
48
|
+
string = subject.obmol.upcast.write(:sdf)
|
49
|
+
string.should =~ /jtp_special/
|
50
|
+
string.should =~ /sauce/
|
51
|
+
string.should =~ /PEPPER/
|
52
|
+
end
|
53
|
+
|
54
|
+
it '#key?' do
|
55
|
+
subject.key?('NAME').should be_true
|
56
|
+
subject.key?('bananas').should be_false
|
57
|
+
end
|
58
|
+
|
59
|
+
it '#keys' do
|
60
|
+
subject.keys.should == ["NAME", "OpenBabel Symmetry Classes"]
|
61
|
+
end
|
62
|
+
|
63
|
+
it '#values' do
|
64
|
+
subject.values.should == ["2Ferrocene", "8 4 9 4 4 4 4 4 4 4 4 4 5 3 6 2 7 1 5 3 6 2 5 3 7 1 5 3 6 2 6 2"]
|
65
|
+
end
|
66
|
+
|
67
|
+
it '#delete' do
|
68
|
+
key = "OpenBabel Symmetry Classes"
|
69
|
+
subject.delete(key).should =~ /8 4 9/
|
70
|
+
subject.key?(key).should be_false
|
71
|
+
subject.size.should == 1
|
72
|
+
subject.delete("nonsense").should be_nil
|
73
|
+
subject.delete("nonsense") { 'wow' }.should == 'wow'
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -3,6 +3,13 @@ require 'spec_helper'
|
|
3
3
|
require 'rubabel/molecule'
|
4
4
|
|
5
5
|
describe Rubabel::Molecule do
|
6
|
+
describe 'creation' do
|
7
|
+
it 'can be made with Rubabel[]' do
|
8
|
+
mol = Rubabel["CC(O)O"]
|
9
|
+
mol.csmiles.should == "CC(O)O"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
6
13
|
before(:each) do
|
7
14
|
@mol = Rubabel::Molecule.from_file( TESTFILES + '/cholesterol.sdf' )
|
8
15
|
end
|
@@ -18,6 +25,14 @@ describe Rubabel::Molecule do
|
|
18
25
|
end
|
19
26
|
end
|
20
27
|
|
28
|
+
it '#dup creates an entirely new molecule based on the first' do
|
29
|
+
another = @mol.dup
|
30
|
+
# this is a deep copy all the way. Even the atoms are duplicated so that
|
31
|
+
# they can be modified in one and do not affect the other at all.
|
32
|
+
@mol.atoms.first.charge = 1
|
33
|
+
@mol.charge.should_not == another.charge
|
34
|
+
end
|
35
|
+
|
21
36
|
it '#each iterates through each atom in id order' do
|
22
37
|
cnt = 0
|
23
38
|
@mol.each do |atom|
|
@@ -29,6 +44,14 @@ describe Rubabel::Molecule do
|
|
29
44
|
@mol.atoms.size.should == 74
|
30
45
|
end
|
31
46
|
|
47
|
+
it '#hydrogens_added?' do
|
48
|
+
@mol.hydrogens_added?.should be_false
|
49
|
+
@mol.atoms.size.should == 33
|
50
|
+
@mol.add_h!
|
51
|
+
@mol.atoms.size.should == 74
|
52
|
+
@mol.hydrogens_added?.should be_true
|
53
|
+
end
|
54
|
+
|
32
55
|
it 'calculates #ob_sssr (smallest set of smallest rings)' do
|
33
56
|
ar = @mol.ob_sssr
|
34
57
|
ar.should be_an(Array)
|
@@ -36,6 +59,106 @@ describe Rubabel::Molecule do
|
|
36
59
|
ar.first.should be_a(OpenBabel::OBRing)
|
37
60
|
end
|
38
61
|
|
62
|
+
describe 'masses' do
|
63
|
+
subject { Rubabel::Molecule.from_string("C(=O)COC(=O)C[NH3+]") }
|
64
|
+
it '#mol_wt (or #avg_mass)' do
|
65
|
+
subject.mol_wt.should be_within(0.000001).of(118.11121999999999)
|
66
|
+
end
|
67
|
+
|
68
|
+
it '#exact_mass' do
|
69
|
+
subject.exact_mass.should be_within(0.00000001).of(118.05041812003999)
|
70
|
+
end
|
71
|
+
|
72
|
+
it '#mass is the exact mass adjusted for electron gain/loss' do
|
73
|
+
subject.mass.should be_within(0.00000001).of(118.04986952003999)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
describe 'getting other descriptors' do
|
78
|
+
# can't figure this out yet
|
79
|
+
end
|
80
|
+
|
81
|
+
describe 'pH' do
|
82
|
+
|
83
|
+
subject { Rubabel::Molecule.from_string("NCC(=O)OCC(=O)O") }
|
84
|
+
|
85
|
+
it '#correct_for_ph! neutral' do
|
86
|
+
subject.correct_for_ph!.to_s.should == '[O-]C(=O)COC(=O)C[NH3+]'
|
87
|
+
end
|
88
|
+
|
89
|
+
it '#correct_for_ph!(1.4) [low]' do
|
90
|
+
subject.correct_for_ph!(1.4).to_s.should == 'OC(=O)COC(=O)C[NH3+]'
|
91
|
+
end
|
92
|
+
|
93
|
+
it '#correct_for_ph!(11.0) [high]' do
|
94
|
+
subject.correct_for_ph!(11.0).to_s.should == '[O-]C(=O)COC(=O)CN'
|
95
|
+
end
|
96
|
+
|
97
|
+
it '#correct_for_ph!(nil) [gives neutral molecule]' do
|
98
|
+
subject.correct_for_ph!.to_s.should == '[O-]C(=O)COC(=O)C[NH3+]'
|
99
|
+
subject.correct_for_ph!(nil).to_s.should == "NCC(=O)OCC(=O)O"
|
100
|
+
end
|
101
|
+
|
102
|
+
it '#neutral! [can be set neutral again]' do
|
103
|
+
subject.correct_for_ph!
|
104
|
+
subject.to_s.should == '[O-]C(=O)COC(=O)C[NH3+]'
|
105
|
+
subject.h_added?.should == false
|
106
|
+
subject.neutral!.to_s.should == "NCC(=O)OCC(=O)O"
|
107
|
+
subject.h_added?.should == false
|
108
|
+
end
|
109
|
+
|
110
|
+
it '#neutral! [preserves hydrogens added state]' do
|
111
|
+
subject.correct_for_ph!
|
112
|
+
subject.add_h!
|
113
|
+
subject.to_s.should == '[O-]C(=O)COC(=O)C[NH3+]'
|
114
|
+
subject.h_added?.should == true
|
115
|
+
subject.neutral!.to_s.should == "NCC(=O)OCC(=O)O"
|
116
|
+
subject.h_added?.should == true
|
117
|
+
end
|
118
|
+
|
119
|
+
it '#add_h!(11.0) [can correct for ph if given a ph]' do
|
120
|
+
subject.add_h!(11.0).to_s.should == '[O-]C(=O)COC(=O)CN'
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|
124
|
+
|
125
|
+
it 'can calculate a molecular fingerprint (for similarity calcs)' do
|
126
|
+
# this just returns the std::vector object at the moment
|
127
|
+
fp = @mol.ob_fingerprint
|
128
|
+
# this is an array of unsigned ints that really need to be coerced into
|
129
|
+
# bits for further usefulness.
|
130
|
+
fp.to_a.should == [0, 604110848, 16777216, 0, 2147483648, 4210688, 0, 2097152, 16, 16809984, 0, 0, 1, 37756928, 32, 0, 524296, 1028, 8388612, 131072, 1073741824, 512, 1048584, 16384, 1026, 0, 0, 524288, 0, 2048, 16777248, 0]
|
131
|
+
lambda { @mol.ob_fingerprint("WACKY") }.should raise_error
|
132
|
+
end
|
133
|
+
|
134
|
+
it 'can calculate the tanimoto similarity' do
|
135
|
+
# oo way to call:
|
136
|
+
@mol.tanimoto(@mol).should == 1.0
|
137
|
+
mol2 = Rubabel::Molecule.from_string("CCC(O)OCCC")
|
138
|
+
# class way to call this
|
139
|
+
t = Rubabel::Molecule.tanimoto(@mol, mol2)
|
140
|
+
# actual: 0.11363636363636363
|
141
|
+
t.should be < 0.2
|
142
|
+
t.should be > 0.0
|
143
|
+
end
|
144
|
+
|
145
|
+
describe '3D' do
|
146
|
+
before(:each) do
|
147
|
+
# cholesterol
|
148
|
+
@mol = Rubabel::Molecule.from_string("OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O")
|
149
|
+
end
|
150
|
+
|
151
|
+
it 'can be turned into a 3D molecule' do
|
152
|
+
# this is only as good as the Builder is good. For instance, it fails
|
153
|
+
# to get all the stereo centers of cholesterol (but it does warn on
|
154
|
+
# this, although I don't know how to capture the warnings (can't get
|
155
|
+
# with stdout or stderr??))
|
156
|
+
@mol.ob.has_3d.should be_false
|
157
|
+
@mol.make_3d!
|
158
|
+
@mol.ob.has_3d.should be_true
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
39
162
|
describe 'breaking a molecule' do
|
40
163
|
before(:each) do
|
41
164
|
@mol = Rubabel::Molecule.from_string("NC(=O)CO")
|
@@ -61,13 +184,20 @@ describe Rubabel::Molecule do
|
|
61
184
|
@mol.each_bond.map.to_a.size.should == 9
|
62
185
|
end
|
63
186
|
|
64
|
-
it 'can be split into multiple molecules' do
|
187
|
+
it 'can be split into multiple molecules [unaffecting self]' do
|
188
|
+
num_bonds_before = @mol.num_bonds
|
189
|
+
num_atoms_before = @mol.num_atoms
|
190
|
+
|
65
191
|
reply = @mol.split(@mol.bonds.first, @mol.bonds.last)
|
192
|
+
|
66
193
|
reply.should be_a(Array)
|
67
194
|
reply.size.should == 3
|
195
|
+
@mol.num_bonds.should == num_bonds_before
|
196
|
+
@mol.num_atoms.should == num_atoms_before
|
68
197
|
csmiles = reply.map(&:csmiles)
|
69
198
|
csmiles.sort.should == %w(N CC=O O).sort
|
70
199
|
end
|
200
|
+
|
71
201
|
end
|
72
202
|
|
73
203
|
describe 'matching patterns (SMARTS)' do
|
data/spec/rubabel_spec.rb
CHANGED
@@ -48,19 +48,49 @@ describe Rubabel do
|
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
51
|
-
describe '::
|
51
|
+
describe '::molecule_from_file and ::molecule_from_string' do
|
52
52
|
before(:each) do
|
53
53
|
@samples = TESTFILES + "/Samples.sdf"
|
54
54
|
end
|
55
55
|
|
56
56
|
it 'return a single molecule (the first one in the file)' do
|
57
|
-
mol_f = Rubabel.
|
58
|
-
mol_s = Rubabel.
|
57
|
+
mol_f = Rubabel.molecule_from_file(@samples)
|
58
|
+
mol_s = Rubabel.molecule_from_string(IO.read(@samples), :sdf)
|
59
59
|
[mol_f, mol_s].each {|mol| mol.should be_a(Rubabel::Molecule) }
|
60
60
|
mol_f.should == mol_s
|
61
61
|
end
|
62
62
|
end
|
63
63
|
|
64
|
+
describe 'can deal with .gz files properly' do
|
65
|
+
before(:each) do
|
66
|
+
@gz_file = TESTFILES + "/Samples.sdf.gz"
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'can get the file format' do
|
70
|
+
# non-existant file okay
|
71
|
+
Rubabel.format_from_ext("silly.sdf.gz").should == :sdf
|
72
|
+
Rubabel.format_from_ext(@gz_file).should == :sdf
|
73
|
+
Rubabel.foreach(@gz_file) do |mol|
|
74
|
+
mol.should be_a(Rubabel::Molecule)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
|
80
|
+
describe 'format from extension' do
|
81
|
+
|
82
|
+
it 'determines format from extension' do
|
83
|
+
Rubabel.format_from_ext( TESTFILES + "/Samples.sdf" ).should == :sdf
|
84
|
+
Rubabel.format_from_ext( TESTFILES + "/Samples.non_existent" ).should be_nil
|
85
|
+
end
|
86
|
+
|
87
|
+
it 'determines format from mime-type' do
|
88
|
+
Rubabel.format_from_mime( "chemical/x-mdl-sdfile" ).should == :sdf
|
89
|
+
Rubabel.format_from_mime( "chemical/wierdness" ).should be_nil
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
|
64
94
|
#describe 'an atom in it' do
|
65
95
|
#end
|
66
96
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'rspec'
|
2
|
+
require 'stringio'
|
2
3
|
|
3
4
|
# Requires supporting files with custom matchers and macros, etc,
|
4
5
|
# in ./support/ and its subdirectories.
|
@@ -10,3 +11,26 @@ RSpec.configure do |config|
|
|
10
11
|
end
|
11
12
|
|
12
13
|
TESTFILES = File.dirname(__FILE__) + "/testfiles"
|
14
|
+
|
15
|
+
module Kernel
|
16
|
+
# from: http://thinkingdigitally.com/archive/capturing-output-from-puts-in-ruby/
|
17
|
+
def capture_stdout
|
18
|
+
out = StringIO.new
|
19
|
+
$stdout = out
|
20
|
+
yield
|
21
|
+
return out.string
|
22
|
+
ensure
|
23
|
+
$stdout = STDOUT
|
24
|
+
end
|
25
|
+
|
26
|
+
def capture_stderr
|
27
|
+
out = StringIO.new
|
28
|
+
$stderr = out
|
29
|
+
yield
|
30
|
+
return out.string
|
31
|
+
ensure
|
32
|
+
$stderr = STDERR
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
end
|