rubabel 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,21 @@ require 'rubabel/molecule'
4
4
  require 'rubabel/atom'
5
5
 
6
6
  describe Rubabel::Atom do
7
+
8
+ it 'can be created given an element symbol' do
9
+ hydrogen = Rubabel::Atom[:h]
10
+ hydrogen.el.should == :h
11
+ hydrogen.id.should == 0
12
+
13
+ carbon = Rubabel::Atom[:c]
14
+ carbon.el.should == :c
15
+ carbon.id.should == 0
16
+
17
+ chlorine = Rubabel::Atom[:cl, 3]
18
+ chlorine.el.should == :cl
19
+ chlorine.id.should == 3
20
+ end
21
+
7
22
  describe 'working with a complex molecule' do
8
23
 
9
24
  before do
@@ -24,6 +39,15 @@ describe Rubabel::Atom do
24
39
  end
25
40
  end
26
41
 
42
+
43
+ it '#mol retrieves the parent molecule' do
44
+ @atom.mol.should == @mol
45
+
46
+ # no parent molecule
47
+ h = Rubabel::Atom[:h]
48
+ h.mol.should be_nil
49
+ end
50
+
27
51
  it 'can get the bonds' do
28
52
  @atom.each_bond do |bond|
29
53
  bond.should be_a(Rubabel::Bond)
@@ -31,6 +55,9 @@ describe Rubabel::Atom do
31
55
  @atom.bonds.size.should == 4
32
56
  end
33
57
 
58
+ it 'can add a bond' do
59
+ end
60
+
34
61
  it 'can get the neighboring atoms' do
35
62
  @atom.id.should == 0
36
63
  @atom.atomic_num.should == 6
@@ -42,6 +69,14 @@ describe Rubabel::Atom do
42
69
  @atom.atoms.size.should == 4
43
70
  end
44
71
 
72
+ it '#get_bond can retrieve a particular bond based on the atom' do
73
+ other = @mol.atoms[3]
74
+ bond = @atom.get_bond(other)
75
+ bond.atoms.map(&:id).should == [0,3]
76
+ other = @mol.atoms[15] # these are not connected
77
+ @atom.get_bond(other).should be_nil
78
+ end
79
+
45
80
  it '#coords gets the coordinates' do
46
81
  @atom.coords
47
82
  end
@@ -0,0 +1,51 @@
1
+ require 'spec_helper'
2
+
3
+ require 'rubabel'
4
+
5
+ describe Rubabel::Molecule::Fragmentable do
6
+ describe 'the :co rule' do
7
+
8
+ describe 'water loss' do
9
+
10
+ it ':h2oloss' do
11
+ mol = Rubabel["NCCC(O)CC"]
12
+ fragments = mol.fragment( rules: [:h2oloss] )
13
+ fragments.flatten(1).map(&:csmiles).sort.should == ["CC=CCC[NH3+]", "CCC=CC[NH3+]", "O", "O"]
14
+ end
15
+
16
+ it ':h2oloss [does not allow bad chemistry]' do
17
+ # lone pair and double bond resonance ?
18
+ mol = Rubabel["NCC(O)CC"]
19
+ fragments = mol.fragment( rules: [:h2oloss] )
20
+ fragments.flatten(1).map(&:csmiles).sort.should == ["CC=CC[NH3+]", "O"]
21
+
22
+ mol = Rubabel["NC(O)CC"]
23
+ fragments = mol.fragment( rules: [:h2oloss] )
24
+ fragments.flatten(1).map(&:csmiles).sort.should == []
25
+ end
26
+ end
27
+
28
+ describe 'backbone cleavage' do
29
+
30
+ it 'cleaves beside alcohols yielding aldehydes' do
31
+ mol = Rubabel["NCCC(O)CC"]
32
+ mol.correct_for_ph!
33
+ total_mass = mol.add_h!.mass
34
+
35
+ pieces = mol.fragment(rules: [:co])
36
+ pieces.size.should == 2
37
+ pieces.map(&:size).should == [2,2]
38
+ pieces.flatten(1).map(&:csmiles).should == ["CC[NH3+]", "CCC=O", "C(C=O)C[NH3+]", "CC"]
39
+ pieces.each do |pair|
40
+ pair.map(&:mass).reduce(:+).should == total_mass
41
+ end
42
+ end
43
+
44
+ xit 'does not cleave esters' do
45
+ mol = Rubabel["NCCC(=O)OC"]
46
+ pieces = mol.fragment( rules: [:co] )
47
+ pieces.should be_empty
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,76 @@
1
+ require 'spec_helper'
2
+
3
+ require 'rubabel/molecule_data'
4
+ require 'rubabel'
5
+
6
+ describe Rubabel::MoleculeData do
7
+
8
+ before(:each) do
9
+ sdf = TESTFILES + "/Samples.sdf"
10
+ @mol = Rubabel::Molecule.from_file( sdf )
11
+ end
12
+
13
+ it 'is initialized with an OpenBabel molecule' do
14
+ md = Rubabel::MoleculeData.new(@mol.ob)
15
+ md.should be_a(Rubabel::MoleculeData)
16
+ end
17
+
18
+ describe 'hash-like behavior' do
19
+ subject { Rubabel::MoleculeData.new(@mol.ob) }
20
+
21
+ it '#each' do
22
+ enum = subject.each
23
+ enum.should be_a(Enumerator)
24
+ pair = enum.next
25
+ pair.should == ["NAME", "2Ferrocene"]
26
+ end
27
+
28
+ it '#to_a' do
29
+ subject.to_a.size.should == 2
30
+ end
31
+
32
+ it '#size & #length' do
33
+ subject.size.should == 2
34
+ subject.length.should == 2
35
+ end
36
+
37
+ it '#[]' do
38
+ subject['NAME'].should == "2Ferrocene"
39
+ end
40
+
41
+ it '#[]=' do
42
+ # modify one:
43
+ subject['NAME'] = 'PEPPER'
44
+ subject.size.should == 2
45
+ # create_a new one:
46
+ subject['jtp_special'] = 'sauce'
47
+ subject.size.should == 3
48
+ string = subject.obmol.upcast.write(:sdf)
49
+ string.should =~ /jtp_special/
50
+ string.should =~ /sauce/
51
+ string.should =~ /PEPPER/
52
+ end
53
+
54
+ it '#key?' do
55
+ subject.key?('NAME').should be_true
56
+ subject.key?('bananas').should be_false
57
+ end
58
+
59
+ it '#keys' do
60
+ subject.keys.should == ["NAME", "OpenBabel Symmetry Classes"]
61
+ end
62
+
63
+ it '#values' do
64
+ subject.values.should == ["2Ferrocene", "8 4 9 4 4 4 4 4 4 4 4 4 5 3 6 2 7 1 5 3 6 2 5 3 7 1 5 3 6 2 6 2"]
65
+ end
66
+
67
+ it '#delete' do
68
+ key = "OpenBabel Symmetry Classes"
69
+ subject.delete(key).should =~ /8 4 9/
70
+ subject.key?(key).should be_false
71
+ subject.size.should == 1
72
+ subject.delete("nonsense").should be_nil
73
+ subject.delete("nonsense") { 'wow' }.should == 'wow'
74
+ end
75
+ end
76
+ end
@@ -3,6 +3,13 @@ require 'spec_helper'
3
3
  require 'rubabel/molecule'
4
4
 
5
5
  describe Rubabel::Molecule do
6
+ describe 'creation' do
7
+ it 'can be made with Rubabel[]' do
8
+ mol = Rubabel["CC(O)O"]
9
+ mol.csmiles.should == "CC(O)O"
10
+ end
11
+ end
12
+
6
13
  before(:each) do
7
14
  @mol = Rubabel::Molecule.from_file( TESTFILES + '/cholesterol.sdf' )
8
15
  end
@@ -18,6 +25,14 @@ describe Rubabel::Molecule do
18
25
  end
19
26
  end
20
27
 
28
+ it '#dup creates an entirely new molecule based on the first' do
29
+ another = @mol.dup
30
+ # this is a deep copy all the way. Even the atoms are duplicated so that
31
+ # they can be modified in one and do not affect the other at all.
32
+ @mol.atoms.first.charge = 1
33
+ @mol.charge.should_not == another.charge
34
+ end
35
+
21
36
  it '#each iterates through each atom in id order' do
22
37
  cnt = 0
23
38
  @mol.each do |atom|
@@ -29,6 +44,14 @@ describe Rubabel::Molecule do
29
44
  @mol.atoms.size.should == 74
30
45
  end
31
46
 
47
+ it '#hydrogens_added?' do
48
+ @mol.hydrogens_added?.should be_false
49
+ @mol.atoms.size.should == 33
50
+ @mol.add_h!
51
+ @mol.atoms.size.should == 74
52
+ @mol.hydrogens_added?.should be_true
53
+ end
54
+
32
55
  it 'calculates #ob_sssr (smallest set of smallest rings)' do
33
56
  ar = @mol.ob_sssr
34
57
  ar.should be_an(Array)
@@ -36,6 +59,106 @@ describe Rubabel::Molecule do
36
59
  ar.first.should be_a(OpenBabel::OBRing)
37
60
  end
38
61
 
62
+ describe 'masses' do
63
+ subject { Rubabel::Molecule.from_string("C(=O)COC(=O)C[NH3+]") }
64
+ it '#mol_wt (or #avg_mass)' do
65
+ subject.mol_wt.should be_within(0.000001).of(118.11121999999999)
66
+ end
67
+
68
+ it '#exact_mass' do
69
+ subject.exact_mass.should be_within(0.00000001).of(118.05041812003999)
70
+ end
71
+
72
+ it '#mass is the exact mass adjusted for electron gain/loss' do
73
+ subject.mass.should be_within(0.00000001).of(118.04986952003999)
74
+ end
75
+ end
76
+
77
+ describe 'getting other descriptors' do
78
+ # can't figure this out yet
79
+ end
80
+
81
+ describe 'pH' do
82
+
83
+ subject { Rubabel::Molecule.from_string("NCC(=O)OCC(=O)O") }
84
+
85
+ it '#correct_for_ph! neutral' do
86
+ subject.correct_for_ph!.to_s.should == '[O-]C(=O)COC(=O)C[NH3+]'
87
+ end
88
+
89
+ it '#correct_for_ph!(1.4) [low]' do
90
+ subject.correct_for_ph!(1.4).to_s.should == 'OC(=O)COC(=O)C[NH3+]'
91
+ end
92
+
93
+ it '#correct_for_ph!(11.0) [high]' do
94
+ subject.correct_for_ph!(11.0).to_s.should == '[O-]C(=O)COC(=O)CN'
95
+ end
96
+
97
+ it '#correct_for_ph!(nil) [gives neutral molecule]' do
98
+ subject.correct_for_ph!.to_s.should == '[O-]C(=O)COC(=O)C[NH3+]'
99
+ subject.correct_for_ph!(nil).to_s.should == "NCC(=O)OCC(=O)O"
100
+ end
101
+
102
+ it '#neutral! [can be set neutral again]' do
103
+ subject.correct_for_ph!
104
+ subject.to_s.should == '[O-]C(=O)COC(=O)C[NH3+]'
105
+ subject.h_added?.should == false
106
+ subject.neutral!.to_s.should == "NCC(=O)OCC(=O)O"
107
+ subject.h_added?.should == false
108
+ end
109
+
110
+ it '#neutral! [preserves hydrogens added state]' do
111
+ subject.correct_for_ph!
112
+ subject.add_h!
113
+ subject.to_s.should == '[O-]C(=O)COC(=O)C[NH3+]'
114
+ subject.h_added?.should == true
115
+ subject.neutral!.to_s.should == "NCC(=O)OCC(=O)O"
116
+ subject.h_added?.should == true
117
+ end
118
+
119
+ it '#add_h!(11.0) [can correct for ph if given a ph]' do
120
+ subject.add_h!(11.0).to_s.should == '[O-]C(=O)COC(=O)CN'
121
+ end
122
+
123
+ end
124
+
125
+ it 'can calculate a molecular fingerprint (for similarity calcs)' do
126
+ # this just returns the std::vector object at the moment
127
+ fp = @mol.ob_fingerprint
128
+ # this is an array of unsigned ints that really need to be coerced into
129
+ # bits for further usefulness.
130
+ fp.to_a.should == [0, 604110848, 16777216, 0, 2147483648, 4210688, 0, 2097152, 16, 16809984, 0, 0, 1, 37756928, 32, 0, 524296, 1028, 8388612, 131072, 1073741824, 512, 1048584, 16384, 1026, 0, 0, 524288, 0, 2048, 16777248, 0]
131
+ lambda { @mol.ob_fingerprint("WACKY") }.should raise_error
132
+ end
133
+
134
+ it 'can calculate the tanimoto similarity' do
135
+ # oo way to call:
136
+ @mol.tanimoto(@mol).should == 1.0
137
+ mol2 = Rubabel::Molecule.from_string("CCC(O)OCCC")
138
+ # class way to call this
139
+ t = Rubabel::Molecule.tanimoto(@mol, mol2)
140
+ # actual: 0.11363636363636363
141
+ t.should be < 0.2
142
+ t.should be > 0.0
143
+ end
144
+
145
+ describe '3D' do
146
+ before(:each) do
147
+ # cholesterol
148
+ @mol = Rubabel::Molecule.from_string("OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O")
149
+ end
150
+
151
+ it 'can be turned into a 3D molecule' do
152
+ # this is only as good as the Builder is good. For instance, it fails
153
+ # to get all the stereo centers of cholesterol (but it does warn on
154
+ # this, although I don't know how to capture the warnings (can't get
155
+ # with stdout or stderr??))
156
+ @mol.ob.has_3d.should be_false
157
+ @mol.make_3d!
158
+ @mol.ob.has_3d.should be_true
159
+ end
160
+ end
161
+
39
162
  describe 'breaking a molecule' do
40
163
  before(:each) do
41
164
  @mol = Rubabel::Molecule.from_string("NC(=O)CO")
@@ -61,13 +184,20 @@ describe Rubabel::Molecule do
61
184
  @mol.each_bond.map.to_a.size.should == 9
62
185
  end
63
186
 
64
- it 'can be split into multiple molecules' do
187
+ it 'can be split into multiple molecules [unaffecting self]' do
188
+ num_bonds_before = @mol.num_bonds
189
+ num_atoms_before = @mol.num_atoms
190
+
65
191
  reply = @mol.split(@mol.bonds.first, @mol.bonds.last)
192
+
66
193
  reply.should be_a(Array)
67
194
  reply.size.should == 3
195
+ @mol.num_bonds.should == num_bonds_before
196
+ @mol.num_atoms.should == num_atoms_before
68
197
  csmiles = reply.map(&:csmiles)
69
198
  csmiles.sort.should == %w(N CC=O O).sort
70
199
  end
200
+
71
201
  end
72
202
 
73
203
  describe 'matching patterns (SMARTS)' do
data/spec/rubabel_spec.rb CHANGED
@@ -48,19 +48,49 @@ describe Rubabel do
48
48
  end
49
49
  end
50
50
 
51
- describe '::read_file and ::read_string' do
51
+ describe '::molecule_from_file and ::molecule_from_string' do
52
52
  before(:each) do
53
53
  @samples = TESTFILES + "/Samples.sdf"
54
54
  end
55
55
 
56
56
  it 'return a single molecule (the first one in the file)' do
57
- mol_f = Rubabel.read_file(@samples)
58
- mol_s = Rubabel.read_string(IO.read(@samples), :sdf)
57
+ mol_f = Rubabel.molecule_from_file(@samples)
58
+ mol_s = Rubabel.molecule_from_string(IO.read(@samples), :sdf)
59
59
  [mol_f, mol_s].each {|mol| mol.should be_a(Rubabel::Molecule) }
60
60
  mol_f.should == mol_s
61
61
  end
62
62
  end
63
63
 
64
+ describe 'can deal with .gz files properly' do
65
+ before(:each) do
66
+ @gz_file = TESTFILES + "/Samples.sdf.gz"
67
+ end
68
+
69
+ it 'can get the file format' do
70
+ # non-existant file okay
71
+ Rubabel.format_from_ext("silly.sdf.gz").should == :sdf
72
+ Rubabel.format_from_ext(@gz_file).should == :sdf
73
+ Rubabel.foreach(@gz_file) do |mol|
74
+ mol.should be_a(Rubabel::Molecule)
75
+ end
76
+ end
77
+
78
+ end
79
+
80
+ describe 'format from extension' do
81
+
82
+ it 'determines format from extension' do
83
+ Rubabel.format_from_ext( TESTFILES + "/Samples.sdf" ).should == :sdf
84
+ Rubabel.format_from_ext( TESTFILES + "/Samples.non_existent" ).should be_nil
85
+ end
86
+
87
+ it 'determines format from mime-type' do
88
+ Rubabel.format_from_mime( "chemical/x-mdl-sdfile" ).should == :sdf
89
+ Rubabel.format_from_mime( "chemical/wierdness" ).should be_nil
90
+ end
91
+
92
+ end
93
+
64
94
  #describe 'an atom in it' do
65
95
  #end
66
96
  end
data/spec/spec_helper.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require 'rspec'
2
+ require 'stringio'
2
3
 
3
4
  # Requires supporting files with custom matchers and macros, etc,
4
5
  # in ./support/ and its subdirectories.
@@ -10,3 +11,26 @@ RSpec.configure do |config|
10
11
  end
11
12
 
12
13
  TESTFILES = File.dirname(__FILE__) + "/testfiles"
14
+
15
+ module Kernel
16
+ # from: http://thinkingdigitally.com/archive/capturing-output-from-puts-in-ruby/
17
+ def capture_stdout
18
+ out = StringIO.new
19
+ $stdout = out
20
+ yield
21
+ return out.string
22
+ ensure
23
+ $stdout = STDOUT
24
+ end
25
+
26
+ def capture_stderr
27
+ out = StringIO.new
28
+ $stderr = out
29
+ yield
30
+ return out.string
31
+ ensure
32
+ $stderr = STDERR
33
+ end
34
+
35
+
36
+ end