rubabel 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/rubabel/bond.rb CHANGED
@@ -13,6 +13,15 @@ module Rubabel
13
13
  class Bond
14
14
  include Enumerable
15
15
 
16
+ class << self
17
+ def [](atom1, atom2)
18
+ obbond = OpenBabel::OBBond.new
19
+ obbond.set_begin(atom1.ob)
20
+ obbond.set_end(atom2.ob)
21
+ self.new(obbond)
22
+ end
23
+ end
24
+
16
25
  attr_accessor :ob
17
26
 
18
27
  def initialize(obbond)
@@ -34,6 +43,15 @@ module Rubabel
34
43
 
35
44
  alias_method :each, :each_atom
36
45
 
46
+ def bond_order
47
+ @ob.get_bond_order
48
+ end
49
+
50
+ # 1 = single, 2 = double, 5 = aromatic
51
+ def bond_order=(val=1)
52
+ @ob.set_bond_order(val)
53
+ end
54
+
37
55
  # returns an array of Rubabel::Atoms
38
56
  def atoms
39
57
  [@ob.get_begin_atom.upcast, @ob.get_end_atom.upcast]
@@ -2,6 +2,7 @@ require 'openbabel'
2
2
  require 'rubabel'
3
3
  require 'rubabel/atom'
4
4
  require 'rubabel/bond'
5
+ require 'rubabel/molecule/fragmentable'
5
6
 
6
7
  class OpenBabel::OBMol
7
8
  def upcast
@@ -9,25 +10,63 @@ class OpenBabel::OBMol
9
10
  end
10
11
  end
11
12
 
13
+ class OpenBabelUnableToSetupForceFieldError < RuntimeError
14
+ end
15
+
12
16
  module Rubabel
13
- # yet to implement:
17
+ # yet to implement:
14
18
  class Molecule
15
19
  include Enumerable
16
20
 
21
+ DEFAULT_FINGERPRINT = "FP2"
22
+ DEFAULT_OUT_TYPE = :can
23
+ DEFAULT_IN_TYPE = :smi
24
+
17
25
  # the OpenBabel::OBmol object
18
26
  attr_accessor :ob
19
27
 
28
+ # the OpenBabel::OBConversion object
29
+ attr_accessor :obconv
30
+
20
31
  class << self
32
+
33
+ def tanimoto(mol1, mol2, type=DEFAULT_FINGERPRINT)
34
+ OpenBabel::OBFingerprint.tanimoto(mol1.ob_fingerprint(type), mol2.ob_fingerprint(type))
35
+ end
36
+
21
37
  def from_file(file, type=nil)
22
- Rubabel.read_file(file, type)
38
+ Rubabel.molecule_from_file(file, type)
23
39
  end
24
40
 
25
- def from_string(string, type=:smi)
26
- Rubabel.read_string(string, type)
41
+ def from_string(string, type=DEFAULT_IN_TYPE)
42
+ Rubabel.molecule_from_string(string, type)
27
43
  end
28
44
  end
29
45
 
30
- DEFAULT_OUT_TYPE = :can
46
+
47
+ # attributes
48
+ def title() @ob.get_title end
49
+ def title=(val) @ob.set_title(val) end
50
+
51
+ def charge() @ob.get_total_charge end
52
+ def charge=(v) @ob.set_total_charge(v) end
53
+
54
+ def spin() @ob.get_total_spin_multiplicity end
55
+
56
+ def mol_wt() @ob.get_mol_wt end
57
+ alias_method :avg_mass, :mol_wt
58
+
59
+ def exact_mass() @ob.get_exact_mass end
60
+
61
+ # returns the exact_mass corrected for charge gain/loss
62
+ def mass
63
+ @ob.get_exact_mass - (@ob.get_total_charge * Rubabel::MASS_E)
64
+ end
65
+
66
+ # returns a string representation of the molecular formula. Not sensitive
67
+ # to add_h!
68
+ def formula() @ob.get_formula end
69
+
31
70
 
32
71
  def initialize(obmol, obconv=nil)
33
72
  @obconv = obconv
@@ -64,73 +103,78 @@ module Rubabel
64
103
  smarts_indices(smarts_or_string).size > 0
65
104
  end
66
105
 
67
- def charge
68
- @ob.get_total_charge
69
- end
70
-
71
- def charge=(v)
72
- @ob.set_total_charge(v)
73
- end
74
-
75
- def spin
76
- @ob.get_total_spin_multiplicity
77
- end
78
-
79
106
  # returns an array of OpenBabel::OBRing objects.
80
107
  def ob_sssr
81
108
  @ob.get_sssr.to_a
82
109
  end
83
110
 
84
- def exact_mass
85
- @ob.get_exact_mass
86
- end
87
-
88
- def mol_wt
89
- @ob.get_mol_wt
90
- end
91
-
92
- alias_method :avg_mass, :mol_wt
93
-
94
- def exact_mass
95
- @ob.get_exact_mass
96
- end
97
-
98
111
  #def conformers
99
112
  # Currently returns an object of type
100
113
  # SWIG::TYPE_p_std__vectorT_double_p_std__allocatorT_double_p_t_t
101
114
  #vec = @ob.get_conformers
102
115
  #end
116
+
117
+ # are there hydrogens added yet
118
+ def hydrogens_added?
119
+ @ob.has_hydrogens_added
120
+ end
121
+ alias_method :h_added?, :hydrogens_added?
122
+
123
+ # returns self. Corrects for ph if ph is not nil. NOTE: the reversal of
124
+ # arguments from the OpenBabel api.
125
+ def add_h!(ph=nil, polaronly=false)
126
+ if ph.nil?
127
+ @ob.add_hydrogens(polaronly)
128
+ else
129
+ @ob.add_hydrogens(polaronly, true, ph)
130
+ end
131
+ self
132
+ end
103
133
 
104
- def add_h!
105
- @ob.add_hydrogens
134
+ # returns self. If ph is nil, then #neutral! is called
135
+ def correct_for_ph!(ph=7.4)
136
+ ph.nil? ? neutral! : @ob.correct_for_ph(ph)
137
+ self
106
138
  end
107
- #alias_method :add_h!, :add_hydrogens!
108
139
 
109
- # creates a new molecule (currently writes to smiles and uses babel
110
- # commandline to get hydrogens at a given pH; this is because no pH model
111
- # in ruby bindings currently).
112
- def add_h_at_ph(ph=7.4)
113
- # write the file with the molecule
114
- self.write_file("tmp.smi")
115
- system "#{Rubabel::CMD[:babel]} -i smi tmp.smi -p #{ph} -o can tmp.can"
116
- Molecule.from_file("tmp.can")
140
+ # simple method to coerce the molecule into a neutral charge state.
141
+ # It does this by removing any charge from each atom and then removing the
142
+ # hydrogens (which will then can be added back by the user and will be
143
+ # added back with proper valence). If the molecule had hydrogens added it
144
+ # will return the molecule with hydrogens added
145
+ # returns self.
146
+ def neutral!
147
+ had_hydrogens = h_added?
148
+ atoms.each {|atom| atom.charge = 0 if (atom.charge != 0) }
149
+ remove_h!
150
+ add_h! if had_hydrogens
151
+ self
117
152
  end
118
- #alias_method :add_h!, :add_hydrogens!
119
153
 
154
+ # adds hydrogens
155
+ #def add_h_at_ph!(ph=7.4)
156
+ # # creates a new molecule (currently writes to smiles and uses babel
157
+ # # commandline to get hydrogens at a given pH; this is because no pH model
158
+ # # in ruby bindings currently).
159
+ #
160
+ # ## write the file with the molecule
161
+ # #self.write_file("tmp.smi")
162
+ # #system "#{Rubabel::CMD[:babel]} -i smi tmp.smi -p #{ph} -o can tmp.can"
163
+ # #Molecule.from_file("tmp.can")
164
+ # end
165
+ # #alias_method :add_h!, :add_hydrogens!
166
+
167
+ # returns self
120
168
  def remove_h!
121
169
  @ob.delete_hydrogens
170
+ self
122
171
  end
123
172
 
173
+ # calls separate on the OBMol object
124
174
  def separate!
125
175
  @ob.separate
126
176
  end
127
177
 
128
- # returns a string representation of the molecular formula. Not sensitive
129
- # to add_h!
130
- def formula
131
- @ob.get_formula
132
- end
133
-
134
178
  # returns just the smiles string :smi (not the id)
135
179
  def smiles
136
180
  to_s(:smi)
@@ -173,6 +217,14 @@ module Rubabel
173
217
  self
174
218
  end
175
219
 
220
+ # creates a deep copy of the molecule (even the atoms are duplicated)
221
+ def initialize_copy(source)
222
+ super
223
+ @ob = OpenBabel::OBMol.new(source.ob)
224
+ @obconv = OpenBabel::OBConversion.new(source.obconv)
225
+ self
226
+ end
227
+
176
228
  # returns the array of bonds. Consider using #each_bond
177
229
  def bonds
178
230
  each_bond.map.to_a
@@ -198,20 +250,57 @@ module Rubabel
198
250
  #def descs
199
251
  #end
200
252
 
201
- # TODO: implement
202
- #def fingerprint(type='FP2')
203
- #end
204
- #alias_method :calc_fp, :fingerprint
253
+ def tanimoto(other, type=DEFAULT_FINGERPRINT)
254
+ Rubabel::Molecule.tanimoto(self, other, type)
255
+ end
205
256
 
206
- def split(*bonds)
257
+ # returns a std::vector<unsigned int> that can be passed directly into
258
+ # the OBFingerprint.tanimoto method
259
+ def ob_fingerprint(type=DEFAULT_FINGERPRINT)
260
+ fprinter = OpenBabel::OBFingerprint.find_fingerprint(type) || raise(ArgumentError, "fingerprint type not found")
261
+ fp = OpenBabel::VectorUnsignedInt.new
262
+ fprinter.get_fingerprint(@ob, fp) || raise("failed to get fingerprint for #{mol}")
263
+ fp
264
+ end
265
+
266
+ # obj is an atom or bond
267
+ def delete(obj)
268
+ case obj
269
+ when Rubabel::Bond
270
+ delete_bond(obj)
271
+ when Rubabel::Atom
272
+ delete_atom(obj)
273
+ else
274
+ raise(ArgumentError, "don't know how to delete objects of type: #{obj.class}")
275
+ end
276
+ end
277
+
278
+ def delete_bond(bond)
279
+ @ob.delete_bond(bond.ob)
280
+ end
281
+
282
+ def delete_atom(atom)
283
+ @ob.delete_atom(atom.ob)
284
+ end
285
+
286
+ # yields self after deleting the specified bonds. When the block is
287
+ # closed the bonds are restored. Returns whatever is returned from the
288
+ # block.
289
+ def delete_and_restore_bonds(*bonds, &block)
207
290
  bonds.each do |bond|
208
291
  unless @ob.delete_bond(bond.ob, false)
209
292
  raise "#{bond.inspect} not deleted!"
210
293
  end
211
294
  end
212
- frags = @ob.separate.map(&:upcast)
295
+ reply = block.call(self)
213
296
  bonds.each {|bond| @ob.add_bond(bond.ob) }
214
- frags
297
+ reply
298
+ end
299
+
300
+ def split(*bonds)
301
+ delete_and_restore_bonds(*bonds) do |mol|
302
+ mol.ob.separate.map(&:upcast)
303
+ end
215
304
  end
216
305
 
217
306
  alias_method :separate, :split
@@ -229,6 +318,11 @@ module Rubabel
229
318
  end
230
319
  end
231
320
 
321
+ # returns a Rubabel::MoleculeData hash
322
+ def data
323
+ Rubabel::MoleculeData.new(@ob)
324
+ end
325
+
232
326
  # sensitive to add_h!
233
327
  def num_atoms() @ob.num_atoms end
234
328
  def num_bonds() @ob.num_bonds end
@@ -247,15 +341,49 @@ module Rubabel
247
341
  end
248
342
  end
249
343
 
344
+ # adds hydrogens if necessary. Performs only steepest descent
345
+ # optimization (no rotors optimized)
346
+ # returns self
347
+ def local_optimize!(forcefield=DEFAULT_FORCEFIELD, steps=500)
348
+ add_h! unless hydrogens_added?
349
+ if dim == 3
350
+ ff = Rubabel.force_field(forcefield.to_s)
351
+ ff.setup(@ob) || raise(OpenBabelUnableToSetupForceFieldError)
352
+ ff.steepest_descent(steps) # is the default termination count 1.0e-4 (used in obgen?)
353
+ ff.update_coordinates(@ob)
354
+ else
355
+ make_3d!(forcefield, steps)
356
+ end
357
+ self
358
+ end
359
+
360
+ #def global_optimize!(forcefield=DEFAULT_FORCEFIELD, steps=1000)
361
+ # if dim != 3
362
+ # # don't bother optimizing yet (steps=nil)
363
+ # make_3d!(DEFAULT_FORCEFIELD, nil)
364
+ # end
365
+ #end
366
+
367
+ # does a bit of basic local optimization unless steps is set to nil
368
+ # returns self
369
+ def make_3d!(forcefield=DEFAULT_FORCEFIELD, steps=50)
370
+ BUILDER.build(@ob)
371
+ @ob.add_hydrogens(false, true) unless hydrogens_added?
372
+ local_optimize!(forcefield, steps) if steps
373
+ self
374
+ end
375
+ alias_method :make3d!, :make_3d!
376
+
250
377
  def write_string(type=DEFAULT_OUT_TYPE)
251
378
  @obconv ||= OpenBabel::OBConversion.new
252
379
  @obconv.set_out_format(type.to_s)
253
380
  @obconv.write_string(@ob)
254
381
  end
255
382
 
256
- # writes to the file based on the extension
257
- def write_file(filename)
258
- type = Rubabel.filetype(filename)
383
+ # writes to the file based on the extension given. If type is given
384
+ # explicitly, then it is used.
385
+ def write_file(filename, type=nil)
386
+ type ||= Rubabel.filetype(filename)
259
387
  File.write(filename, write_string(type))
260
388
  end
261
389
 
@@ -0,0 +1,209 @@
1
+
2
+ module Rubabel
3
+ class Molecule
4
+ module Fragmentable
5
+ RULES = [:co]
6
+ #ADDUCTS = [:lioh, :nh4cl, :nh4oh]
7
+
8
+ DEFAULT_OPTIONS = {
9
+ rules: RULES,
10
+ #adduct: nil,
11
+ ph: 7.4,
12
+ # return only the set of unique fragments
13
+ uniq: false,
14
+ }
15
+
16
+ # molecules and fragments should all have hydrogens added (add_h!)
17
+ # before calling this method
18
+ #
19
+ # For instance, water loss with double bond formation is not allowable
20
+ # for NCC(O)CC => CCC=C[NH2+], presumably because of the lone pair and
21
+ # double bond resonance.
22
+ #
23
+ def allowable_fragmentation?(frags)
24
+ self.num_atoms == frags.map(&:num_atoms).reduce(:+)
25
+ end
26
+
27
+ # will turn bond into a double bond, yield the changed molecule, then
28
+ # return the bond to the original state when the block is closed
29
+ # returns whatever the block returned
30
+ def feint_double_bond(bond, &block)
31
+ orig = bond.bond_order
32
+ bond.bond_order = 2
33
+ reply = block.call(self)
34
+ bond.bond_order = orig
35
+ reply
36
+ end
37
+
38
+ # to ensure proper fragmentation, will add_h!(ph) first at the given ph
39
+ # an empty array is returned if there are no fragments generated.
40
+ def fragment(opts={})
41
+ opts = DEFAULT_OPTIONS.merge(opts)
42
+
43
+ had_hydrogens = self.h_added?
44
+
45
+ self.correct_for_ph!(opts[:ph])
46
+ self.remove_h!
47
+
48
+ rules = opts[:rules]
49
+ fragments = []
50
+ self.each_match("CO").each do |_atoms|
51
+ (carbon, oxygen) = _atoms
52
+ carbon_nbrs = carbon.atoms.reject {|atom| atom == oxygen }
53
+ c3_nbrs = carbon_nbrs.select {|atm| atm.type == 'C3' }
54
+ c2_nbrs = carbon_nbrs.select {|atm| atm.type == 'C2' }
55
+ num_oxygen_bonds = oxygen.bonds.size
56
+ # pulling this out here causes it to work incorrectly internally
57
+ # (not sure why)
58
+ #co_bond = carbon.get_bond(oxygen)
59
+
60
+ case num_oxygen_bonds
61
+ when 1 # an alcohol
62
+ # water loss
63
+ if (c3_nbrs.size > 0 || c2_nbrs.size > 0) && !carbon.carboxyl_carbon?
64
+ if rules.include?(:h2oloss)
65
+ frag_sets = (c2_nbrs + c3_nbrs).map do |dbl_bondable_atom|
66
+ frags = feint_double_bond(dbl_bondable_atom.get_bond(carbon)) do |_mol|
67
+ # TODO: check accuracy before completely splitting for efficiency
68
+ frags = _mol.split(carbon.get_bond(oxygen))
69
+ frags.map(&:add_h!)
70
+ end
71
+ end
72
+
73
+ self.add_h!
74
+ frag_sets.select! do |_frags|
75
+ self.allowable_fragmentation?(_frags)
76
+ end
77
+ fragments.push *frag_sets
78
+ end
79
+ if rules.include?(:co)
80
+ # alcohol becomes a ketone and one R group is released
81
+ frag_sets = c3_nbrs.map do |neighbor_atom|
82
+ frags = feint_double_bond(carbon.get_bond(oxygen)) do |_mol|
83
+ frags = _mol.split(carbon.get_bond(neighbor_atom))
84
+ frags.map(&:add_h!)
85
+ end
86
+ end
87
+
88
+ self.add_h!
89
+ frag_sets.select! do |_frags|
90
+ self.allowable_fragmentation?(_frags)
91
+ end
92
+ fragments.push *frag_sets
93
+ end
94
+
95
+ if rules.include?(:co) && (num_oxygen_bonds == 2)
96
+ if oxygen
97
+
98
+
99
+ # alcohol becomes a ketone and one R group is released
100
+ frag_sets = c3_nbrs.map do |neighbor_atom|
101
+ frags = feint_double_bond(carbon.get_bond(oxygen)) do |_mol|
102
+ frags = _mol.split(carbon.get_bond(neighbor_atom))
103
+ frags.map(&:add_h!)
104
+ end
105
+ end
106
+
107
+ self.add_h!
108
+ frag_sets.select! do |_frags|
109
+ self.allowable_fragmentation?(_frags)
110
+ end
111
+ fragments.push *frag_sets
112
+ end
113
+
114
+ end
115
+ end
116
+ # oxygen bonded to something else (per-oxide??)
117
+ # also could be ether situation...
118
+ when 2
119
+ raise NotImplementedError
120
+ end
121
+ end
122
+ unless had_hydrogens
123
+ fragments.each {|set| set.each(&:remove_h!) }
124
+ self.remove_h!
125
+ end
126
+ fragments
127
+ end
128
+
129
+ end
130
+ include Fragmentable
131
+ end
132
+ end
133
+
134
+
135
+ # co_bond = carbon.get_bond(oxygen)
136
+ # left_to_c_bond = carbon.get_bond(left)
137
+ # right_to_c_bond = carbon.get_bond(right)
138
+ #
139
+ # co_bond.bond_order = 2
140
+ #
141
+ # [left_to_c_bond, right_to_c_bond].flat_map do |other_to_c_bond|
142
+ # mol.ob.delete_bond(other_to_c_bond.ob, false)
143
+ # pieces = mol.ob.separate.map(&:upcast)
144
+ # mol.ob.add_bond(other_to_c_bond.ob)
145
+ # pieces
146
+ # end
147
+
148
+
149
+ =begin
150
+
151
+ # duplicate the molecule so we can do what we like with it
152
+ mol = self.dup
153
+
154
+ has_hydrogens_added = h_added?
155
+ mol.remove_h! if has_hydrogens_added
156
+
157
+ mol.correct_for_ph!(opts[:ph])
158
+
159
+ rules = opts[:rules]
160
+ fragments = []
161
+ if rules.include?(:co)
162
+ mol.each_match("C(O)").flat_map do |_atoms|
163
+ carbon = _atoms.first
164
+ non_oxygen = carbon.each_bond.reject {|bond| bond.include?(_atoms.last) }
165
+ non_oxygen.each {|bond| p mol.split(bond) }
166
+
167
+ fragments.push *non_oxygen.flat_map {|bond| mol.split(bond) }
168
+ end
169
+ end
170
+ p fragments
171
+ abort 'here'
172
+ fragments.each(&:add_h!) if has_hydrogens_added
173
+ fragments
174
+ end
175
+ end
176
+
177
+ =end
178
+
179
+
180
+ # [[left_to_c_bond, left], [right_to_c_bond, right]].flat_map do |other_to_carbony_c, other|
181
+ # puts "INSIDE!!!"
182
+ # pieces = mol.split(other_to_carbony_c)
183
+ # c_in_pieces = nil
184
+ # oxy_in_pieces = nil
185
+ # other_in_pieces = nil
186
+ # pieces.each do |piece|
187
+ # piece.each_atom do |atom|
188
+ # p piece
189
+ # p [atom.id, other.id]
190
+ # other_in_pieces = atom if atom.id == other.id
191
+ # c_in_pieces = atom if atom.id == carbon.id
192
+ # oxy_in_pieces = atom if atom.id == oxygen.id
193
+ # break if c_in_pieces && oxy_in_pieces
194
+ # end
195
+ # break if c_in_pieces && oxy_in_pieces
196
+ # end
197
+ # oxygen_bond = c_in_pieces.get_bond(oxy_in_pieces)
198
+ # oxygen_bond.bond_order = 2
199
+ #
200
+ # puts "EXAMINE:other"
201
+ # p other_in_pieces.ob
202
+ # p other_in_pieces.mol.csmiles
203
+ # other_mol = other_in_pieces.mol
204
+ # ob_atom = other_mol.ob.new_atom
205
+ # ob_atom.set_atomic_num 1
206
+ # newbond = OpenBabel::OBBond.new
207
+ # ob_atom
208
+ #
209
+