rubabel 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -120,7 +120,7 @@ Have some bonds to break?, split makes new molecules split from that bond(s)
120
120
  bonds = mol.matches("CO").map {|c, o| c.get_bond(o) }
121
121
  mol.split(*bonds) # splits between every carbon single bonded to oxygen
122
122
 
123
- ### Add & Delete atoms/bonds
123
+ ### Add, delete, modify atoms/bonds
124
124
 
125
125
  #### Adding
126
126
 
@@ -147,6 +147,16 @@ Have some bonds to break?, split makes new molecules split from that bond(s)
147
147
  bond = mol[0].get_bond(mol[1])
148
148
  mol.delete(bond) # -> #<Mol C.O>
149
149
 
150
+ #### Modifying
151
+
152
+ Can add or subtract from bonds to change bond order:
153
+
154
+ mol = Rubabel["CC"]
155
+ mol[0].get_bond(mol[1]) + 1 # now it is a double bond
156
+ bond = mol[0].bonds.first
157
+ bond - 1
158
+ bond.bond_order # => 1
159
+
150
160
  ## Installing
151
161
 
152
162
  First, many thanks to Andreas Maunz for packaging openbabel as a gem which makes this install quite painless.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.1
1
+ 0.2.2
@@ -4,40 +4,64 @@ require 'trollop'
4
4
  require 'rubabel'
5
5
  require 'rubabel/molecule/fragmentable'
6
6
 
7
+ default_ph = 2.5
8
+
9
+ Fragment = Struct.new(:frag, :id, :title, :mz, :mass, :charge, :smiles, :pairing)
7
10
 
8
11
  parser = Trollop::Parser.new do
9
12
  banner "usage: #{File.basename($0)} [OPTIONS|RULES] <SMARTS> ..."
10
13
  text "\noptions:"
11
- opt :ph, "the pH to use (experimental option)", :default => Rubabel::Molecule::Fragmentable::DEFAULT_OPTIONS[:ph]
14
+ opt :ph, "the pH to use (experimental option)", :default => default_ph
15
+ opt :images, "print out svg images of fragments"
16
+ opt :format, "format of the molecules", :default => 'smiles'
12
17
  #opt :uniq, "no repeated fragments", :default => false
13
18
  text "\nrules:"
14
19
  Rubabel::Molecule::Fragmentable::RULES.each do |rule|
15
20
  opt rule, rule.to_s.gsub("_",' ')
16
21
  end
17
22
  text "\nexample:"
18
- text "fragmenter.rb -aecsoxn 'CCC(=O)OCCC' 'CCC(=O)OCCC(=O)O'"
23
+ text "fragmenter.rb -xeh 'CCC(=O)OCCC' 'CCC(=O)OCCC(=O)O'"
19
24
  end
20
25
 
21
- rules = parser.parse(ARGV)
22
- options = {rules: []}
23
- options[:ph] = rules.delete(:ph)
24
- options[:uniq] = rules.delete(:uniq)
25
- rules.each do |k,v|
26
- options[:rules] << k if v && k.to_s !~ /_given/
27
- end
26
+ options = parser.parse(ARGV)
27
+ opts = {rules: []}
28
+ opts[:uniq] = options.delete(:uniq)
29
+ ph = options.delete(:ph)
30
+ opts[:rules] = Rubabel::Molecule::Fragmentable::RULES.map do |rule|
31
+ rule if options["#{rule}_given".to_sym]
32
+ end.compact
28
33
 
29
34
  if ARGV.size == 0
30
35
  parser.educate && exit
31
36
  end
32
37
 
33
- ARGV.each do |mol|
34
- mol = Rubabel[mol]
38
+ ARGV.each do |smiles|
39
+ mol = Rubabel[smiles, options[:format].to_sym]
35
40
  puts "\nmolecule: #{mol.csmiles}"
36
- fragment_sets = mol.fragment(options)
37
- fragment_sets.each do |frag_set|
38
- puts ""
39
- frag_set.each do |frag|
40
- puts "#{frag.mass.round(5)} #{frag.csmiles}"
41
+ mol.correct_for_ph!(ph)
42
+ puts "at ph #{ph}: #{mol.csmiles}"
43
+ fragment_sets = mol.fragment(opts)
44
+ puts %w(mz mass charge title smiles pairing).join("\t")
45
+ frags = []
46
+ fragment_sets.each_with_index do |frag_set,i|
47
+ frag_set.each_with_index do |frag,j|
48
+ unless frag.charge == 0
49
+ mz = (frag.mass / frag.charge).round(5)
50
+ end
51
+
52
+ frag.title = "#{i}-#{j}pair_" + (mz ? "#{mz}_mz" : "#{frag.mass.round(3)}_Mass")
53
+ frag_obj = Fragment.new(frag, frag.title, frag.title, mz, frag.exact_mass, frag.charge, frag.csmiles, i)
54
+ frags << frag_obj
41
55
  end
42
56
  end
57
+ frags = frags.sort_by {|frag| [-frag.charge, frag.mz] }
58
+ if options[:images]
59
+ frags.each do |frag|
60
+ fn = "#{frag.title}.png"
61
+ frag.frag.write(fn)
62
+ end
63
+ end
64
+ frags.each do |frag|
65
+ puts [:mz, :mass, :charge, :title, :smiles, :pairing].map {|cat| frag.send(cat) }.join("\t")
66
+ end
43
67
  end
@@ -1,3 +1,5 @@
1
+ #encoding: utf-8
2
+
1
3
  require 'matrix'
2
4
  require 'andand'
3
5
 
@@ -184,7 +186,7 @@ module Rubabel
184
186
  end
185
187
  @ob.set_spin_multiplicity(new_spin)
186
188
  atoms.each do |atom|
187
- if atom.atomic_num == 1
189
+ if atom.hydrogen?
188
190
  self.mol.delete_atom(atom)
189
191
  break
190
192
  end
@@ -246,6 +248,10 @@ module Rubabel
246
248
  @ob.get_spin_multiplicity
247
249
  end
248
250
 
251
+ def spin=(val)
252
+ @ob.set_spin_multiplicity(val)
253
+ end
254
+
249
255
  def type
250
256
  @ob.get_type
251
257
  end
@@ -292,6 +298,13 @@ module Rubabel
292
298
  def hbond_donor?() @ob.is_hbond_donor end
293
299
  def hbond_donor_h?() @ob.is_hbond_donor_h end
294
300
 
301
+ # the total number of hydrogens bonded to the atom (implicit + explicit)
302
+ def hydrogen_count
303
+ @ob.implicit_hydrogen_count + @ob.explicit_hydrogen_count
304
+ end
305
+
306
+ alias_method :num_h, :hydrogen_count
307
+
295
308
  def double_bond?
296
309
  each_bond.any? {|bond| bond.bond_order == 2 }
297
310
  end
@@ -1,3 +1,5 @@
1
+ #encoding: utf-8
2
+
1
3
  require 'rubabel/atom'
2
4
 
3
5
  class OpenBabel::OBBond
@@ -88,7 +90,29 @@ module Rubabel
88
90
  end
89
91
 
90
92
  def inspect
91
- "[#{atoms.map(&:inspect).join('-')}]"
93
+ bond_symbol = case bond_order
94
+ when 2 then '='
95
+ when 3 then '≡'
96
+ else
97
+ '-'
98
+ end
99
+ "#{atoms.map(&:inspect).join(bond_symbol)}"
100
+ end
101
+
102
+ # returns self
103
+ def +(val)
104
+ # do we need to check the bounds here?
105
+ newval = @ob.get_bond_order + val
106
+ @ob.set_bond_order(newval)
107
+ self
108
+ end
109
+
110
+ # won't decrease below zero. returns self
111
+ def -(val)
112
+ newval = @ob.get_bond_order - val
113
+ newval = 0 if newval < 0
114
+ @ob.set_bond_order(newval)
115
+ self
92
116
  end
93
117
 
94
118
  end
@@ -503,7 +503,13 @@ module Rubabel
503
503
  end
504
504
 
505
505
  # sensitive to add_h!
506
- def num_atoms() @ob.num_atoms end
506
+ def num_atoms(count_implied_hydrogens=false)
507
+ if !count_implied_hydrogens
508
+ @ob.num_atoms
509
+ else
510
+ @ob.num_atoms + reduce(0) {|cnt, atom| cnt + atom.ob.implicit_hydrogen_count }
511
+ end
512
+ end
507
513
  def num_bonds() @ob.num_bonds end
508
514
  def num_hvy_atoms() @ob.num_hvy_atoms end
509
515
  def num_residues() @ob.num_residues end
@@ -6,21 +6,12 @@ module Rubabel
6
6
  class Molecule
7
7
  module Fragmentable
8
8
 
9
- #:sp3c_oxygen_asymmetric_far_sp3, :sp3c_nitrogen_asymmetric_far_sp3,
10
- #RULES = Set[ :alcohol_to_aldehyde, :peroxy_to_carboxy, :co2_loss,
11
- # :sp3c_oxygen_double_bond_far_side_sp3, :sp3c_oxygen_double_bond_far_side_sp2, :sp3c_oxygen_double_bond_water_loss, :sp3c_nitrogen_double_bond,
12
- #]
13
- #ADDUCTS = [:lioh, :nh4cl, :nh4oh]
14
- #CO_RULES = Set[:alcohol_to_aldehyde, :peroxy_to_carboxy, :co2_loss,
15
- # :sp3c_oxygen_double_bond_water_loss, :sp3c_oxygen_double_bond_far_side_sp2, :sp3c_oxygen_double_bond_far_side_sp3, :sp3c_oxygen_asymmetric_far_sp3
16
- #]
17
-
18
- RULES = Set[:cad_o, :cad_oo, :oxed_ether]
9
+ #RULES = Set[:cod, :codoo, :oxe, :oxepd, :oxh]
10
+ RULES = Set[:cod, :codoo, :oxe, :oxepd, :oxh, :oxhpd]
19
11
 
20
12
  DEFAULT_OPTIONS = {
21
13
  rules: RULES,
22
- #adduct: nil,
23
- #ph: 7.4,
14
+ errors: :remove,
24
15
  # return only the set of unique fragments
25
16
  uniq: false,
26
17
  }
@@ -31,117 +22,8 @@ module Rubabel
31
22
  # For instance, water loss with double bond formation is not allowable
32
23
  # for NCC(O)CC => CCC=C[NH2+], presumably because of the lone pair and
33
24
  # double bond resonance.
34
- #
35
25
  def allowable_fragmentation?(frags)
36
- self.num_atoms == frags.map(&:num_atoms).reduce(:+)
37
- end
38
-
39
- # add_h! to self, then selects allowable fragments
40
- def allowable_fragment_sets!(fragment_sets)
41
- self.add_h!
42
- fragment_sets.select do |_frags|
43
- putsv "ExMAIN:"
44
- putsv _frags.inspect
45
- putsv self.allowable_fragmentation?(_frags)
46
- self.allowable_fragmentation?(_frags)
47
- end
48
- end
49
-
50
- # will turn bond into a double bond, yield the changed molecule, then
51
- # return the bond to the original state when the block is closed
52
- # returns whatever the block returned
53
- def feint_double_bond(bond, give_e_pair=nil, get_e_pair=nil, &block)
54
- orig = bond.bond_order
55
- bond.bond_order = 2
56
- reply =
57
- if give_e_pair || get_e_pair
58
- feint_e_transfer(give_e_pair, get_e_pair, &block)
59
- else
60
- block.call(self)
61
- end
62
- bond.bond_order = orig
63
- reply
64
- end
65
-
66
- # warning, this method adds_h! to the calling molecule
67
- def electrophile_snatches_electrons(carbon, electrophile)
68
- self.add_h!
69
- frags = self.split(carbon.get_bond(electrophile))
70
- raise NotImplementedError
71
- # don't check for allowable fragments because it
72
- #allowable_fragment_sets!([frag_set])
73
- end
74
-
75
- def feint_e_transfer(give_e_pair=nil, get_e_pair=nil, &block)
76
- if give_e_pair
77
- gc_orig = give_e_pair.charge
78
- give_e_pair.charge = gc_orig + 1
79
- end
80
- if get_e_pair
81
- rc_orig = get_e_pair.charge
82
- get_e_pair.charge = rc_orig - 1
83
- end
84
-
85
- reply = block.call(self)
86
-
87
- give_e_pair.charge = gc_orig if give_e_pair
88
- get_e_pair.charge = rc_orig if get_e_pair
89
-
90
- reply
91
- end
92
-
93
- def near_side_double_bond_break(carbon, electrophile)
94
- frag_sets = carbon.atoms.select {|atom| atom.type == "C3" }.map do |near_c3|
95
- frags = feint_double_bond(carbon.get_bond(near_c3)) do |_mol|
96
- frags = _mol.split(electrophile.get_bond(carbon))
97
- frags.map(&:add_h!)
98
- end
99
- end
100
- allowable_fragment_sets!(frag_sets)
101
- end
102
-
103
- def alcohol_to_aldehyde(carbon, oxygen, carbon_nbrs)
104
- # alcohol becomes a ketone and one R group is released
105
- frag_sets = carbon_nbrs.select {|atom| atom.type == 'C3' }.map do |_atom|
106
- frags = feint_double_bond(carbon.get_bond(oxygen)) do |_mol|
107
- frags = _mol.split(carbon.get_bond(_atom))
108
- frags.map(&:add_h!)
109
- end
110
- end
111
- allowable_fragment_sets!(frag_sets)
112
- end
113
-
114
- def co2_loss(carbon, oxygen, c3_nbr)
115
- # carboxyl rules ...
116
- # neutral carbon dioxide loss with anion gain on attaching group
117
- # (if carbon)
118
- frags = feint_double_bond(carbon.get_bond(oxygen), oxygen, c3_nbr) do |_mol|
119
- frags = _mol.split(c3_nbr.get_bond(carbon))
120
- frags.map(&:add_h!)
121
- end
122
- allowable_fragment_sets!([frags])
123
- end
124
-
125
- def peroxy_to_carboxy(carbon, oxygen, carbon_nbrs, oxygen_nbr)
126
- if oxygen_nbr.el == :o # has a neighbor oxygen
127
- distal_o = oxygen_nbr
128
- if distal_o.bonds.size == 1 # this is a peroxy
129
- frag_sets = carbon_nbrs.select {|atom| atom.type == 'C3' }.map do |_atom|
130
- self.swap!(carbon, _atom, oxygen, distal_o)
131
- frags = feint_double_bond(carbon.get_bond(oxygen)) do |_mol|
132
-
133
- # we swapped the atoms so the bond to split off is now
134
- # attached to the oxygen
135
- frags = _mol.split(oxygen.get_bond(_atom))
136
- frags.map(&:add_h!)
137
- end
138
- self.swap!(carbon, distal_o, oxygen, _atom)
139
- frags
140
- end
141
- allowable_fragment_sets!(frag_sets)
142
- end
143
- end
144
-
26
+ self.num_atoms(true) == frags.reduce(0) {|cnt,fr| cnt + fr.num_atoms(true) }
145
27
  end
146
28
 
147
29
  # splits the molecule between the carbon and carbon_nbr, adds a double
@@ -174,29 +56,30 @@ module Rubabel
174
56
  # breaks the bond and gives the electrons to the oxygen
175
57
  def carbon_oxygen_esteal(carbon, oxygen)
176
58
  nmol = self.dup
177
- nmol.ob.add_hydrogens
178
59
  ncarbon = nmol.atom(carbon.id)
179
60
  noxygen = nmol.atom(oxygen.id)
180
61
  nmol.delete_bond(ncarbon, noxygen)
181
- ncarbon.charge += 1
182
- noxygen.charge -= 1
183
62
  ncarbon.remove_an_h!
184
- #p ncarbon.ob.implicit_hydrogen_count
185
- #p ncarbon
186
- #ncarbon.ob.decrement_implicit_valence
187
- #p ncarbon.ob.implicit_hydrogen_count
188
- #p ncarbon
189
- #ncarbon.ob.increment_implicit_valence
63
+ #noxygen.ob.set_spin_multiplicity 1
64
+ noxygen.spin = 1
65
+ noxygen.charge = -1
66
+ nmol.split
67
+ end
68
+
69
+ # returns the duplicated molecule and the equivalent atoms
70
+ def dup_molecule(atoms=[])
71
+ nmol = self.dup
72
+ [nmol, atoms.map {|old_atom| nmol.atom(old_atom.id) }]
73
+ end
190
74
 
191
- nmol.title = nmol.to_s
192
- p nmol.write("tmp.svg")
193
- parts = nmol.split
194
- p z=parts.first
195
- p z.formula
196
- p z.mass
197
- p z.exact_mass
198
-
199
- puts "HIAY"
75
+ # returns molecules created from splitting between the electrophile and
76
+ # the center and where the bond order is increased between the center
77
+ # and center_nbr
78
+ def break_with_double_bond(electrophile, center, center_nbr)
79
+ (nmol, (nele, ncarb, ncarb_nbr)) = self.dup_molecule([electrophile, center, center_nbr])
80
+ nmol.delete_bond(nele, ncarb)
81
+ ncarb_nbr.get_bond(ncarb) + 1
82
+ nmol.split
200
83
  end
201
84
 
202
85
  # an empty array is returned if there are no fragments generated.
@@ -205,6 +88,7 @@ module Rubabel
205
88
  #
206
89
  # :rules => queryable by :include? set of rules
207
90
  # :uniq => false
91
+ # :errors => :remove | :fix | :ignore (default is :remove)
208
92
  def fragment(opts={})
209
93
  only_uniqs = true
210
94
  opts = DEFAULT_OPTIONS.merge(opts)
@@ -218,131 +102,60 @@ module Rubabel
218
102
 
219
103
  fragment_sets = []
220
104
 
221
- if opts[:rules].any? {|r| [:cad_o, :cad_oo].include?(r) }
105
+ if opts[:rules].any? {|r| [:cod, :codoo].include?(r) }
222
106
  self.each_match("C[O;h1,O]", only_uniqs) do |carbon, oxygen|
223
107
  carbon.atoms.select {|a| a.el == :c }.each do |carbon_nbr|
224
108
  fragment_sets << carbonyl_oxygen_dump(carbon, oxygen, carbon_nbr)
225
109
  end
226
110
  end
227
111
  end
228
- if opts[:rules].any? {|r| [:oxed_ether].include?(r) }
229
- self.each_match("C[O&X2]", only_uniqs) do |carbon, oxygen|
112
+ if opts[:rules].any? {|r| [:oxe].include?(r) }
113
+ self.each_match("C-O", only_uniqs) do |carbon, oxygen|
230
114
  fragment_sets << carbon_oxygen_esteal(carbon, oxygen)
231
115
  end
232
116
  end
117
+ # right now implemented so that a beta hydrogen has to be availabe for
118
+ # extraction
119
+ if opts[:rules].any? {|r| [:oxh].include?(r) }
120
+ self.each_match("C[C,O]-O", only_uniqs) do |beta_c, center, oxygen|
121
+ next unless beta_c.hydrogen_count > 0
122
+ fragment_sets << break_with_double_bond(oxygen, center, beta_c)
123
+ end
124
+ end
125
+ if opts[:rules].any? {|r| [:oxhpd].include?(r) }
126
+ self.each_match("C-O-P-O", only_uniqs) do |carbon, alc_oxy, phosphate, beta_carb_oxy|
127
+ next unless beta_carb_oxy.hydrogen_count > 0
128
+ frag_set = break_with_double_bond(alc_oxy, phosphate, beta_carb_oxy)
129
+ frag_set.map! &:convert_dative_bonds!
130
+ fragment_sets << frag_set
131
+ end
132
+ end
133
+ if opts[:rules].any? {|r| [:oxepd].include?(r) }
134
+ self.each_match("P-O-C", only_uniqs) do |phosphate, oxygen, carbon|
135
+ frag_set = carbon_oxygen_esteal(phosphate, oxygen)
136
+ frag_set.map! &:convert_dative_bonds!
137
+ fragment_sets << frag_set
138
+ end
139
+ end
233
140
 
234
- unless had_hydrogens
235
- fragment_sets.each {|set| set.each(&:remove_h!) }
236
- self.remove_h!
141
+ case opts[:errors]
142
+ when :remove
143
+ fragment_sets.select! {|set| allowable_fragmentation?(set) }
144
+ when :fix
145
+ raise NotImplementedError
146
+ when :ignore # do nothing
237
147
  end
148
+
149
+ self.remove_h!
238
150
  if opts[:uniq]
239
151
  # TODO: impelent properly
240
152
  raise NotImplementedError
241
- #fragment_sets = fragment_sets.uniq_by(&:csmiles)
153
+ #fragment_sets = fragment_sets.uniq_by(&:csmiles)
242
154
  end
243
155
 
244
156
  fragment_sets
245
157
  end
246
-
247
-
248
- # had_hydrogens = self.h_added?
249
-
250
- #self.correct_for_ph!(opts[:ph])
251
- #self.remove_h!
252
-
253
- #rules = opts[:rules]
254
- #fragment_sets = []
255
- #if rules.any? {|rule| CO_RULES.include?(rule) }
256
- #putsv "matching C-O"
257
- #self.each_match("CO").each do |_atoms|
258
- ## note: this will *not* match C=O
259
- #(carbon, oxygen) = _atoms
260
- #carbon_nbrs = carbon.atoms.reject {|atom| atom == oxygen }
261
- #c3_nbrs = carbon_nbrs.select {|atm| atm.type == 'C3' }
262
- ## pulling this out here causes it to work incorrectly internally
263
- ## (not sure why)
264
- ##co_bond = carbon.get_bond(oxygen)
265
-
266
- #case oxygen.bonds.size # non-hydrogen bonds
267
- #when 1 # *must* be an alcohol or a carboxylic acid
268
- #putsv "#{csmiles} oxygen has no other bonds besides C-O (alcohol or carboxylic acid)"
269
- #if carbon.type == 'C3'
270
- #if rules.include?(:sp3c_oxygen_double_bond_water_loss)
271
- #putsv "rule :sp3c_oxygen_double_bond_water_loss"
272
- #fragment_sets.push *near_side_double_bond_break(carbon, oxygen)
273
- #end
274
- #if rules.include?(:alcohol_to_aldehyde)
275
- #putsv "rule :alcohol_to_aldehyde"
276
- #fragment_sets.push *alcohol_to_aldehyde(carbon, oxygen, carbon_nbrs)
277
- #end
278
- #elsif carbon.carboxyl_carbon?
279
- #if rules.include?(:co2_loss)
280
- #putsv "rule :co2_loss"
281
- #if c3_nbr = c3_nbrs.first
282
- #fragment_sets.push *co2_loss(carbon, oxygen, c3_nbr)
283
- #end
284
- #end
285
- #end
286
- #when 2
287
- #putsv "#{csmiles} c-o & oxygen has 2 non-hydrogen bonds"
288
- #oxygen_nbr = oxygen.atoms.reject {|atom| atom.idx == carbon.idx }.first
289
- #if carbon.type == 'C3'
290
- #if rules.include?(:peroxy_to_carboxy)
291
- #fragment_sets.push *peroxy_to_carboxy(carbon, oxygen, carbon_nbrs, oxygen_nbr)
292
- #end
293
- ## ester and ethers (look *only* on close side for places to make
294
- ## double bond)
295
-
296
- #if oxygen_nbr.type == 'C3'
297
- #putsv "oxygen nbr is C3"
298
- #if rules.include?(:sp3c_oxygen_double_bond_far_side_sp3)
299
- #putsv "rule :sp3c_oxygen_double_bond_far_side_sp3"
300
- #fragment_sets.push *near_side_double_bond_break(carbon, oxygen)
301
- #end
302
- #if rules.include?(:sp3c_oxygen_asymmetric_far_sp3)
303
- #putsv "rule :sp3c_oxygen_asymmetric_far_sp3"
304
- ## only returns a single frag set
305
- #fragment_sets.push electrophile_snatches_electrons(carbon, oxygen)
306
- #end
307
- #end
308
- #if oxygen_nbr.type == 'C2'
309
- #if rules.include?(:sp3c_oxygen_double_bond_far_side_sp2)
310
- #putsv "rule :sp3c_oxygen_double_bond_far_side_sp2"
311
- #fragment_sets.push *near_side_double_bond_break(carbon, oxygen)
312
- #end
313
- #end
314
- ## note: the case of a carboxy is found with CO search
315
- #end
316
- #end
317
- #end
318
- #end
319
- #if rules.include?(:sp3c_nitrogen_double_bond)
320
- #self.each_match("CN") do |_atoms|
321
- #(carbon, nitrogen) = _atoms
322
- #num_nitrogen_bonds = nitrogen.bonds.size
323
- #case num_nitrogen_bonds
324
- #when 2
325
- #if carbon.type == 'C3'
326
- #fragment_sets.push *near_side_double_bond_break(carbon, nitrogen)
327
- #end
328
- #end
329
- #end
330
- #end
331
-
332
- #unless had_hydrogens
333
- #fragment_sets.each {|set| set.each(&:remove_h!) }
334
- #self.remove_h!
335
- #end
336
- #if opts[:uniq]
337
- ## TODO: impelent properly
338
- ##fragment_sets = fragment_sets.uniq_by(&:csmiles)
339
- #raise NotImplementedError
340
- #end
341
- #fragment_sets
342
- #end
343
-
344
158
  end
345
159
  include Fragmentable
346
160
  end
347
- end
348
-
161
+ end # Rubabel
@@ -4,226 +4,184 @@ require 'rubabel'
4
4
 
5
5
  $VERBOSE = nil
6
6
 
7
- #describe Rubabel::Molecule::Fragmentable do
7
+ describe Rubabel::Molecule::Fragmentable do
8
8
 
9
- ## :peroxy_to_carboxy
10
- ## :oxygen_asymmetric_sp3, :nitrogen_asymmetric_sp3,
11
- ## :internal_phosphoester
9
+ # :peroxy_to_carboxy
10
+ # :oxygen_asymmetric_sp3, :nitrogen_asymmetric_sp3,
11
+ # :internal_phosphoester
12
12
 
13
- #describe 'fragmentation rules' do
14
- ## coenzyme: CC1=CC(=O)C=CC1=O
15
- ## 2-methylcyclohexa-2,5-diene-1,4-dione
13
+ describe 'fragmentation rules' do
14
+ # coenzyme: CC1=CC(=O)C=CC1=O
15
+ # 2-methylcyclohexa-2,5-diene-1,4-dione
16
16
 
17
17
  #let(:test_mol) { "COP(=O)(O)OCNCOCC(OO)C(=O)O" }
18
18
 
19
- #it 'raises an error for a bad rule' do
20
- #mol = Rubabel["CCNC"]
21
- #expect { mol.fragment(rules: [:wackiness]) }.to raise_error
22
- #end
19
+ it 'raises an error for a bad rule' do
20
+ mol = Rubabel["CCNC"]
21
+ expect { mol.fragment(rules: [:wackiness]) }.to raise_error
22
+ end
23
23
 
24
- #describe 'cad_o: carbonyl appendage dump ' do
25
- ## a primary oxygen or peroxide => C=O appendage dump
24
+ describe 'cod: carbonyl appendage dump' do
25
+ # a primary oxygen or peroxide => C=O appendage dump
26
26
 
27
- #describe 'cad_o: primary alcohol' do
28
- #mol = Rubabel["NCC(O)CC"]
29
- #frags = mol.fragment(rules: [:cad_o])
30
- #frags.flatten(1).map(&:csmiles).should == ["C[NH3+]", "CCC=O", "C([NH3+])C=O", "CC"]
31
- #end
32
-
33
- #describe 'peroxide' do
34
- #mol = Rubabel["NCC(OO)CC"]
35
- #frags = mol.fragment(rules: [:cad_oo])
36
- #frags.flatten(1).each_with_index do |f,i|
37
- #f.write("mol#{i}.svg")
38
- #end
39
- #frags.flatten(1).map(&:csmiles).should == ["OC[NH3+]", "CCC=O", "C([NH3+])C=O", "CCO"]
40
- #end
41
-
42
- #describe 'cad_o: carboxylate' do
43
- #mol = Rubabel["CCC(=O)O"]
44
- #pieces = mol.fragment(rules: [:cad_o])
45
- #pieces.flatten(1).map(&:csmiles).should == ["[CH2-]C", "O=C=O"]
46
- #end
47
-
48
- #describe 'cad_o: carboxylic acid' do
49
- #mol = Rubabel["CCC(=O)O"]
50
- #mol.add_h!(1.5)
51
- #pieces = mol.fragment(rules: [:cad_o])
52
- #pieces.flatten(1).map(&:csmiles).should == ["CC", "O=C=O"]
53
- #end
54
- #end
55
-
56
- #describe 'oxe: oxygen electron stealing' do
57
- ## oxygen just steals the electron pair it is attached to. This
58
- ## typically results in a negatively charged oxygen and a positively
59
- ## charged carbo-cation.
60
- #describe 'ether to ions' do
61
- #mol = Rubabel["NCCOCC"]
62
- ##mol.add_h!
63
- #pieces = mol.fragment(rules: [:oxed_ether])
64
- #m = pieces.first.first
65
- ##[CH2+]CH2NH3+
66
- ## C2H7N
67
-
68
- #p m
69
- #m.add_h!
70
- #h = m.atoms[4].atoms.find {|a| a.el == :h }
71
- #m.delete_atom(h)
72
- #p m
73
- #p m.atoms
74
- ##mol = Rubabel["NCO"]
75
- ##pieces = mol.fragment(rules: [:oxed_ether])
76
- ##pieces.size.should == 0
77
- #end
78
-
79
- #describe 'ester to ions' do
80
- #end
81
-
82
- #describe 'carboxyl group' do
83
- #end
84
-
85
- #describe 'phosphodiester' do
86
- #end
87
- #end
88
-
89
- ## this is really a subset of oxygen bond stealing: if the negatively
90
- ## charged oxygen can rip off a nearby proton, it will.
91
- #describe 'oxygen alpha/beta/gamma hydrogen stealing' do
92
- #describe 'primary alcohol giving water loss' do
93
- #end
94
-
95
- #describe 'peroxide carbonyl formation' do
96
- #end
97
-
98
- #describe 'ether to alcohol' do
99
- #end
100
-
101
- #describe 'ester to alcohol' do
102
- #end
103
-
104
- #describe 'phosphodiester' do
105
- #end
106
- #end
107
-
108
- #end
109
- #end
110
-
111
-
112
-
113
-
114
- ##describe ':sp3c_nitrogen_double_bond' do
115
-
116
- ##it 'cleaves like an ether a secondary NH group if possible' do
117
- ##mol = Rubabel["CCNC"]
118
- ##frag_sets = mol.fragment(rules: [:sp3c_nitrogen_double_bond])
119
- ##frag_sets.size.should == 1
120
- ##csmiles = frag_sets.first.map(&:csmiles)
121
- ##csmiles.should include("C=C")
122
- ##csmiles.should include("C[NH3+]")
123
- ##end
124
-
125
- ##it 'will not cleave if not possible' do
126
- ##mol = Rubabel["CNC"]
127
- ##frag_sets = mol.fragment(rules: [:sp3c_nitrogen_double_bond])
128
- ##frag_sets.should be_empty
129
- ##end
130
-
131
- ##end
132
-
133
- ##describe ':co2_loss' do
134
- ##it 'loss of CO2 from carboxy group with charge transfer' do
135
- ##mol = Rubabel["NCC(=O)O"]
136
- ##frag_sets = mol.fragment( rules: [:co2_loss] )
137
- ##frag_sets.size.should == 1
138
- ##csmiles = frag_sets.first.map(&:csmiles)
139
-
140
- ##csmiles.should include("[CH2-][NH3+]")
141
- ##csmiles.should include("O=C=O")
142
- ##end
143
-
144
- ##it "doesn't remove CO2 if adjacent is not c3" do
145
- ##mol = Rubabel["C=CC(=O)O"]
146
- ##fragments = mol.fragment( rules: [:co2_loss] )
147
- ##fragments.should be_empty
148
- ##end
149
-
150
- ##end
151
-
152
- ##describe ':peroxy_to_carboxy' do
153
- ##it 'works' do
154
- ##mol = Rubabel["NCCC(OO)CC"]
155
- ##frag_sets = mol.fragment( rules: [:peroxy_to_carboxy] )
156
- ##frag_sets.size.should == 2
157
- ##frag_sets.flatten(1).map(&:csmiles).sort.should == ["CC", "CCC(=O)O", "CC[NH3+]", "OC(=O)CC[NH3+]"]
158
- ##end
159
- ##end
160
-
161
- ##describe ':sp3c_oxygen_asymmetric_far_sp3', :pending do
162
- ##it 'splits like sp3c_oxygen_double_bond except oxygen takes the electrons' do
163
- ##$VERBOSE = 3
164
- ##mol = Rubabel["NCCCOCC"]
165
- ##frag_sets = mol.fragment( rules: [:sp3c_oxygen_asymmetric_far_sp3] )
166
- ##$VERBOSE = nil
167
- ##frag_sets.size.should == 2
168
- ###mol = Rubabel["NCCOCC"]
169
- ###p mol.fragment( rules: [:sp3c_oxygen_asymmetric_far_sp3] )
170
- ###mol = Rubabel["NCOC"]
171
- ###p mol.fragment( rules: [:sp3c_oxygen_asymmetric_far_sp3] )
172
- ##end
173
- ##end
174
-
175
- ##describe ':sp3c_oxygen_double_bond_water_loss' do
176
-
177
- ##it 'does h2o loss of alcohol' do
178
- ##mol = Rubabel["NCCC(O)CC"]
179
- ##fragments = mol.fragment( rules: [:sp3c_oxygen_double_bond_water_loss] )
180
- ##fragments.flatten(1).map(&:csmiles).sort.should == ["CC=CCC[NH3+]", "CCC=CC[NH3+]", "O", "O"]
181
- ##end
182
-
183
- ##it 'h2o loss does not allow bad chemistry' do
184
- ### lone pair and double bond resonance ?
185
- ##mol = Rubabel["NCC(O)CC"]
186
- ##fragments = mol.fragment( rules: [:sp3c_oxygen_double_bond_water_loss] )
187
- ##fragments.flatten(1).map(&:csmiles).sort.should == ["CC=CC[NH3+]", "O"]
188
-
189
- ##mol = Rubabel["NC(O)CC"]
190
- ##fragments = mol.fragment( rules: [:sp3c_oxygen_double_bond_water_loss] )
191
- ##fragments.flatten(1).map(&:csmiles).sort.should == []
192
- ##end
193
- ##end
194
-
195
- ##describe 'sp3c_oxygen_double_bond_far_side_sp2' do
196
-
197
- ##it 'does not cleave esters without sp3 carbons available for double bond' do
198
- ##mol = Rubabel["NCCC(=O)OC"]
199
- ##pieces = mol.fragment( rules: [:sp3c_oxygen_double_bond_far_side_sp2] )
200
- ##pieces.should be_empty
201
- ##end
202
-
203
- ##it 'cleaves esters on far side of singly bonded oxygen' do
204
- ##mol = Rubabel["NCCC(=O)OCC"]
205
- ##pieces = mol.fragment( rules: [:sp3c_oxygen_double_bond_far_side_sp2] )
206
- ##pieces.size.should == 1 # one set
207
- ##the_pair = pieces.first
208
- ##csmiles = the_pair.map(&:csmiles)
209
- ##csmiles.should include("OC(=O)CC[NH3+]")
210
- ##csmiles.should include("C=C")
211
- ##end
212
-
213
- ##end
214
-
215
- ##describe ':alcohol_to_aldehyde' do
216
- ##it 'cleaves beside alcohols to generate an aldehyde' do
217
- ##mol = Rubabel["NCCC(O)CC"]
218
- ##mol.correct_for_ph!
219
- ##total_mass = mol.add_h!.mass
220
-
221
- ##pieces = mol.fragment(rules: [:alcohol_to_aldehyde])
222
- ##pieces.size.should == 2
223
- ##pieces.map(&:size).should == [2,2]
224
- ##pieces.flatten(1).map(&:csmiles).should == ["CC[NH3+]", "CCC=O", "C(C=O)C[NH3+]", "CC"]
225
- ##pieces.each do |pair|
226
- ##pair.map(&:mass).reduce(:+).should == total_mass
227
- ##end
228
- ##end
229
- ##end
27
+ specify 'cod: primary alcohol' do
28
+ mol = Rubabel["NCC(O)CC"]
29
+ frags = mol.fragment(rules: [:cod])
30
+ frags.flatten(1).map(&:csmiles).should == ["C[NH3+]", "CCC=O", "C([NH3+])C=O", "CC"]
31
+ end
32
+
33
+ specify 'peroxide' do
34
+ mol = Rubabel["NCC(OO)CC"]
35
+ frags = mol.fragment(rules: [:codoo])
36
+ frags.flatten(1).map(&:csmiles).should == ["OC[NH3+]", "CCC=O", "C([NH3+])C=O", "CCO"]
37
+ end
38
+
39
+ specify 'cod: carboxylate' do
40
+ mol = Rubabel["CCC(=O)O"]
41
+ pieces = mol.fragment(rules: [:cod])
42
+ pieces.flatten(1).map(&:csmiles).should == ["[CH2-]C", "O=C=O"]
43
+ end
44
+
45
+ specify 'cod: carboxylic acid' do
46
+ mol = Rubabel["CCC(=O)O"]
47
+ mol.add_h!(1.5)
48
+ pieces = mol.fragment(rules: [:cod])
49
+ pieces.flatten(1).map(&:csmiles).should == ["CC", "O=C=O"]
50
+ end
51
+ end
52
+
53
+ describe 'oxe: oxygen electron stealing' do
54
+ # oxygen just steals the electron pair it is attached to. This
55
+ # typically results in a negatively charged oxygen and a positively
56
+ # charged carbo-cation.
57
+ specify 'ether to ions (same for esters)' do
58
+ mol = Rubabel["CCOCCN"]
59
+ frag_set = mol.fragment(rules: [:oxe])
60
+ frags = frag_set.first
61
+ frags.first.csmiles.should == "C[CH2+]"
62
+ frags.last.csmiles.should == '[O-]CC[NH3+]'
63
+ frags.first.formula.should == 'C2H5'
64
+ frags.last.formula.should == 'C2H7NO'
65
+ frags.first.exact_mass.should be_within(1e-6).of(29.03912516)
66
+ frags.last.exact_mass.should be_within(1e-6).of(61.052763849)
67
+
68
+ mol = Rubabel["CCOC(=O)CCN"]
69
+ frag_set = mol.fragment(rules: [:oxe])
70
+ ff = frag_set.first
71
+ ff.first.csmiles.should == 'C[CH2+]'
72
+ ff.last.csmiles.should == '[O-]C(=O)CC[NH3+]'
73
+ ff.first.formula.should == "C2H5"
74
+ ff.last.formula.should == "C3H7NO2"
75
+ ff.first.exact_mass.should be_within(1e-6).of(29.03912516035)
76
+ ff.last.exact_mass.should be_within(1e-6).of(89.04767846841)
77
+ end
78
+
79
+ specify 'carboxyl group' do
80
+ mol = Rubabel["CCC(=O)O"]
81
+ frag_set = mol.fragment(rules: [:oxe])
82
+ ff = frag_set.first
83
+ ff.first.csmiles.should == 'CC[C+]=O'
84
+ ff.last.csmiles.should == '[O-]'
85
+ ff.first.formula.should == "C3H5O"
86
+ ff.first.exact_mass.should be_within(1e-6).of(57.034039779909996)
87
+ ff.last.formula.should == "O"
88
+ end
89
+
90
+ specify 'phosphodiester' do
91
+ mol = Rubabel["CC(COP(=O)([O-])OCCN"]
92
+ frag_set = mol.fragment(rules: [:oxepd])
93
+ ff = frag_set.first
94
+ ff.first.csmiles.should == '[O-]CCC'
95
+ ff.last.csmiles.should == '[NH3+]CCO[P](=O)=O'
96
+ ff.first.formula.should == 'C3H7O'
97
+ ff.first.exact_mass.should be_within(1e-6).of(59.049689844)
98
+ ff.last.formula.should == 'C2H7NO3P'
99
+ ff.last.exact_mass.should be_within(1e-6).of(124.016354719)
100
+
101
+ mol = Rubabel["CCCOP(=O)(OCC[N+](C)(C)C)[O-]"]
102
+ frag_set = mol.fragment(rules: [:oxepd, :oxe])
103
+ # some of these don't like right on first inspection, but that is
104
+ # because we 'converted dative bonds' meaning + and - next to each
105
+ # other are allowed to cancel one another out!
106
+ frag_set.size.should == 4
107
+ mols = frag_set.flatten
108
+ mols.map(&:csmiles).should == ["CC[CH2+]", "[O-]P(=O)(OCC[N+](C)(C)C)[O-]", "CCCOP(=O)([O-])[O-]", "[CH2+]C[N+](C)(C)C", "[O-]CCC", "O=[P](=O)OCC[N+](C)(C)C", "CCCO[P](=O)=O", "[O-]CC[N+](C)(C)C"]
109
+ mols.map(&:formula).should == ["C3H7", "C5H13NO4P", "C3H7O4P", "C5H13N", "C3H7O", "C5H13NO3P", "C3H7O3P", "C5H13NO"]
110
+ mols.map(&:exact_mass).zip([43.05477522449, 182.05821952995, 138.00819533273, 87.10479942171, 59.04968984405, 166.06330491039, 122.01328071317, 103.09971404127]) do |act, exp|
111
+ act.should be_within(1e-6).of(exp)
112
+ end
113
+
114
+ end
115
+ end
116
+
117
+ # this is really a subset of oxygen bond stealing: if the negatively
118
+ # charged oxygen can rip off a nearby proton, it will.
119
+ describe 'oxh: oxygen alpha/beta/gamma hydrogen stealing' do
120
+ specify 'primary alcohol giving water loss' do
121
+ mol = Rubabel["CC(O)CCN"]
122
+ frags = mol.fragment(rules: [:oxh])
123
+ ff = frags.first
124
+ ff.first.csmiles.should == 'C=CCC[NH3+]'
125
+ ff.last.csmiles.should == 'O'
126
+ ll = frags.last
127
+ ll.first.csmiles.should == 'CC=CC[NH3+]'
128
+ ll.last.csmiles.should == 'O'
129
+ ff.first.formula.should == 'C4H10N'
130
+ ff.first.exact_mass.should be_within(1e-6).of(72.0813243255)
131
+ end
132
+
133
+ specify 'peroxide carbonyl formation (or peroxide formation [that what we want??])' do
134
+ # do we really see peroxide formation? Tamil didn't include this in
135
+ # the rules but it follows from the broad way for creating these
136
+ # rules. Can prohibit peroxide formation in future if necessary...
137
+ mol = Rubabel["CC(OO)CCN"]
138
+ frags = mol.fragment(rules: [:oxh])
139
+ mols = frags.flatten
140
+ mols.map(&:csmiles).should == ["C=CCC[NH3+]", "OO", "CC(=O)CC[NH3+]", "O", "CC=CC[NH3+]", "OO"]
141
+ mols.map(&:formula).should == ["C4H10N", "H2O2", "C4H10NO", "H2O", "C4H10N", "H2O2"]
142
+ mols.map(&:exact_mass).zip([72.081324325, 34.005479304, 88.076238945, 18.010564684, 72.081324325, 34.005479304]) do |act, exp|
143
+ act.should be_within(1e-6).of(exp)
144
+ end
145
+ end
146
+
147
+ specify 'ether to alcohol, ignoring errors' do
148
+ # this is a good example of a 'disallowed structure' where the
149
+ # formula's do not match up to the original formulas
150
+ mol = Rubabel["CCOCCN"]
151
+ frags = mol.fragment(rules: [:oxh], errors: :ignore)
152
+ mols = frags.flatten
153
+ mols.map(&:csmiles).should == ["C=C", "OCC[NH3+]", "CCO", "C=C[NH2+]"]
154
+ end
155
+
156
+ specify 'ether to alcohol, removing errors' do
157
+ mol = Rubabel["CCOCCN"]
158
+ frags = mol.fragment(rules: [:oxh])
159
+ mols = frags.flatten
160
+ mols.map(&:csmiles).should == ["C=C", "OCC[NH3+]"]
161
+ end
162
+
163
+ specify 'ester to alcohol' do
164
+ mol = Rubabel["CC(=O)OCCCN"]
165
+ frags = mol.fragment(rules: [:oxh])
166
+ mols = frags.flatten
167
+ mols.map(&:csmiles).should == ["C=C=O", "OCCC[NH3+]", "CC(=O)O", "C=CC[NH3+]"]
168
+ mols.map(&:formula).should == ["C2H2O", "C3H10NO", "C2H4O2", "C3H8N"]
169
+ mols.map(&:exact_mass).zip([42.010564684, 76.076238945, 60.021129368000004, 58.065674261]) do |act,exp|
170
+ act.should be_within(1e-6).of(exp)
171
+ end
172
+ end
173
+
174
+ specify 'phosphodiester (right now needs very low pH and NOT SURE WORKING PROPERLY)' do
175
+ mol = Rubabel["CC(COP(=O)(O)OCCCN"]
176
+ mol.add_h!(1.0)
177
+ frags = mol.fragment(rules: [:oxhpd])
178
+ frags = frags.flatten
179
+ frags.map(&:csmiles).should == ["CCCO", "[NH3+]CCCOP(=O)=O", "CCCOP(=O)=O", "OCCC[NH3+]"]
180
+ end
181
+ end
182
+
183
+ end
184
+ end
185
+
186
+
187
+
@@ -88,6 +88,18 @@ describe Rubabel::Molecule do
88
88
  ar.size.should == 2
89
89
  end
90
90
 
91
+ specify 'num_atoms gives the number of atoms which can vary with hydrogens added' do
92
+ mol = Rubabel["CCC"]
93
+ mol.num_atoms.should == 3
94
+ mol.add_h!
95
+ mol.num_atoms.should == 11
96
+ end
97
+
98
+ specify 'num_atoms(true) gives the number including implied hydrogens' do
99
+ mol = Rubabel["CCC"]
100
+ mol.num_atoms(true).should == 11
101
+ end
102
+
91
103
  describe 'adding an atom' do
92
104
  it 'can be added but not attached' do
93
105
  mol = Rubabel["CCO"]
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rubabel
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-10-07 00:00:00.000000000 Z
12
+ date: 2012-10-18 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: openbabel