rubabel 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -120,7 +120,7 @@ Have some bonds to break?, split makes new molecules split from that bond(s)
120
120
  bonds = mol.matches("CO").map {|c, o| c.get_bond(o) }
121
121
  mol.split(*bonds) # splits between every carbon single bonded to oxygen
122
122
 
123
- ### Add & Delete atoms/bonds
123
+ ### Add, delete, modify atoms/bonds
124
124
 
125
125
  #### Adding
126
126
 
@@ -147,6 +147,16 @@ Have some bonds to break?, split makes new molecules split from that bond(s)
147
147
  bond = mol[0].get_bond(mol[1])
148
148
  mol.delete(bond) # -> #<Mol C.O>
149
149
 
150
+ #### Modifying
151
+
152
+ Can add or subtract from bonds to change bond order:
153
+
154
+ mol = Rubabel["CC"]
155
+ mol[0].get_bond(mol[1]) + 1 # now it is a double bond
156
+ bond = mol[0].bonds.first
157
+ bond - 1
158
+ bond.bond_order # => 1
159
+
150
160
  ## Installing
151
161
 
152
162
  First, many thanks to Andreas Maunz for packaging openbabel as a gem which makes this install quite painless.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.1
1
+ 0.2.2
@@ -4,40 +4,64 @@ require 'trollop'
4
4
  require 'rubabel'
5
5
  require 'rubabel/molecule/fragmentable'
6
6
 
7
+ default_ph = 2.5
8
+
9
+ Fragment = Struct.new(:frag, :id, :title, :mz, :mass, :charge, :smiles, :pairing)
7
10
 
8
11
  parser = Trollop::Parser.new do
9
12
  banner "usage: #{File.basename($0)} [OPTIONS|RULES] <SMARTS> ..."
10
13
  text "\noptions:"
11
- opt :ph, "the pH to use (experimental option)", :default => Rubabel::Molecule::Fragmentable::DEFAULT_OPTIONS[:ph]
14
+ opt :ph, "the pH to use (experimental option)", :default => default_ph
15
+ opt :images, "print out svg images of fragments"
16
+ opt :format, "format of the molecules", :default => 'smiles'
12
17
  #opt :uniq, "no repeated fragments", :default => false
13
18
  text "\nrules:"
14
19
  Rubabel::Molecule::Fragmentable::RULES.each do |rule|
15
20
  opt rule, rule.to_s.gsub("_",' ')
16
21
  end
17
22
  text "\nexample:"
18
- text "fragmenter.rb -aecsoxn 'CCC(=O)OCCC' 'CCC(=O)OCCC(=O)O'"
23
+ text "fragmenter.rb -xeh 'CCC(=O)OCCC' 'CCC(=O)OCCC(=O)O'"
19
24
  end
20
25
 
21
- rules = parser.parse(ARGV)
22
- options = {rules: []}
23
- options[:ph] = rules.delete(:ph)
24
- options[:uniq] = rules.delete(:uniq)
25
- rules.each do |k,v|
26
- options[:rules] << k if v && k.to_s !~ /_given/
27
- end
26
+ options = parser.parse(ARGV)
27
+ opts = {rules: []}
28
+ opts[:uniq] = options.delete(:uniq)
29
+ ph = options.delete(:ph)
30
+ opts[:rules] = Rubabel::Molecule::Fragmentable::RULES.map do |rule|
31
+ rule if options["#{rule}_given".to_sym]
32
+ end.compact
28
33
 
29
34
  if ARGV.size == 0
30
35
  parser.educate && exit
31
36
  end
32
37
 
33
- ARGV.each do |mol|
34
- mol = Rubabel[mol]
38
+ ARGV.each do |smiles|
39
+ mol = Rubabel[smiles, options[:format].to_sym]
35
40
  puts "\nmolecule: #{mol.csmiles}"
36
- fragment_sets = mol.fragment(options)
37
- fragment_sets.each do |frag_set|
38
- puts ""
39
- frag_set.each do |frag|
40
- puts "#{frag.mass.round(5)} #{frag.csmiles}"
41
+ mol.correct_for_ph!(ph)
42
+ puts "at ph #{ph}: #{mol.csmiles}"
43
+ fragment_sets = mol.fragment(opts)
44
+ puts %w(mz mass charge title smiles pairing).join("\t")
45
+ frags = []
46
+ fragment_sets.each_with_index do |frag_set,i|
47
+ frag_set.each_with_index do |frag,j|
48
+ unless frag.charge == 0
49
+ mz = (frag.mass / frag.charge).round(5)
50
+ end
51
+
52
+ frag.title = "#{i}-#{j}pair_" + (mz ? "#{mz}_mz" : "#{frag.mass.round(3)}_Mass")
53
+ frag_obj = Fragment.new(frag, frag.title, frag.title, mz, frag.exact_mass, frag.charge, frag.csmiles, i)
54
+ frags << frag_obj
41
55
  end
42
56
  end
57
+ frags = frags.sort_by {|frag| [-frag.charge, frag.mz] }
58
+ if options[:images]
59
+ frags.each do |frag|
60
+ fn = "#{frag.title}.png"
61
+ frag.frag.write(fn)
62
+ end
63
+ end
64
+ frags.each do |frag|
65
+ puts [:mz, :mass, :charge, :title, :smiles, :pairing].map {|cat| frag.send(cat) }.join("\t")
66
+ end
43
67
  end
@@ -1,3 +1,5 @@
1
+ #encoding: utf-8
2
+
1
3
  require 'matrix'
2
4
  require 'andand'
3
5
 
@@ -184,7 +186,7 @@ module Rubabel
184
186
  end
185
187
  @ob.set_spin_multiplicity(new_spin)
186
188
  atoms.each do |atom|
187
- if atom.atomic_num == 1
189
+ if atom.hydrogen?
188
190
  self.mol.delete_atom(atom)
189
191
  break
190
192
  end
@@ -246,6 +248,10 @@ module Rubabel
246
248
  @ob.get_spin_multiplicity
247
249
  end
248
250
 
251
+ def spin=(val)
252
+ @ob.set_spin_multiplicity(val)
253
+ end
254
+
249
255
  def type
250
256
  @ob.get_type
251
257
  end
@@ -292,6 +298,13 @@ module Rubabel
292
298
  def hbond_donor?() @ob.is_hbond_donor end
293
299
  def hbond_donor_h?() @ob.is_hbond_donor_h end
294
300
 
301
+ # the total number of hydrogens bonded to the atom (implicit + explicit)
302
+ def hydrogen_count
303
+ @ob.implicit_hydrogen_count + @ob.explicit_hydrogen_count
304
+ end
305
+
306
+ alias_method :num_h, :hydrogen_count
307
+
295
308
  def double_bond?
296
309
  each_bond.any? {|bond| bond.bond_order == 2 }
297
310
  end
@@ -1,3 +1,5 @@
1
+ #encoding: utf-8
2
+
1
3
  require 'rubabel/atom'
2
4
 
3
5
  class OpenBabel::OBBond
@@ -88,7 +90,29 @@ module Rubabel
88
90
  end
89
91
 
90
92
  def inspect
91
- "[#{atoms.map(&:inspect).join('-')}]"
93
+ bond_symbol = case bond_order
94
+ when 2 then '='
95
+ when 3 then '≡'
96
+ else
97
+ '-'
98
+ end
99
+ "#{atoms.map(&:inspect).join(bond_symbol)}"
100
+ end
101
+
102
+ # returns self
103
+ def +(val)
104
+ # do we need to check the bounds here?
105
+ newval = @ob.get_bond_order + val
106
+ @ob.set_bond_order(newval)
107
+ self
108
+ end
109
+
110
+ # won't decrease below zero. returns self
111
+ def -(val)
112
+ newval = @ob.get_bond_order - val
113
+ newval = 0 if newval < 0
114
+ @ob.set_bond_order(newval)
115
+ self
92
116
  end
93
117
 
94
118
  end
@@ -503,7 +503,13 @@ module Rubabel
503
503
  end
504
504
 
505
505
  # sensitive to add_h!
506
- def num_atoms() @ob.num_atoms end
506
+ def num_atoms(count_implied_hydrogens=false)
507
+ if !count_implied_hydrogens
508
+ @ob.num_atoms
509
+ else
510
+ @ob.num_atoms + reduce(0) {|cnt, atom| cnt + atom.ob.implicit_hydrogen_count }
511
+ end
512
+ end
507
513
  def num_bonds() @ob.num_bonds end
508
514
  def num_hvy_atoms() @ob.num_hvy_atoms end
509
515
  def num_residues() @ob.num_residues end
@@ -6,21 +6,12 @@ module Rubabel
6
6
  class Molecule
7
7
  module Fragmentable
8
8
 
9
- #:sp3c_oxygen_asymmetric_far_sp3, :sp3c_nitrogen_asymmetric_far_sp3,
10
- #RULES = Set[ :alcohol_to_aldehyde, :peroxy_to_carboxy, :co2_loss,
11
- # :sp3c_oxygen_double_bond_far_side_sp3, :sp3c_oxygen_double_bond_far_side_sp2, :sp3c_oxygen_double_bond_water_loss, :sp3c_nitrogen_double_bond,
12
- #]
13
- #ADDUCTS = [:lioh, :nh4cl, :nh4oh]
14
- #CO_RULES = Set[:alcohol_to_aldehyde, :peroxy_to_carboxy, :co2_loss,
15
- # :sp3c_oxygen_double_bond_water_loss, :sp3c_oxygen_double_bond_far_side_sp2, :sp3c_oxygen_double_bond_far_side_sp3, :sp3c_oxygen_asymmetric_far_sp3
16
- #]
17
-
18
- RULES = Set[:cad_o, :cad_oo, :oxed_ether]
9
+ #RULES = Set[:cod, :codoo, :oxe, :oxepd, :oxh]
10
+ RULES = Set[:cod, :codoo, :oxe, :oxepd, :oxh, :oxhpd]
19
11
 
20
12
  DEFAULT_OPTIONS = {
21
13
  rules: RULES,
22
- #adduct: nil,
23
- #ph: 7.4,
14
+ errors: :remove,
24
15
  # return only the set of unique fragments
25
16
  uniq: false,
26
17
  }
@@ -31,117 +22,8 @@ module Rubabel
31
22
  # For instance, water loss with double bond formation is not allowable
32
23
  # for NCC(O)CC => CCC=C[NH2+], presumably because of the lone pair and
33
24
  # double bond resonance.
34
- #
35
25
  def allowable_fragmentation?(frags)
36
- self.num_atoms == frags.map(&:num_atoms).reduce(:+)
37
- end
38
-
39
- # add_h! to self, then selects allowable fragments
40
- def allowable_fragment_sets!(fragment_sets)
41
- self.add_h!
42
- fragment_sets.select do |_frags|
43
- putsv "ExMAIN:"
44
- putsv _frags.inspect
45
- putsv self.allowable_fragmentation?(_frags)
46
- self.allowable_fragmentation?(_frags)
47
- end
48
- end
49
-
50
- # will turn bond into a double bond, yield the changed molecule, then
51
- # return the bond to the original state when the block is closed
52
- # returns whatever the block returned
53
- def feint_double_bond(bond, give_e_pair=nil, get_e_pair=nil, &block)
54
- orig = bond.bond_order
55
- bond.bond_order = 2
56
- reply =
57
- if give_e_pair || get_e_pair
58
- feint_e_transfer(give_e_pair, get_e_pair, &block)
59
- else
60
- block.call(self)
61
- end
62
- bond.bond_order = orig
63
- reply
64
- end
65
-
66
- # warning, this method adds_h! to the calling molecule
67
- def electrophile_snatches_electrons(carbon, electrophile)
68
- self.add_h!
69
- frags = self.split(carbon.get_bond(electrophile))
70
- raise NotImplementedError
71
- # don't check for allowable fragments because it
72
- #allowable_fragment_sets!([frag_set])
73
- end
74
-
75
- def feint_e_transfer(give_e_pair=nil, get_e_pair=nil, &block)
76
- if give_e_pair
77
- gc_orig = give_e_pair.charge
78
- give_e_pair.charge = gc_orig + 1
79
- end
80
- if get_e_pair
81
- rc_orig = get_e_pair.charge
82
- get_e_pair.charge = rc_orig - 1
83
- end
84
-
85
- reply = block.call(self)
86
-
87
- give_e_pair.charge = gc_orig if give_e_pair
88
- get_e_pair.charge = rc_orig if get_e_pair
89
-
90
- reply
91
- end
92
-
93
- def near_side_double_bond_break(carbon, electrophile)
94
- frag_sets = carbon.atoms.select {|atom| atom.type == "C3" }.map do |near_c3|
95
- frags = feint_double_bond(carbon.get_bond(near_c3)) do |_mol|
96
- frags = _mol.split(electrophile.get_bond(carbon))
97
- frags.map(&:add_h!)
98
- end
99
- end
100
- allowable_fragment_sets!(frag_sets)
101
- end
102
-
103
- def alcohol_to_aldehyde(carbon, oxygen, carbon_nbrs)
104
- # alcohol becomes a ketone and one R group is released
105
- frag_sets = carbon_nbrs.select {|atom| atom.type == 'C3' }.map do |_atom|
106
- frags = feint_double_bond(carbon.get_bond(oxygen)) do |_mol|
107
- frags = _mol.split(carbon.get_bond(_atom))
108
- frags.map(&:add_h!)
109
- end
110
- end
111
- allowable_fragment_sets!(frag_sets)
112
- end
113
-
114
- def co2_loss(carbon, oxygen, c3_nbr)
115
- # carboxyl rules ...
116
- # neutral carbon dioxide loss with anion gain on attaching group
117
- # (if carbon)
118
- frags = feint_double_bond(carbon.get_bond(oxygen), oxygen, c3_nbr) do |_mol|
119
- frags = _mol.split(c3_nbr.get_bond(carbon))
120
- frags.map(&:add_h!)
121
- end
122
- allowable_fragment_sets!([frags])
123
- end
124
-
125
- def peroxy_to_carboxy(carbon, oxygen, carbon_nbrs, oxygen_nbr)
126
- if oxygen_nbr.el == :o # has a neighbor oxygen
127
- distal_o = oxygen_nbr
128
- if distal_o.bonds.size == 1 # this is a peroxy
129
- frag_sets = carbon_nbrs.select {|atom| atom.type == 'C3' }.map do |_atom|
130
- self.swap!(carbon, _atom, oxygen, distal_o)
131
- frags = feint_double_bond(carbon.get_bond(oxygen)) do |_mol|
132
-
133
- # we swapped the atoms so the bond to split off is now
134
- # attached to the oxygen
135
- frags = _mol.split(oxygen.get_bond(_atom))
136
- frags.map(&:add_h!)
137
- end
138
- self.swap!(carbon, distal_o, oxygen, _atom)
139
- frags
140
- end
141
- allowable_fragment_sets!(frag_sets)
142
- end
143
- end
144
-
26
+ self.num_atoms(true) == frags.reduce(0) {|cnt,fr| cnt + fr.num_atoms(true) }
145
27
  end
146
28
 
147
29
  # splits the molecule between the carbon and carbon_nbr, adds a double
@@ -174,29 +56,30 @@ module Rubabel
174
56
  # breaks the bond and gives the electrons to the oxygen
175
57
  def carbon_oxygen_esteal(carbon, oxygen)
176
58
  nmol = self.dup
177
- nmol.ob.add_hydrogens
178
59
  ncarbon = nmol.atom(carbon.id)
179
60
  noxygen = nmol.atom(oxygen.id)
180
61
  nmol.delete_bond(ncarbon, noxygen)
181
- ncarbon.charge += 1
182
- noxygen.charge -= 1
183
62
  ncarbon.remove_an_h!
184
- #p ncarbon.ob.implicit_hydrogen_count
185
- #p ncarbon
186
- #ncarbon.ob.decrement_implicit_valence
187
- #p ncarbon.ob.implicit_hydrogen_count
188
- #p ncarbon
189
- #ncarbon.ob.increment_implicit_valence
63
+ #noxygen.ob.set_spin_multiplicity 1
64
+ noxygen.spin = 1
65
+ noxygen.charge = -1
66
+ nmol.split
67
+ end
68
+
69
+ # returns the duplicated molecule and the equivalent atoms
70
+ def dup_molecule(atoms=[])
71
+ nmol = self.dup
72
+ [nmol, atoms.map {|old_atom| nmol.atom(old_atom.id) }]
73
+ end
190
74
 
191
- nmol.title = nmol.to_s
192
- p nmol.write("tmp.svg")
193
- parts = nmol.split
194
- p z=parts.first
195
- p z.formula
196
- p z.mass
197
- p z.exact_mass
198
-
199
- puts "HIAY"
75
+ # returns molecules created from splitting between the electrophile and
76
+ # the center and where the bond order is increased between the center
77
+ # and center_nbr
78
+ def break_with_double_bond(electrophile, center, center_nbr)
79
+ (nmol, (nele, ncarb, ncarb_nbr)) = self.dup_molecule([electrophile, center, center_nbr])
80
+ nmol.delete_bond(nele, ncarb)
81
+ ncarb_nbr.get_bond(ncarb) + 1
82
+ nmol.split
200
83
  end
201
84
 
202
85
  # an empty array is returned if there are no fragments generated.
@@ -205,6 +88,7 @@ module Rubabel
205
88
  #
206
89
  # :rules => queryable by :include? set of rules
207
90
  # :uniq => false
91
+ # :errors => :remove | :fix | :ignore (default is :remove)
208
92
  def fragment(opts={})
209
93
  only_uniqs = true
210
94
  opts = DEFAULT_OPTIONS.merge(opts)
@@ -218,131 +102,60 @@ module Rubabel
218
102
 
219
103
  fragment_sets = []
220
104
 
221
- if opts[:rules].any? {|r| [:cad_o, :cad_oo].include?(r) }
105
+ if opts[:rules].any? {|r| [:cod, :codoo].include?(r) }
222
106
  self.each_match("C[O;h1,O]", only_uniqs) do |carbon, oxygen|
223
107
  carbon.atoms.select {|a| a.el == :c }.each do |carbon_nbr|
224
108
  fragment_sets << carbonyl_oxygen_dump(carbon, oxygen, carbon_nbr)
225
109
  end
226
110
  end
227
111
  end
228
- if opts[:rules].any? {|r| [:oxed_ether].include?(r) }
229
- self.each_match("C[O&X2]", only_uniqs) do |carbon, oxygen|
112
+ if opts[:rules].any? {|r| [:oxe].include?(r) }
113
+ self.each_match("C-O", only_uniqs) do |carbon, oxygen|
230
114
  fragment_sets << carbon_oxygen_esteal(carbon, oxygen)
231
115
  end
232
116
  end
117
+ # right now implemented so that a beta hydrogen has to be availabe for
118
+ # extraction
119
+ if opts[:rules].any? {|r| [:oxh].include?(r) }
120
+ self.each_match("C[C,O]-O", only_uniqs) do |beta_c, center, oxygen|
121
+ next unless beta_c.hydrogen_count > 0
122
+ fragment_sets << break_with_double_bond(oxygen, center, beta_c)
123
+ end
124
+ end
125
+ if opts[:rules].any? {|r| [:oxhpd].include?(r) }
126
+ self.each_match("C-O-P-O", only_uniqs) do |carbon, alc_oxy, phosphate, beta_carb_oxy|
127
+ next unless beta_carb_oxy.hydrogen_count > 0
128
+ frag_set = break_with_double_bond(alc_oxy, phosphate, beta_carb_oxy)
129
+ frag_set.map! &:convert_dative_bonds!
130
+ fragment_sets << frag_set
131
+ end
132
+ end
133
+ if opts[:rules].any? {|r| [:oxepd].include?(r) }
134
+ self.each_match("P-O-C", only_uniqs) do |phosphate, oxygen, carbon|
135
+ frag_set = carbon_oxygen_esteal(phosphate, oxygen)
136
+ frag_set.map! &:convert_dative_bonds!
137
+ fragment_sets << frag_set
138
+ end
139
+ end
233
140
 
234
- unless had_hydrogens
235
- fragment_sets.each {|set| set.each(&:remove_h!) }
236
- self.remove_h!
141
+ case opts[:errors]
142
+ when :remove
143
+ fragment_sets.select! {|set| allowable_fragmentation?(set) }
144
+ when :fix
145
+ raise NotImplementedError
146
+ when :ignore # do nothing
237
147
  end
148
+
149
+ self.remove_h!
238
150
  if opts[:uniq]
239
151
  # TODO: impelent properly
240
152
  raise NotImplementedError
241
- #fragment_sets = fragment_sets.uniq_by(&:csmiles)
153
+ #fragment_sets = fragment_sets.uniq_by(&:csmiles)
242
154
  end
243
155
 
244
156
  fragment_sets
245
157
  end
246
-
247
-
248
- # had_hydrogens = self.h_added?
249
-
250
- #self.correct_for_ph!(opts[:ph])
251
- #self.remove_h!
252
-
253
- #rules = opts[:rules]
254
- #fragment_sets = []
255
- #if rules.any? {|rule| CO_RULES.include?(rule) }
256
- #putsv "matching C-O"
257
- #self.each_match("CO").each do |_atoms|
258
- ## note: this will *not* match C=O
259
- #(carbon, oxygen) = _atoms
260
- #carbon_nbrs = carbon.atoms.reject {|atom| atom == oxygen }
261
- #c3_nbrs = carbon_nbrs.select {|atm| atm.type == 'C3' }
262
- ## pulling this out here causes it to work incorrectly internally
263
- ## (not sure why)
264
- ##co_bond = carbon.get_bond(oxygen)
265
-
266
- #case oxygen.bonds.size # non-hydrogen bonds
267
- #when 1 # *must* be an alcohol or a carboxylic acid
268
- #putsv "#{csmiles} oxygen has no other bonds besides C-O (alcohol or carboxylic acid)"
269
- #if carbon.type == 'C3'
270
- #if rules.include?(:sp3c_oxygen_double_bond_water_loss)
271
- #putsv "rule :sp3c_oxygen_double_bond_water_loss"
272
- #fragment_sets.push *near_side_double_bond_break(carbon, oxygen)
273
- #end
274
- #if rules.include?(:alcohol_to_aldehyde)
275
- #putsv "rule :alcohol_to_aldehyde"
276
- #fragment_sets.push *alcohol_to_aldehyde(carbon, oxygen, carbon_nbrs)
277
- #end
278
- #elsif carbon.carboxyl_carbon?
279
- #if rules.include?(:co2_loss)
280
- #putsv "rule :co2_loss"
281
- #if c3_nbr = c3_nbrs.first
282
- #fragment_sets.push *co2_loss(carbon, oxygen, c3_nbr)
283
- #end
284
- #end
285
- #end
286
- #when 2
287
- #putsv "#{csmiles} c-o & oxygen has 2 non-hydrogen bonds"
288
- #oxygen_nbr = oxygen.atoms.reject {|atom| atom.idx == carbon.idx }.first
289
- #if carbon.type == 'C3'
290
- #if rules.include?(:peroxy_to_carboxy)
291
- #fragment_sets.push *peroxy_to_carboxy(carbon, oxygen, carbon_nbrs, oxygen_nbr)
292
- #end
293
- ## ester and ethers (look *only* on close side for places to make
294
- ## double bond)
295
-
296
- #if oxygen_nbr.type == 'C3'
297
- #putsv "oxygen nbr is C3"
298
- #if rules.include?(:sp3c_oxygen_double_bond_far_side_sp3)
299
- #putsv "rule :sp3c_oxygen_double_bond_far_side_sp3"
300
- #fragment_sets.push *near_side_double_bond_break(carbon, oxygen)
301
- #end
302
- #if rules.include?(:sp3c_oxygen_asymmetric_far_sp3)
303
- #putsv "rule :sp3c_oxygen_asymmetric_far_sp3"
304
- ## only returns a single frag set
305
- #fragment_sets.push electrophile_snatches_electrons(carbon, oxygen)
306
- #end
307
- #end
308
- #if oxygen_nbr.type == 'C2'
309
- #if rules.include?(:sp3c_oxygen_double_bond_far_side_sp2)
310
- #putsv "rule :sp3c_oxygen_double_bond_far_side_sp2"
311
- #fragment_sets.push *near_side_double_bond_break(carbon, oxygen)
312
- #end
313
- #end
314
- ## note: the case of a carboxy is found with CO search
315
- #end
316
- #end
317
- #end
318
- #end
319
- #if rules.include?(:sp3c_nitrogen_double_bond)
320
- #self.each_match("CN") do |_atoms|
321
- #(carbon, nitrogen) = _atoms
322
- #num_nitrogen_bonds = nitrogen.bonds.size
323
- #case num_nitrogen_bonds
324
- #when 2
325
- #if carbon.type == 'C3'
326
- #fragment_sets.push *near_side_double_bond_break(carbon, nitrogen)
327
- #end
328
- #end
329
- #end
330
- #end
331
-
332
- #unless had_hydrogens
333
- #fragment_sets.each {|set| set.each(&:remove_h!) }
334
- #self.remove_h!
335
- #end
336
- #if opts[:uniq]
337
- ## TODO: impelent properly
338
- ##fragment_sets = fragment_sets.uniq_by(&:csmiles)
339
- #raise NotImplementedError
340
- #end
341
- #fragment_sets
342
- #end
343
-
344
158
  end
345
159
  include Fragmentable
346
160
  end
347
- end
348
-
161
+ end # Rubabel
@@ -4,226 +4,184 @@ require 'rubabel'
4
4
 
5
5
  $VERBOSE = nil
6
6
 
7
- #describe Rubabel::Molecule::Fragmentable do
7
+ describe Rubabel::Molecule::Fragmentable do
8
8
 
9
- ## :peroxy_to_carboxy
10
- ## :oxygen_asymmetric_sp3, :nitrogen_asymmetric_sp3,
11
- ## :internal_phosphoester
9
+ # :peroxy_to_carboxy
10
+ # :oxygen_asymmetric_sp3, :nitrogen_asymmetric_sp3,
11
+ # :internal_phosphoester
12
12
 
13
- #describe 'fragmentation rules' do
14
- ## coenzyme: CC1=CC(=O)C=CC1=O
15
- ## 2-methylcyclohexa-2,5-diene-1,4-dione
13
+ describe 'fragmentation rules' do
14
+ # coenzyme: CC1=CC(=O)C=CC1=O
15
+ # 2-methylcyclohexa-2,5-diene-1,4-dione
16
16
 
17
17
  #let(:test_mol) { "COP(=O)(O)OCNCOCC(OO)C(=O)O" }
18
18
 
19
- #it 'raises an error for a bad rule' do
20
- #mol = Rubabel["CCNC"]
21
- #expect { mol.fragment(rules: [:wackiness]) }.to raise_error
22
- #end
19
+ it 'raises an error for a bad rule' do
20
+ mol = Rubabel["CCNC"]
21
+ expect { mol.fragment(rules: [:wackiness]) }.to raise_error
22
+ end
23
23
 
24
- #describe 'cad_o: carbonyl appendage dump ' do
25
- ## a primary oxygen or peroxide => C=O appendage dump
24
+ describe 'cod: carbonyl appendage dump' do
25
+ # a primary oxygen or peroxide => C=O appendage dump
26
26
 
27
- #describe 'cad_o: primary alcohol' do
28
- #mol = Rubabel["NCC(O)CC"]
29
- #frags = mol.fragment(rules: [:cad_o])
30
- #frags.flatten(1).map(&:csmiles).should == ["C[NH3+]", "CCC=O", "C([NH3+])C=O", "CC"]
31
- #end
32
-
33
- #describe 'peroxide' do
34
- #mol = Rubabel["NCC(OO)CC"]
35
- #frags = mol.fragment(rules: [:cad_oo])
36
- #frags.flatten(1).each_with_index do |f,i|
37
- #f.write("mol#{i}.svg")
38
- #end
39
- #frags.flatten(1).map(&:csmiles).should == ["OC[NH3+]", "CCC=O", "C([NH3+])C=O", "CCO"]
40
- #end
41
-
42
- #describe 'cad_o: carboxylate' do
43
- #mol = Rubabel["CCC(=O)O"]
44
- #pieces = mol.fragment(rules: [:cad_o])
45
- #pieces.flatten(1).map(&:csmiles).should == ["[CH2-]C", "O=C=O"]
46
- #end
47
-
48
- #describe 'cad_o: carboxylic acid' do
49
- #mol = Rubabel["CCC(=O)O"]
50
- #mol.add_h!(1.5)
51
- #pieces = mol.fragment(rules: [:cad_o])
52
- #pieces.flatten(1).map(&:csmiles).should == ["CC", "O=C=O"]
53
- #end
54
- #end
55
-
56
- #describe 'oxe: oxygen electron stealing' do
57
- ## oxygen just steals the electron pair it is attached to. This
58
- ## typically results in a negatively charged oxygen and a positively
59
- ## charged carbo-cation.
60
- #describe 'ether to ions' do
61
- #mol = Rubabel["NCCOCC"]
62
- ##mol.add_h!
63
- #pieces = mol.fragment(rules: [:oxed_ether])
64
- #m = pieces.first.first
65
- ##[CH2+]CH2NH3+
66
- ## C2H7N
67
-
68
- #p m
69
- #m.add_h!
70
- #h = m.atoms[4].atoms.find {|a| a.el == :h }
71
- #m.delete_atom(h)
72
- #p m
73
- #p m.atoms
74
- ##mol = Rubabel["NCO"]
75
- ##pieces = mol.fragment(rules: [:oxed_ether])
76
- ##pieces.size.should == 0
77
- #end
78
-
79
- #describe 'ester to ions' do
80
- #end
81
-
82
- #describe 'carboxyl group' do
83
- #end
84
-
85
- #describe 'phosphodiester' do
86
- #end
87
- #end
88
-
89
- ## this is really a subset of oxygen bond stealing: if the negatively
90
- ## charged oxygen can rip off a nearby proton, it will.
91
- #describe 'oxygen alpha/beta/gamma hydrogen stealing' do
92
- #describe 'primary alcohol giving water loss' do
93
- #end
94
-
95
- #describe 'peroxide carbonyl formation' do
96
- #end
97
-
98
- #describe 'ether to alcohol' do
99
- #end
100
-
101
- #describe 'ester to alcohol' do
102
- #end
103
-
104
- #describe 'phosphodiester' do
105
- #end
106
- #end
107
-
108
- #end
109
- #end
110
-
111
-
112
-
113
-
114
- ##describe ':sp3c_nitrogen_double_bond' do
115
-
116
- ##it 'cleaves like an ether a secondary NH group if possible' do
117
- ##mol = Rubabel["CCNC"]
118
- ##frag_sets = mol.fragment(rules: [:sp3c_nitrogen_double_bond])
119
- ##frag_sets.size.should == 1
120
- ##csmiles = frag_sets.first.map(&:csmiles)
121
- ##csmiles.should include("C=C")
122
- ##csmiles.should include("C[NH3+]")
123
- ##end
124
-
125
- ##it 'will not cleave if not possible' do
126
- ##mol = Rubabel["CNC"]
127
- ##frag_sets = mol.fragment(rules: [:sp3c_nitrogen_double_bond])
128
- ##frag_sets.should be_empty
129
- ##end
130
-
131
- ##end
132
-
133
- ##describe ':co2_loss' do
134
- ##it 'loss of CO2 from carboxy group with charge transfer' do
135
- ##mol = Rubabel["NCC(=O)O"]
136
- ##frag_sets = mol.fragment( rules: [:co2_loss] )
137
- ##frag_sets.size.should == 1
138
- ##csmiles = frag_sets.first.map(&:csmiles)
139
-
140
- ##csmiles.should include("[CH2-][NH3+]")
141
- ##csmiles.should include("O=C=O")
142
- ##end
143
-
144
- ##it "doesn't remove CO2 if adjacent is not c3" do
145
- ##mol = Rubabel["C=CC(=O)O"]
146
- ##fragments = mol.fragment( rules: [:co2_loss] )
147
- ##fragments.should be_empty
148
- ##end
149
-
150
- ##end
151
-
152
- ##describe ':peroxy_to_carboxy' do
153
- ##it 'works' do
154
- ##mol = Rubabel["NCCC(OO)CC"]
155
- ##frag_sets = mol.fragment( rules: [:peroxy_to_carboxy] )
156
- ##frag_sets.size.should == 2
157
- ##frag_sets.flatten(1).map(&:csmiles).sort.should == ["CC", "CCC(=O)O", "CC[NH3+]", "OC(=O)CC[NH3+]"]
158
- ##end
159
- ##end
160
-
161
- ##describe ':sp3c_oxygen_asymmetric_far_sp3', :pending do
162
- ##it 'splits like sp3c_oxygen_double_bond except oxygen takes the electrons' do
163
- ##$VERBOSE = 3
164
- ##mol = Rubabel["NCCCOCC"]
165
- ##frag_sets = mol.fragment( rules: [:sp3c_oxygen_asymmetric_far_sp3] )
166
- ##$VERBOSE = nil
167
- ##frag_sets.size.should == 2
168
- ###mol = Rubabel["NCCOCC"]
169
- ###p mol.fragment( rules: [:sp3c_oxygen_asymmetric_far_sp3] )
170
- ###mol = Rubabel["NCOC"]
171
- ###p mol.fragment( rules: [:sp3c_oxygen_asymmetric_far_sp3] )
172
- ##end
173
- ##end
174
-
175
- ##describe ':sp3c_oxygen_double_bond_water_loss' do
176
-
177
- ##it 'does h2o loss of alcohol' do
178
- ##mol = Rubabel["NCCC(O)CC"]
179
- ##fragments = mol.fragment( rules: [:sp3c_oxygen_double_bond_water_loss] )
180
- ##fragments.flatten(1).map(&:csmiles).sort.should == ["CC=CCC[NH3+]", "CCC=CC[NH3+]", "O", "O"]
181
- ##end
182
-
183
- ##it 'h2o loss does not allow bad chemistry' do
184
- ### lone pair and double bond resonance ?
185
- ##mol = Rubabel["NCC(O)CC"]
186
- ##fragments = mol.fragment( rules: [:sp3c_oxygen_double_bond_water_loss] )
187
- ##fragments.flatten(1).map(&:csmiles).sort.should == ["CC=CC[NH3+]", "O"]
188
-
189
- ##mol = Rubabel["NC(O)CC"]
190
- ##fragments = mol.fragment( rules: [:sp3c_oxygen_double_bond_water_loss] )
191
- ##fragments.flatten(1).map(&:csmiles).sort.should == []
192
- ##end
193
- ##end
194
-
195
- ##describe 'sp3c_oxygen_double_bond_far_side_sp2' do
196
-
197
- ##it 'does not cleave esters without sp3 carbons available for double bond' do
198
- ##mol = Rubabel["NCCC(=O)OC"]
199
- ##pieces = mol.fragment( rules: [:sp3c_oxygen_double_bond_far_side_sp2] )
200
- ##pieces.should be_empty
201
- ##end
202
-
203
- ##it 'cleaves esters on far side of singly bonded oxygen' do
204
- ##mol = Rubabel["NCCC(=O)OCC"]
205
- ##pieces = mol.fragment( rules: [:sp3c_oxygen_double_bond_far_side_sp2] )
206
- ##pieces.size.should == 1 # one set
207
- ##the_pair = pieces.first
208
- ##csmiles = the_pair.map(&:csmiles)
209
- ##csmiles.should include("OC(=O)CC[NH3+]")
210
- ##csmiles.should include("C=C")
211
- ##end
212
-
213
- ##end
214
-
215
- ##describe ':alcohol_to_aldehyde' do
216
- ##it 'cleaves beside alcohols to generate an aldehyde' do
217
- ##mol = Rubabel["NCCC(O)CC"]
218
- ##mol.correct_for_ph!
219
- ##total_mass = mol.add_h!.mass
220
-
221
- ##pieces = mol.fragment(rules: [:alcohol_to_aldehyde])
222
- ##pieces.size.should == 2
223
- ##pieces.map(&:size).should == [2,2]
224
- ##pieces.flatten(1).map(&:csmiles).should == ["CC[NH3+]", "CCC=O", "C(C=O)C[NH3+]", "CC"]
225
- ##pieces.each do |pair|
226
- ##pair.map(&:mass).reduce(:+).should == total_mass
227
- ##end
228
- ##end
229
- ##end
27
+ specify 'cod: primary alcohol' do
28
+ mol = Rubabel["NCC(O)CC"]
29
+ frags = mol.fragment(rules: [:cod])
30
+ frags.flatten(1).map(&:csmiles).should == ["C[NH3+]", "CCC=O", "C([NH3+])C=O", "CC"]
31
+ end
32
+
33
+ specify 'peroxide' do
34
+ mol = Rubabel["NCC(OO)CC"]
35
+ frags = mol.fragment(rules: [:codoo])
36
+ frags.flatten(1).map(&:csmiles).should == ["OC[NH3+]", "CCC=O", "C([NH3+])C=O", "CCO"]
37
+ end
38
+
39
+ specify 'cod: carboxylate' do
40
+ mol = Rubabel["CCC(=O)O"]
41
+ pieces = mol.fragment(rules: [:cod])
42
+ pieces.flatten(1).map(&:csmiles).should == ["[CH2-]C", "O=C=O"]
43
+ end
44
+
45
+ specify 'cod: carboxylic acid' do
46
+ mol = Rubabel["CCC(=O)O"]
47
+ mol.add_h!(1.5)
48
+ pieces = mol.fragment(rules: [:cod])
49
+ pieces.flatten(1).map(&:csmiles).should == ["CC", "O=C=O"]
50
+ end
51
+ end
52
+
53
+ describe 'oxe: oxygen electron stealing' do
54
+ # oxygen just steals the electron pair it is attached to. This
55
+ # typically results in a negatively charged oxygen and a positively
56
+ # charged carbo-cation.
57
+ specify 'ether to ions (same for esters)' do
58
+ mol = Rubabel["CCOCCN"]
59
+ frag_set = mol.fragment(rules: [:oxe])
60
+ frags = frag_set.first
61
+ frags.first.csmiles.should == "C[CH2+]"
62
+ frags.last.csmiles.should == '[O-]CC[NH3+]'
63
+ frags.first.formula.should == 'C2H5'
64
+ frags.last.formula.should == 'C2H7NO'
65
+ frags.first.exact_mass.should be_within(1e-6).of(29.03912516)
66
+ frags.last.exact_mass.should be_within(1e-6).of(61.052763849)
67
+
68
+ mol = Rubabel["CCOC(=O)CCN"]
69
+ frag_set = mol.fragment(rules: [:oxe])
70
+ ff = frag_set.first
71
+ ff.first.csmiles.should == 'C[CH2+]'
72
+ ff.last.csmiles.should == '[O-]C(=O)CC[NH3+]'
73
+ ff.first.formula.should == "C2H5"
74
+ ff.last.formula.should == "C3H7NO2"
75
+ ff.first.exact_mass.should be_within(1e-6).of(29.03912516035)
76
+ ff.last.exact_mass.should be_within(1e-6).of(89.04767846841)
77
+ end
78
+
79
+ specify 'carboxyl group' do
80
+ mol = Rubabel["CCC(=O)O"]
81
+ frag_set = mol.fragment(rules: [:oxe])
82
+ ff = frag_set.first
83
+ ff.first.csmiles.should == 'CC[C+]=O'
84
+ ff.last.csmiles.should == '[O-]'
85
+ ff.first.formula.should == "C3H5O"
86
+ ff.first.exact_mass.should be_within(1e-6).of(57.034039779909996)
87
+ ff.last.formula.should == "O"
88
+ end
89
+
90
+ specify 'phosphodiester' do
91
+ mol = Rubabel["CC(COP(=O)([O-])OCCN"]
92
+ frag_set = mol.fragment(rules: [:oxepd])
93
+ ff = frag_set.first
94
+ ff.first.csmiles.should == '[O-]CCC'
95
+ ff.last.csmiles.should == '[NH3+]CCO[P](=O)=O'
96
+ ff.first.formula.should == 'C3H7O'
97
+ ff.first.exact_mass.should be_within(1e-6).of(59.049689844)
98
+ ff.last.formula.should == 'C2H7NO3P'
99
+ ff.last.exact_mass.should be_within(1e-6).of(124.016354719)
100
+
101
+ mol = Rubabel["CCCOP(=O)(OCC[N+](C)(C)C)[O-]"]
102
+ frag_set = mol.fragment(rules: [:oxepd, :oxe])
103
+ # some of these don't like right on first inspection, but that is
104
+ # because we 'converted dative bonds' meaning + and - next to each
105
+ # other are allowed to cancel one another out!
106
+ frag_set.size.should == 4
107
+ mols = frag_set.flatten
108
+ mols.map(&:csmiles).should == ["CC[CH2+]", "[O-]P(=O)(OCC[N+](C)(C)C)[O-]", "CCCOP(=O)([O-])[O-]", "[CH2+]C[N+](C)(C)C", "[O-]CCC", "O=[P](=O)OCC[N+](C)(C)C", "CCCO[P](=O)=O", "[O-]CC[N+](C)(C)C"]
109
+ mols.map(&:formula).should == ["C3H7", "C5H13NO4P", "C3H7O4P", "C5H13N", "C3H7O", "C5H13NO3P", "C3H7O3P", "C5H13NO"]
110
+ mols.map(&:exact_mass).zip([43.05477522449, 182.05821952995, 138.00819533273, 87.10479942171, 59.04968984405, 166.06330491039, 122.01328071317, 103.09971404127]) do |act, exp|
111
+ act.should be_within(1e-6).of(exp)
112
+ end
113
+
114
+ end
115
+ end
116
+
117
+ # this is really a subset of oxygen bond stealing: if the negatively
118
+ # charged oxygen can rip off a nearby proton, it will.
119
+ describe 'oxh: oxygen alpha/beta/gamma hydrogen stealing' do
120
+ specify 'primary alcohol giving water loss' do
121
+ mol = Rubabel["CC(O)CCN"]
122
+ frags = mol.fragment(rules: [:oxh])
123
+ ff = frags.first
124
+ ff.first.csmiles.should == 'C=CCC[NH3+]'
125
+ ff.last.csmiles.should == 'O'
126
+ ll = frags.last
127
+ ll.first.csmiles.should == 'CC=CC[NH3+]'
128
+ ll.last.csmiles.should == 'O'
129
+ ff.first.formula.should == 'C4H10N'
130
+ ff.first.exact_mass.should be_within(1e-6).of(72.0813243255)
131
+ end
132
+
133
+ specify 'peroxide carbonyl formation (or peroxide formation [that what we want??])' do
134
+ # do we really see peroxide formation? Tamil didn't include this in
135
+ # the rules but it follows from the broad way for creating these
136
+ # rules. Can prohibit peroxide formation in future if necessary...
137
+ mol = Rubabel["CC(OO)CCN"]
138
+ frags = mol.fragment(rules: [:oxh])
139
+ mols = frags.flatten
140
+ mols.map(&:csmiles).should == ["C=CCC[NH3+]", "OO", "CC(=O)CC[NH3+]", "O", "CC=CC[NH3+]", "OO"]
141
+ mols.map(&:formula).should == ["C4H10N", "H2O2", "C4H10NO", "H2O", "C4H10N", "H2O2"]
142
+ mols.map(&:exact_mass).zip([72.081324325, 34.005479304, 88.076238945, 18.010564684, 72.081324325, 34.005479304]) do |act, exp|
143
+ act.should be_within(1e-6).of(exp)
144
+ end
145
+ end
146
+
147
+ specify 'ether to alcohol, ignoring errors' do
148
+ # this is a good example of a 'disallowed structure' where the
149
+ # formula's do not match up to the original formulas
150
+ mol = Rubabel["CCOCCN"]
151
+ frags = mol.fragment(rules: [:oxh], errors: :ignore)
152
+ mols = frags.flatten
153
+ mols.map(&:csmiles).should == ["C=C", "OCC[NH3+]", "CCO", "C=C[NH2+]"]
154
+ end
155
+
156
+ specify 'ether to alcohol, removing errors' do
157
+ mol = Rubabel["CCOCCN"]
158
+ frags = mol.fragment(rules: [:oxh])
159
+ mols = frags.flatten
160
+ mols.map(&:csmiles).should == ["C=C", "OCC[NH3+]"]
161
+ end
162
+
163
+ specify 'ester to alcohol' do
164
+ mol = Rubabel["CC(=O)OCCCN"]
165
+ frags = mol.fragment(rules: [:oxh])
166
+ mols = frags.flatten
167
+ mols.map(&:csmiles).should == ["C=C=O", "OCCC[NH3+]", "CC(=O)O", "C=CC[NH3+]"]
168
+ mols.map(&:formula).should == ["C2H2O", "C3H10NO", "C2H4O2", "C3H8N"]
169
+ mols.map(&:exact_mass).zip([42.010564684, 76.076238945, 60.021129368000004, 58.065674261]) do |act,exp|
170
+ act.should be_within(1e-6).of(exp)
171
+ end
172
+ end
173
+
174
+ specify 'phosphodiester (right now needs very low pH and NOT SURE WORKING PROPERLY)' do
175
+ mol = Rubabel["CC(COP(=O)(O)OCCCN"]
176
+ mol.add_h!(1.0)
177
+ frags = mol.fragment(rules: [:oxhpd])
178
+ frags = frags.flatten
179
+ frags.map(&:csmiles).should == ["CCCO", "[NH3+]CCCOP(=O)=O", "CCCOP(=O)=O", "OCCC[NH3+]"]
180
+ end
181
+ end
182
+
183
+ end
184
+ end
185
+
186
+
187
+
@@ -88,6 +88,18 @@ describe Rubabel::Molecule do
88
88
  ar.size.should == 2
89
89
  end
90
90
 
91
+ specify 'num_atoms gives the number of atoms which can vary with hydrogens added' do
92
+ mol = Rubabel["CCC"]
93
+ mol.num_atoms.should == 3
94
+ mol.add_h!
95
+ mol.num_atoms.should == 11
96
+ end
97
+
98
+ specify 'num_atoms(true) gives the number including implied hydrogens' do
99
+ mol = Rubabel["CCC"]
100
+ mol.num_atoms(true).should == 11
101
+ end
102
+
91
103
  describe 'adding an atom' do
92
104
  it 'can be added but not attached' do
93
105
  mol = Rubabel["CCO"]
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rubabel
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-10-07 00:00:00.000000000 Z
12
+ date: 2012-10-18 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: openbabel