rubabel 0.1.6 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/rubabel/atom.rb +104 -2
- data/lib/rubabel/bond.rb +19 -0
- data/lib/rubabel/molecule.rb +92 -33
- data/lib/rubabel/molecule/fragmentable.rb +177 -85
- data/spec/rubabel/atom_spec.rb +39 -0
- data/spec/rubabel/bond_spec.rb +11 -8
- data/spec/rubabel/molecule/fragmentable_spec.rb +222 -135
- data/spec/rubabel/molecule_spec.rb +74 -21
- metadata +2 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/lib/rubabel/atom.rb
CHANGED
@@ -79,7 +79,6 @@ module Rubabel
|
|
79
79
|
end
|
80
80
|
end
|
81
81
|
|
82
|
-
alias_method :each, :each_bond
|
83
82
|
|
84
83
|
# retrieves the bond
|
85
84
|
def get_bond(atom)
|
@@ -101,6 +100,7 @@ module Rubabel
|
|
101
100
|
_atom = @ob.next_nbr_atom(iter)
|
102
101
|
end
|
103
102
|
end
|
103
|
+
alias_method :each, :each_atom
|
104
104
|
|
105
105
|
# returns the neighboring atoms. Consider using each_atom.
|
106
106
|
def atoms
|
@@ -153,6 +153,79 @@ module Rubabel
|
|
153
153
|
@ob.get_partial_charge
|
154
154
|
end
|
155
155
|
|
156
|
+
# permanently removes a hydrogen by properly incrementing the
|
157
|
+
# spin_multiplicity (and deleting a hydrogen if one is explicitly attached
|
158
|
+
# to the atom). If called with cnt=2 a carbene or nitrene can be made
|
159
|
+
# (giving a spin_multiplicity of 3). Makes no effort to ensure that the
|
160
|
+
# proper number of hydrogens already exist to be deleted, just alters the
|
161
|
+
# spin_multiplicity and deletes the right number of hydrogens if they are
|
162
|
+
# available to be deleted. Adds one charge to the atom.
|
163
|
+
def remove_an_h!(add_charge=true)
|
164
|
+
new_spin =
|
165
|
+
case @ob.get_spin_multiplicity
|
166
|
+
when 0 then 2
|
167
|
+
when 2 then 3
|
168
|
+
end
|
169
|
+
@ob.set_spin_multiplicity(new_spin)
|
170
|
+
atoms.each do |atom|
|
171
|
+
if atom.atomic_num == 1
|
172
|
+
self.mol.delete_atom(atom)
|
173
|
+
break
|
174
|
+
end
|
175
|
+
end
|
176
|
+
# add the charge
|
177
|
+
(self.charge = charge + 1) if add_charge
|
178
|
+
self
|
179
|
+
end
|
180
|
+
|
181
|
+
# philosophy on equality: there are *so* many ways for two atoms to be
|
182
|
+
# different that we can never really ensure that "equivalence" is met
|
183
|
+
# without calling ~20 methods. We narrowly define equivalence so it is
|
184
|
+
# useful for that case and let the user make more complicated
|
185
|
+
# equivalency/equality definitions themselves.
|
186
|
+
|
187
|
+
# the exact same atom in the same molecule. The equivalency test for
|
188
|
+
# molecules is a little pricey, so better to use something like atom.id ==
|
189
|
+
# other.id if you know you are working within the same molecule.
|
190
|
+
def equal?(other)
|
191
|
+
other.respond_to?(:mol) && mol.equal?(other.mol) && id == other.id
|
192
|
+
end
|
193
|
+
|
194
|
+
alias_method :==, :equal?
|
195
|
+
alias_method :eql?, :equal?
|
196
|
+
|
197
|
+
## opposite of remove_an_h!
|
198
|
+
## THIS IS STILL BROKEN!!!
|
199
|
+
# maybe need to change the type?? C+ -> C2 or C3, but this gets really
|
200
|
+
# invasive... Why is this so flippin hard to do!!
|
201
|
+
#def add_an_h!(remove_charge=true)
|
202
|
+
#new_spin =
|
203
|
+
#case @ob.get_spin_multiplicity
|
204
|
+
#when 2 then 0
|
205
|
+
#when [1,3] then 2
|
206
|
+
#end
|
207
|
+
#@ob.set_spin_multiplicity(new_spin)
|
208
|
+
|
209
|
+
#puts self.inspect_internals
|
210
|
+
#puts "EXAMIN B:"
|
211
|
+
#p self
|
212
|
+
#p self.charge
|
213
|
+
#(self.charge = self.charge - 1) if remove_charge
|
214
|
+
#puts "EXAMIN A:"
|
215
|
+
#puts self.inspect_internals
|
216
|
+
#p self
|
217
|
+
#p self.charge
|
218
|
+
#puts "BEFORE:"
|
219
|
+
#p mol.formula
|
220
|
+
#p mol.atoms
|
221
|
+
#mol.add_bond!(self, mol.add_atom!(1))
|
222
|
+
#puts "AFTER:"
|
223
|
+
#p mol.formula
|
224
|
+
#p mol.atoms
|
225
|
+
#abort 'here'
|
226
|
+
#self
|
227
|
+
#end
|
228
|
+
|
156
229
|
def spin
|
157
230
|
@ob.get_spin_multiplicity
|
158
231
|
end
|
@@ -203,8 +276,25 @@ module Rubabel
|
|
203
276
|
def hbond_donor?() @ob.is_hbond_donor end
|
204
277
|
def hbond_donor_h?() @ob.is_hbond_donor_h end
|
205
278
|
|
279
|
+
def double_bond?
|
280
|
+
each_bond.any? {|bond| bond.bond_order == 2 }
|
281
|
+
end
|
282
|
+
|
283
|
+
def single_bond?
|
284
|
+
each_bond.any? {|bond| bond.bond_order == 1 }
|
285
|
+
end
|
286
|
+
|
206
287
|
def carboxyl_carbon?
|
207
|
-
|
288
|
+
each_atom.any?(&:carboxyl_oxygen?)
|
289
|
+
end
|
290
|
+
|
291
|
+
def carbonyl_oxygen?
|
292
|
+
ats = atoms
|
293
|
+
ats.size == 1 && ats.first.el == :c && double_bond?
|
294
|
+
end
|
295
|
+
|
296
|
+
def carbonyl_carbon?
|
297
|
+
each_atom.any?(&:carbonyl_oxygen?)
|
208
298
|
end
|
209
299
|
|
210
300
|
# # does this carbon hold a primary alcohol
|
@@ -218,5 +308,17 @@ module Rubabel
|
|
218
308
|
def inspect
|
219
309
|
"<#{type} id:#{id}>"
|
220
310
|
end
|
311
|
+
|
312
|
+
def inspect_internals
|
313
|
+
"<" << @ob.methods.grep(/get_/).map do |mthd|
|
314
|
+
begin
|
315
|
+
"#{mthd.to_s.sub(/get_/,'')}=#{@ob.send(mthd)}"
|
316
|
+
rescue ArgumentError
|
317
|
+
nil
|
318
|
+
end
|
319
|
+
end.compact.join(" ") << ">"
|
320
|
+
end
|
321
|
+
|
322
|
+
|
221
323
|
end
|
222
324
|
end
|
data/lib/rubabel/bond.rb
CHANGED
@@ -63,6 +63,25 @@ module Rubabel
|
|
63
63
|
@ob.set_bond_order(val)
|
64
64
|
end
|
65
65
|
|
66
|
+
# returns self
|
67
|
+
def set_atoms!(beg_atom, end_atom)
|
68
|
+
@ob.set_begin(beg_atom.ob)
|
69
|
+
@ob.set_end(end_atom.ob)
|
70
|
+
self
|
71
|
+
end
|
72
|
+
|
73
|
+
# Sets the beginning atom of the bond to atom. returns self
|
74
|
+
def set_begin!(atom)
|
75
|
+
@ob.set_begin(atom.ob)
|
76
|
+
self
|
77
|
+
end
|
78
|
+
|
79
|
+
# Sets the end atom of the bond to the given atom. returns self
|
80
|
+
def set_end!(atom)
|
81
|
+
@ob.set_end(atom.ob)
|
82
|
+
self
|
83
|
+
end
|
84
|
+
|
66
85
|
# returns an array of Rubabel::Atoms
|
67
86
|
def atoms
|
68
87
|
[@ob.get_begin_atom.upcast, @ob.get_end_atom.upcast]
|
data/lib/rubabel/molecule.rb
CHANGED
@@ -112,9 +112,10 @@ module Rubabel
|
|
112
112
|
def add_atom!(atomic_num=1)
|
113
113
|
# jtp implementation:
|
114
114
|
# @ob.add_atom(atom.ob)
|
115
|
-
|
116
|
-
|
117
|
-
|
115
|
+
new_obatom = @ob.new_atom
|
116
|
+
new_obatom.set_atomic_num(atomic_num)
|
117
|
+
#@ob.add_atom(new_obatom)
|
118
|
+
Rubabel::Atom.new(new_obatom)
|
118
119
|
end
|
119
120
|
|
120
121
|
def delete_atom(atom)
|
@@ -144,7 +145,6 @@ module Rubabel
|
|
144
145
|
# to add_h!
|
145
146
|
def formula() @ob.get_formula end
|
146
147
|
|
147
|
-
|
148
148
|
def initialize(obmol)
|
149
149
|
@ob = obmol
|
150
150
|
end
|
@@ -254,11 +254,6 @@ module Rubabel
|
|
254
254
|
self
|
255
255
|
end
|
256
256
|
|
257
|
-
# calls separate on the OBMol object
|
258
|
-
def separate!
|
259
|
-
@ob.separate
|
260
|
-
end
|
261
|
-
|
262
257
|
# returns just the smiles string :smi (not the id)
|
263
258
|
def smiles
|
264
259
|
to_s(:smi)
|
@@ -269,10 +264,38 @@ module Rubabel
|
|
269
264
|
to_s(:can)
|
270
265
|
end
|
271
266
|
|
272
|
-
#
|
273
|
-
#
|
267
|
+
# checks to see if the molecules are the same OBMol object underneath by
|
268
|
+
# modifying one and seeing if the other changes. This is because
|
269
|
+
# openbabel routinely creates new objects that point to the same
|
270
|
+
# underlying data store, so even checking for OBMol equivalency is not
|
271
|
+
# enough.
|
272
|
+
def equal?(other)
|
273
|
+
return false unless other.is_a?(self.class)
|
274
|
+
are_identical = false
|
275
|
+
if self.title == other.title
|
276
|
+
begin
|
277
|
+
obj_id = self.object_id.to_s
|
278
|
+
self.title += obj_id
|
279
|
+
are_identical = (self.title == other.title)
|
280
|
+
ensure
|
281
|
+
self.title.sub(/#{obj_id}$/,'')
|
282
|
+
end
|
283
|
+
are_identical
|
284
|
+
else
|
285
|
+
false
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
alias_method :eql?, :equal?
|
290
|
+
|
291
|
+
# defined as whether the csmiles strings are identical. This incorporates
|
292
|
+
# more information than the FP2 fingerprint, for instance (try changing
|
293
|
+
# the charge and see how it does not influence the fingerprint).
|
294
|
+
# Obviously, things like title or data will not be evaluated with ==. See
|
295
|
+
# equal? if you are looking for identity. More stringent comparisons will
|
296
|
+
# have to be done by hand!
|
274
297
|
def ==(other)
|
275
|
-
|
298
|
+
other.respond_to?(:csmiles) && (csmiles == other.csmiles)
|
276
299
|
end
|
277
300
|
|
278
301
|
# iterates over the molecule's Rubabel::Atom objects
|
@@ -308,11 +331,21 @@ module Rubabel
|
|
308
331
|
self
|
309
332
|
end
|
310
333
|
|
334
|
+
# gets the bond by id
|
335
|
+
def bond(id)
|
336
|
+
@ob.get_bond_by_id(id).upcast
|
337
|
+
end
|
338
|
+
|
311
339
|
# returns the array of bonds. Consider using #each_bond
|
312
340
|
def bonds
|
313
341
|
each_bond.map.to_a
|
314
342
|
end
|
315
343
|
|
344
|
+
# gets the atom by id
|
345
|
+
def atom(id)
|
346
|
+
@ob.get_atom_by_id(id).upcast
|
347
|
+
end
|
348
|
+
|
316
349
|
# returns the array of atoms. Consider using #each
|
317
350
|
def atoms
|
318
351
|
each_atom.map.to_a
|
@@ -358,8 +391,15 @@ module Rubabel
|
|
358
391
|
end
|
359
392
|
end
|
360
393
|
|
361
|
-
|
362
|
-
|
394
|
+
# if given a bond, deletes it (doesn't garbage collect). If given two
|
395
|
+
# atoms, deletes the bond between them.
|
396
|
+
def delete_bond(*args)
|
397
|
+
case args.size
|
398
|
+
when 1
|
399
|
+
@ob.delete_bond(args[0].ob, false)
|
400
|
+
when 2
|
401
|
+
@ob.delete_bond(args[0].get_bond(args[1]).ob, false)
|
402
|
+
end
|
363
403
|
end
|
364
404
|
|
365
405
|
def delete_atom(atom)
|
@@ -373,21 +413,15 @@ module Rubabel
|
|
373
413
|
self
|
374
414
|
end
|
375
415
|
|
376
|
-
#
|
377
|
-
def
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
@ob.add_bond(args[0].idx, args[1].idx, args[2] || 1)
|
386
|
-
#ob_bond = Rubabel::Bond[ *args ].ob
|
387
|
-
#ob_bond.get_begin_atom.add_bond(ob_bond)
|
388
|
-
#ob_bond.get_end_atom.add_bond(ob_bond)
|
389
|
-
#@ob.add_bond(ob_bond)
|
390
|
-
end
|
416
|
+
# creates a new (as yet unspecified) bond associated with the molecule and gives it a unique id
|
417
|
+
def new_bond
|
418
|
+
@ob.new_bond.upcast
|
419
|
+
end
|
420
|
+
|
421
|
+
# takes a pair of Rubabel::Atom objects and adds a bond to the molecule
|
422
|
+
# returns whether the bond creation was successful.
|
423
|
+
def add_bond!(atom1, atom2, order=1)
|
424
|
+
@ob.add_bond(atom1.idx, atom2.idx, order)
|
391
425
|
end
|
392
426
|
|
393
427
|
# yields self after deleting the specified bonds. When the block is
|
@@ -405,14 +439,24 @@ module Rubabel
|
|
405
439
|
end
|
406
440
|
|
407
441
|
# splits the molecules at the given bonds and returns the fragments. Does
|
408
|
-
# not alter the caller.
|
442
|
+
# not alter the caller. If the molecule is already fragmented, then
|
443
|
+
# returns the separate fragments.
|
409
444
|
def split(*bonds)
|
410
|
-
|
411
|
-
|
445
|
+
if bonds.size > 0
|
446
|
+
delete_and_restore_bonds(*bonds) do |mol|
|
447
|
+
mol.ob.separate.map(&:upcast)
|
448
|
+
end
|
449
|
+
else
|
450
|
+
self.ob.separate.map(&:upcast)
|
412
451
|
end
|
413
452
|
end
|
414
453
|
|
415
|
-
|
454
|
+
def each_fragment(&block)
|
455
|
+
block or return enum_for(__method__)
|
456
|
+
@ob.separate.each do |ob_mol|
|
457
|
+
block.call( ob_mol.upcast )
|
458
|
+
end
|
459
|
+
end
|
416
460
|
|
417
461
|
# emits smiles without the trailing tab, newline, or id. Use write_string
|
418
462
|
# to get the default OpenBabel behavior (ie., tabs and newlines).
|
@@ -593,6 +637,21 @@ module Rubabel
|
|
593
637
|
distance_matrix.max
|
594
638
|
end
|
595
639
|
|
640
|
+
# adds 1 hydrogen to the formula and returns self
|
641
|
+
def add_hydrogen_to_formula!
|
642
|
+
string = @ob.get_formula
|
643
|
+
substituted = false
|
644
|
+
new_string = string.sub(/H(\d*)/) { substituted=true; "H#{$1.to_i+1}" }
|
645
|
+
unless substituted
|
646
|
+
new_string = string.sub("^(C?\d*)") { $1 + 'H' }
|
647
|
+
end
|
648
|
+
puts 'HERE'
|
649
|
+
p string
|
650
|
+
p new_string
|
651
|
+
#@ob.set_formula(new_string)
|
652
|
+
self
|
653
|
+
end
|
654
|
+
|
596
655
|
end
|
597
656
|
end
|
598
657
|
|
@@ -7,18 +7,20 @@ module Rubabel
|
|
7
7
|
module Fragmentable
|
8
8
|
|
9
9
|
#:sp3c_oxygen_asymmetric_far_sp3, :sp3c_nitrogen_asymmetric_far_sp3,
|
10
|
-
RULES = Set[ :alcohol_to_aldehyde, :peroxy_to_carboxy, :co2_loss,
|
11
|
-
|
12
|
-
]
|
10
|
+
#RULES = Set[ :alcohol_to_aldehyde, :peroxy_to_carboxy, :co2_loss,
|
11
|
+
# :sp3c_oxygen_double_bond_far_side_sp3, :sp3c_oxygen_double_bond_far_side_sp2, :sp3c_oxygen_double_bond_water_loss, :sp3c_nitrogen_double_bond,
|
12
|
+
#]
|
13
13
|
#ADDUCTS = [:lioh, :nh4cl, :nh4oh]
|
14
|
-
CO_RULES = Set[:alcohol_to_aldehyde, :peroxy_to_carboxy, :co2_loss,
|
15
|
-
|
16
|
-
]
|
14
|
+
#CO_RULES = Set[:alcohol_to_aldehyde, :peroxy_to_carboxy, :co2_loss,
|
15
|
+
# :sp3c_oxygen_double_bond_water_loss, :sp3c_oxygen_double_bond_far_side_sp2, :sp3c_oxygen_double_bond_far_side_sp3, :sp3c_oxygen_asymmetric_far_sp3
|
16
|
+
#]
|
17
|
+
|
18
|
+
RULES = Set[:cad_o, :cad_oo, :oxed_ether]
|
17
19
|
|
18
20
|
DEFAULT_OPTIONS = {
|
19
21
|
rules: RULES,
|
20
22
|
#adduct: nil,
|
21
|
-
ph: 7.4,
|
23
|
+
#ph: 7.4,
|
22
24
|
# return only the set of unique fragments
|
23
25
|
uniq: false,
|
24
26
|
}
|
@@ -142,98 +144,90 @@ module Rubabel
|
|
142
144
|
|
143
145
|
end
|
144
146
|
|
145
|
-
#
|
147
|
+
# splits the molecule between the carbon and carbon_nbr, adds a double
|
148
|
+
# bond between the carbon and oxygen, and moves whatever was on the
|
149
|
+
# oxygen (e.g., an OH or a charge) to the carbon_nbr. Returns two new
|
150
|
+
# molecules.
|
151
|
+
def carbonyl_oxygen_dump(carbon, oxygen, carbon_nbr)
|
152
|
+
appendage = oxygen.atoms.find {|a| a.el != :c }
|
153
|
+
if oxygen.charge != 0
|
154
|
+
ocharge = oxygen.charge
|
155
|
+
end
|
156
|
+
nmol = self.dup
|
157
|
+
new_oxygen = nmol.atom(oxygen.id)
|
158
|
+
new_carbon = nmol.atom(carbon.id)
|
159
|
+
new_carbon_nbr = nmol.atom(carbon_nbr.id)
|
160
|
+
new_appendage = nmol.atom(appendage.id) if appendage
|
161
|
+
nmol.delete_bond(new_carbon.get_bond(new_carbon_nbr))
|
162
|
+
if new_appendage
|
163
|
+
nmol.delete_bond(new_oxygen.get_bond(new_appendage))
|
164
|
+
nmol.add_bond!(new_carbon_nbr, new_appendage)
|
165
|
+
end
|
166
|
+
if ocharge
|
167
|
+
new_carbon_nbr.charge += ocharge
|
168
|
+
new_oxygen.charge -= ocharge
|
169
|
+
end
|
170
|
+
new_carbon.get_bond(new_oxygen).bond_order = 2
|
171
|
+
nmol.split
|
172
|
+
end
|
173
|
+
|
174
|
+
# breaks the bond and gives the electrons to the oxygen
|
175
|
+
def carbon_oxygen_esteal(carbon, oxygen)
|
176
|
+
nmol = self.dup
|
177
|
+
nmol.ob.add_hydrogens
|
178
|
+
ncarbon = nmol.atom(carbon.id)
|
179
|
+
noxygen = nmol.atom(oxygen.id)
|
180
|
+
nmol.delete_bond(ncarbon, noxygen)
|
181
|
+
ncarbon.charge += 1
|
182
|
+
noxygen.charge -= 1
|
183
|
+
ncarbon.remove_an_h!
|
184
|
+
#p ncarbon.ob.implicit_hydrogen_count
|
185
|
+
#p ncarbon
|
186
|
+
#ncarbon.ob.decrement_implicit_valence
|
187
|
+
#p ncarbon.ob.implicit_hydrogen_count
|
188
|
+
#p ncarbon
|
189
|
+
#ncarbon.ob.increment_implicit_valence
|
190
|
+
|
191
|
+
nmol.title = nmol.to_s
|
192
|
+
p nmol.write("tmp.svg")
|
193
|
+
parts = nmol.split
|
194
|
+
p z=parts.first
|
195
|
+
p z.formula
|
196
|
+
p z.mass
|
197
|
+
p z.exact_mass
|
198
|
+
|
199
|
+
puts "HIAY"
|
200
|
+
end
|
201
|
+
|
146
202
|
# an empty array is returned if there are no fragments generated.
|
203
|
+
# Hydrogens are added at a pH of 7.4, unless they have already been
|
204
|
+
# added.
|
147
205
|
#
|
148
|
-
# :
|
149
|
-
# :uniq => false
|
206
|
+
# :rules => queryable by :include? set of rules
|
207
|
+
# :uniq => false
|
150
208
|
def fragment(opts={})
|
209
|
+
only_uniqs = true
|
151
210
|
opts = DEFAULT_OPTIONS.merge(opts)
|
152
211
|
opts[:rules].each do |rule|
|
153
212
|
raise ArgumentError, "bad rule: #{rule}" unless RULES.include?(rule)
|
154
213
|
end
|
155
214
|
|
156
215
|
had_hydrogens = self.h_added?
|
157
|
-
|
158
|
-
self.correct_for_ph!(opts[:ph])
|
216
|
+
self.correct_for_ph!(7.4) unless had_hydrogens
|
159
217
|
self.remove_h!
|
160
218
|
|
161
|
-
rules = opts[:rules]
|
162
219
|
fragment_sets = []
|
163
|
-
|
164
|
-
|
165
|
-
self.each_match("
|
166
|
-
|
167
|
-
|
168
|
-
carbon_nbrs = carbon.atoms.reject {|atom| atom == oxygen }
|
169
|
-
c3_nbrs = carbon_nbrs.select {|atm| atm.type == 'C3' }
|
170
|
-
# pulling this out here causes it to work incorrectly internally
|
171
|
-
# (not sure why)
|
172
|
-
#co_bond = carbon.get_bond(oxygen)
|
173
|
-
|
174
|
-
case oxygen.bonds.size # non-hydrogen bonds
|
175
|
-
when 1 # *must* be an alcohol or a carboxylic acid
|
176
|
-
putsv "#{csmiles} oxygen has no other bonds besides C-O (alcohol or carboxylic acid)"
|
177
|
-
if carbon.type == 'C3'
|
178
|
-
if rules.include?(:sp3c_oxygen_double_bond_water_loss)
|
179
|
-
putsv "rule :sp3c_oxygen_double_bond_water_loss"
|
180
|
-
fragment_sets.push *near_side_double_bond_break(carbon, oxygen)
|
181
|
-
end
|
182
|
-
if rules.include?(:alcohol_to_aldehyde)
|
183
|
-
putsv "rule :alcohol_to_aldehyde"
|
184
|
-
fragment_sets.push *alcohol_to_aldehyde(carbon, oxygen, carbon_nbrs)
|
185
|
-
end
|
186
|
-
elsif carbon.carboxyl_carbon?
|
187
|
-
if rules.include?(:co2_loss)
|
188
|
-
putsv "rule :co2_loss"
|
189
|
-
if c3_nbr = c3_nbrs.first
|
190
|
-
fragment_sets.push *co2_loss(carbon, oxygen, c3_nbr)
|
191
|
-
end
|
192
|
-
end
|
193
|
-
end
|
194
|
-
when 2
|
195
|
-
putsv "#{csmiles} c-o & oxygen has 2 non-hydrogen bonds"
|
196
|
-
oxygen_nbr = oxygen.atoms.reject {|atom| atom.idx == carbon.idx }.first
|
197
|
-
if carbon.type == 'C3'
|
198
|
-
if rules.include?(:peroxy_to_carboxy)
|
199
|
-
fragment_sets.push *peroxy_to_carboxy(carbon, oxygen, carbon_nbrs, oxygen_nbr)
|
200
|
-
end
|
201
|
-
# ester and ethers (look *only* on close side for places to make
|
202
|
-
# double bond)
|
203
|
-
|
204
|
-
if oxygen_nbr.type == 'C3'
|
205
|
-
putsv "oxygen nbr is C3"
|
206
|
-
if rules.include?(:sp3c_oxygen_double_bond_far_side_sp3)
|
207
|
-
putsv "rule :sp3c_oxygen_double_bond_far_side_sp3"
|
208
|
-
fragment_sets.push *near_side_double_bond_break(carbon, oxygen)
|
209
|
-
end
|
210
|
-
if rules.include?(:sp3c_oxygen_asymmetric_far_sp3)
|
211
|
-
putsv "rule :sp3c_oxygen_asymmetric_far_sp3"
|
212
|
-
# only returns a single frag set
|
213
|
-
fragment_sets.push electrophile_snatches_electrons(carbon, oxygen)
|
214
|
-
end
|
215
|
-
end
|
216
|
-
if oxygen_nbr.type == 'C2'
|
217
|
-
if rules.include?(:sp3c_oxygen_double_bond_far_side_sp2)
|
218
|
-
putsv "rule :sp3c_oxygen_double_bond_far_side_sp2"
|
219
|
-
fragment_sets.push *near_side_double_bond_break(carbon, oxygen)
|
220
|
-
end
|
221
|
-
end
|
222
|
-
# note: the case of a carboxy is found with CO search
|
223
|
-
end
|
220
|
+
|
221
|
+
if opts[:rules].any? {|r| [:cad_o, :cad_oo].include?(r) }
|
222
|
+
self.each_match("C[O;h1,O]", only_uniqs) do |carbon, oxygen|
|
223
|
+
carbon.atoms.select {|a| a.el == :c }.each do |carbon_nbr|
|
224
|
+
fragment_sets << carbonyl_oxygen_dump(carbon, oxygen, carbon_nbr)
|
224
225
|
end
|
225
226
|
end
|
226
227
|
end
|
227
|
-
if rules.include?(
|
228
|
-
self.each_match("
|
229
|
-
(carbon,
|
230
|
-
num_nitrogen_bonds = nitrogen.bonds.size
|
231
|
-
case num_nitrogen_bonds
|
232
|
-
when 2
|
233
|
-
if carbon.type == 'C3'
|
234
|
-
fragment_sets.push *near_side_double_bond_break(carbon, nitrogen)
|
235
|
-
end
|
236
|
-
end
|
228
|
+
if opts[:rules].any? {|r| [:oxed_ether].include?(r) }
|
229
|
+
self.each_match("C[O&X2]", only_uniqs) do |carbon, oxygen|
|
230
|
+
fragment_sets << carbon_oxygen_esteal(carbon, oxygen)
|
237
231
|
end
|
238
232
|
end
|
239
233
|
|
@@ -243,12 +237,110 @@ module Rubabel
|
|
243
237
|
end
|
244
238
|
if opts[:uniq]
|
245
239
|
# TODO: impelent properly
|
246
|
-
#fragment_sets = fragment_sets.uniq_by(&:csmiles)
|
247
240
|
raise NotImplementedError
|
241
|
+
#fragment_sets = fragment_sets.uniq_by(&:csmiles)
|
248
242
|
end
|
243
|
+
|
249
244
|
fragment_sets
|
250
245
|
end
|
251
246
|
|
247
|
+
|
248
|
+
# had_hydrogens = self.h_added?
|
249
|
+
|
250
|
+
#self.correct_for_ph!(opts[:ph])
|
251
|
+
#self.remove_h!
|
252
|
+
|
253
|
+
#rules = opts[:rules]
|
254
|
+
#fragment_sets = []
|
255
|
+
#if rules.any? {|rule| CO_RULES.include?(rule) }
|
256
|
+
#putsv "matching C-O"
|
257
|
+
#self.each_match("CO").each do |_atoms|
|
258
|
+
## note: this will *not* match C=O
|
259
|
+
#(carbon, oxygen) = _atoms
|
260
|
+
#carbon_nbrs = carbon.atoms.reject {|atom| atom == oxygen }
|
261
|
+
#c3_nbrs = carbon_nbrs.select {|atm| atm.type == 'C3' }
|
262
|
+
## pulling this out here causes it to work incorrectly internally
|
263
|
+
## (not sure why)
|
264
|
+
##co_bond = carbon.get_bond(oxygen)
|
265
|
+
|
266
|
+
#case oxygen.bonds.size # non-hydrogen bonds
|
267
|
+
#when 1 # *must* be an alcohol or a carboxylic acid
|
268
|
+
#putsv "#{csmiles} oxygen has no other bonds besides C-O (alcohol or carboxylic acid)"
|
269
|
+
#if carbon.type == 'C3'
|
270
|
+
#if rules.include?(:sp3c_oxygen_double_bond_water_loss)
|
271
|
+
#putsv "rule :sp3c_oxygen_double_bond_water_loss"
|
272
|
+
#fragment_sets.push *near_side_double_bond_break(carbon, oxygen)
|
273
|
+
#end
|
274
|
+
#if rules.include?(:alcohol_to_aldehyde)
|
275
|
+
#putsv "rule :alcohol_to_aldehyde"
|
276
|
+
#fragment_sets.push *alcohol_to_aldehyde(carbon, oxygen, carbon_nbrs)
|
277
|
+
#end
|
278
|
+
#elsif carbon.carboxyl_carbon?
|
279
|
+
#if rules.include?(:co2_loss)
|
280
|
+
#putsv "rule :co2_loss"
|
281
|
+
#if c3_nbr = c3_nbrs.first
|
282
|
+
#fragment_sets.push *co2_loss(carbon, oxygen, c3_nbr)
|
283
|
+
#end
|
284
|
+
#end
|
285
|
+
#end
|
286
|
+
#when 2
|
287
|
+
#putsv "#{csmiles} c-o & oxygen has 2 non-hydrogen bonds"
|
288
|
+
#oxygen_nbr = oxygen.atoms.reject {|atom| atom.idx == carbon.idx }.first
|
289
|
+
#if carbon.type == 'C3'
|
290
|
+
#if rules.include?(:peroxy_to_carboxy)
|
291
|
+
#fragment_sets.push *peroxy_to_carboxy(carbon, oxygen, carbon_nbrs, oxygen_nbr)
|
292
|
+
#end
|
293
|
+
## ester and ethers (look *only* on close side for places to make
|
294
|
+
## double bond)
|
295
|
+
|
296
|
+
#if oxygen_nbr.type == 'C3'
|
297
|
+
#putsv "oxygen nbr is C3"
|
298
|
+
#if rules.include?(:sp3c_oxygen_double_bond_far_side_sp3)
|
299
|
+
#putsv "rule :sp3c_oxygen_double_bond_far_side_sp3"
|
300
|
+
#fragment_sets.push *near_side_double_bond_break(carbon, oxygen)
|
301
|
+
#end
|
302
|
+
#if rules.include?(:sp3c_oxygen_asymmetric_far_sp3)
|
303
|
+
#putsv "rule :sp3c_oxygen_asymmetric_far_sp3"
|
304
|
+
## only returns a single frag set
|
305
|
+
#fragment_sets.push electrophile_snatches_electrons(carbon, oxygen)
|
306
|
+
#end
|
307
|
+
#end
|
308
|
+
#if oxygen_nbr.type == 'C2'
|
309
|
+
#if rules.include?(:sp3c_oxygen_double_bond_far_side_sp2)
|
310
|
+
#putsv "rule :sp3c_oxygen_double_bond_far_side_sp2"
|
311
|
+
#fragment_sets.push *near_side_double_bond_break(carbon, oxygen)
|
312
|
+
#end
|
313
|
+
#end
|
314
|
+
## note: the case of a carboxy is found with CO search
|
315
|
+
#end
|
316
|
+
#end
|
317
|
+
#end
|
318
|
+
#end
|
319
|
+
#if rules.include?(:sp3c_nitrogen_double_bond)
|
320
|
+
#self.each_match("CN") do |_atoms|
|
321
|
+
#(carbon, nitrogen) = _atoms
|
322
|
+
#num_nitrogen_bonds = nitrogen.bonds.size
|
323
|
+
#case num_nitrogen_bonds
|
324
|
+
#when 2
|
325
|
+
#if carbon.type == 'C3'
|
326
|
+
#fragment_sets.push *near_side_double_bond_break(carbon, nitrogen)
|
327
|
+
#end
|
328
|
+
#end
|
329
|
+
#end
|
330
|
+
#end
|
331
|
+
|
332
|
+
#unless had_hydrogens
|
333
|
+
#fragment_sets.each {|set| set.each(&:remove_h!) }
|
334
|
+
#self.remove_h!
|
335
|
+
#end
|
336
|
+
#if opts[:uniq]
|
337
|
+
## TODO: impelent properly
|
338
|
+
##fragment_sets = fragment_sets.uniq_by(&:csmiles)
|
339
|
+
#raise NotImplementedError
|
340
|
+
#end
|
341
|
+
#fragment_sets
|
342
|
+
#end
|
343
|
+
|
252
344
|
end
|
253
345
|
include Fragmentable
|
254
346
|
end
|
data/spec/rubabel/atom_spec.rb
CHANGED
@@ -17,6 +17,45 @@ describe Rubabel::Atom do
|
|
17
17
|
chlorine.id.should == 3
|
18
18
|
end
|
19
19
|
|
20
|
+
specify 'equality' do
|
21
|
+
mol = Rubabel["CCO"]
|
22
|
+
oxygen = mol.atoms[2]
|
23
|
+
oxygen_from_match = mol.matches("CO").first.last
|
24
|
+
(oxygen == oxygen_from_match).should be_true
|
25
|
+
(oxygen.equal?(oxygen_from_match)).should be_true
|
26
|
+
(oxygen.eql?(oxygen_from_match)).should be_true
|
27
|
+
|
28
|
+
mol2 = Rubabel["CCO"]
|
29
|
+
(mol.atoms[0] == mol2.atoms[0]).should_not be_true
|
30
|
+
(mol.atoms[0].equal?(mol2.atoms[0])).should_not be_true
|
31
|
+
(mol.atoms[0].eql?(mol2.atoms[0])).should_not be_true
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'removes hydrogens with proper charge accounting' do
|
35
|
+
mol = Rubabel["CC"]
|
36
|
+
mol.add_h!
|
37
|
+
mol.atoms[0].remove_an_h!
|
38
|
+
mol.formula.should == "C2H5"
|
39
|
+
mol.csmiles.should == 'C[CH2+]'
|
40
|
+
mol.exact_mass.round(5).should == 29.03913
|
41
|
+
mol.charge.should == 1
|
42
|
+
|
43
|
+
# can't seem to get working properly!!!
|
44
|
+
#mol.atoms[0].add_an_h!
|
45
|
+
#mol.formula.should == 'C2H6'
|
46
|
+
#mol.csmiles.should == 'CC'
|
47
|
+
#mol.charge.should == 0
|
48
|
+
##fmol.atoms[0].charge -= 1
|
49
|
+
#mol.exact_mass.should == 323
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'can find atom identities with simple questions' do
|
53
|
+
mol = Rubabel["NCC(O)CC(=O)"]
|
54
|
+
(c_exp, o_exp) = mol.matches("C=O").first
|
55
|
+
mol.find(&:carbonyl_carbon?).id.should == c_exp.id
|
56
|
+
mol.find(&:carbonyl_oxygen?).id.should == o_exp.id
|
57
|
+
end
|
58
|
+
|
20
59
|
describe 'working with a complex molecule' do
|
21
60
|
|
22
61
|
before do
|
data/spec/rubabel/bond_spec.rb
CHANGED
@@ -4,16 +4,19 @@ require 'rubabel/molecule'
|
|
4
4
|
require 'rubabel/bond'
|
5
5
|
|
6
6
|
describe Rubabel::Bond do
|
7
|
-
|
7
|
+
describe 'cholesterol from sdf' do
|
8
|
+
subject { Rubabel::Molecule.from_file( TESTFILES + '/cholesterol.sdf' ).bonds.first }
|
8
9
|
|
9
|
-
|
10
|
-
|
11
|
-
|
10
|
+
it 'is a Rubabel::Bond' do
|
11
|
+
subject.should be_a(Rubabel::Bond)
|
12
|
+
end
|
12
13
|
|
13
|
-
|
14
|
-
|
15
|
-
|
14
|
+
it 'knows what atoms it includes' do
|
15
|
+
subject.each_atom do |atom|
|
16
|
+
atom.should be_a(Rubabel::Atom)
|
17
|
+
end
|
18
|
+
subject.atoms.size.should == 2
|
16
19
|
end
|
17
|
-
subject.atoms.size.should == 2
|
18
20
|
end
|
21
|
+
|
19
22
|
end
|
@@ -4,139 +4,226 @@ require 'rubabel'
|
|
4
4
|
|
5
5
|
$VERBOSE = nil
|
6
6
|
|
7
|
-
describe Rubabel::Molecule::Fragmentable do
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
describe 'fragmentation rules' do
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
let(:test_mol) { "COP(=O)(O)OCNCOCC(OO)C(=O)O" }
|
18
|
-
|
19
|
-
it 'raises an error for a bad rule' do
|
20
|
-
mol = Rubabel["CCNC"]
|
21
|
-
expect { mol.fragment(rules: [:wackiness]) }.to raise_error
|
22
|
-
end
|
23
|
-
|
24
|
-
describe ':
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
describe ':peroxy_to_carboxy' do
|
63
|
-
it 'works' do
|
64
|
-
mol = Rubabel["NCCC(OO)CC"]
|
65
|
-
frag_sets = mol.fragment( rules: [:peroxy_to_carboxy] )
|
66
|
-
frag_sets.size.should == 2
|
67
|
-
frag_sets.flatten(1).map(&:csmiles).sort.should == ["CC", "CCC(=O)O", "CC[NH3+]", "OC(=O)CC[NH3+]"]
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
describe ':sp3c_oxygen_asymmetric_far_sp3', :pending do
|
72
|
-
it 'splits like sp3c_oxygen_double_bond except oxygen takes the electrons' do
|
73
|
-
$VERBOSE = 3
|
74
|
-
mol = Rubabel["NCCCOCC"]
|
75
|
-
frag_sets = mol.fragment( rules: [:sp3c_oxygen_asymmetric_far_sp3] )
|
76
|
-
$VERBOSE = nil
|
77
|
-
frag_sets.size.should == 2
|
7
|
+
#describe Rubabel::Molecule::Fragmentable do
|
8
|
+
|
9
|
+
## :peroxy_to_carboxy
|
10
|
+
## :oxygen_asymmetric_sp3, :nitrogen_asymmetric_sp3,
|
11
|
+
## :internal_phosphoester
|
12
|
+
|
13
|
+
#describe 'fragmentation rules' do
|
14
|
+
## coenzyme: CC1=CC(=O)C=CC1=O
|
15
|
+
## 2-methylcyclohexa-2,5-diene-1,4-dione
|
16
|
+
|
17
|
+
#let(:test_mol) { "COP(=O)(O)OCNCOCC(OO)C(=O)O" }
|
18
|
+
|
19
|
+
#it 'raises an error for a bad rule' do
|
20
|
+
#mol = Rubabel["CCNC"]
|
21
|
+
#expect { mol.fragment(rules: [:wackiness]) }.to raise_error
|
22
|
+
#end
|
23
|
+
|
24
|
+
#describe 'cad_o: carbonyl appendage dump ' do
|
25
|
+
## a primary oxygen or peroxide => C=O appendage dump
|
26
|
+
|
27
|
+
#describe 'cad_o: primary alcohol' do
|
28
|
+
#mol = Rubabel["NCC(O)CC"]
|
29
|
+
#frags = mol.fragment(rules: [:cad_o])
|
30
|
+
#frags.flatten(1).map(&:csmiles).should == ["C[NH3+]", "CCC=O", "C([NH3+])C=O", "CC"]
|
31
|
+
#end
|
32
|
+
|
33
|
+
#describe 'peroxide' do
|
34
|
+
#mol = Rubabel["NCC(OO)CC"]
|
35
|
+
#frags = mol.fragment(rules: [:cad_oo])
|
36
|
+
#frags.flatten(1).each_with_index do |f,i|
|
37
|
+
#f.write("mol#{i}.svg")
|
38
|
+
#end
|
39
|
+
#frags.flatten(1).map(&:csmiles).should == ["OC[NH3+]", "CCC=O", "C([NH3+])C=O", "CCO"]
|
40
|
+
#end
|
41
|
+
|
42
|
+
#describe 'cad_o: carboxylate' do
|
43
|
+
#mol = Rubabel["CCC(=O)O"]
|
44
|
+
#pieces = mol.fragment(rules: [:cad_o])
|
45
|
+
#pieces.flatten(1).map(&:csmiles).should == ["[CH2-]C", "O=C=O"]
|
46
|
+
#end
|
47
|
+
|
48
|
+
#describe 'cad_o: carboxylic acid' do
|
49
|
+
#mol = Rubabel["CCC(=O)O"]
|
50
|
+
#mol.add_h!(1.5)
|
51
|
+
#pieces = mol.fragment(rules: [:cad_o])
|
52
|
+
#pieces.flatten(1).map(&:csmiles).should == ["CC", "O=C=O"]
|
53
|
+
#end
|
54
|
+
#end
|
55
|
+
|
56
|
+
#describe 'oxe: oxygen electron stealing' do
|
57
|
+
## oxygen just steals the electron pair it is attached to. This
|
58
|
+
## typically results in a negatively charged oxygen and a positively
|
59
|
+
## charged carbo-cation.
|
60
|
+
#describe 'ether to ions' do
|
78
61
|
#mol = Rubabel["NCCOCC"]
|
79
|
-
|
80
|
-
#
|
81
|
-
#
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
end
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
62
|
+
##mol.add_h!
|
63
|
+
#pieces = mol.fragment(rules: [:oxed_ether])
|
64
|
+
#m = pieces.first.first
|
65
|
+
##[CH2+]CH2NH3+
|
66
|
+
## C2H7N
|
67
|
+
|
68
|
+
#p m
|
69
|
+
#m.add_h!
|
70
|
+
#h = m.atoms[4].atoms.find {|a| a.el == :h }
|
71
|
+
#m.delete_atom(h)
|
72
|
+
#p m
|
73
|
+
#p m.atoms
|
74
|
+
##mol = Rubabel["NCO"]
|
75
|
+
##pieces = mol.fragment(rules: [:oxed_ether])
|
76
|
+
##pieces.size.should == 0
|
77
|
+
#end
|
78
|
+
|
79
|
+
#describe 'ester to ions' do
|
80
|
+
#end
|
81
|
+
|
82
|
+
#describe 'carboxyl group' do
|
83
|
+
#end
|
84
|
+
|
85
|
+
#describe 'phosphodiester' do
|
86
|
+
#end
|
87
|
+
#end
|
88
|
+
|
89
|
+
## this is really a subset of oxygen bond stealing: if the negatively
|
90
|
+
## charged oxygen can rip off a nearby proton, it will.
|
91
|
+
#describe 'oxygen alpha/beta/gamma hydrogen stealing' do
|
92
|
+
#describe 'primary alcohol giving water loss' do
|
93
|
+
#end
|
94
|
+
|
95
|
+
#describe 'peroxide carbonyl formation' do
|
96
|
+
#end
|
97
|
+
|
98
|
+
#describe 'ether to alcohol' do
|
99
|
+
#end
|
100
|
+
|
101
|
+
#describe 'ester to alcohol' do
|
102
|
+
#end
|
103
|
+
|
104
|
+
#describe 'phosphodiester' do
|
105
|
+
#end
|
106
|
+
#end
|
107
|
+
|
108
|
+
#end
|
109
|
+
#end
|
110
|
+
|
111
|
+
|
112
|
+
|
113
|
+
|
114
|
+
##describe ':sp3c_nitrogen_double_bond' do
|
115
|
+
|
116
|
+
##it 'cleaves like an ether a secondary NH group if possible' do
|
117
|
+
##mol = Rubabel["CCNC"]
|
118
|
+
##frag_sets = mol.fragment(rules: [:sp3c_nitrogen_double_bond])
|
119
|
+
##frag_sets.size.should == 1
|
120
|
+
##csmiles = frag_sets.first.map(&:csmiles)
|
121
|
+
##csmiles.should include("C=C")
|
122
|
+
##csmiles.should include("C[NH3+]")
|
123
|
+
##end
|
124
|
+
|
125
|
+
##it 'will not cleave if not possible' do
|
126
|
+
##mol = Rubabel["CNC"]
|
127
|
+
##frag_sets = mol.fragment(rules: [:sp3c_nitrogen_double_bond])
|
128
|
+
##frag_sets.should be_empty
|
129
|
+
##end
|
130
|
+
|
131
|
+
##end
|
132
|
+
|
133
|
+
##describe ':co2_loss' do
|
134
|
+
##it 'loss of CO2 from carboxy group with charge transfer' do
|
135
|
+
##mol = Rubabel["NCC(=O)O"]
|
136
|
+
##frag_sets = mol.fragment( rules: [:co2_loss] )
|
137
|
+
##frag_sets.size.should == 1
|
138
|
+
##csmiles = frag_sets.first.map(&:csmiles)
|
139
|
+
|
140
|
+
##csmiles.should include("[CH2-][NH3+]")
|
141
|
+
##csmiles.should include("O=C=O")
|
142
|
+
##end
|
143
|
+
|
144
|
+
##it "doesn't remove CO2 if adjacent is not c3" do
|
145
|
+
##mol = Rubabel["C=CC(=O)O"]
|
146
|
+
##fragments = mol.fragment( rules: [:co2_loss] )
|
147
|
+
##fragments.should be_empty
|
148
|
+
##end
|
149
|
+
|
150
|
+
##end
|
151
|
+
|
152
|
+
##describe ':peroxy_to_carboxy' do
|
153
|
+
##it 'works' do
|
154
|
+
##mol = Rubabel["NCCC(OO)CC"]
|
155
|
+
##frag_sets = mol.fragment( rules: [:peroxy_to_carboxy] )
|
156
|
+
##frag_sets.size.should == 2
|
157
|
+
##frag_sets.flatten(1).map(&:csmiles).sort.should == ["CC", "CCC(=O)O", "CC[NH3+]", "OC(=O)CC[NH3+]"]
|
158
|
+
##end
|
159
|
+
##end
|
160
|
+
|
161
|
+
##describe ':sp3c_oxygen_asymmetric_far_sp3', :pending do
|
162
|
+
##it 'splits like sp3c_oxygen_double_bond except oxygen takes the electrons' do
|
163
|
+
##$VERBOSE = 3
|
164
|
+
##mol = Rubabel["NCCCOCC"]
|
165
|
+
##frag_sets = mol.fragment( rules: [:sp3c_oxygen_asymmetric_far_sp3] )
|
166
|
+
##$VERBOSE = nil
|
167
|
+
##frag_sets.size.should == 2
|
168
|
+
###mol = Rubabel["NCCOCC"]
|
169
|
+
###p mol.fragment( rules: [:sp3c_oxygen_asymmetric_far_sp3] )
|
170
|
+
###mol = Rubabel["NCOC"]
|
171
|
+
###p mol.fragment( rules: [:sp3c_oxygen_asymmetric_far_sp3] )
|
172
|
+
##end
|
173
|
+
##end
|
174
|
+
|
175
|
+
##describe ':sp3c_oxygen_double_bond_water_loss' do
|
176
|
+
|
177
|
+
##it 'does h2o loss of alcohol' do
|
178
|
+
##mol = Rubabel["NCCC(O)CC"]
|
179
|
+
##fragments = mol.fragment( rules: [:sp3c_oxygen_double_bond_water_loss] )
|
180
|
+
##fragments.flatten(1).map(&:csmiles).sort.should == ["CC=CCC[NH3+]", "CCC=CC[NH3+]", "O", "O"]
|
181
|
+
##end
|
182
|
+
|
183
|
+
##it 'h2o loss does not allow bad chemistry' do
|
184
|
+
### lone pair and double bond resonance ?
|
185
|
+
##mol = Rubabel["NCC(O)CC"]
|
186
|
+
##fragments = mol.fragment( rules: [:sp3c_oxygen_double_bond_water_loss] )
|
187
|
+
##fragments.flatten(1).map(&:csmiles).sort.should == ["CC=CC[NH3+]", "O"]
|
188
|
+
|
189
|
+
##mol = Rubabel["NC(O)CC"]
|
190
|
+
##fragments = mol.fragment( rules: [:sp3c_oxygen_double_bond_water_loss] )
|
191
|
+
##fragments.flatten(1).map(&:csmiles).sort.should == []
|
192
|
+
##end
|
193
|
+
##end
|
194
|
+
|
195
|
+
##describe 'sp3c_oxygen_double_bond_far_side_sp2' do
|
196
|
+
|
197
|
+
##it 'does not cleave esters without sp3 carbons available for double bond' do
|
198
|
+
##mol = Rubabel["NCCC(=O)OC"]
|
199
|
+
##pieces = mol.fragment( rules: [:sp3c_oxygen_double_bond_far_side_sp2] )
|
200
|
+
##pieces.should be_empty
|
201
|
+
##end
|
202
|
+
|
203
|
+
##it 'cleaves esters on far side of singly bonded oxygen' do
|
204
|
+
##mol = Rubabel["NCCC(=O)OCC"]
|
205
|
+
##pieces = mol.fragment( rules: [:sp3c_oxygen_double_bond_far_side_sp2] )
|
206
|
+
##pieces.size.should == 1 # one set
|
207
|
+
##the_pair = pieces.first
|
208
|
+
##csmiles = the_pair.map(&:csmiles)
|
209
|
+
##csmiles.should include("OC(=O)CC[NH3+]")
|
210
|
+
##csmiles.should include("C=C")
|
211
|
+
##end
|
212
|
+
|
213
|
+
##end
|
214
|
+
|
215
|
+
##describe ':alcohol_to_aldehyde' do
|
216
|
+
##it 'cleaves beside alcohols to generate an aldehyde' do
|
217
|
+
##mol = Rubabel["NCCC(O)CC"]
|
218
|
+
##mol.correct_for_ph!
|
219
|
+
##total_mass = mol.add_h!.mass
|
220
|
+
|
221
|
+
##pieces = mol.fragment(rules: [:alcohol_to_aldehyde])
|
222
|
+
##pieces.size.should == 2
|
223
|
+
##pieces.map(&:size).should == [2,2]
|
224
|
+
##pieces.flatten(1).map(&:csmiles).should == ["CC[NH3+]", "CCC=O", "C(C=O)C[NH3+]", "CC"]
|
225
|
+
##pieces.each do |pair|
|
226
|
+
##pair.map(&:mass).reduce(:+).should == total_mass
|
227
|
+
##end
|
228
|
+
##end
|
229
|
+
##end
|
@@ -10,6 +10,13 @@ describe Rubabel::Molecule do
|
|
10
10
|
end
|
11
11
|
end
|
12
12
|
|
13
|
+
#xit 'can add a hydrogen to the formula' do
|
14
|
+
#mol = Rubabel["CCC"]
|
15
|
+
#p mol.formula
|
16
|
+
#mol.add_hydrogen_to_formula!
|
17
|
+
#p mol.formula
|
18
|
+
#end
|
19
|
+
|
13
20
|
describe 'png output' do
|
14
21
|
it 'creates a png image (corresponds to the svg)' do
|
15
22
|
mol = Rubabel["NCC(=O)O"]
|
@@ -39,6 +46,24 @@ describe Rubabel::Molecule do
|
|
39
46
|
end
|
40
47
|
end
|
41
48
|
|
49
|
+
specify 'eql? and equal? mean the objects modify the same underlying openbabel molecule data' do
|
50
|
+
mol = Rubabel["C"]
|
51
|
+
eq_mol = mol.atoms.first.mol
|
52
|
+
mol.equal?(eq_mol).should be_true
|
53
|
+
another = Rubabel["C"]
|
54
|
+
mol.equal?(another).should be_false
|
55
|
+
end
|
56
|
+
|
57
|
+
specify '== means the canonical smiles strings (:csmiles) are equal' do
|
58
|
+
mol1 = Rubabel["CCO"]
|
59
|
+
mol2 = Rubabel["OCC"]
|
60
|
+
(mol1 == mol2).should be_true
|
61
|
+
mol2.atoms[0].charge += 1
|
62
|
+
(mol1 == mol2).should be_false
|
63
|
+
mol3 = Rubabel["CCCO"]
|
64
|
+
(mol1 == mol3).should be_false
|
65
|
+
end
|
66
|
+
|
42
67
|
specify '#add_atom! adds an atom given an atomic number and returns it' do
|
43
68
|
mol = Rubabel["CCO"]
|
44
69
|
before_size = mol.atoms.size
|
@@ -48,29 +73,30 @@ describe Rubabel::Molecule do
|
|
48
73
|
mol.csmiles.should == "CCO.C"
|
49
74
|
end
|
50
75
|
|
76
|
+
specify '#dup duplicates the molecule' do
|
77
|
+
mol = Rubabel["CCO"]
|
78
|
+
dup_mol = mol.dup
|
79
|
+
mol.atoms[0].ob.set_atomic_num(9)
|
80
|
+
mol.csmiles.should == "OCF"
|
81
|
+
dup_mol.csmiles.should == "CCO"
|
82
|
+
end
|
83
|
+
|
51
84
|
specify '#add_atom! with a Rubabel::Atom'
|
52
85
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
86
|
+
describe '#add_bond! adds a bond (and updates atoms)' do
|
87
|
+
specify 'given two atoms' do
|
88
|
+
mol = Rubabel["CCO"]
|
89
|
+
atom = mol.add_atom!(0)
|
90
|
+
mol.add_bond!(mol.atoms[1], atom)
|
91
|
+
mol.csmiles.should == '*C(O)C'
|
92
|
+
end
|
58
93
|
end
|
59
94
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
# puts "BONDS:"
|
66
|
-
# p mol.bonds
|
67
|
-
# puts "ATOMS:"
|
68
|
-
# p mol.atoms
|
69
|
-
# puts "C ATOMS:"
|
70
|
-
# p c.atoms
|
71
|
-
# puts "O ATOMS:"
|
72
|
-
# p o.atoms
|
73
|
-
# end
|
95
|
+
specify '#atom(id) retrieves atom by id num' do
|
96
|
+
mol = Rubabel["CCO"]
|
97
|
+
o = mol.find {|a| a.el == :o }
|
98
|
+
mol.atom(o.id).id.should == o.id
|
99
|
+
end
|
74
100
|
|
75
101
|
specify '#swap! can swap atoms around' do
|
76
102
|
mol = Rubabel["NCC(=O)O"]
|
@@ -117,8 +143,20 @@ describe Rubabel::Molecule do
|
|
117
143
|
ar.first.should be_a(OpenBabel::OBRing)
|
118
144
|
end
|
119
145
|
|
146
|
+
describe 'making carbo-cations: spin_multiplicity and charges' do
|
147
|
+
# http://openbabel.org/docs/2.3.1/Features/Radicals.html
|
148
|
+
subject { mol = Rubabel["CC"] }
|
149
|
+
it 'can be turned into a carbocation' do
|
150
|
+
mol = subject
|
151
|
+
c = mol.atoms[0]
|
152
|
+
c.ob.set_spin_multiplicity 2
|
153
|
+
c.charge += 1
|
154
|
+
mol.csmiles.should == "C[CH2+]"
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
120
158
|
describe 'masses' do
|
121
|
-
subject { Rubabel
|
159
|
+
subject { Rubabel["C(=O)COC(=O)C[NH3+]"] }
|
122
160
|
it '#mol_wt (or #avg_mass)' do
|
123
161
|
subject.mol_wt.should be_within(0.000001).of(118.11121999999999)
|
124
162
|
end
|
@@ -138,7 +176,7 @@ describe Rubabel::Molecule do
|
|
138
176
|
|
139
177
|
describe 'pH' do
|
140
178
|
|
141
|
-
subject { Rubabel
|
179
|
+
subject { Rubabel["NCC(=O)OCC(=O)O"] }
|
142
180
|
|
143
181
|
it '#correct_for_ph! neutral' do
|
144
182
|
subject.correct_for_ph!.to_s.should == '[O-]C(=O)COC(=O)C[NH3+]'
|
@@ -220,6 +258,7 @@ describe Rubabel::Molecule do
|
|
220
258
|
describe 'breaking a molecule' do
|
221
259
|
before(:each) do
|
222
260
|
@mol = Rubabel::Molecule.from_string("NC(=O)CO")
|
261
|
+
@n = @mol.find {|a| a.el == :n }
|
223
262
|
end
|
224
263
|
|
225
264
|
it 'num_atoms, atoms and each_atom are sensitive to #add_h!' do
|
@@ -256,6 +295,20 @@ describe Rubabel::Molecule do
|
|
256
295
|
csmiles.sort.should == %w(N CC=O O).sort
|
257
296
|
end
|
258
297
|
|
298
|
+
it 'can split fragments (akin to separate)' do
|
299
|
+
@mol.delete_bond(@n, @n.atoms.first)
|
300
|
+
pieces = @mol.split
|
301
|
+
pieces.map(&:csmiles).sort.should == ["N", "OCC=O"]
|
302
|
+
end
|
303
|
+
|
304
|
+
it 'can iterate through fragments' do
|
305
|
+
expected = %w(N OCC=O)
|
306
|
+
@mol.delete_bond(@n, @n.atoms.first)
|
307
|
+
@mol.each_fragment do |frag|
|
308
|
+
frag.csmiles.should == expected.shift
|
309
|
+
end
|
310
|
+
end
|
311
|
+
|
259
312
|
end
|
260
313
|
|
261
314
|
describe 'matching patterns (SMARTS)' do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rubabel
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-10-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: openbabel
|