chemruby 0.9.3 → 1.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +2 -2
- data/Rakefile +67 -63
- data/ext/extconf.rb +2 -0
- data/ext/subcomp.c +461 -320
- data/ext/utils.c +56 -0
- data/ext/utils.h +13 -0
- data/lib/chem.rb +34 -8
- data/lib/chem/db.rb +8 -0
- data/lib/chem/db/cansmi.rb +1 -1
- data/lib/chem/db/cdx.rb +1 -1
- data/lib/chem/db/cml.rb +52 -0
- data/lib/chem/db/gd.rb +64 -0
- data/lib/chem/db/gspan.rb +2 -2
- data/lib/chem/db/kcf_rpair.rb +34 -0
- data/lib/chem/db/kegg.rb +35 -1
- data/lib/chem/db/mdl.rb +75 -34
- data/lib/chem/db/opsin.rb +24 -0
- data/lib/chem/db/pdb.rb +105 -0
- data/lib/chem/db/pdf.rb +2 -0
- data/lib/chem/db/pubchem.rb +1071 -88
- data/lib/chem/db/rmagick.rb +5 -3
- data/lib/chem/db/sdf.rb +28 -2
- data/lib/chem/db/smiles/smiles.ry +27 -25
- data/lib/chem/db/smiles/smiparser.rb +29 -27
- data/lib/chem/db/types/type_gd.rb +35 -0
- data/lib/chem/db/types/type_gspan.rb +2 -2
- data/lib/chem/db/types/type_kcf.rb +19 -0
- data/lib/chem/db/types/type_kegg.rb +2 -0
- data/lib/chem/db/types/type_mdl.rb +1 -1
- data/lib/chem/db/types/type_png.rb +5 -1
- data/lib/chem/db/types/type_rdf.rb +22 -0
- data/lib/chem/db/types/type_xyz.rb +1 -1
- data/lib/chem/db/vector.rb +19 -3
- data/lib/chem/model.rb +5 -2
- data/lib/chem/utils.rb +17 -1
- data/lib/chem/utils/bitdb.rb +49 -0
- data/lib/chem/utils/cas.rb +28 -0
- data/lib/chem/utils/cdk.rb +403 -0
- data/lib/chem/utils/fingerprint.rb +98 -0
- data/lib/chem/utils/geometry.rb +8 -0
- data/lib/chem/utils/net.rb +303 -0
- data/lib/chem/utils/once.rb +28 -0
- data/lib/chem/utils/openbabel.rb +204 -0
- data/lib/chem/utils/sssr.rb +33 -25
- data/lib/chem/utils/sub.rb +6 -0
- data/lib/chem/utils/transform.rb +9 -8
- data/lib/chem/utils/ullmann.rb +138 -95
- data/lib/graph.rb +5 -6
- data/lib/graph/utils.rb +8 -0
- data/sample/calc_maximum_common_subgraph.rb +27 -0
- data/sample/calc_properties.rb +9 -0
- data/sample/data/atp.mol +69 -0
- data/sample/data/pioglitazone.mol +58 -0
- data/sample/data/rosiglitazone.mol +55 -0
- data/sample/data/troglitazone.mol +70 -0
- data/sample/find_compound_by_keggapi.rb +19 -0
- data/sample/generate_inchi.rb +7 -0
- data/sample/generate_substructurekey.rb +11 -0
- data/sample/images/ex6.rb +17 -0
- data/sample/images/ex7.rb +18 -0
- data/sample/iupac2mol.rb +8 -0
- data/sample/kekule.rb +13 -0
- data/sample/logp.rb +4 -0
- data/sample/mcs.rb +13 -0
- data/sample/mol2pdf.rb +8 -0
- data/sample/pubchem_fetch.rb +8 -0
- data/sample/pubchem_search.rb +12 -0
- data/sample/rosiglitazone.mol +57 -0
- data/sample/smarts.rb +10 -0
- data/sample/structure_match.rb +8 -0
- data/sample/structure_match_color.rb +22 -0
- data/sample/thiazolidinedione.mol +19 -0
- data/sample/troglitazone.mol +232 -0
- data/sample/vicinity.rb +8 -0
- data/test/data/CID_704.sdf +236 -0
- data/test/data/CID_994.sdf +146 -0
- data/test/data/db_EXPT03276.txt +321 -0
- data/test/data/pioglitazone.mol +58 -0
- data/test/data/rosiglitazone.mol +55 -0
- data/test/data/thiazolidinedione.mol +19 -0
- data/test/data/troglitazone.mol +70 -0
- data/test/{test_adj.rb → tc_adj.rb} +0 -0
- data/test/{test_canonical_smiles.rb → tc_canonical_smiles.rb} +0 -0
- data/test/tc_casrn.rb +17 -0
- data/test/tc_cdk.rb +89 -0
- data/test/{test_cdx.rb → tc_cdx.rb} +0 -0
- data/test/{test_chem.rb → tc_chem.rb} +0 -0
- data/test/{test_cluster.rb → tc_cluster.rb} +0 -0
- data/test/{test_db.rb → tc_db.rb} +0 -0
- data/test/tc_develop.rb +38 -0
- data/test/tc_drugbank.rb +13 -0
- data/test/{test_eps.rb → tc_eps.rb} +0 -0
- data/test/tc_gd.rb +8 -0
- data/test/{test_geometry.rb → tc_geometry.rb} +0 -0
- data/test/tc_graph.rb +15 -0
- data/test/{test_gspan.rb → tc_gspan.rb} +0 -0
- data/test/{test_iupac.rb → tc_iupac.rb} +0 -0
- data/test/{test_kcf.rb → tc_kcf.rb} +0 -0
- data/test/{test_kcf_glycan.rb → tc_kcf_glycan.rb} +0 -0
- data/test/{test_kegg.rb → tc_kegg.rb} +13 -0
- data/test/{test_linucs.rb → tc_linucs.rb} +0 -0
- data/test/{test_mdl.rb → tc_mdl.rb} +20 -0
- data/test/{test_mol2.rb → tc_mol2.rb} +1 -1
- data/test/{test_morgan.rb → tc_morgan.rb} +0 -0
- data/test/tc_net.rb +5 -0
- data/test/tc_once.rb +29 -0
- data/test/tc_openbabel.rb +57 -0
- data/test/{test_pdf.rb → tc_pdf.rb} +0 -0
- data/test/{test_prop.rb → tc_prop.rb} +1 -1
- data/test/tc_pubchem.rb +32 -0
- data/test/{test_rmagick.rb → tc_rmagick.rb} +0 -0
- data/test/{test_sbdb.rb → tc_sbdb.rb} +0 -0
- data/test/{test_sdf.rb → tc_sdf.rb} +2 -0
- data/test/{test_smiles.rb → tc_smiles.rb} +46 -30
- data/test/tc_sssr.rb +1 -0
- data/test/{test_sub.rb → tc_sub.rb} +0 -0
- data/test/tc_subcomp.rb +59 -0
- data/test/{test_traverse.rb → tc_traverse.rb} +0 -0
- data/test/{test_writer.rb → tc_writer.rb} +0 -0
- data/test/{test_xyz.rb → tc_xyz.rb} +0 -0
- data/test/ts_current.rb +11 -0
- data/test/ts_image.rb +6 -0
- data/test/ts_main.rb +12 -0
- metadata +259 -194
- data/lib/chem/utils/graph_db.rb +0 -146
- data/test/test_sssr.rb +0 -18
- data/test/test_subcomp.rb +0 -37
data/lib/chem/db/vector.rb
CHANGED
@@ -8,20 +8,33 @@ module Chem
|
|
8
8
|
# mol.save("benzene.pdf", :type => :pdf)
|
9
9
|
# mol.save("benzene.pdf") # File type will automatically detected from file extensions
|
10
10
|
#
|
11
|
-
def save_as_pdf
|
11
|
+
def save_as_pdf(out, params = {})
|
12
12
|
v = PDFWriter.new(self, params)
|
13
13
|
v.save(out)
|
14
14
|
end
|
15
15
|
|
16
|
+
def hilight(atoms, color = [1, 0, 0])
|
17
|
+
edges.each do |bond, atom1, atom2|
|
18
|
+
bond.color = [1, 0, 0] if atoms.include?(atom1) and atoms.include?(atom2)
|
19
|
+
end
|
20
|
+
nodes.each{|atom| atom.color = [1, 0, 0] if atoms.include?(atom)}
|
21
|
+
end
|
22
|
+
|
16
23
|
end
|
17
24
|
|
18
25
|
module Atom
|
19
26
|
# position vector for visualization
|
20
27
|
attr_accessor :v_pos
|
28
|
+
# supplimentary information
|
29
|
+
attr_accessor :label
|
21
30
|
end
|
22
31
|
|
23
32
|
module Writer
|
24
33
|
|
34
|
+
def to_256(color)
|
35
|
+
color.collect{|c| (c * 255).to_i}
|
36
|
+
end
|
37
|
+
|
25
38
|
def fbox # :nodoc:
|
26
39
|
n = @params[:orig_point]
|
27
40
|
m = [@params[:size][0] + n[0], @params[:size][1] + n[1]]
|
@@ -74,7 +87,7 @@ module Chem
|
|
74
87
|
params = {}
|
75
88
|
params[:color] = atom.color if atom.color
|
76
89
|
if atom.visible
|
77
|
-
text(atom.element.to_s, atom.v_pos[0], atom.v_pos[1], params)
|
90
|
+
text(atom.label.nil? ? atom.element.to_s : atom.label, atom.v_pos[0], atom.v_pos[1], params)
|
78
91
|
end
|
79
92
|
end
|
80
93
|
end
|
@@ -85,13 +98,16 @@ module Chem
|
|
85
98
|
# :upside_down:: true # turns images upside down
|
86
99
|
# :size:: [10, 20] # set box size
|
87
100
|
# :pointsize:: 18 # set font size
|
88
|
-
def initialize
|
101
|
+
def initialize(mol, params)
|
89
102
|
@mol = mol
|
90
103
|
@params = params
|
91
104
|
|
92
105
|
unless params[:manual] == false
|
93
106
|
mol.nodes.each do |node|
|
94
107
|
node.visible = true unless node.element == :C
|
108
|
+
node.x = node.x.to_f
|
109
|
+
node.y = node.y.to_f
|
110
|
+
node.z = node.z.to_f
|
95
111
|
end
|
96
112
|
end
|
97
113
|
|
data/lib/chem/model.rb
CHANGED
@@ -6,6 +6,7 @@
|
|
6
6
|
#
|
7
7
|
|
8
8
|
require 'graph'
|
9
|
+
require 'chem/utils/transform'
|
9
10
|
|
10
11
|
module Chem
|
11
12
|
|
@@ -13,6 +14,8 @@ module Chem
|
|
13
14
|
# It will be mixed-in to other concrete class.
|
14
15
|
|
15
16
|
module Atom
|
17
|
+
include Chem::Transform::ThreeDimension
|
18
|
+
|
16
19
|
# true if visible (for visualization)
|
17
20
|
attr_accessor :visible
|
18
21
|
# [r, g, b]
|
@@ -94,7 +97,7 @@ module Chem
|
|
94
97
|
|
95
98
|
module Molecule
|
96
99
|
|
97
|
-
include Graph
|
100
|
+
include ::Graph
|
98
101
|
|
99
102
|
attr_writer :source # source of molecule
|
100
103
|
attr_writer :name # name of molecule
|
@@ -108,7 +111,7 @@ module Chem
|
|
108
111
|
# Returns name of molecule.
|
109
112
|
# default value is self.source
|
110
113
|
def name
|
111
|
-
@name ? @name : self.
|
114
|
+
@name ? @name : self.source
|
112
115
|
end
|
113
116
|
|
114
117
|
end
|
data/lib/chem/utils.rb
CHANGED
@@ -1,11 +1,27 @@
|
|
1
1
|
require 'chem/utils/math'
|
2
|
+
require 'chem/utils/fingerprint'
|
2
3
|
require 'chem/utils/transform'
|
3
4
|
require 'chem/utils/sssr'
|
4
5
|
require 'chem/utils/traverse'
|
5
6
|
require 'chem/utils/sub'
|
7
|
+
require 'chem/utils/bitdb'
|
6
8
|
|
7
9
|
require 'chem/utils/prop'
|
8
10
|
require 'chem/utils/geometry'
|
11
|
+
require 'chem/utils/cas'
|
12
|
+
require 'chem/utils/once'
|
13
|
+
|
14
|
+
require 'chem/utils/net'
|
9
15
|
|
10
16
|
require 'chem/utils/ullmann'
|
11
|
-
|
17
|
+
|
18
|
+
|
19
|
+
module Chem
|
20
|
+
module Molecule
|
21
|
+
def remove_hydrogens!
|
22
|
+
hyd = nodes.select{|atom| atom.element == :H}
|
23
|
+
@edges = @edges.reject{|b, f, t| hyd.include?(f) or hyd.include?(t)}
|
24
|
+
@nodes = @nodes - hyd
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
#
|
2
|
+
# = chem/utils/bitdb.rb - Bit Database
|
3
|
+
#
|
4
|
+
# Author:: Toshiaki Katayama <k@bioruby.org>
|
5
|
+
# Nobuya Tanaka <t@chemruby.org>
|
6
|
+
#
|
7
|
+
# Copyright:: Copyright (c) 2005, 2006 ChemRuby project
|
8
|
+
#
|
9
|
+
# $Id: bitdb.rb 180 2006-04-19 08:52:15Z tanaka $
|
10
|
+
#
|
11
|
+
|
12
|
+
|
13
|
+
class BitDatabase
|
14
|
+
|
15
|
+
ARCH = 32
|
16
|
+
|
17
|
+
attr_reader :bit_length
|
18
|
+
|
19
|
+
def initialize(filename, bit_length)
|
20
|
+
@out = File.open(filename + ".dat", "w")
|
21
|
+
@idx = File.open(filename + ".inf", "w")
|
22
|
+
@bit_length = bit_length
|
23
|
+
@n_bytes = (bit_length - 1) / ARCH + 1
|
24
|
+
@idx.write [@bit_length, @n_bytes].pack("l*")
|
25
|
+
@current = 0
|
26
|
+
end
|
27
|
+
|
28
|
+
def push(ary)
|
29
|
+
@current += 1
|
30
|
+
@out.write ary.inject(Array.new(@n_bytes, 0)){|ret, num|
|
31
|
+
raise Exception if num > @bit_length
|
32
|
+
ret[num / ARCH] += (1 << (num % ARCH))
|
33
|
+
ret
|
34
|
+
}.pack('l*')
|
35
|
+
end
|
36
|
+
|
37
|
+
def close
|
38
|
+
@idx.write [@current * 1000].pack("l*")
|
39
|
+
@idx.close
|
40
|
+
@out.close
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.open(filename, bit_length)
|
44
|
+
db = new(filename, bit_length)
|
45
|
+
yield db
|
46
|
+
db.close
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
|
2
|
+
class String
|
3
|
+
|
4
|
+
def is_valid_casrn?
|
5
|
+
ary = self.split("-")
|
6
|
+
return false if not ary.length == 3
|
7
|
+
|
8
|
+
first_digits = ary[0].scan(/\d/)
|
9
|
+
return false if not first_digits.length == ary[0].length
|
10
|
+
|
11
|
+
second_digits = ary[1].scan(/\d/)
|
12
|
+
return false if not second_digits.length == ary[1].length
|
13
|
+
return false if not second_digits.length == 2
|
14
|
+
|
15
|
+
return false if not ary[2].length == 1
|
16
|
+
return false if /\d/.match(ary[2]).nil?
|
17
|
+
third_digits = ary[2].to_i
|
18
|
+
|
19
|
+
total = 0
|
20
|
+
(second_digits.reverse + first_digits.reverse).each_with_index do |digit, idx|
|
21
|
+
total += digit.to_i * (idx + 1)
|
22
|
+
end
|
23
|
+
return false if not (total % 10) == third_digits
|
24
|
+
true
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
|
@@ -0,0 +1,403 @@
|
|
1
|
+
# Copyright (C) 2006 Richard L. Apodaca
|
2
|
+
# Nobuya Tanaka
|
3
|
+
|
4
|
+
module Chem
|
5
|
+
|
6
|
+
module CDK
|
7
|
+
|
8
|
+
def self.parse_mdl(str)
|
9
|
+
require 'rcdk'
|
10
|
+
reader = Rjb::import('java.io.StringReader').new(str)
|
11
|
+
mdlreader = Rjb::import('org.openscience.cdk.io.MDLReader').new(reader)
|
12
|
+
molcls = Rjb::import('org.openscience.cdk.Molecule')
|
13
|
+
|
14
|
+
CDKMolecule.new(mdlreader.read(molcls.new))
|
15
|
+
end
|
16
|
+
|
17
|
+
class CDKAtom
|
18
|
+
|
19
|
+
include ::Chem::Transform::TwoDimension# fix me!
|
20
|
+
include Atom
|
21
|
+
attr_reader :cdk_atom
|
22
|
+
|
23
|
+
def initialize(cdk_atom)
|
24
|
+
@cdk_atom = cdk_atom
|
25
|
+
end
|
26
|
+
|
27
|
+
def element ; @cdk_atom.getSymbol.intern ; end
|
28
|
+
def x ; @cdk_atom.getX2d ; end
|
29
|
+
def y ; @cdk_atom.getY2d ; end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
class CDKBond
|
34
|
+
|
35
|
+
include Bond
|
36
|
+
|
37
|
+
def initialize(cdk_bond)
|
38
|
+
@cdk_bond = cdk_bond
|
39
|
+
end
|
40
|
+
|
41
|
+
def v ; @cdk_bond.getOrder ; end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
class CDKMolecule
|
46
|
+
include Molecule
|
47
|
+
include ::Graph
|
48
|
+
|
49
|
+
attr_reader :cdk_mol, :nodes, :edges
|
50
|
+
def initialize(cdk_mol)
|
51
|
+
@cdk_mol = cdk_mol
|
52
|
+
setup_nodes_and_edges
|
53
|
+
end
|
54
|
+
|
55
|
+
def setup_nodes_and_edges
|
56
|
+
@nodes = []
|
57
|
+
@edges = []
|
58
|
+
@cdk2atom = {}
|
59
|
+
|
60
|
+
enum = @cdk_mol.atoms
|
61
|
+
while(enum.hasMoreElements)
|
62
|
+
cdkatom = enum.nextElement
|
63
|
+
atom = CDKAtom.new(cdkatom)
|
64
|
+
@cdk2atom[cdkatom.hashCode] = atom
|
65
|
+
@nodes << atom
|
66
|
+
end
|
67
|
+
|
68
|
+
tmp = {}
|
69
|
+
@nodes.each do |from|
|
70
|
+
tmp[from.cdk_atom.hashCode] ||= {}
|
71
|
+
@cdk_mol.getConnectedAtoms(from.cdk_atom).each do |to|
|
72
|
+
|
73
|
+
if tmp[from.cdk_atom.hashCode][to.hashCode].nil?
|
74
|
+
bond = @cdk_mol.getBond(from.cdk_atom, to)
|
75
|
+
|
76
|
+
tmp[from.cdk_atom.hashCode][to.hashCode] = bond
|
77
|
+
tmp[to.hashCode] ||= {}
|
78
|
+
tmp[to.hashCode][from.cdk_atom.hashCode] = bond
|
79
|
+
|
80
|
+
@edges << [CDKBond.new(bond), from, @cdk2atom[to.hashCode]]
|
81
|
+
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.parse_smiles(smiles)
|
91
|
+
require 'rcdk'
|
92
|
+
smiles_parser = Rjb::import('org.openscience.cdk.smiles.SmilesParser').new
|
93
|
+
CDKMolecule.new(smiles_parser.parseSmiles(smiles))
|
94
|
+
end
|
95
|
+
|
96
|
+
# def self.load(path)
|
97
|
+
# factory = Rjb::import('org.openscience.cdk.templates.MoleculeFactory').new
|
98
|
+
# factory.loadMolecule(path)
|
99
|
+
# end
|
100
|
+
|
101
|
+
end # CDK module
|
102
|
+
|
103
|
+
module Atom
|
104
|
+
attr_accessor :cdk_atom
|
105
|
+
end
|
106
|
+
|
107
|
+
module Molecule
|
108
|
+
|
109
|
+
attr_reader :cdk_mol, :cdk2atom
|
110
|
+
|
111
|
+
def cdk_generate_2D
|
112
|
+
cdk_setup
|
113
|
+
@@gen_cls ||= Rjb::import('org.openscience.cdk.layout.StructureDiagramGenerator')
|
114
|
+
generator = @@gen_cls.new#(self.cdk_mol)
|
115
|
+
generator.setMolecule(self.cdk_mol)
|
116
|
+
generator.generateCoordinates
|
117
|
+
Chem::CDK::CDKMolecule.new(generator.getMolecule)
|
118
|
+
end
|
119
|
+
alias cdk_calc_2d cdk_generate_2D
|
120
|
+
|
121
|
+
def cdk_find_all_rings
|
122
|
+
cdk_setup
|
123
|
+
|
124
|
+
@@ring_finder ||= Rjb::import('org.openscience.cdk.ringsearch.AllRingsFinder').new
|
125
|
+
r_p ||= Rjb::import('org.openscience.cdk.ringsearch.RingPartitioner')
|
126
|
+
ringset = @@ring_finder.findAllRings(self.cdk_mol)
|
127
|
+
enum = r_p.partitionRings(ringset).elements
|
128
|
+
rings = []
|
129
|
+
while(enum.hasMoreElements)
|
130
|
+
ring = []
|
131
|
+
ac = r_p.convertToAtomContainer(enum.nextElement)
|
132
|
+
atom_enum = ac.atoms
|
133
|
+
puts "--"
|
134
|
+
while(atom_enum.hasMoreElements)
|
135
|
+
ring << cdk2atom[atom_enum.nextElement.hashCode]
|
136
|
+
end
|
137
|
+
rings << ring
|
138
|
+
end
|
139
|
+
rings
|
140
|
+
end
|
141
|
+
|
142
|
+
def cdk_sssr
|
143
|
+
cdk_setup
|
144
|
+
|
145
|
+
@@sssr_finder ||= Rjb::import('org.openscience.cdk.ringsearch.SSSRFinder')
|
146
|
+
r_p ||= Rjb::import('org.openscience.cdk.ringsearch.RingPartitioner')
|
147
|
+
sssr = @@sssr_finder.new(self.cdk_mol)
|
148
|
+
enum = r_p.partitionRings(sssr.findSSSR).elements
|
149
|
+
|
150
|
+
rings = []
|
151
|
+
while(enum.hasMoreElements)
|
152
|
+
ring = []
|
153
|
+
ac = r_p.convertToAtomContainer(enum.nextElement)
|
154
|
+
atom_enum = ac.atoms
|
155
|
+
puts
|
156
|
+
while(atom_enum.hasMoreElements)
|
157
|
+
ring << cdk2atom[atom_enum.nextElement.hashCode]
|
158
|
+
end
|
159
|
+
rings << ring
|
160
|
+
end
|
161
|
+
rings
|
162
|
+
end
|
163
|
+
|
164
|
+
def cdk_generate_randomly
|
165
|
+
cdk_setup
|
166
|
+
gen = Rjb::import('org.openscience.cdk.structgen.RandomGenerator').new(self.cdk_mol)
|
167
|
+
CDK::CDKMolecule.new(gen.proposeStructure)
|
168
|
+
end
|
169
|
+
|
170
|
+
def cdk_generate_vicinity
|
171
|
+
cdk_setup
|
172
|
+
gen = Rjb::import('org.openscience.cdk.structgen.VicinitySampler').new(self.cdk_mol)
|
173
|
+
ary = gen.sample(self.cdk_mol)
|
174
|
+
enum = ary.elements
|
175
|
+
ret = []
|
176
|
+
while enum.hasMoreElements
|
177
|
+
ret << CDK::CDKMolecule.new(enum.nextElement)
|
178
|
+
end
|
179
|
+
ret
|
180
|
+
end
|
181
|
+
|
182
|
+
#HueckelAromaticityDetector
|
183
|
+
def cdk_hueckel
|
184
|
+
cdk_setup
|
185
|
+
huckel = Rjb::import('org.openscience.cdk.aromaticity.HueckelAromaticityDetector')
|
186
|
+
huckel.detectAromaticity(self.cdk_mol)
|
187
|
+
end
|
188
|
+
|
189
|
+
# Fix me !
|
190
|
+
# Fail: unknown method name `assignGasteigerMarsiliFactors
|
191
|
+
def cdk_gasteiger_marsili_partial_charges(params = {})
|
192
|
+
cdk_setup
|
193
|
+
gm = Rjb::import('org.openscience.cdk.charges.GasteigerMarsiliPartialCharges').new
|
194
|
+
gm.setChiCatHydrogen = params[:deoc_hydrogen] if params[:deoc_hydrogen]
|
195
|
+
p gm.getStepSize
|
196
|
+
p gm.assignGasteigerMarsiliFactors(self.cdk_mol)
|
197
|
+
# gm.assignGasteigerMarsiliFactors(self.cdk_mol)
|
198
|
+
# gm.assignGasteigerMarsiliPartialCharges(self.cdk_mol, false)
|
199
|
+
# gm.assignGasteigerMarsiliPartialCharges(self.cdk_mol, true)
|
200
|
+
end
|
201
|
+
|
202
|
+
# Return HOSE code
|
203
|
+
# Anal. Chim. Acta. (1978) 103:355-365
|
204
|
+
def cdk_hose_code(atom, depth = 3)
|
205
|
+
hose_gen = Rjb::import('org.openscience.cdk.tools.HOSECodeGenerator').new
|
206
|
+
hose_gen.getHOSECode(mol, mol.getAtomAt(9), 3)
|
207
|
+
end
|
208
|
+
|
209
|
+
def cdk_BCUT
|
210
|
+
end
|
211
|
+
|
212
|
+
def cdk_fingerprint
|
213
|
+
'org.openscience.cdk.fingerprint.Fingerprinter'
|
214
|
+
end
|
215
|
+
|
216
|
+
def cdk_setup
|
217
|
+
return unless self.cdk_mol.nil?
|
218
|
+
require 'rcdk'
|
219
|
+
atom_class = Rjb::import('org.openscience.cdk.Atom')
|
220
|
+
bond_class = Rjb::import('org.openscience.cdk.Bond')
|
221
|
+
ac = Rjb::import('org.openscience.cdk.AtomContainer').new
|
222
|
+
point3d = Rjb::import('javax.vecmath.Point3d')
|
223
|
+
point2d = Rjb::import('javax.vecmath.Point2d')
|
224
|
+
i = 0
|
225
|
+
@cdk2atom = {}
|
226
|
+
atoms = nodes.collect{ |node|
|
227
|
+
i += 1
|
228
|
+
atom = atom_class.new(node.element.to_s)
|
229
|
+
|
230
|
+
# atom.setPoint3d(point3d.new(node.x.to_f, node.y.to_f, node.z.to_f))
|
231
|
+
# atom.setPoint2d(point2d.new(node.x.to_f, node.y.to_f, node.z.to_f))
|
232
|
+
|
233
|
+
atom.setSymbol(node.element.to_s)
|
234
|
+
node.cdk_atom = atom
|
235
|
+
@cdk2atom[atom.hashCode] = node
|
236
|
+
atom
|
237
|
+
}
|
238
|
+
ac.setAtoms(atoms)
|
239
|
+
edges.each do |edge, node1, node2|
|
240
|
+
atom1 = ac.getAtomAt(nodes.index(node1))
|
241
|
+
atom2 = ac.getAtomAt(nodes.index(node2))
|
242
|
+
bond = bond_class.new(atom1, atom2, edge.v.to_f)
|
243
|
+
ac.addBond(bond)
|
244
|
+
@cdk_mol = Rjb::import('org.openscience.cdk.Molecule').new(ac)
|
245
|
+
end
|
246
|
+
self
|
247
|
+
end
|
248
|
+
|
249
|
+
def cdk_xlogp
|
250
|
+
self.cdk_setup
|
251
|
+
add_hydrogen = Rjb::import('org.openscience.cdk.tools.HydrogenAdder').new
|
252
|
+
add_hydrogen.addExplicitHydrogensToSatisfyValency(self.cdk_mol)
|
253
|
+
xlogp = Rjb::import('org.openscience.cdk.qsar.descriptors.molecular.XLogPDescriptor').new
|
254
|
+
xlogp.setParameters([true, true])
|
255
|
+
xlogp.calculate(self.cdk_mol).getValue().doubleValue
|
256
|
+
end
|
257
|
+
|
258
|
+
def cdk_mcs(other)
|
259
|
+
self.cdk_setup
|
260
|
+
other.cdk_setup
|
261
|
+
|
262
|
+
mcsClass = Rjb::import('org.openscience.cdk.isomorphism.UniversalIsomorphismTester')
|
263
|
+
iso = mcsClass.getOverlaps(self.cdk_mol, other.cdk_mol)
|
264
|
+
maps = []
|
265
|
+
itr = iso.iterator
|
266
|
+
while(itr.hasNext)
|
267
|
+
maps << CDK::CDKMolecule.new(itr.next)
|
268
|
+
end
|
269
|
+
maps
|
270
|
+
end
|
271
|
+
|
272
|
+
DESCRIPTORNAME = 'org.openscience.cdk.qsar.descriptors.molecular.'
|
273
|
+
def cdk_calc_descriptor(name, args = [])
|
274
|
+
self.cdk_setup
|
275
|
+
calc = Rjb::import(DESCRIPTORNAME + name).new
|
276
|
+
calc.setParameters(args)
|
277
|
+
res = calc.calculate(self.cdk_mol).getValue
|
278
|
+
case res._classname
|
279
|
+
when "org.openscience.cdk.qsar.result.IntegerResult"
|
280
|
+
res.intValue
|
281
|
+
when "org.openscience.cdk.qsar.result.DoubleResult"
|
282
|
+
res.doubleValue
|
283
|
+
when "org.openscience.cdk.qsar.result.IntegerArrayResult"
|
284
|
+
(0..(res.size - 1)).to_a.collect{|n| res.get(n)}
|
285
|
+
when "org.openscience.cdk.qsar.result.DoubleArrayResult"
|
286
|
+
(0..(res.size - 1)).to_a.collect{|n| res.get(n)}
|
287
|
+
end
|
288
|
+
end
|
289
|
+
|
290
|
+
# Wiener path number
|
291
|
+
# Wiener polarity number
|
292
|
+
def cdk_wiener_numbers
|
293
|
+
cdk_calc_descriptor('WienerNumbersDescriptor')
|
294
|
+
end
|
295
|
+
|
296
|
+
# CPSA
|
297
|
+
def cdk_CPSA
|
298
|
+
cdk_calc_descriptor('CPSADescriptor')
|
299
|
+
end
|
300
|
+
|
301
|
+
#BCUT Descriptors ....
|
302
|
+
# Fix me!
|
303
|
+
def cdk_BCUT(params)
|
304
|
+
cdk_calc_descriptor('BCUTDescriptor', params)
|
305
|
+
end
|
306
|
+
|
307
|
+
# Lipinki's Rule of file
|
308
|
+
def cdk_rule_of_file(params = true)
|
309
|
+
cdk_calc_descriptor('RuleOfFiveDescriptor', [params])
|
310
|
+
end
|
311
|
+
|
312
|
+
#
|
313
|
+
|
314
|
+
# args : terminal atoms must be included in the count
|
315
|
+
def cdk_RotatableBondsCount(rot = [true])
|
316
|
+
cdk_calc_descriptor('RotatableBondsCountDescriptor', rot)
|
317
|
+
end
|
318
|
+
|
319
|
+
# dump CDK properties...
|
320
|
+
# useless...
|
321
|
+
def cdk_properties
|
322
|
+
self.cdk_setup
|
323
|
+
hash = self.cdk_mol.getProperties
|
324
|
+
keys = hash.keys
|
325
|
+
while(keys.hasMoreElements)
|
326
|
+
k = keys.nextElement
|
327
|
+
p k.toString
|
328
|
+
if /org.openscience.cdk.qsar.DescriptorSpecification/.match(k.toString)
|
329
|
+
p [
|
330
|
+
k.getImplementationIdentifier,
|
331
|
+
k.getImplementationTitle,
|
332
|
+
k.getImplementationVendor,
|
333
|
+
k.getSpecificationReference
|
334
|
+
]
|
335
|
+
end
|
336
|
+
p hash.get(k).toString
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
# fixme
|
341
|
+
# this method does not work very well
|
342
|
+
def cdk_calc_descriptors
|
343
|
+
self.cdk_setup
|
344
|
+
engineClass = Rjb::import('org.openscience.cdk.qsar.DescriptorEngine')
|
345
|
+
# 1: atom
|
346
|
+
# 2: bond?
|
347
|
+
# 3: molecule?
|
348
|
+
engine = engineClass.new(2)
|
349
|
+
engine.process(self.cdk_mol)
|
350
|
+
end
|
351
|
+
|
352
|
+
def cdk_save_as(path, params = {})
|
353
|
+
self.cdk_setup
|
354
|
+
|
355
|
+
params[:type] ||= :png
|
356
|
+
params[:width] ||= 100
|
357
|
+
params[:height] ||= 100
|
358
|
+
|
359
|
+
image_kit = Rjb::import('net.sf.structure.cdk.util.ImageKit')
|
360
|
+
case params[:type]
|
361
|
+
when :png
|
362
|
+
image_kit.writePNG(self.cdk_mol, params[:width], params[:height], path)
|
363
|
+
when :svg
|
364
|
+
image_kit.writeSVG(self.cdk_mol, params[:width], params[:height], path)
|
365
|
+
when :jpg
|
366
|
+
image_kit.writeJPG(self.cdk_mol, params[:width], params[:height], path)
|
367
|
+
end
|
368
|
+
end
|
369
|
+
|
370
|
+
end# Molecule module
|
371
|
+
|
372
|
+
end
|
373
|
+
|
374
|
+
if __FILE__ == $0
|
375
|
+
|
376
|
+
mol = Chem::CDK::parse_smiles("C1CCC=N1")
|
377
|
+
|
378
|
+
elsif false
|
379
|
+
|
380
|
+
mol1 = Chem::CDK::parse_smiles("C1CCC=N1")
|
381
|
+
mol2 = mol1.cdk_generate_2D
|
382
|
+
|
383
|
+
mol1.cdk_save_as_image("sample1.svg", :type => :svg)
|
384
|
+
mol2.cdk_save_as_image("sample2.svg", :type => :svg)
|
385
|
+
|
386
|
+
DIR = "/Users/tanaka/data/kegg/ligand/mol/C%05d.mol"
|
387
|
+
mols = [8434, 8435].collect{|filename|
|
388
|
+
Chem::CDK::parse_mdl(File.open(DIR % filename).read)
|
389
|
+
}
|
390
|
+
|
391
|
+
# mcs = mols[0].cdk_mcs(mols[1]).each do |map|
|
392
|
+
# p map.nodes.length
|
393
|
+
# end
|
394
|
+
|
395
|
+
p mols[0].cdk_wiener_numbers
|
396
|
+
p mols[0].cdk_generate_2d_coordinates._classname
|
397
|
+
|
398
|
+
# p mols[0].cdk_calc_descriptors
|
399
|
+
# p mols[0].cdk_properties
|
400
|
+
|
401
|
+
end
|
402
|
+
|
403
|
+
|