chemruby 0.9.3 → 1.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +2 -2
- data/Rakefile +67 -63
- data/ext/extconf.rb +2 -0
- data/ext/subcomp.c +461 -320
- data/ext/utils.c +56 -0
- data/ext/utils.h +13 -0
- data/lib/chem.rb +34 -8
- data/lib/chem/db.rb +8 -0
- data/lib/chem/db/cansmi.rb +1 -1
- data/lib/chem/db/cdx.rb +1 -1
- data/lib/chem/db/cml.rb +52 -0
- data/lib/chem/db/gd.rb +64 -0
- data/lib/chem/db/gspan.rb +2 -2
- data/lib/chem/db/kcf_rpair.rb +34 -0
- data/lib/chem/db/kegg.rb +35 -1
- data/lib/chem/db/mdl.rb +75 -34
- data/lib/chem/db/opsin.rb +24 -0
- data/lib/chem/db/pdb.rb +105 -0
- data/lib/chem/db/pdf.rb +2 -0
- data/lib/chem/db/pubchem.rb +1071 -88
- data/lib/chem/db/rmagick.rb +5 -3
- data/lib/chem/db/sdf.rb +28 -2
- data/lib/chem/db/smiles/smiles.ry +27 -25
- data/lib/chem/db/smiles/smiparser.rb +29 -27
- data/lib/chem/db/types/type_gd.rb +35 -0
- data/lib/chem/db/types/type_gspan.rb +2 -2
- data/lib/chem/db/types/type_kcf.rb +19 -0
- data/lib/chem/db/types/type_kegg.rb +2 -0
- data/lib/chem/db/types/type_mdl.rb +1 -1
- data/lib/chem/db/types/type_png.rb +5 -1
- data/lib/chem/db/types/type_rdf.rb +22 -0
- data/lib/chem/db/types/type_xyz.rb +1 -1
- data/lib/chem/db/vector.rb +19 -3
- data/lib/chem/model.rb +5 -2
- data/lib/chem/utils.rb +17 -1
- data/lib/chem/utils/bitdb.rb +49 -0
- data/lib/chem/utils/cas.rb +28 -0
- data/lib/chem/utils/cdk.rb +403 -0
- data/lib/chem/utils/fingerprint.rb +98 -0
- data/lib/chem/utils/geometry.rb +8 -0
- data/lib/chem/utils/net.rb +303 -0
- data/lib/chem/utils/once.rb +28 -0
- data/lib/chem/utils/openbabel.rb +204 -0
- data/lib/chem/utils/sssr.rb +33 -25
- data/lib/chem/utils/sub.rb +6 -0
- data/lib/chem/utils/transform.rb +9 -8
- data/lib/chem/utils/ullmann.rb +138 -95
- data/lib/graph.rb +5 -6
- data/lib/graph/utils.rb +8 -0
- data/sample/calc_maximum_common_subgraph.rb +27 -0
- data/sample/calc_properties.rb +9 -0
- data/sample/data/atp.mol +69 -0
- data/sample/data/pioglitazone.mol +58 -0
- data/sample/data/rosiglitazone.mol +55 -0
- data/sample/data/troglitazone.mol +70 -0
- data/sample/find_compound_by_keggapi.rb +19 -0
- data/sample/generate_inchi.rb +7 -0
- data/sample/generate_substructurekey.rb +11 -0
- data/sample/images/ex6.rb +17 -0
- data/sample/images/ex7.rb +18 -0
- data/sample/iupac2mol.rb +8 -0
- data/sample/kekule.rb +13 -0
- data/sample/logp.rb +4 -0
- data/sample/mcs.rb +13 -0
- data/sample/mol2pdf.rb +8 -0
- data/sample/pubchem_fetch.rb +8 -0
- data/sample/pubchem_search.rb +12 -0
- data/sample/rosiglitazone.mol +57 -0
- data/sample/smarts.rb +10 -0
- data/sample/structure_match.rb +8 -0
- data/sample/structure_match_color.rb +22 -0
- data/sample/thiazolidinedione.mol +19 -0
- data/sample/troglitazone.mol +232 -0
- data/sample/vicinity.rb +8 -0
- data/test/data/CID_704.sdf +236 -0
- data/test/data/CID_994.sdf +146 -0
- data/test/data/db_EXPT03276.txt +321 -0
- data/test/data/pioglitazone.mol +58 -0
- data/test/data/rosiglitazone.mol +55 -0
- data/test/data/thiazolidinedione.mol +19 -0
- data/test/data/troglitazone.mol +70 -0
- data/test/{test_adj.rb → tc_adj.rb} +0 -0
- data/test/{test_canonical_smiles.rb → tc_canonical_smiles.rb} +0 -0
- data/test/tc_casrn.rb +17 -0
- data/test/tc_cdk.rb +89 -0
- data/test/{test_cdx.rb → tc_cdx.rb} +0 -0
- data/test/{test_chem.rb → tc_chem.rb} +0 -0
- data/test/{test_cluster.rb → tc_cluster.rb} +0 -0
- data/test/{test_db.rb → tc_db.rb} +0 -0
- data/test/tc_develop.rb +38 -0
- data/test/tc_drugbank.rb +13 -0
- data/test/{test_eps.rb → tc_eps.rb} +0 -0
- data/test/tc_gd.rb +8 -0
- data/test/{test_geometry.rb → tc_geometry.rb} +0 -0
- data/test/tc_graph.rb +15 -0
- data/test/{test_gspan.rb → tc_gspan.rb} +0 -0
- data/test/{test_iupac.rb → tc_iupac.rb} +0 -0
- data/test/{test_kcf.rb → tc_kcf.rb} +0 -0
- data/test/{test_kcf_glycan.rb → tc_kcf_glycan.rb} +0 -0
- data/test/{test_kegg.rb → tc_kegg.rb} +13 -0
- data/test/{test_linucs.rb → tc_linucs.rb} +0 -0
- data/test/{test_mdl.rb → tc_mdl.rb} +20 -0
- data/test/{test_mol2.rb → tc_mol2.rb} +1 -1
- data/test/{test_morgan.rb → tc_morgan.rb} +0 -0
- data/test/tc_net.rb +5 -0
- data/test/tc_once.rb +29 -0
- data/test/tc_openbabel.rb +57 -0
- data/test/{test_pdf.rb → tc_pdf.rb} +0 -0
- data/test/{test_prop.rb → tc_prop.rb} +1 -1
- data/test/tc_pubchem.rb +32 -0
- data/test/{test_rmagick.rb → tc_rmagick.rb} +0 -0
- data/test/{test_sbdb.rb → tc_sbdb.rb} +0 -0
- data/test/{test_sdf.rb → tc_sdf.rb} +2 -0
- data/test/{test_smiles.rb → tc_smiles.rb} +46 -30
- data/test/tc_sssr.rb +1 -0
- data/test/{test_sub.rb → tc_sub.rb} +0 -0
- data/test/tc_subcomp.rb +59 -0
- data/test/{test_traverse.rb → tc_traverse.rb} +0 -0
- data/test/{test_writer.rb → tc_writer.rb} +0 -0
- data/test/{test_xyz.rb → tc_xyz.rb} +0 -0
- data/test/ts_current.rb +11 -0
- data/test/ts_image.rb +6 -0
- data/test/ts_main.rb +12 -0
- metadata +259 -194
- data/lib/chem/utils/graph_db.rb +0 -146
- data/test/test_sssr.rb +0 -18
- data/test/test_subcomp.rb +0 -37
data/lib/chem/utils/sssr.rb
CHANGED
@@ -31,31 +31,35 @@ module Chem
|
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
|
+
# Fix me! This is not sufficient
|
34
35
|
def canonical_ring ring
|
35
|
-
|
36
|
-
ring.sort{|a, b| @atoms.index(a) <=> @atoms.index(b)}
|
36
|
+
ring.sort{|a, b| nodes.index(a) <=> nodes.index(b)}
|
37
37
|
end
|
38
38
|
|
39
|
+
# Returns Smallest Set of Smallest Ring
|
39
40
|
def find_sssr
|
40
|
-
|
41
|
-
fullSet =
|
41
|
+
|
42
|
+
fullSet = nodes.dup
|
42
43
|
trimSet = []
|
43
44
|
rings = []
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
45
|
+
mol = {}
|
46
|
+
|
47
|
+
nodes.each do |node|
|
48
|
+
mol[node] = []
|
49
|
+
adjacent_to(node).each do |bond, atom|
|
50
|
+
mol[node] << atom
|
51
|
+
end
|
52
|
+
end
|
50
53
|
|
51
54
|
loop do
|
52
55
|
nodesN2 = []
|
53
56
|
smallest_degree = 10
|
54
57
|
smallest = nil
|
55
|
-
|
58
|
+
|
59
|
+
mol.each do |k, a|
|
56
60
|
case a.length
|
57
61
|
when 0
|
58
|
-
|
62
|
+
mol.delete(k)# Is this OK?
|
59
63
|
trimSet.push(k)
|
60
64
|
when 2
|
61
65
|
nodesN2.push(k)
|
@@ -65,36 +69,40 @@ module Chem
|
|
65
69
|
smallest_degree = a.length
|
66
70
|
end
|
67
71
|
end
|
72
|
+
|
68
73
|
case smallest_degree
|
69
74
|
when 1
|
70
|
-
trim(smallest)
|
75
|
+
trim(mol, smallest)
|
71
76
|
when 2
|
72
77
|
nodesN2.each do |k|
|
73
78
|
ring = find_smallest_ring(k)
|
74
|
-
|
75
|
-
|
79
|
+
if ring && !rings.include?(canonical_ring(ring))
|
80
|
+
rings.push(canonical_ring(ring))
|
81
|
+
end
|
76
82
|
end
|
77
83
|
nodesN2.each do |k|
|
78
|
-
trim(k)
|
84
|
+
trim(mol, k)
|
79
85
|
end
|
80
86
|
when 3
|
81
87
|
ring = find_smallest_ring(smallest)
|
82
|
-
trim(smallest)
|
88
|
+
trim(mol, smallest)
|
83
89
|
end
|
84
|
-
|
90
|
+
|
91
|
+
break if mol.length == 0
|
85
92
|
end
|
86
|
-
|
93
|
+
rings
|
87
94
|
end
|
88
95
|
|
89
|
-
def trim smallest
|
90
|
-
if
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
96
|
+
def trim mol, smallest
|
97
|
+
if mol.length > 0 && mol.include?(smallest)
|
98
|
+
mol[smallest].each do |n|
|
99
|
+
mol[n] = mol[n] - [smallest]
|
100
|
+
mol.delete(smallest)
|
101
|
+
mol.delete(n) if mol[n].length == 0
|
95
102
|
end
|
96
103
|
end
|
97
104
|
end
|
105
|
+
private :trim
|
98
106
|
|
99
107
|
end
|
100
108
|
end
|
data/lib/chem/utils/sub.rb
CHANGED
data/lib/chem/utils/transform.rb
CHANGED
@@ -8,10 +8,11 @@ module Chem
|
|
8
8
|
|
9
9
|
module TwoDimension
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
11
|
+
# attr_reader :pos
|
12
|
+
# def initialize
|
13
|
+
# super
|
14
|
+
# @pos = Vector[0.0, 0.0]
|
15
|
+
# end
|
15
16
|
|
16
17
|
def pos ; @pos ||= Vector[@x, @y, @z] ; end
|
17
18
|
def x ; pos[0] ; end
|
@@ -23,10 +24,10 @@ module Chem
|
|
23
24
|
module ThreeDimension
|
24
25
|
include TwoDimension
|
25
26
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
27
|
+
# def initialize
|
28
|
+
# super
|
29
|
+
# @pos = Vector[0.0, 0.0]
|
30
|
+
# end
|
30
31
|
|
31
32
|
def z ; pos[2] ; end
|
32
33
|
def z=(z_val) ; pos[2] = z_val ; end
|
data/lib/chem/utils/ullmann.rb
CHANGED
@@ -1,134 +1,177 @@
|
|
1
1
|
#
|
2
|
-
# = chem/utils/
|
2
|
+
# = chem/utils/ullmann.rb - Subgraph isomorphism
|
3
3
|
#
|
4
|
-
# Author:: Nobuya Tanaka <
|
4
|
+
# Author:: Nobuya Tanaka <t@chemruby.org>
|
5
5
|
#
|
6
|
-
# Copyright:: Copyright (c)
|
6
|
+
# Copyright:: Copyright (c) 2005, 2006 ChemRuby project
|
7
7
|
#
|
8
|
-
# $Id: ullmann.rb
|
8
|
+
# $Id: ullmann.rb 180 2006-04-19 08:52:15Z tanaka $
|
9
9
|
#
|
10
10
|
|
11
|
-
require 'subcomp'
|
12
|
-
|
13
11
|
$ARC = 4 # for 32-bit computer
|
14
12
|
|
15
|
-
|
13
|
+
ARCH = 32
|
16
14
|
|
17
|
-
|
18
|
-
n_long = (nodes.length - 1) / 32 + 1
|
19
|
-
mat = Array.new(n_long * @nodes.length, 0)
|
20
|
-
nodes.each_with_index do |node, idx|
|
21
|
-
adjacent_to(node).each do |bond, node|
|
22
|
-
keta = nodes.index(node) / 32
|
23
|
-
mat[idx * n_long + keta] += 1 << (nodes.index(node) - keta * 32)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
mat.pack("L*")
|
27
|
-
end
|
15
|
+
module Chem
|
28
16
|
|
29
|
-
|
30
|
-
m = Array.new("0xff", 100).pack("c*")
|
31
|
-
subcomp_match_by_ullmann(mat, len, self.adjacency_list, self.nodes.length, m)
|
32
|
-
end
|
17
|
+
module Molecule
|
33
18
|
|
34
|
-
|
35
|
-
|
36
|
-
self
|
19
|
+
def match_by_ullmann(target, &block)
|
20
|
+
require 'subcomp'
|
21
|
+
Chem.match_by_ullmann(self, target, &block)
|
37
22
|
end
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
a.element == b.element and not (correspond[a] and correspond[a].include? b)
|
23
|
+
|
24
|
+
def match(target, &block)
|
25
|
+
ary = nil
|
26
|
+
|
27
|
+
if block_given?
|
28
|
+
ary = match_by_ullmann(target){ |i, j|
|
29
|
+
yield(self.nodes[i], target.nodes[j])
|
30
|
+
}
|
31
|
+
else
|
32
|
+
ary = match_by_ullmann(target)
|
49
33
|
end
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
34
|
+
|
35
|
+
ret = []
|
36
|
+
ary.each do |a|
|
37
|
+
hash = {}
|
38
|
+
a.each_with_index do |i, j|
|
39
|
+
hash[nodes[j]] = target.nodes[i]
|
40
|
+
end
|
41
|
+
hash
|
42
|
+
ret << hash
|
54
43
|
end
|
44
|
+
ret
|
45
|
+
end
|
46
|
+
|
47
|
+
def typ_str
|
48
|
+
nodes.collect{|atom| atom.atomic_number}.pack("l*")
|
55
49
|
end
|
56
|
-
result
|
57
|
-
end
|
58
50
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
if node.element == n.element
|
65
|
-
keta = idx / 32
|
66
|
-
mat[index * n_long + keta] += 1 << (idx - keta * 32)
|
51
|
+
def adjacent_index
|
52
|
+
nodes.inject([]) do |ret, node|
|
53
|
+
ary = ret[nodes.index(node)] = []
|
54
|
+
adjacent_to(node).each do |bond, ad_node|
|
55
|
+
ary << nodes.index(ad_node)
|
67
56
|
end
|
57
|
+
ret
|
68
58
|
end
|
69
59
|
end
|
70
|
-
mat.pack("L*")
|
71
|
-
end
|
72
60
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
col_bit = idx - col_byte * 8
|
84
|
-
r[index * row_unit + col_byte] += (1 << col_bit)
|
61
|
+
def bit_mat
|
62
|
+
bm = BitMatrix.new(nodes.length, nodes.length)
|
63
|
+
if edges.length == 0
|
64
|
+
bm.has_matrix = false
|
65
|
+
else
|
66
|
+
adj = {}
|
67
|
+
nodes.each do |node|
|
68
|
+
adj[node] = []
|
69
|
+
adjacent_to(node).each do |bond, to|
|
70
|
+
adj[node] << to
|
85
71
|
end
|
86
72
|
end
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
r[index * row_unit + col_byte] += (1 << col_bit)
|
73
|
+
|
74
|
+
nodes.each_with_index do |atom1, idx1|
|
75
|
+
ary = []
|
76
|
+
nodes.each_with_index do |atom2, idx2|
|
77
|
+
if adj[atom1].include?(atom2)
|
78
|
+
bm.set(idx1, idx2)
|
79
|
+
end
|
95
80
|
end
|
96
81
|
end
|
97
82
|
end
|
83
|
+
bm
|
98
84
|
end
|
99
|
-
|
85
|
+
|
100
86
|
end
|
101
87
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
88
|
+
class BitMatrix
|
89
|
+
|
90
|
+
attr_reader :height, :widht, :n_bytes
|
91
|
+
attr_accessor :has_matrix
|
92
|
+
|
93
|
+
def initialize(height, width)
|
94
|
+
@height = height
|
95
|
+
@width = width
|
96
|
+
@n_bytes = (width - 1) / ARCH + 1
|
97
|
+
@bits = []
|
98
|
+
height.times do |n|
|
99
|
+
@bits[n] = []
|
100
|
+
@n_bytes.times do |m|
|
101
|
+
@bits[n][m] = 0
|
102
|
+
end
|
108
103
|
end
|
109
|
-
|
104
|
+
@has_matrix = true
|
110
105
|
end
|
111
|
-
ret
|
112
|
-
end
|
113
106
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
107
|
+
def set(row, col)
|
108
|
+
@bits[row][col / ARCH] += (1 << (col % ARCH))
|
109
|
+
end
|
110
|
+
|
111
|
+
def to_s
|
112
|
+
s = " "
|
113
|
+
@width.times{|n| s << "%d" % (n % 10)}
|
114
|
+
s << "\n"
|
115
|
+
@bits.each_with_index do |ary, idx|
|
116
|
+
s << "%3d " % idx
|
117
|
+
ary.each_with_index do |a, idx2|
|
118
|
+
s << bit_to_str(a, (idx2 == @n_bytes - 1) ? (@width % ARCH) : ARCH)
|
119
|
+
end
|
120
|
+
s << "\n"
|
121
121
|
end
|
122
|
-
|
122
|
+
s
|
123
123
|
end
|
124
|
-
|
124
|
+
|
125
|
+
def bit_str
|
126
|
+
@bits.flatten.pack("L*")
|
127
|
+
end
|
128
|
+
|
129
|
+
def bit_to_str bits, num
|
130
|
+
s = ""
|
131
|
+
num.times do |n|
|
132
|
+
s << (((1 << n) & bits != 0) ? "*" : ".")
|
133
|
+
end
|
134
|
+
s
|
135
|
+
end
|
136
|
+
private :bit_to_str
|
137
|
+
|
125
138
|
end
|
126
139
|
|
127
|
-
|
140
|
+
# Database Specification
|
141
|
+
# * idx file
|
142
|
+
# 32 bit : n_bytes
|
143
|
+
class CompoundDB
|
144
|
+
|
145
|
+
def initialize(name)
|
146
|
+
@current_id = 0
|
147
|
+
@mat = File.open(name + ".mat", "w")
|
148
|
+
@idx = File.open(name + ".idx", "w")
|
149
|
+
@typ = File.open(name + ".typ", "w")
|
150
|
+
end
|
151
|
+
|
152
|
+
def store(mol)
|
153
|
+
bm = mol.bit_mat
|
154
|
+
@current_id += 1
|
155
|
+
|
156
|
+
if bm.has_matrix
|
157
|
+
@idx.print [bm.height, bm.n_bytes, @mat.tell, 0].pack("l*")
|
158
|
+
@mat.print bm.bit_str
|
159
|
+
else
|
160
|
+
@idx.print [bm.height, bm.n_bytes, @mat.tell, -1].pack("l*")
|
161
|
+
end
|
162
|
+
@typ.print mol.typ_str
|
163
|
+
@current_id
|
164
|
+
end
|
165
|
+
|
166
|
+
def close
|
167
|
+
@idx.print [-1, -1, -1].pack("l*")
|
168
|
+
|
169
|
+
@mat.close
|
170
|
+
@idx.close
|
171
|
+
@typ.close
|
172
|
+
end
|
128
173
|
|
129
|
-
module Chem
|
130
|
-
module Molecule
|
131
|
-
include Graph
|
132
174
|
end
|
175
|
+
|
133
176
|
end
|
134
177
|
|
data/lib/graph.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#
|
2
2
|
# graph.rb - Graph
|
3
3
|
#
|
4
|
-
# Copyright (C)
|
4
|
+
# Copyright (C) 2005, 2006 TANAKA Nobuya <t@chemruby.net>
|
5
5
|
#
|
6
6
|
# $Id: graph.rb 61 2005-10-12 09:17:39Z tanaka $
|
7
7
|
#
|
@@ -9,23 +9,22 @@
|
|
9
9
|
|
10
10
|
require 'graph/morgan'
|
11
11
|
require 'graph/cluster'
|
12
|
+
require 'graph/utils'
|
12
13
|
|
13
14
|
module Graph
|
14
15
|
|
15
16
|
attr_accessor :nodes, :edges, :adjacencies
|
16
17
|
|
17
18
|
def each
|
18
|
-
|
19
|
+
nodes.each do |atom|
|
19
20
|
yield atom
|
20
21
|
end
|
21
22
|
end
|
22
23
|
|
23
24
|
def adjacent_to(atom)
|
24
|
-
# instance_eval "alias :tmp_adjacent_to :adjacent_to"
|
25
|
-
# instance_eval "alias :adjacent_to :adjacencies"
|
26
25
|
if @adjacencies == nil
|
27
|
-
@adjacencies =
|
28
|
-
|
26
|
+
@adjacencies = Hash.new
|
27
|
+
edges.each do |bond, atom_a, atom_b|
|
29
28
|
(@adjacencies[atom_a] ||= []).push([bond, atom_b])
|
30
29
|
(@adjacencies[atom_b] ||= []).push([bond, atom_a])
|
31
30
|
end
|