chemruby 0.9.3 → 1.1.9
Sign up to get free protection for your applications and to get access to all the features.
- data/README +2 -2
- data/Rakefile +67 -63
- data/ext/extconf.rb +2 -0
- data/ext/subcomp.c +461 -320
- data/ext/utils.c +56 -0
- data/ext/utils.h +13 -0
- data/lib/chem.rb +34 -8
- data/lib/chem/db.rb +8 -0
- data/lib/chem/db/cansmi.rb +1 -1
- data/lib/chem/db/cdx.rb +1 -1
- data/lib/chem/db/cml.rb +52 -0
- data/lib/chem/db/gd.rb +64 -0
- data/lib/chem/db/gspan.rb +2 -2
- data/lib/chem/db/kcf_rpair.rb +34 -0
- data/lib/chem/db/kegg.rb +35 -1
- data/lib/chem/db/mdl.rb +75 -34
- data/lib/chem/db/opsin.rb +24 -0
- data/lib/chem/db/pdb.rb +105 -0
- data/lib/chem/db/pdf.rb +2 -0
- data/lib/chem/db/pubchem.rb +1071 -88
- data/lib/chem/db/rmagick.rb +5 -3
- data/lib/chem/db/sdf.rb +28 -2
- data/lib/chem/db/smiles/smiles.ry +27 -25
- data/lib/chem/db/smiles/smiparser.rb +29 -27
- data/lib/chem/db/types/type_gd.rb +35 -0
- data/lib/chem/db/types/type_gspan.rb +2 -2
- data/lib/chem/db/types/type_kcf.rb +19 -0
- data/lib/chem/db/types/type_kegg.rb +2 -0
- data/lib/chem/db/types/type_mdl.rb +1 -1
- data/lib/chem/db/types/type_png.rb +5 -1
- data/lib/chem/db/types/type_rdf.rb +22 -0
- data/lib/chem/db/types/type_xyz.rb +1 -1
- data/lib/chem/db/vector.rb +19 -3
- data/lib/chem/model.rb +5 -2
- data/lib/chem/utils.rb +17 -1
- data/lib/chem/utils/bitdb.rb +49 -0
- data/lib/chem/utils/cas.rb +28 -0
- data/lib/chem/utils/cdk.rb +403 -0
- data/lib/chem/utils/fingerprint.rb +98 -0
- data/lib/chem/utils/geometry.rb +8 -0
- data/lib/chem/utils/net.rb +303 -0
- data/lib/chem/utils/once.rb +28 -0
- data/lib/chem/utils/openbabel.rb +204 -0
- data/lib/chem/utils/sssr.rb +33 -25
- data/lib/chem/utils/sub.rb +6 -0
- data/lib/chem/utils/transform.rb +9 -8
- data/lib/chem/utils/ullmann.rb +138 -95
- data/lib/graph.rb +5 -6
- data/lib/graph/utils.rb +8 -0
- data/sample/calc_maximum_common_subgraph.rb +27 -0
- data/sample/calc_properties.rb +9 -0
- data/sample/data/atp.mol +69 -0
- data/sample/data/pioglitazone.mol +58 -0
- data/sample/data/rosiglitazone.mol +55 -0
- data/sample/data/troglitazone.mol +70 -0
- data/sample/find_compound_by_keggapi.rb +19 -0
- data/sample/generate_inchi.rb +7 -0
- data/sample/generate_substructurekey.rb +11 -0
- data/sample/images/ex6.rb +17 -0
- data/sample/images/ex7.rb +18 -0
- data/sample/iupac2mol.rb +8 -0
- data/sample/kekule.rb +13 -0
- data/sample/logp.rb +4 -0
- data/sample/mcs.rb +13 -0
- data/sample/mol2pdf.rb +8 -0
- data/sample/pubchem_fetch.rb +8 -0
- data/sample/pubchem_search.rb +12 -0
- data/sample/rosiglitazone.mol +57 -0
- data/sample/smarts.rb +10 -0
- data/sample/structure_match.rb +8 -0
- data/sample/structure_match_color.rb +22 -0
- data/sample/thiazolidinedione.mol +19 -0
- data/sample/troglitazone.mol +232 -0
- data/sample/vicinity.rb +8 -0
- data/test/data/CID_704.sdf +236 -0
- data/test/data/CID_994.sdf +146 -0
- data/test/data/db_EXPT03276.txt +321 -0
- data/test/data/pioglitazone.mol +58 -0
- data/test/data/rosiglitazone.mol +55 -0
- data/test/data/thiazolidinedione.mol +19 -0
- data/test/data/troglitazone.mol +70 -0
- data/test/{test_adj.rb → tc_adj.rb} +0 -0
- data/test/{test_canonical_smiles.rb → tc_canonical_smiles.rb} +0 -0
- data/test/tc_casrn.rb +17 -0
- data/test/tc_cdk.rb +89 -0
- data/test/{test_cdx.rb → tc_cdx.rb} +0 -0
- data/test/{test_chem.rb → tc_chem.rb} +0 -0
- data/test/{test_cluster.rb → tc_cluster.rb} +0 -0
- data/test/{test_db.rb → tc_db.rb} +0 -0
- data/test/tc_develop.rb +38 -0
- data/test/tc_drugbank.rb +13 -0
- data/test/{test_eps.rb → tc_eps.rb} +0 -0
- data/test/tc_gd.rb +8 -0
- data/test/{test_geometry.rb → tc_geometry.rb} +0 -0
- data/test/tc_graph.rb +15 -0
- data/test/{test_gspan.rb → tc_gspan.rb} +0 -0
- data/test/{test_iupac.rb → tc_iupac.rb} +0 -0
- data/test/{test_kcf.rb → tc_kcf.rb} +0 -0
- data/test/{test_kcf_glycan.rb → tc_kcf_glycan.rb} +0 -0
- data/test/{test_kegg.rb → tc_kegg.rb} +13 -0
- data/test/{test_linucs.rb → tc_linucs.rb} +0 -0
- data/test/{test_mdl.rb → tc_mdl.rb} +20 -0
- data/test/{test_mol2.rb → tc_mol2.rb} +1 -1
- data/test/{test_morgan.rb → tc_morgan.rb} +0 -0
- data/test/tc_net.rb +5 -0
- data/test/tc_once.rb +29 -0
- data/test/tc_openbabel.rb +57 -0
- data/test/{test_pdf.rb → tc_pdf.rb} +0 -0
- data/test/{test_prop.rb → tc_prop.rb} +1 -1
- data/test/tc_pubchem.rb +32 -0
- data/test/{test_rmagick.rb → tc_rmagick.rb} +0 -0
- data/test/{test_sbdb.rb → tc_sbdb.rb} +0 -0
- data/test/{test_sdf.rb → tc_sdf.rb} +2 -0
- data/test/{test_smiles.rb → tc_smiles.rb} +46 -30
- data/test/tc_sssr.rb +1 -0
- data/test/{test_sub.rb → tc_sub.rb} +0 -0
- data/test/tc_subcomp.rb +59 -0
- data/test/{test_traverse.rb → tc_traverse.rb} +0 -0
- data/test/{test_writer.rb → tc_writer.rb} +0 -0
- data/test/{test_xyz.rb → tc_xyz.rb} +0 -0
- data/test/ts_current.rb +11 -0
- data/test/ts_image.rb +6 -0
- data/test/ts_main.rb +12 -0
- metadata +259 -194
- data/lib/chem/utils/graph_db.rb +0 -146
- data/test/test_sssr.rb +0 -18
- data/test/test_subcomp.rb +0 -37
data/lib/chem/utils/sssr.rb
CHANGED
@@ -31,31 +31,35 @@ module Chem
|
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
|
+
# Fix me! This is not sufficient
|
34
35
|
def canonical_ring ring
|
35
|
-
|
36
|
-
ring.sort{|a, b| @atoms.index(a) <=> @atoms.index(b)}
|
36
|
+
ring.sort{|a, b| nodes.index(a) <=> nodes.index(b)}
|
37
37
|
end
|
38
38
|
|
39
|
+
# Returns Smallest Set of Smallest Ring
|
39
40
|
def find_sssr
|
40
|
-
|
41
|
-
fullSet =
|
41
|
+
|
42
|
+
fullSet = nodes.dup
|
42
43
|
trimSet = []
|
43
44
|
rings = []
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
45
|
+
mol = {}
|
46
|
+
|
47
|
+
nodes.each do |node|
|
48
|
+
mol[node] = []
|
49
|
+
adjacent_to(node).each do |bond, atom|
|
50
|
+
mol[node] << atom
|
51
|
+
end
|
52
|
+
end
|
50
53
|
|
51
54
|
loop do
|
52
55
|
nodesN2 = []
|
53
56
|
smallest_degree = 10
|
54
57
|
smallest = nil
|
55
|
-
|
58
|
+
|
59
|
+
mol.each do |k, a|
|
56
60
|
case a.length
|
57
61
|
when 0
|
58
|
-
|
62
|
+
mol.delete(k)# Is this OK?
|
59
63
|
trimSet.push(k)
|
60
64
|
when 2
|
61
65
|
nodesN2.push(k)
|
@@ -65,36 +69,40 @@ module Chem
|
|
65
69
|
smallest_degree = a.length
|
66
70
|
end
|
67
71
|
end
|
72
|
+
|
68
73
|
case smallest_degree
|
69
74
|
when 1
|
70
|
-
trim(smallest)
|
75
|
+
trim(mol, smallest)
|
71
76
|
when 2
|
72
77
|
nodesN2.each do |k|
|
73
78
|
ring = find_smallest_ring(k)
|
74
|
-
|
75
|
-
|
79
|
+
if ring && !rings.include?(canonical_ring(ring))
|
80
|
+
rings.push(canonical_ring(ring))
|
81
|
+
end
|
76
82
|
end
|
77
83
|
nodesN2.each do |k|
|
78
|
-
trim(k)
|
84
|
+
trim(mol, k)
|
79
85
|
end
|
80
86
|
when 3
|
81
87
|
ring = find_smallest_ring(smallest)
|
82
|
-
trim(smallest)
|
88
|
+
trim(mol, smallest)
|
83
89
|
end
|
84
|
-
|
90
|
+
|
91
|
+
break if mol.length == 0
|
85
92
|
end
|
86
|
-
|
93
|
+
rings
|
87
94
|
end
|
88
95
|
|
89
|
-
def trim smallest
|
90
|
-
if
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
96
|
+
def trim mol, smallest
|
97
|
+
if mol.length > 0 && mol.include?(smallest)
|
98
|
+
mol[smallest].each do |n|
|
99
|
+
mol[n] = mol[n] - [smallest]
|
100
|
+
mol.delete(smallest)
|
101
|
+
mol.delete(n) if mol[n].length == 0
|
95
102
|
end
|
96
103
|
end
|
97
104
|
end
|
105
|
+
private :trim
|
98
106
|
|
99
107
|
end
|
100
108
|
end
|
data/lib/chem/utils/sub.rb
CHANGED
data/lib/chem/utils/transform.rb
CHANGED
@@ -8,10 +8,11 @@ module Chem
|
|
8
8
|
|
9
9
|
module TwoDimension
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
11
|
+
# attr_reader :pos
|
12
|
+
# def initialize
|
13
|
+
# super
|
14
|
+
# @pos = Vector[0.0, 0.0]
|
15
|
+
# end
|
15
16
|
|
16
17
|
def pos ; @pos ||= Vector[@x, @y, @z] ; end
|
17
18
|
def x ; pos[0] ; end
|
@@ -23,10 +24,10 @@ module Chem
|
|
23
24
|
module ThreeDimension
|
24
25
|
include TwoDimension
|
25
26
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
27
|
+
# def initialize
|
28
|
+
# super
|
29
|
+
# @pos = Vector[0.0, 0.0]
|
30
|
+
# end
|
30
31
|
|
31
32
|
def z ; pos[2] ; end
|
32
33
|
def z=(z_val) ; pos[2] = z_val ; end
|
data/lib/chem/utils/ullmann.rb
CHANGED
@@ -1,134 +1,177 @@
|
|
1
1
|
#
|
2
|
-
# = chem/utils/
|
2
|
+
# = chem/utils/ullmann.rb - Subgraph isomorphism
|
3
3
|
#
|
4
|
-
# Author:: Nobuya Tanaka <
|
4
|
+
# Author:: Nobuya Tanaka <t@chemruby.org>
|
5
5
|
#
|
6
|
-
# Copyright:: Copyright (c)
|
6
|
+
# Copyright:: Copyright (c) 2005, 2006 ChemRuby project
|
7
7
|
#
|
8
|
-
# $Id: ullmann.rb
|
8
|
+
# $Id: ullmann.rb 180 2006-04-19 08:52:15Z tanaka $
|
9
9
|
#
|
10
10
|
|
11
|
-
require 'subcomp'
|
12
|
-
|
13
11
|
$ARC = 4 # for 32-bit computer
|
14
12
|
|
15
|
-
|
13
|
+
ARCH = 32
|
16
14
|
|
17
|
-
|
18
|
-
n_long = (nodes.length - 1) / 32 + 1
|
19
|
-
mat = Array.new(n_long * @nodes.length, 0)
|
20
|
-
nodes.each_with_index do |node, idx|
|
21
|
-
adjacent_to(node).each do |bond, node|
|
22
|
-
keta = nodes.index(node) / 32
|
23
|
-
mat[idx * n_long + keta] += 1 << (nodes.index(node) - keta * 32)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
mat.pack("L*")
|
27
|
-
end
|
15
|
+
module Chem
|
28
16
|
|
29
|
-
|
30
|
-
m = Array.new("0xff", 100).pack("c*")
|
31
|
-
subcomp_match_by_ullmann(mat, len, self.adjacency_list, self.nodes.length, m)
|
32
|
-
end
|
17
|
+
module Molecule
|
33
18
|
|
34
|
-
|
35
|
-
|
36
|
-
self
|
19
|
+
def match_by_ullmann(target, &block)
|
20
|
+
require 'subcomp'
|
21
|
+
Chem.match_by_ullmann(self, target, &block)
|
37
22
|
end
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
a.element == b.element and not (correspond[a] and correspond[a].include? b)
|
23
|
+
|
24
|
+
def match(target, &block)
|
25
|
+
ary = nil
|
26
|
+
|
27
|
+
if block_given?
|
28
|
+
ary = match_by_ullmann(target){ |i, j|
|
29
|
+
yield(self.nodes[i], target.nodes[j])
|
30
|
+
}
|
31
|
+
else
|
32
|
+
ary = match_by_ullmann(target)
|
49
33
|
end
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
34
|
+
|
35
|
+
ret = []
|
36
|
+
ary.each do |a|
|
37
|
+
hash = {}
|
38
|
+
a.each_with_index do |i, j|
|
39
|
+
hash[nodes[j]] = target.nodes[i]
|
40
|
+
end
|
41
|
+
hash
|
42
|
+
ret << hash
|
54
43
|
end
|
44
|
+
ret
|
45
|
+
end
|
46
|
+
|
47
|
+
def typ_str
|
48
|
+
nodes.collect{|atom| atom.atomic_number}.pack("l*")
|
55
49
|
end
|
56
|
-
result
|
57
|
-
end
|
58
50
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
if node.element == n.element
|
65
|
-
keta = idx / 32
|
66
|
-
mat[index * n_long + keta] += 1 << (idx - keta * 32)
|
51
|
+
def adjacent_index
|
52
|
+
nodes.inject([]) do |ret, node|
|
53
|
+
ary = ret[nodes.index(node)] = []
|
54
|
+
adjacent_to(node).each do |bond, ad_node|
|
55
|
+
ary << nodes.index(ad_node)
|
67
56
|
end
|
57
|
+
ret
|
68
58
|
end
|
69
59
|
end
|
70
|
-
mat.pack("L*")
|
71
|
-
end
|
72
60
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
col_bit = idx - col_byte * 8
|
84
|
-
r[index * row_unit + col_byte] += (1 << col_bit)
|
61
|
+
def bit_mat
|
62
|
+
bm = BitMatrix.new(nodes.length, nodes.length)
|
63
|
+
if edges.length == 0
|
64
|
+
bm.has_matrix = false
|
65
|
+
else
|
66
|
+
adj = {}
|
67
|
+
nodes.each do |node|
|
68
|
+
adj[node] = []
|
69
|
+
adjacent_to(node).each do |bond, to|
|
70
|
+
adj[node] << to
|
85
71
|
end
|
86
72
|
end
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
r[index * row_unit + col_byte] += (1 << col_bit)
|
73
|
+
|
74
|
+
nodes.each_with_index do |atom1, idx1|
|
75
|
+
ary = []
|
76
|
+
nodes.each_with_index do |atom2, idx2|
|
77
|
+
if adj[atom1].include?(atom2)
|
78
|
+
bm.set(idx1, idx2)
|
79
|
+
end
|
95
80
|
end
|
96
81
|
end
|
97
82
|
end
|
83
|
+
bm
|
98
84
|
end
|
99
|
-
|
85
|
+
|
100
86
|
end
|
101
87
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
88
|
+
class BitMatrix
|
89
|
+
|
90
|
+
attr_reader :height, :widht, :n_bytes
|
91
|
+
attr_accessor :has_matrix
|
92
|
+
|
93
|
+
def initialize(height, width)
|
94
|
+
@height = height
|
95
|
+
@width = width
|
96
|
+
@n_bytes = (width - 1) / ARCH + 1
|
97
|
+
@bits = []
|
98
|
+
height.times do |n|
|
99
|
+
@bits[n] = []
|
100
|
+
@n_bytes.times do |m|
|
101
|
+
@bits[n][m] = 0
|
102
|
+
end
|
108
103
|
end
|
109
|
-
|
104
|
+
@has_matrix = true
|
110
105
|
end
|
111
|
-
ret
|
112
|
-
end
|
113
106
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
107
|
+
def set(row, col)
|
108
|
+
@bits[row][col / ARCH] += (1 << (col % ARCH))
|
109
|
+
end
|
110
|
+
|
111
|
+
def to_s
|
112
|
+
s = " "
|
113
|
+
@width.times{|n| s << "%d" % (n % 10)}
|
114
|
+
s << "\n"
|
115
|
+
@bits.each_with_index do |ary, idx|
|
116
|
+
s << "%3d " % idx
|
117
|
+
ary.each_with_index do |a, idx2|
|
118
|
+
s << bit_to_str(a, (idx2 == @n_bytes - 1) ? (@width % ARCH) : ARCH)
|
119
|
+
end
|
120
|
+
s << "\n"
|
121
121
|
end
|
122
|
-
|
122
|
+
s
|
123
123
|
end
|
124
|
-
|
124
|
+
|
125
|
+
def bit_str
|
126
|
+
@bits.flatten.pack("L*")
|
127
|
+
end
|
128
|
+
|
129
|
+
def bit_to_str bits, num
|
130
|
+
s = ""
|
131
|
+
num.times do |n|
|
132
|
+
s << (((1 << n) & bits != 0) ? "*" : ".")
|
133
|
+
end
|
134
|
+
s
|
135
|
+
end
|
136
|
+
private :bit_to_str
|
137
|
+
|
125
138
|
end
|
126
139
|
|
127
|
-
|
140
|
+
# Database Specification
|
141
|
+
# * idx file
|
142
|
+
# 32 bit : n_bytes
|
143
|
+
class CompoundDB
|
144
|
+
|
145
|
+
def initialize(name)
|
146
|
+
@current_id = 0
|
147
|
+
@mat = File.open(name + ".mat", "w")
|
148
|
+
@idx = File.open(name + ".idx", "w")
|
149
|
+
@typ = File.open(name + ".typ", "w")
|
150
|
+
end
|
151
|
+
|
152
|
+
def store(mol)
|
153
|
+
bm = mol.bit_mat
|
154
|
+
@current_id += 1
|
155
|
+
|
156
|
+
if bm.has_matrix
|
157
|
+
@idx.print [bm.height, bm.n_bytes, @mat.tell, 0].pack("l*")
|
158
|
+
@mat.print bm.bit_str
|
159
|
+
else
|
160
|
+
@idx.print [bm.height, bm.n_bytes, @mat.tell, -1].pack("l*")
|
161
|
+
end
|
162
|
+
@typ.print mol.typ_str
|
163
|
+
@current_id
|
164
|
+
end
|
165
|
+
|
166
|
+
def close
|
167
|
+
@idx.print [-1, -1, -1].pack("l*")
|
168
|
+
|
169
|
+
@mat.close
|
170
|
+
@idx.close
|
171
|
+
@typ.close
|
172
|
+
end
|
128
173
|
|
129
|
-
module Chem
|
130
|
-
module Molecule
|
131
|
-
include Graph
|
132
174
|
end
|
175
|
+
|
133
176
|
end
|
134
177
|
|
data/lib/graph.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#
|
2
2
|
# graph.rb - Graph
|
3
3
|
#
|
4
|
-
# Copyright (C)
|
4
|
+
# Copyright (C) 2005, 2006 TANAKA Nobuya <t@chemruby.net>
|
5
5
|
#
|
6
6
|
# $Id: graph.rb 61 2005-10-12 09:17:39Z tanaka $
|
7
7
|
#
|
@@ -9,23 +9,22 @@
|
|
9
9
|
|
10
10
|
require 'graph/morgan'
|
11
11
|
require 'graph/cluster'
|
12
|
+
require 'graph/utils'
|
12
13
|
|
13
14
|
module Graph
|
14
15
|
|
15
16
|
attr_accessor :nodes, :edges, :adjacencies
|
16
17
|
|
17
18
|
def each
|
18
|
-
|
19
|
+
nodes.each do |atom|
|
19
20
|
yield atom
|
20
21
|
end
|
21
22
|
end
|
22
23
|
|
23
24
|
def adjacent_to(atom)
|
24
|
-
# instance_eval "alias :tmp_adjacent_to :adjacent_to"
|
25
|
-
# instance_eval "alias :adjacent_to :adjacencies"
|
26
25
|
if @adjacencies == nil
|
27
|
-
@adjacencies =
|
28
|
-
|
26
|
+
@adjacencies = Hash.new
|
27
|
+
edges.each do |bond, atom_a, atom_b|
|
29
28
|
(@adjacencies[atom_a] ||= []).push([bond, atom_b])
|
30
29
|
(@adjacencies[atom_b] ||= []).push([bond, atom_a])
|
31
30
|
end
|