chemruby 0.9.3 → 1.1.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (127) hide show
  1. data/README +2 -2
  2. data/Rakefile +67 -63
  3. data/ext/extconf.rb +2 -0
  4. data/ext/subcomp.c +461 -320
  5. data/ext/utils.c +56 -0
  6. data/ext/utils.h +13 -0
  7. data/lib/chem.rb +34 -8
  8. data/lib/chem/db.rb +8 -0
  9. data/lib/chem/db/cansmi.rb +1 -1
  10. data/lib/chem/db/cdx.rb +1 -1
  11. data/lib/chem/db/cml.rb +52 -0
  12. data/lib/chem/db/gd.rb +64 -0
  13. data/lib/chem/db/gspan.rb +2 -2
  14. data/lib/chem/db/kcf_rpair.rb +34 -0
  15. data/lib/chem/db/kegg.rb +35 -1
  16. data/lib/chem/db/mdl.rb +75 -34
  17. data/lib/chem/db/opsin.rb +24 -0
  18. data/lib/chem/db/pdb.rb +105 -0
  19. data/lib/chem/db/pdf.rb +2 -0
  20. data/lib/chem/db/pubchem.rb +1071 -88
  21. data/lib/chem/db/rmagick.rb +5 -3
  22. data/lib/chem/db/sdf.rb +28 -2
  23. data/lib/chem/db/smiles/smiles.ry +27 -25
  24. data/lib/chem/db/smiles/smiparser.rb +29 -27
  25. data/lib/chem/db/types/type_gd.rb +35 -0
  26. data/lib/chem/db/types/type_gspan.rb +2 -2
  27. data/lib/chem/db/types/type_kcf.rb +19 -0
  28. data/lib/chem/db/types/type_kegg.rb +2 -0
  29. data/lib/chem/db/types/type_mdl.rb +1 -1
  30. data/lib/chem/db/types/type_png.rb +5 -1
  31. data/lib/chem/db/types/type_rdf.rb +22 -0
  32. data/lib/chem/db/types/type_xyz.rb +1 -1
  33. data/lib/chem/db/vector.rb +19 -3
  34. data/lib/chem/model.rb +5 -2
  35. data/lib/chem/utils.rb +17 -1
  36. data/lib/chem/utils/bitdb.rb +49 -0
  37. data/lib/chem/utils/cas.rb +28 -0
  38. data/lib/chem/utils/cdk.rb +403 -0
  39. data/lib/chem/utils/fingerprint.rb +98 -0
  40. data/lib/chem/utils/geometry.rb +8 -0
  41. data/lib/chem/utils/net.rb +303 -0
  42. data/lib/chem/utils/once.rb +28 -0
  43. data/lib/chem/utils/openbabel.rb +204 -0
  44. data/lib/chem/utils/sssr.rb +33 -25
  45. data/lib/chem/utils/sub.rb +6 -0
  46. data/lib/chem/utils/transform.rb +9 -8
  47. data/lib/chem/utils/ullmann.rb +138 -95
  48. data/lib/graph.rb +5 -6
  49. data/lib/graph/utils.rb +8 -0
  50. data/sample/calc_maximum_common_subgraph.rb +27 -0
  51. data/sample/calc_properties.rb +9 -0
  52. data/sample/data/atp.mol +69 -0
  53. data/sample/data/pioglitazone.mol +58 -0
  54. data/sample/data/rosiglitazone.mol +55 -0
  55. data/sample/data/troglitazone.mol +70 -0
  56. data/sample/find_compound_by_keggapi.rb +19 -0
  57. data/sample/generate_inchi.rb +7 -0
  58. data/sample/generate_substructurekey.rb +11 -0
  59. data/sample/images/ex6.rb +17 -0
  60. data/sample/images/ex7.rb +18 -0
  61. data/sample/iupac2mol.rb +8 -0
  62. data/sample/kekule.rb +13 -0
  63. data/sample/logp.rb +4 -0
  64. data/sample/mcs.rb +13 -0
  65. data/sample/mol2pdf.rb +8 -0
  66. data/sample/pubchem_fetch.rb +8 -0
  67. data/sample/pubchem_search.rb +12 -0
  68. data/sample/rosiglitazone.mol +57 -0
  69. data/sample/smarts.rb +10 -0
  70. data/sample/structure_match.rb +8 -0
  71. data/sample/structure_match_color.rb +22 -0
  72. data/sample/thiazolidinedione.mol +19 -0
  73. data/sample/troglitazone.mol +232 -0
  74. data/sample/vicinity.rb +8 -0
  75. data/test/data/CID_704.sdf +236 -0
  76. data/test/data/CID_994.sdf +146 -0
  77. data/test/data/db_EXPT03276.txt +321 -0
  78. data/test/data/pioglitazone.mol +58 -0
  79. data/test/data/rosiglitazone.mol +55 -0
  80. data/test/data/thiazolidinedione.mol +19 -0
  81. data/test/data/troglitazone.mol +70 -0
  82. data/test/{test_adj.rb → tc_adj.rb} +0 -0
  83. data/test/{test_canonical_smiles.rb → tc_canonical_smiles.rb} +0 -0
  84. data/test/tc_casrn.rb +17 -0
  85. data/test/tc_cdk.rb +89 -0
  86. data/test/{test_cdx.rb → tc_cdx.rb} +0 -0
  87. data/test/{test_chem.rb → tc_chem.rb} +0 -0
  88. data/test/{test_cluster.rb → tc_cluster.rb} +0 -0
  89. data/test/{test_db.rb → tc_db.rb} +0 -0
  90. data/test/tc_develop.rb +38 -0
  91. data/test/tc_drugbank.rb +13 -0
  92. data/test/{test_eps.rb → tc_eps.rb} +0 -0
  93. data/test/tc_gd.rb +8 -0
  94. data/test/{test_geometry.rb → tc_geometry.rb} +0 -0
  95. data/test/tc_graph.rb +15 -0
  96. data/test/{test_gspan.rb → tc_gspan.rb} +0 -0
  97. data/test/{test_iupac.rb → tc_iupac.rb} +0 -0
  98. data/test/{test_kcf.rb → tc_kcf.rb} +0 -0
  99. data/test/{test_kcf_glycan.rb → tc_kcf_glycan.rb} +0 -0
  100. data/test/{test_kegg.rb → tc_kegg.rb} +13 -0
  101. data/test/{test_linucs.rb → tc_linucs.rb} +0 -0
  102. data/test/{test_mdl.rb → tc_mdl.rb} +20 -0
  103. data/test/{test_mol2.rb → tc_mol2.rb} +1 -1
  104. data/test/{test_morgan.rb → tc_morgan.rb} +0 -0
  105. data/test/tc_net.rb +5 -0
  106. data/test/tc_once.rb +29 -0
  107. data/test/tc_openbabel.rb +57 -0
  108. data/test/{test_pdf.rb → tc_pdf.rb} +0 -0
  109. data/test/{test_prop.rb → tc_prop.rb} +1 -1
  110. data/test/tc_pubchem.rb +32 -0
  111. data/test/{test_rmagick.rb → tc_rmagick.rb} +0 -0
  112. data/test/{test_sbdb.rb → tc_sbdb.rb} +0 -0
  113. data/test/{test_sdf.rb → tc_sdf.rb} +2 -0
  114. data/test/{test_smiles.rb → tc_smiles.rb} +46 -30
  115. data/test/tc_sssr.rb +1 -0
  116. data/test/{test_sub.rb → tc_sub.rb} +0 -0
  117. data/test/tc_subcomp.rb +59 -0
  118. data/test/{test_traverse.rb → tc_traverse.rb} +0 -0
  119. data/test/{test_writer.rb → tc_writer.rb} +0 -0
  120. data/test/{test_xyz.rb → tc_xyz.rb} +0 -0
  121. data/test/ts_current.rb +11 -0
  122. data/test/ts_image.rb +6 -0
  123. data/test/ts_main.rb +12 -0
  124. metadata +259 -194
  125. data/lib/chem/utils/graph_db.rb +0 -146
  126. data/test/test_sssr.rb +0 -18
  127. data/test/test_subcomp.rb +0 -37
@@ -31,31 +31,35 @@ module Chem
31
31
  end
32
32
  end
33
33
 
34
+ # Fix me! This is not sufficient
34
35
  def canonical_ring ring
35
- # ring.sort # Fix me! This is not sufficient
36
- ring.sort{|a, b| @atoms.index(a) <=> @atoms.index(b)}
36
+ ring.sort{|a, b| nodes.index(a) <=> nodes.index(b)}
37
37
  end
38
38
 
39
+ # Returns Smallest Set of Smallest Ring
39
40
  def find_sssr
40
- return @sssr if @sssr
41
- fullSet = []
41
+
42
+ fullSet = nodes.dup
42
43
  trimSet = []
43
44
  rings = []
44
- @mol = {}
45
- # mol = {1=>[2, 5], 2=> [1, 3], 3=> [2, 4], 4=>[3, 5], 5=>[4,1]}
46
- # @nodes.each do |k, atom|
47
- # atom.set_neighbor
48
- # @mol[atom] = atom.neighbor
49
- # end
45
+ mol = {}
46
+
47
+ nodes.each do |node|
48
+ mol[node] = []
49
+ adjacent_to(node).each do |bond, atom|
50
+ mol[node] << atom
51
+ end
52
+ end
50
53
 
51
54
  loop do
52
55
  nodesN2 = []
53
56
  smallest_degree = 10
54
57
  smallest = nil
55
- @mol.each do |k, a|
58
+
59
+ mol.each do |k, a|
56
60
  case a.length
57
61
  when 0
58
- @mol.delete(k)# Is this OK?
62
+ mol.delete(k)# Is this OK?
59
63
  trimSet.push(k)
60
64
  when 2
61
65
  nodesN2.push(k)
@@ -65,36 +69,40 @@ module Chem
65
69
  smallest_degree = a.length
66
70
  end
67
71
  end
72
+
68
73
  case smallest_degree
69
74
  when 1
70
- trim(smallest)
75
+ trim(mol, smallest)
71
76
  when 2
72
77
  nodesN2.each do |k|
73
78
  ring = find_smallest_ring(k)
74
- # rings.push(canonical_ring(ring)) if !rings.include?(canonical_ring(ring))
75
- rings.push(canonical_ring(ring)) if ring && !rings.include?(canonical_ring(ring))
79
+ if ring && !rings.include?(canonical_ring(ring))
80
+ rings.push(canonical_ring(ring))
81
+ end
76
82
  end
77
83
  nodesN2.each do |k|
78
- trim(k)
84
+ trim(mol, k)
79
85
  end
80
86
  when 3
81
87
  ring = find_smallest_ring(smallest)
82
- trim(smallest)
88
+ trim(mol, smallest)
83
89
  end
84
- break if @mol.length == 0
90
+
91
+ break if mol.length == 0
85
92
  end
86
- @sssr = rings
93
+ rings
87
94
  end
88
95
 
89
- def trim smallest
90
- if @mol.length > 0 && @mol.include?(smallest)
91
- @mol[smallest].each do |n|
92
- @mol[n] = @mol[n] - [smallest]
93
- @mol.delete(smallest)
94
- @mol.delete(n) if @mol[n].length == 0
96
+ def trim mol, smallest
97
+ if mol.length > 0 && mol.include?(smallest)
98
+ mol[smallest].each do |n|
99
+ mol[n] = mol[n] - [smallest]
100
+ mol.delete(smallest)
101
+ mol.delete(n) if mol[n].length == 0
95
102
  end
96
103
  end
97
104
  end
105
+ private :trim
98
106
 
99
107
  end
100
108
  end
@@ -1,3 +1,9 @@
1
+ #
2
+ # = chem/utils/sub.rb - Subgraph
3
+ #
4
+ # Author:: Nobuya Tanaka <t@chemruby.org>
5
+ #
6
+ # Copyright:: Copyright (c) 2005, 2006 ChemRuby project
1
7
 
2
8
  module Chem
3
9
  module Molecule
@@ -8,10 +8,11 @@ module Chem
8
8
 
9
9
  module TwoDimension
10
10
 
11
- def initialize
12
- super
13
- @pos = Vector[0.0, 0.0]
14
- end
11
+ # attr_reader :pos
12
+ # def initialize
13
+ # super
14
+ # @pos = Vector[0.0, 0.0]
15
+ # end
15
16
 
16
17
  def pos ; @pos ||= Vector[@x, @y, @z] ; end
17
18
  def x ; pos[0] ; end
@@ -23,10 +24,10 @@ module Chem
23
24
  module ThreeDimension
24
25
  include TwoDimension
25
26
 
26
- def initialize
27
- super
28
- @pos = Vector[0.0, 0.0]
29
- end
27
+ # def initialize
28
+ # super
29
+ # @pos = Vector[0.0, 0.0]
30
+ # end
30
31
 
31
32
  def z ; pos[2] ; end
32
33
  def z=(z_val) ; pos[2] = z_val ; end
@@ -1,134 +1,177 @@
1
1
  #
2
- # = chem/utils/subgraph.rb - Subgraph isomorphism
2
+ # = chem/utils/ullmann.rb - Subgraph isomorphism
3
3
  #
4
- # Author:: Nobuya Tanaka <tanaka@chemruby.org>
4
+ # Author:: Nobuya Tanaka <t@chemruby.org>
5
5
  #
6
- # Copyright:: Copyright (c) 2001, 2005 ChemRuby project
6
+ # Copyright:: Copyright (c) 2005, 2006 ChemRuby project
7
7
  #
8
- # $Id: ullmann.rb 139 2006-02-07 07:39:20Z tanaka $
8
+ # $Id: ullmann.rb 180 2006-04-19 08:52:15Z tanaka $
9
9
  #
10
10
 
11
- require 'subcomp'
12
-
13
11
  $ARC = 4 # for 32-bit computer
14
12
 
15
- module Graph
13
+ ARCH = 32
16
14
 
17
- def adj_matrix
18
- n_long = (nodes.length - 1) / 32 + 1
19
- mat = Array.new(n_long * @nodes.length, 0)
20
- nodes.each_with_index do |node, idx|
21
- adjacent_to(node).each do |bond, node|
22
- keta = nodes.index(node) / 32
23
- mat[idx * n_long + keta] += 1 << (nodes.index(node) - keta * 32)
24
- end
25
- end
26
- mat.pack("L*")
27
- end
15
+ module Chem
28
16
 
29
- def match_by_adj_mat mat, len
30
- m = Array.new("0xff", 100).pack("c*")
31
- subcomp_match_by_ullmann(mat, len, self.adjacency_list, self.nodes.length, m)
32
- end
17
+ module Molecule
33
18
 
34
- def match_by_ullmann other, &block
35
- if other.nodes.length == 1
36
- self.nodes.find{|node| node.element == other.nodes[0].element}
19
+ def match_by_ullmann(target, &block)
20
+ require 'subcomp'
21
+ Chem.match_by_ullmann(self, target, &block)
37
22
  end
38
- subcomp_match_by_ullmann(adj_matrix, nodes.length, other.adjacency_list, other.nodes.length, other.matchable(self, &block))
39
- end
40
- alias match match_by_ullmann
41
-
42
- # returns match correspondences without duplicate
43
- def match_exhaustively other
44
- correspond = {}
45
- result = []
46
- while true
47
- match = self.match_by_ullmann(other) do |a, b|
48
- a.element == b.element and not (correspond[a] and correspond[a].include? b)
23
+
24
+ def match(target, &block)
25
+ ary = nil
26
+
27
+ if block_given?
28
+ ary = match_by_ullmann(target){ |i, j|
29
+ yield(self.nodes[i], target.nodes[j])
30
+ }
31
+ else
32
+ ary = match_by_ullmann(target)
49
33
  end
50
- break if not match
51
- result.push(match)
52
- match.each_with_index do |n, m|
53
- (correspond[other.nodes[n]] ||=[]).push @nodes[m]
34
+
35
+ ret = []
36
+ ary.each do |a|
37
+ hash = {}
38
+ a.each_with_index do |i, j|
39
+ hash[nodes[j]] = target.nodes[i]
40
+ end
41
+ hash
42
+ ret << hash
54
43
  end
44
+ ret
45
+ end
46
+
47
+ def typ_str
48
+ nodes.collect{|atom| atom.atomic_number}.pack("l*")
55
49
  end
56
- result
57
- end
58
50
 
59
- def matchable other, exlucde = {}
60
- n_long = (other.nodes.length - 1) / 32 + 1
61
- mat = Array.new(n_long * @nodes.length, 0)
62
- @nodes.each_with_index do |node, index|
63
- other.nodes.each_with_index do |n, idx|
64
- if node.element == n.element
65
- keta = idx / 32
66
- mat[index * n_long + keta] += 1 << (idx - keta * 32)
51
+ def adjacent_index
52
+ nodes.inject([]) do |ret, node|
53
+ ary = ret[nodes.index(node)] = []
54
+ adjacent_to(node).each do |bond, ad_node|
55
+ ary << nodes.index(ad_node)
67
56
  end
57
+ ret
68
58
  end
69
59
  end
70
- mat.pack("L*")
71
- end
72
60
 
73
- #obsolete
74
- def matchable_old other, exlucde = {}
75
- n_long = (other.nodes.length - 1) / 32 + 1
76
- row_unit = n_long * ( 32 / 8)
77
- r = "\0" * 10000
78
- if block_given?
79
- @nodes.each_with_index do |node, index|
80
- other.nodes.each_with_index do |o_node, idx|
81
- if yield(node, o_node)
82
- col_byte = idx / 8
83
- col_bit = idx - col_byte * 8
84
- r[index * row_unit + col_byte] += (1 << col_bit)
61
+ def bit_mat
62
+ bm = BitMatrix.new(nodes.length, nodes.length)
63
+ if edges.length == 0
64
+ bm.has_matrix = false
65
+ else
66
+ adj = {}
67
+ nodes.each do |node|
68
+ adj[node] = []
69
+ adjacent_to(node).each do |bond, to|
70
+ adj[node] << to
85
71
  end
86
72
  end
87
- end
88
- else
89
- @nodes.each_with_index do |node, index|
90
- other.nodes.each_with_index do |o_node, idx|
91
- if node.element == o_node.element or node.element == :R or o_node.element == :R
92
- col_byte = idx / 8
93
- col_bit = idx - col_byte * 8
94
- r[index * row_unit + col_byte] += (1 << col_bit)
73
+
74
+ nodes.each_with_index do |atom1, idx1|
75
+ ary = []
76
+ nodes.each_with_index do |atom2, idx2|
77
+ if adj[atom1].include?(atom2)
78
+ bm.set(idx1, idx2)
79
+ end
95
80
  end
96
81
  end
97
82
  end
83
+ bm
98
84
  end
99
- r
85
+
100
86
  end
101
87
 
102
- def adjacency_list
103
- ret = []
104
- @nodes.each do |node|
105
- r = []
106
- self.adjacent_to(node).each do |bond, to|
107
- r << @nodes.index(to)
88
+ class BitMatrix
89
+
90
+ attr_reader :height, :widht, :n_bytes
91
+ attr_accessor :has_matrix
92
+
93
+ def initialize(height, width)
94
+ @height = height
95
+ @width = width
96
+ @n_bytes = (width - 1) / ARCH + 1
97
+ @bits = []
98
+ height.times do |n|
99
+ @bits[n] = []
100
+ @n_bytes.times do |m|
101
+ @bits[n][m] = 0
102
+ end
108
103
  end
109
- ret << r
104
+ @has_matrix = true
110
105
  end
111
- ret
112
- end
113
106
 
114
- # Obsolete!?
115
- def connection
116
- self_adj = []
117
- @nodes.each do |node|
118
- i = 0
119
- self.adjacent_to(node).each do |bond, to|
120
- i += 1<< @nodes.index(to)
107
+ def set(row, col)
108
+ @bits[row][col / ARCH] += (1 << (col % ARCH))
109
+ end
110
+
111
+ def to_s
112
+ s = " "
113
+ @width.times{|n| s << "%d" % (n % 10)}
114
+ s << "\n"
115
+ @bits.each_with_index do |ary, idx|
116
+ s << "%3d " % idx
117
+ ary.each_with_index do |a, idx2|
118
+ s << bit_to_str(a, (idx2 == @n_bytes - 1) ? (@width % ARCH) : ARCH)
119
+ end
120
+ s << "\n"
121
121
  end
122
- self_adj << i
122
+ s
123
123
  end
124
- self_adj
124
+
125
+ def bit_str
126
+ @bits.flatten.pack("L*")
127
+ end
128
+
129
+ def bit_to_str bits, num
130
+ s = ""
131
+ num.times do |n|
132
+ s << (((1 << n) & bits != 0) ? "*" : ".")
133
+ end
134
+ s
135
+ end
136
+ private :bit_to_str
137
+
125
138
  end
126
139
 
127
- end
140
+ # Database Specification
141
+ # * idx file
142
+ # 32 bit : n_bytes
143
+ class CompoundDB
144
+
145
+ def initialize(name)
146
+ @current_id = 0
147
+ @mat = File.open(name + ".mat", "w")
148
+ @idx = File.open(name + ".idx", "w")
149
+ @typ = File.open(name + ".typ", "w")
150
+ end
151
+
152
+ def store(mol)
153
+ bm = mol.bit_mat
154
+ @current_id += 1
155
+
156
+ if bm.has_matrix
157
+ @idx.print [bm.height, bm.n_bytes, @mat.tell, 0].pack("l*")
158
+ @mat.print bm.bit_str
159
+ else
160
+ @idx.print [bm.height, bm.n_bytes, @mat.tell, -1].pack("l*")
161
+ end
162
+ @typ.print mol.typ_str
163
+ @current_id
164
+ end
165
+
166
+ def close
167
+ @idx.print [-1, -1, -1].pack("l*")
168
+
169
+ @mat.close
170
+ @idx.close
171
+ @typ.close
172
+ end
128
173
 
129
- module Chem
130
- module Molecule
131
- include Graph
132
174
  end
175
+
133
176
  end
134
177
 
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # graph.rb - Graph
3
3
  #
4
- # Copyright (C) 2003-2005 TANAKA Nobuya <tanaka@chemruby.net>
4
+ # Copyright (C) 2005, 2006 TANAKA Nobuya <t@chemruby.net>
5
5
  #
6
6
  # $Id: graph.rb 61 2005-10-12 09:17:39Z tanaka $
7
7
  #
@@ -9,23 +9,22 @@
9
9
 
10
10
  require 'graph/morgan'
11
11
  require 'graph/cluster'
12
+ require 'graph/utils'
12
13
 
13
14
  module Graph
14
15
 
15
16
  attr_accessor :nodes, :edges, :adjacencies
16
17
 
17
18
  def each
18
- @nodes.each do |atom|
19
+ nodes.each do |atom|
19
20
  yield atom
20
21
  end
21
22
  end
22
23
 
23
24
  def adjacent_to(atom)
24
- # instance_eval "alias :tmp_adjacent_to :adjacent_to"
25
- # instance_eval "alias :adjacent_to :adjacencies"
26
25
  if @adjacencies == nil
27
- @adjacencies = {}
28
- @edges.each do |bond, atom_a, atom_b|
26
+ @adjacencies = Hash.new
27
+ edges.each do |bond, atom_a, atom_b|
29
28
  (@adjacencies[atom_a] ||= []).push([bond, atom_b])
30
29
  (@adjacencies[atom_b] ||= []).push([bond, atom_a])
31
30
  end