chemruby 0.9.3 → 1.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. data/README +2 -2
  2. data/Rakefile +67 -63
  3. data/ext/extconf.rb +2 -0
  4. data/ext/subcomp.c +461 -320
  5. data/ext/utils.c +56 -0
  6. data/ext/utils.h +13 -0
  7. data/lib/chem.rb +34 -8
  8. data/lib/chem/db.rb +8 -0
  9. data/lib/chem/db/cansmi.rb +1 -1
  10. data/lib/chem/db/cdx.rb +1 -1
  11. data/lib/chem/db/cml.rb +52 -0
  12. data/lib/chem/db/gd.rb +64 -0
  13. data/lib/chem/db/gspan.rb +2 -2
  14. data/lib/chem/db/kcf_rpair.rb +34 -0
  15. data/lib/chem/db/kegg.rb +35 -1
  16. data/lib/chem/db/mdl.rb +75 -34
  17. data/lib/chem/db/opsin.rb +24 -0
  18. data/lib/chem/db/pdb.rb +105 -0
  19. data/lib/chem/db/pdf.rb +2 -0
  20. data/lib/chem/db/pubchem.rb +1071 -88
  21. data/lib/chem/db/rmagick.rb +5 -3
  22. data/lib/chem/db/sdf.rb +28 -2
  23. data/lib/chem/db/smiles/smiles.ry +27 -25
  24. data/lib/chem/db/smiles/smiparser.rb +29 -27
  25. data/lib/chem/db/types/type_gd.rb +35 -0
  26. data/lib/chem/db/types/type_gspan.rb +2 -2
  27. data/lib/chem/db/types/type_kcf.rb +19 -0
  28. data/lib/chem/db/types/type_kegg.rb +2 -0
  29. data/lib/chem/db/types/type_mdl.rb +1 -1
  30. data/lib/chem/db/types/type_png.rb +5 -1
  31. data/lib/chem/db/types/type_rdf.rb +22 -0
  32. data/lib/chem/db/types/type_xyz.rb +1 -1
  33. data/lib/chem/db/vector.rb +19 -3
  34. data/lib/chem/model.rb +5 -2
  35. data/lib/chem/utils.rb +17 -1
  36. data/lib/chem/utils/bitdb.rb +49 -0
  37. data/lib/chem/utils/cas.rb +28 -0
  38. data/lib/chem/utils/cdk.rb +403 -0
  39. data/lib/chem/utils/fingerprint.rb +98 -0
  40. data/lib/chem/utils/geometry.rb +8 -0
  41. data/lib/chem/utils/net.rb +303 -0
  42. data/lib/chem/utils/once.rb +28 -0
  43. data/lib/chem/utils/openbabel.rb +204 -0
  44. data/lib/chem/utils/sssr.rb +33 -25
  45. data/lib/chem/utils/sub.rb +6 -0
  46. data/lib/chem/utils/transform.rb +9 -8
  47. data/lib/chem/utils/ullmann.rb +138 -95
  48. data/lib/graph.rb +5 -6
  49. data/lib/graph/utils.rb +8 -0
  50. data/sample/calc_maximum_common_subgraph.rb +27 -0
  51. data/sample/calc_properties.rb +9 -0
  52. data/sample/data/atp.mol +69 -0
  53. data/sample/data/pioglitazone.mol +58 -0
  54. data/sample/data/rosiglitazone.mol +55 -0
  55. data/sample/data/troglitazone.mol +70 -0
  56. data/sample/find_compound_by_keggapi.rb +19 -0
  57. data/sample/generate_inchi.rb +7 -0
  58. data/sample/generate_substructurekey.rb +11 -0
  59. data/sample/images/ex6.rb +17 -0
  60. data/sample/images/ex7.rb +18 -0
  61. data/sample/iupac2mol.rb +8 -0
  62. data/sample/kekule.rb +13 -0
  63. data/sample/logp.rb +4 -0
  64. data/sample/mcs.rb +13 -0
  65. data/sample/mol2pdf.rb +8 -0
  66. data/sample/pubchem_fetch.rb +8 -0
  67. data/sample/pubchem_search.rb +12 -0
  68. data/sample/rosiglitazone.mol +57 -0
  69. data/sample/smarts.rb +10 -0
  70. data/sample/structure_match.rb +8 -0
  71. data/sample/structure_match_color.rb +22 -0
  72. data/sample/thiazolidinedione.mol +19 -0
  73. data/sample/troglitazone.mol +232 -0
  74. data/sample/vicinity.rb +8 -0
  75. data/test/data/CID_704.sdf +236 -0
  76. data/test/data/CID_994.sdf +146 -0
  77. data/test/data/db_EXPT03276.txt +321 -0
  78. data/test/data/pioglitazone.mol +58 -0
  79. data/test/data/rosiglitazone.mol +55 -0
  80. data/test/data/thiazolidinedione.mol +19 -0
  81. data/test/data/troglitazone.mol +70 -0
  82. data/test/{test_adj.rb → tc_adj.rb} +0 -0
  83. data/test/{test_canonical_smiles.rb → tc_canonical_smiles.rb} +0 -0
  84. data/test/tc_casrn.rb +17 -0
  85. data/test/tc_cdk.rb +89 -0
  86. data/test/{test_cdx.rb → tc_cdx.rb} +0 -0
  87. data/test/{test_chem.rb → tc_chem.rb} +0 -0
  88. data/test/{test_cluster.rb → tc_cluster.rb} +0 -0
  89. data/test/{test_db.rb → tc_db.rb} +0 -0
  90. data/test/tc_develop.rb +38 -0
  91. data/test/tc_drugbank.rb +13 -0
  92. data/test/{test_eps.rb → tc_eps.rb} +0 -0
  93. data/test/tc_gd.rb +8 -0
  94. data/test/{test_geometry.rb → tc_geometry.rb} +0 -0
  95. data/test/tc_graph.rb +15 -0
  96. data/test/{test_gspan.rb → tc_gspan.rb} +0 -0
  97. data/test/{test_iupac.rb → tc_iupac.rb} +0 -0
  98. data/test/{test_kcf.rb → tc_kcf.rb} +0 -0
  99. data/test/{test_kcf_glycan.rb → tc_kcf_glycan.rb} +0 -0
  100. data/test/{test_kegg.rb → tc_kegg.rb} +13 -0
  101. data/test/{test_linucs.rb → tc_linucs.rb} +0 -0
  102. data/test/{test_mdl.rb → tc_mdl.rb} +20 -0
  103. data/test/{test_mol2.rb → tc_mol2.rb} +1 -1
  104. data/test/{test_morgan.rb → tc_morgan.rb} +0 -0
  105. data/test/tc_net.rb +5 -0
  106. data/test/tc_once.rb +29 -0
  107. data/test/tc_openbabel.rb +57 -0
  108. data/test/{test_pdf.rb → tc_pdf.rb} +0 -0
  109. data/test/{test_prop.rb → tc_prop.rb} +1 -1
  110. data/test/tc_pubchem.rb +32 -0
  111. data/test/{test_rmagick.rb → tc_rmagick.rb} +0 -0
  112. data/test/{test_sbdb.rb → tc_sbdb.rb} +0 -0
  113. data/test/{test_sdf.rb → tc_sdf.rb} +2 -0
  114. data/test/{test_smiles.rb → tc_smiles.rb} +46 -30
  115. data/test/tc_sssr.rb +1 -0
  116. data/test/{test_sub.rb → tc_sub.rb} +0 -0
  117. data/test/tc_subcomp.rb +59 -0
  118. data/test/{test_traverse.rb → tc_traverse.rb} +0 -0
  119. data/test/{test_writer.rb → tc_writer.rb} +0 -0
  120. data/test/{test_xyz.rb → tc_xyz.rb} +0 -0
  121. data/test/ts_current.rb +11 -0
  122. data/test/ts_image.rb +6 -0
  123. data/test/ts_main.rb +12 -0
  124. metadata +259 -194
  125. data/lib/chem/utils/graph_db.rb +0 -146
  126. data/test/test_sssr.rb +0 -18
  127. data/test/test_subcomp.rb +0 -37
@@ -31,31 +31,35 @@ module Chem
31
31
  end
32
32
  end
33
33
 
34
+ # Fix me! This is not sufficient
34
35
  def canonical_ring ring
35
- # ring.sort # Fix me! This is not sufficient
36
- ring.sort{|a, b| @atoms.index(a) <=> @atoms.index(b)}
36
+ ring.sort{|a, b| nodes.index(a) <=> nodes.index(b)}
37
37
  end
38
38
 
39
+ # Returns Smallest Set of Smallest Ring
39
40
  def find_sssr
40
- return @sssr if @sssr
41
- fullSet = []
41
+
42
+ fullSet = nodes.dup
42
43
  trimSet = []
43
44
  rings = []
44
- @mol = {}
45
- # mol = {1=>[2, 5], 2=> [1, 3], 3=> [2, 4], 4=>[3, 5], 5=>[4,1]}
46
- # @nodes.each do |k, atom|
47
- # atom.set_neighbor
48
- # @mol[atom] = atom.neighbor
49
- # end
45
+ mol = {}
46
+
47
+ nodes.each do |node|
48
+ mol[node] = []
49
+ adjacent_to(node).each do |bond, atom|
50
+ mol[node] << atom
51
+ end
52
+ end
50
53
 
51
54
  loop do
52
55
  nodesN2 = []
53
56
  smallest_degree = 10
54
57
  smallest = nil
55
- @mol.each do |k, a|
58
+
59
+ mol.each do |k, a|
56
60
  case a.length
57
61
  when 0
58
- @mol.delete(k)# Is this OK?
62
+ mol.delete(k)# Is this OK?
59
63
  trimSet.push(k)
60
64
  when 2
61
65
  nodesN2.push(k)
@@ -65,36 +69,40 @@ module Chem
65
69
  smallest_degree = a.length
66
70
  end
67
71
  end
72
+
68
73
  case smallest_degree
69
74
  when 1
70
- trim(smallest)
75
+ trim(mol, smallest)
71
76
  when 2
72
77
  nodesN2.each do |k|
73
78
  ring = find_smallest_ring(k)
74
- # rings.push(canonical_ring(ring)) if !rings.include?(canonical_ring(ring))
75
- rings.push(canonical_ring(ring)) if ring && !rings.include?(canonical_ring(ring))
79
+ if ring && !rings.include?(canonical_ring(ring))
80
+ rings.push(canonical_ring(ring))
81
+ end
76
82
  end
77
83
  nodesN2.each do |k|
78
- trim(k)
84
+ trim(mol, k)
79
85
  end
80
86
  when 3
81
87
  ring = find_smallest_ring(smallest)
82
- trim(smallest)
88
+ trim(mol, smallest)
83
89
  end
84
- break if @mol.length == 0
90
+
91
+ break if mol.length == 0
85
92
  end
86
- @sssr = rings
93
+ rings
87
94
  end
88
95
 
89
- def trim smallest
90
- if @mol.length > 0 && @mol.include?(smallest)
91
- @mol[smallest].each do |n|
92
- @mol[n] = @mol[n] - [smallest]
93
- @mol.delete(smallest)
94
- @mol.delete(n) if @mol[n].length == 0
96
+ def trim mol, smallest
97
+ if mol.length > 0 && mol.include?(smallest)
98
+ mol[smallest].each do |n|
99
+ mol[n] = mol[n] - [smallest]
100
+ mol.delete(smallest)
101
+ mol.delete(n) if mol[n].length == 0
95
102
  end
96
103
  end
97
104
  end
105
+ private :trim
98
106
 
99
107
  end
100
108
  end
@@ -1,3 +1,9 @@
1
+ #
2
+ # = chem/utils/sub.rb - Subgraph
3
+ #
4
+ # Author:: Nobuya Tanaka <t@chemruby.org>
5
+ #
6
+ # Copyright:: Copyright (c) 2005, 2006 ChemRuby project
1
7
 
2
8
  module Chem
3
9
  module Molecule
@@ -8,10 +8,11 @@ module Chem
8
8
 
9
9
  module TwoDimension
10
10
 
11
- def initialize
12
- super
13
- @pos = Vector[0.0, 0.0]
14
- end
11
+ # attr_reader :pos
12
+ # def initialize
13
+ # super
14
+ # @pos = Vector[0.0, 0.0]
15
+ # end
15
16
 
16
17
  def pos ; @pos ||= Vector[@x, @y, @z] ; end
17
18
  def x ; pos[0] ; end
@@ -23,10 +24,10 @@ module Chem
23
24
  module ThreeDimension
24
25
  include TwoDimension
25
26
 
26
- def initialize
27
- super
28
- @pos = Vector[0.0, 0.0]
29
- end
27
+ # def initialize
28
+ # super
29
+ # @pos = Vector[0.0, 0.0]
30
+ # end
30
31
 
31
32
  def z ; pos[2] ; end
32
33
  def z=(z_val) ; pos[2] = z_val ; end
@@ -1,134 +1,177 @@
1
1
  #
2
- # = chem/utils/subgraph.rb - Subgraph isomorphism
2
+ # = chem/utils/ullmann.rb - Subgraph isomorphism
3
3
  #
4
- # Author:: Nobuya Tanaka <tanaka@chemruby.org>
4
+ # Author:: Nobuya Tanaka <t@chemruby.org>
5
5
  #
6
- # Copyright:: Copyright (c) 2001, 2005 ChemRuby project
6
+ # Copyright:: Copyright (c) 2005, 2006 ChemRuby project
7
7
  #
8
- # $Id: ullmann.rb 139 2006-02-07 07:39:20Z tanaka $
8
+ # $Id: ullmann.rb 180 2006-04-19 08:52:15Z tanaka $
9
9
  #
10
10
 
11
- require 'subcomp'
12
-
13
11
  $ARC = 4 # for 32-bit computer
14
12
 
15
- module Graph
13
+ ARCH = 32
16
14
 
17
- def adj_matrix
18
- n_long = (nodes.length - 1) / 32 + 1
19
- mat = Array.new(n_long * @nodes.length, 0)
20
- nodes.each_with_index do |node, idx|
21
- adjacent_to(node).each do |bond, node|
22
- keta = nodes.index(node) / 32
23
- mat[idx * n_long + keta] += 1 << (nodes.index(node) - keta * 32)
24
- end
25
- end
26
- mat.pack("L*")
27
- end
15
+ module Chem
28
16
 
29
- def match_by_adj_mat mat, len
30
- m = Array.new("0xff", 100).pack("c*")
31
- subcomp_match_by_ullmann(mat, len, self.adjacency_list, self.nodes.length, m)
32
- end
17
+ module Molecule
33
18
 
34
- def match_by_ullmann other, &block
35
- if other.nodes.length == 1
36
- self.nodes.find{|node| node.element == other.nodes[0].element}
19
+ def match_by_ullmann(target, &block)
20
+ require 'subcomp'
21
+ Chem.match_by_ullmann(self, target, &block)
37
22
  end
38
- subcomp_match_by_ullmann(adj_matrix, nodes.length, other.adjacency_list, other.nodes.length, other.matchable(self, &block))
39
- end
40
- alias match match_by_ullmann
41
-
42
- # returns match correspondences without duplicate
43
- def match_exhaustively other
44
- correspond = {}
45
- result = []
46
- while true
47
- match = self.match_by_ullmann(other) do |a, b|
48
- a.element == b.element and not (correspond[a] and correspond[a].include? b)
23
+
24
+ def match(target, &block)
25
+ ary = nil
26
+
27
+ if block_given?
28
+ ary = match_by_ullmann(target){ |i, j|
29
+ yield(self.nodes[i], target.nodes[j])
30
+ }
31
+ else
32
+ ary = match_by_ullmann(target)
49
33
  end
50
- break if not match
51
- result.push(match)
52
- match.each_with_index do |n, m|
53
- (correspond[other.nodes[n]] ||=[]).push @nodes[m]
34
+
35
+ ret = []
36
+ ary.each do |a|
37
+ hash = {}
38
+ a.each_with_index do |i, j|
39
+ hash[nodes[j]] = target.nodes[i]
40
+ end
41
+ hash
42
+ ret << hash
54
43
  end
44
+ ret
45
+ end
46
+
47
+ def typ_str
48
+ nodes.collect{|atom| atom.atomic_number}.pack("l*")
55
49
  end
56
- result
57
- end
58
50
 
59
- def matchable other, exlucde = {}
60
- n_long = (other.nodes.length - 1) / 32 + 1
61
- mat = Array.new(n_long * @nodes.length, 0)
62
- @nodes.each_with_index do |node, index|
63
- other.nodes.each_with_index do |n, idx|
64
- if node.element == n.element
65
- keta = idx / 32
66
- mat[index * n_long + keta] += 1 << (idx - keta * 32)
51
+ def adjacent_index
52
+ nodes.inject([]) do |ret, node|
53
+ ary = ret[nodes.index(node)] = []
54
+ adjacent_to(node).each do |bond, ad_node|
55
+ ary << nodes.index(ad_node)
67
56
  end
57
+ ret
68
58
  end
69
59
  end
70
- mat.pack("L*")
71
- end
72
60
 
73
- #obsolete
74
- def matchable_old other, exlucde = {}
75
- n_long = (other.nodes.length - 1) / 32 + 1
76
- row_unit = n_long * ( 32 / 8)
77
- r = "\0" * 10000
78
- if block_given?
79
- @nodes.each_with_index do |node, index|
80
- other.nodes.each_with_index do |o_node, idx|
81
- if yield(node, o_node)
82
- col_byte = idx / 8
83
- col_bit = idx - col_byte * 8
84
- r[index * row_unit + col_byte] += (1 << col_bit)
61
+ def bit_mat
62
+ bm = BitMatrix.new(nodes.length, nodes.length)
63
+ if edges.length == 0
64
+ bm.has_matrix = false
65
+ else
66
+ adj = {}
67
+ nodes.each do |node|
68
+ adj[node] = []
69
+ adjacent_to(node).each do |bond, to|
70
+ adj[node] << to
85
71
  end
86
72
  end
87
- end
88
- else
89
- @nodes.each_with_index do |node, index|
90
- other.nodes.each_with_index do |o_node, idx|
91
- if node.element == o_node.element or node.element == :R or o_node.element == :R
92
- col_byte = idx / 8
93
- col_bit = idx - col_byte * 8
94
- r[index * row_unit + col_byte] += (1 << col_bit)
73
+
74
+ nodes.each_with_index do |atom1, idx1|
75
+ ary = []
76
+ nodes.each_with_index do |atom2, idx2|
77
+ if adj[atom1].include?(atom2)
78
+ bm.set(idx1, idx2)
79
+ end
95
80
  end
96
81
  end
97
82
  end
83
+ bm
98
84
  end
99
- r
85
+
100
86
  end
101
87
 
102
- def adjacency_list
103
- ret = []
104
- @nodes.each do |node|
105
- r = []
106
- self.adjacent_to(node).each do |bond, to|
107
- r << @nodes.index(to)
88
+ class BitMatrix
89
+
90
+ attr_reader :height, :widht, :n_bytes
91
+ attr_accessor :has_matrix
92
+
93
+ def initialize(height, width)
94
+ @height = height
95
+ @width = width
96
+ @n_bytes = (width - 1) / ARCH + 1
97
+ @bits = []
98
+ height.times do |n|
99
+ @bits[n] = []
100
+ @n_bytes.times do |m|
101
+ @bits[n][m] = 0
102
+ end
108
103
  end
109
- ret << r
104
+ @has_matrix = true
110
105
  end
111
- ret
112
- end
113
106
 
114
- # Obsolete!?
115
- def connection
116
- self_adj = []
117
- @nodes.each do |node|
118
- i = 0
119
- self.adjacent_to(node).each do |bond, to|
120
- i += 1<< @nodes.index(to)
107
+ def set(row, col)
108
+ @bits[row][col / ARCH] += (1 << (col % ARCH))
109
+ end
110
+
111
+ def to_s
112
+ s = " "
113
+ @width.times{|n| s << "%d" % (n % 10)}
114
+ s << "\n"
115
+ @bits.each_with_index do |ary, idx|
116
+ s << "%3d " % idx
117
+ ary.each_with_index do |a, idx2|
118
+ s << bit_to_str(a, (idx2 == @n_bytes - 1) ? (@width % ARCH) : ARCH)
119
+ end
120
+ s << "\n"
121
121
  end
122
- self_adj << i
122
+ s
123
123
  end
124
- self_adj
124
+
125
+ def bit_str
126
+ @bits.flatten.pack("L*")
127
+ end
128
+
129
+ def bit_to_str bits, num
130
+ s = ""
131
+ num.times do |n|
132
+ s << (((1 << n) & bits != 0) ? "*" : ".")
133
+ end
134
+ s
135
+ end
136
+ private :bit_to_str
137
+
125
138
  end
126
139
 
127
- end
140
+ # Database Specification
141
+ # * idx file
142
+ # 32 bit : n_bytes
143
+ class CompoundDB
144
+
145
+ def initialize(name)
146
+ @current_id = 0
147
+ @mat = File.open(name + ".mat", "w")
148
+ @idx = File.open(name + ".idx", "w")
149
+ @typ = File.open(name + ".typ", "w")
150
+ end
151
+
152
+ def store(mol)
153
+ bm = mol.bit_mat
154
+ @current_id += 1
155
+
156
+ if bm.has_matrix
157
+ @idx.print [bm.height, bm.n_bytes, @mat.tell, 0].pack("l*")
158
+ @mat.print bm.bit_str
159
+ else
160
+ @idx.print [bm.height, bm.n_bytes, @mat.tell, -1].pack("l*")
161
+ end
162
+ @typ.print mol.typ_str
163
+ @current_id
164
+ end
165
+
166
+ def close
167
+ @idx.print [-1, -1, -1].pack("l*")
168
+
169
+ @mat.close
170
+ @idx.close
171
+ @typ.close
172
+ end
128
173
 
129
- module Chem
130
- module Molecule
131
- include Graph
132
174
  end
175
+
133
176
  end
134
177
 
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # graph.rb - Graph
3
3
  #
4
- # Copyright (C) 2003-2005 TANAKA Nobuya <tanaka@chemruby.net>
4
+ # Copyright (C) 2005, 2006 TANAKA Nobuya <t@chemruby.net>
5
5
  #
6
6
  # $Id: graph.rb 61 2005-10-12 09:17:39Z tanaka $
7
7
  #
@@ -9,23 +9,22 @@
9
9
 
10
10
  require 'graph/morgan'
11
11
  require 'graph/cluster'
12
+ require 'graph/utils'
12
13
 
13
14
  module Graph
14
15
 
15
16
  attr_accessor :nodes, :edges, :adjacencies
16
17
 
17
18
  def each
18
- @nodes.each do |atom|
19
+ nodes.each do |atom|
19
20
  yield atom
20
21
  end
21
22
  end
22
23
 
23
24
  def adjacent_to(atom)
24
- # instance_eval "alias :tmp_adjacent_to :adjacent_to"
25
- # instance_eval "alias :adjacent_to :adjacencies"
26
25
  if @adjacencies == nil
27
- @adjacencies = {}
28
- @edges.each do |bond, atom_a, atom_b|
26
+ @adjacencies = Hash.new
27
+ edges.each do |bond, atom_a, atom_b|
29
28
  (@adjacencies[atom_a] ||= []).push([bond, atom_b])
30
29
  (@adjacencies[atom_b] ||= []).push([bond, atom_a])
31
30
  end