chemruby 0.9.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (241) hide show
  1. data/README +120 -0
  2. data/Rakefile +195 -0
  3. data/ext/extconf.rb +4 -0
  4. data/ext/subcomp.c +416 -0
  5. data/lib/chem.rb +130 -0
  6. data/lib/chem/appl.rb +1 -0
  7. data/lib/chem/appl/chem3dole.rb +36 -0
  8. data/lib/chem/appl/tinker/nucleic.rb +40 -0
  9. data/lib/chem/appl/tinker/tinker_reader.rb +43 -0
  10. data/lib/chem/data.rb +4 -0
  11. data/lib/chem/data/atomic_weight.rb +124 -0
  12. data/lib/chem/data/character.rb +2 -0
  13. data/lib/chem/data/electronegativity.rb +14 -0
  14. data/lib/chem/data/periodic_table.rb +6 -0
  15. data/lib/chem/data/prime_numbers.rb +1 -0
  16. data/lib/chem/data/vdw_radii.rb +1 -0
  17. data/lib/chem/db.rb +64 -0
  18. data/lib/chem/db/cansmi.rb +234 -0
  19. data/lib/chem/db/cdx.rb +1525 -0
  20. data/lib/chem/db/eps.rb +164 -0
  21. data/lib/chem/db/g98.rb +909 -0
  22. data/lib/chem/db/gspan.rb +130 -0
  23. data/lib/chem/db/iupac.rb +5 -0
  24. data/lib/chem/db/iupac/a_1.rb +46 -0
  25. data/lib/chem/db/iupac/iuparser.rb +226 -0
  26. data/lib/chem/db/iupac/iuparser.ry +97 -0
  27. data/lib/chem/db/iupac/postfix.rb +2 -0
  28. data/lib/chem/db/kcf.rb +390 -0
  29. data/lib/chem/db/kcf_glycan.rb +19 -0
  30. data/lib/chem/db/kegg.rb +516 -0
  31. data/lib/chem/db/linucs/linparser.rb +144 -0
  32. data/lib/chem/db/linucs/linucs.ry +53 -0
  33. data/lib/chem/db/mdl.rb +379 -0
  34. data/lib/chem/db/molconnz.rb +12 -0
  35. data/lib/chem/db/mopac.rb +88 -0
  36. data/lib/chem/db/msi.rb +107 -0
  37. data/lib/chem/db/pdb_dic.rb +115 -0
  38. data/lib/chem/db/pdf.rb +131 -0
  39. data/lib/chem/db/pubchem.rb +113 -0
  40. data/lib/chem/db/rmagick.rb +70 -0
  41. data/lib/chem/db/sdf.rb +37 -0
  42. data/lib/chem/db/smbl.rb +88 -0
  43. data/lib/chem/db/smiles.rb +2 -0
  44. data/lib/chem/db/smiles/smiles.ry +203 -0
  45. data/lib/chem/db/smiles/smiparser.rb +375 -0
  46. data/lib/chem/db/swf.rb +74 -0
  47. data/lib/chem/db/sybyl.rb +150 -0
  48. data/lib/chem/db/tinker.rb +77 -0
  49. data/lib/chem/db/types/type_cansmi.rb +9 -0
  50. data/lib/chem/db/types/type_cdx.rb +24 -0
  51. data/lib/chem/db/types/type_gspan.rb +31 -0
  52. data/lib/chem/db/types/type_kcf.rb +28 -0
  53. data/lib/chem/db/types/type_kcf_glycan.rb +26 -0
  54. data/lib/chem/db/types/type_kegg.rb +92 -0
  55. data/lib/chem/db/types/type_mdl.rb +31 -0
  56. data/lib/chem/db/types/type_pdf.rb +33 -0
  57. data/lib/chem/db/types/type_png.rb +31 -0
  58. data/lib/chem/db/types/type_rxn.rb +25 -0
  59. data/lib/chem/db/types/type_sdf.rb +25 -0
  60. data/lib/chem/db/types/type_sybyl.rb +30 -0
  61. data/lib/chem/db/types/type_xyz.rb +26 -0
  62. data/lib/chem/db/vector.rb +128 -0
  63. data/lib/chem/db/xyz.rb +39 -0
  64. data/lib/chem/model.rb +119 -0
  65. data/lib/chem/model/skeleton.rb +37 -0
  66. data/lib/chem/utils.rb +11 -0
  67. data/lib/chem/utils/geometry.rb +27 -0
  68. data/lib/chem/utils/graph_db.rb +146 -0
  69. data/lib/chem/utils/math.rb +17 -0
  70. data/lib/chem/utils/prop.rb +123 -0
  71. data/lib/chem/utils/sssr.rb +101 -0
  72. data/lib/chem/utils/sub.rb +78 -0
  73. data/lib/chem/utils/transform.rb +110 -0
  74. data/lib/chem/utils/traverse.rb +37 -0
  75. data/lib/chem/utils/ullmann.rb +134 -0
  76. data/lib/graph.rb +41 -0
  77. data/lib/graph/cluster.rb +20 -0
  78. data/lib/graph/morgan.rb +38 -0
  79. data/sample/frequent_subgraph.rb +46 -0
  80. data/sample/images/ex1.rb +11 -0
  81. data/sample/images/ex2.rb +4 -0
  82. data/sample/images/ex3.rb +5 -0
  83. data/sample/images/ex4.rb +17 -0
  84. data/sample/images/ex5.rb +10 -0
  85. data/sample/images/mol/adenine.mol +26 -0
  86. data/sample/images/mol/atp.mol +69 -0
  87. data/sample/images/temp/ex5.mol +344 -0
  88. data/sample/kegg_db.rb +116 -0
  89. data/setup.rb +1551 -0
  90. data/test/all.rb +6 -0
  91. data/test/coord_test.rb +17 -0
  92. data/test/ctab_test.rb +31 -0
  93. data/test/data/A_21.tar.gz +0 -0
  94. data/test/data/A_21/aceanthrylene.cdx +0 -0
  95. data/test/data/A_21/aceanthrylene.mol +40 -0
  96. data/test/data/A_21/acenaphthylene.cdx +0 -0
  97. data/test/data/A_21/acenaphthylene.mol +31 -0
  98. data/test/data/A_21/acephenanthrylene.cdx +0 -0
  99. data/test/data/A_21/acephenanthrylene.mol +40 -0
  100. data/test/data/A_21/anthracene.cdx +0 -0
  101. data/test/data/A_21/anthracene.mol +35 -0
  102. data/test/data/A_21/as-indacene.cdx +0 -0
  103. data/test/data/A_21/as-indacene.mol +31 -0
  104. data/test/data/A_21/azulene.cdx +0 -0
  105. data/test/data/A_21/azulene.mol +26 -0
  106. data/test/data/A_21/biphenylene.cdx +0 -0
  107. data/test/data/A_21/biphenylene.mol +31 -0
  108. data/test/data/A_21/chrysene.cdx +0 -0
  109. data/test/data/A_21/chrysene.mol +44 -0
  110. data/test/data/A_21/coronen.cdx +0 -0
  111. data/test/data/A_21/coronen.mol +59 -0
  112. data/test/data/A_21/fluoranthene.cdx +0 -0
  113. data/test/data/A_21/fluoranthene.mol +40 -0
  114. data/test/data/A_21/fluorene.cdx +0 -0
  115. data/test/data/A_21/fluorene.mol +33 -0
  116. data/test/data/A_21/heptacene.cdx +0 -0
  117. data/test/data/A_21/heptacene.mol +71 -0
  118. data/test/data/A_21/heptalene.cdx +0 -0
  119. data/test/data/A_21/heptalene.mol +30 -0
  120. data/test/data/A_21/heptaphene.cdx +0 -0
  121. data/test/data/A_21/heptaphene.mol +71 -0
  122. data/test/data/A_21/hexacene.cdx +0 -0
  123. data/test/data/A_21/hexacene.mol +62 -0
  124. data/test/data/A_21/hexaphene.cdx +0 -0
  125. data/test/data/A_21/hexaphene.mol +62 -0
  126. data/test/data/A_21/indene.cdx +0 -0
  127. data/test/data/A_21/indene.mol +24 -0
  128. data/test/data/A_21/iupac.txt +41 -0
  129. data/test/data/A_21/naphthacene.cdx +0 -0
  130. data/test/data/A_21/naphthacene.mol +44 -0
  131. data/test/data/A_21/naphthalene.cdx +0 -0
  132. data/test/data/A_21/naphthalene.mol +26 -0
  133. data/test/data/A_21/ovalene.cdx +0 -0
  134. data/test/data/A_21/ovalene.mol +78 -0
  135. data/test/data/A_21/pentacene.cdx +0 -0
  136. data/test/data/A_21/pentacene.mol +53 -0
  137. data/test/data/A_21/pentalene.cdx +0 -0
  138. data/test/data/A_21/pentalene.mol +22 -0
  139. data/test/data/A_21/pentaphene.cdx +0 -0
  140. data/test/data/A_21/pentaphene.mol +53 -0
  141. data/test/data/A_21/perylene.cdx +0 -0
  142. data/test/data/A_21/perylene.mol +49 -0
  143. data/test/data/A_21/phenalene.cdx +0 -0
  144. data/test/data/A_21/phenalene.mol +33 -0
  145. data/test/data/A_21/phenanthrene.cdx +0 -0
  146. data/test/data/A_21/phenanthrene.mol +35 -0
  147. data/test/data/A_21/picene.cdx +0 -0
  148. data/test/data/A_21/picene.mol +53 -0
  149. data/test/data/A_21/pleiadene.cdx +0 -0
  150. data/test/data/A_21/pleiadene.mol +44 -0
  151. data/test/data/A_21/pyranthrene.cdx +0 -0
  152. data/test/data/A_21/pyranthrene.mol +72 -0
  153. data/test/data/A_21/pyrene.cdx +0 -0
  154. data/test/data/A_21/pyrene.mol +40 -0
  155. data/test/data/A_21/rubicene.cdx +0 -0
  156. data/test/data/A_21/rubicene.mol +63 -0
  157. data/test/data/A_21/s-indacene.cdx +0 -0
  158. data/test/data/A_21/s-indacene.mol +31 -0
  159. data/test/data/A_21/tetraphenylene.cdx +0 -0
  160. data/test/data/A_21/tetraphenylene.mol +57 -0
  161. data/test/data/A_21/trinaphthylene.cdx +0 -0
  162. data/test/data/A_21/trinaphthylene.mol +71 -0
  163. data/test/data/A_21/triphenylene.cdx +0 -0
  164. data/test/data/A_21/triphenylene.mol +44 -0
  165. data/test/data/C00147.kcf +25 -0
  166. data/test/data/G00147.kcf +13 -0
  167. data/test/data/atp.mol +69 -0
  168. data/test/data/cyclohexane.mol +17 -0
  169. data/test/data/cyclohexane.ps +485 -0
  170. data/test/data/fullerene.mol +155 -0
  171. data/test/data/glycan +33 -0
  172. data/test/data/hypericin.cdx +0 -0
  173. data/test/data/hypericin.cdxml +596 -0
  174. data/test/data/hypericin.chm +0 -0
  175. data/test/data/hypericin.ct +85 -0
  176. data/test/data/hypericin.f1d +0 -0
  177. data/test/data/hypericin.f1q +0 -0
  178. data/test/data/hypericin.gif +0 -0
  179. data/test/data/hypericin.mol +88 -0
  180. data/test/data/hypericin.mol2 +159 -0
  181. data/test/data/hypericin.msm +123 -0
  182. data/test/data/hypericin.pdf +359 -0
  183. data/test/data/hypericin.png +0 -0
  184. data/test/data/hypericin.ps +0 -0
  185. data/test/data/hypericin.skc +0 -0
  186. data/test/data/hypericin2.gif +0 -0
  187. data/test/data/hypericin2.ps +0 -0
  188. data/test/data/kegg/genomes/hsa/hsa_enzyme.list +4 -0
  189. data/test/data/kegg/genomes/hsa/hsa_pfam.list +4 -0
  190. data/test/data/kegg/ligand/mol/C00147.mol +26 -0
  191. data/test/data/kegg/ligand/reaction +14 -0
  192. data/test/data/kegg/ligand/reaction.lst +1 -0
  193. data/test/data/kegg/ligand/reaction_mapformula.lst +3 -0
  194. data/test/data/reaction +14 -0
  195. data/test/data/reaction.lst +1 -0
  196. data/test/data/reaction_mapformula.lst +3 -0
  197. data/test/data/rxn/C00001.mol +6 -0
  198. data/test/data/rxn/C00011.mol +10 -0
  199. data/test/data/rxn/C00014.mol +6 -0
  200. data/test/data/rxn/C01010.mol +18 -0
  201. data/test/data/rxn/sample.rxn +50 -0
  202. data/test/data/rxn/substitution.rxn +45 -0
  203. data/test/data/test.eps +0 -0
  204. data/test/data/test.mol +28 -0
  205. data/test/data/test.sdf +143 -0
  206. data/test/data/test.skc +0 -0
  207. data/test/data/test.xyz +4 -0
  208. data/test/data/test_lf.sdf +143 -0
  209. data/test/heavy_test_pubchem.rb +16 -0
  210. data/test/multiple_test.rb +22 -0
  211. data/test/test_adj.rb +54 -0
  212. data/test/test_canonical_smiles.rb +46 -0
  213. data/test/test_cdx.rb +32 -0
  214. data/test/test_chem.rb +18 -0
  215. data/test/test_cluster.rb +19 -0
  216. data/test/test_db.rb +11 -0
  217. data/test/test_eps.rb +24 -0
  218. data/test/test_geometry.rb +11 -0
  219. data/test/test_gspan.rb +28 -0
  220. data/test/test_iupac.rb +36 -0
  221. data/test/test_kcf.rb +24 -0
  222. data/test/test_kcf_glycan.rb +10 -0
  223. data/test/test_kegg.rb +118 -0
  224. data/test/test_linucs.rb +21 -0
  225. data/test/test_mdl.rb +45 -0
  226. data/test/test_mol2.rb +62 -0
  227. data/test/test_morgan.rb +21 -0
  228. data/test/test_pdf.rb +12 -0
  229. data/test/test_prop.rb +86 -0
  230. data/test/test_rmagick.rb +15 -0
  231. data/test/test_sbdb.rb +23 -0
  232. data/test/test_sdf.rb +30 -0
  233. data/test/test_smiles.rb +84 -0
  234. data/test/test_sssr.rb +18 -0
  235. data/test/test_sub.rb +47 -0
  236. data/test/test_subcomp.rb +37 -0
  237. data/test/test_traverse.rb +29 -0
  238. data/test/test_writer.rb +13 -0
  239. data/test/test_xyz.rb +15 -0
  240. data/test/type_test.rb +25 -0
  241. metadata +290 -0
@@ -0,0 +1,144 @@
1
+ #
2
+ # DO NOT MODIFY!!!!
3
+ # This file is automatically generated by racc 1.4.4
4
+ # from racc grammer file "linucs.ry".
5
+ #
6
+
7
+ require 'racc/parser'
8
+
9
+
10
+ class LinucsParser < Racc::Parser
11
+
12
+ module_eval <<'..end linucs.ry modeval..idcf977ebdbe', 'linucs.ry', 11
13
+
14
+ def next_token
15
+ @q.shift
16
+ end
17
+
18
+ def parse( line )
19
+ @q = []
20
+
21
+ line.strip!
22
+ until line.empty? do
23
+ case line
24
+ when /\A\[/
25
+ @q.push [:Ll, :Ll]
26
+ when /A\]/
27
+ @q.push [:Lr, :Lr]
28
+ when /\A\[([^\]]+)\]/
29
+ @q.push [:ATOM_SYMBOL, $&] # [m[1], m[2], m[3], m[4], m[5], m[6]]]
30
+ when /\A(Cl|C|F|H|N|O|Au|U)/
31
+ @q.push [:SYMBOL, $&.intern ]
32
+ when /\A[a-zA-Z_]/
33
+ @q.push [:SYMBOL, $&.intern ]
34
+ when /\A\d+/
35
+ @q.push [:NUMBER, $&.to_i ]
36
+ when /\\/
37
+ @q.push [:BSLASH, :BSLASH]
38
+ when /\A./
39
+ @q.push [$&, $&]
40
+ else
41
+ raise RuntimeError, 'must not happen'
42
+ end
43
+ line = $'
44
+ end
45
+ @q.push [ :EOL, nil ]
46
+ mol = Chem::SmilesMol.new
47
+ construct(mol, do_parse)
48
+ mol
49
+ end
50
+
51
+
52
+ ..end linucs.ry modeval..idcf977ebdbe
53
+
54
+ ##### racc 1.4.4 generates ###
55
+
56
+ racc_reduce_table = [
57
+ 0, 0, :racc_error,
58
+ 0, 5, :_reduce_none,
59
+ 3, 5, :_reduce_none,
60
+ 1, 6, :_reduce_none,
61
+ 2, 6, :_reduce_none ]
62
+
63
+ racc_reduce_n = 5
64
+
65
+ racc_shift_n = 8
66
+
67
+ racc_action_table = [
68
+ 2, 6, 7, 3, 5 ]
69
+
70
+ racc_action_check = [
71
+ 1, 4, 4, 1, 2 ]
72
+
73
+ racc_action_pointer = [
74
+ nil, 0, 4, nil, -1, nil, nil, nil ]
75
+
76
+ racc_action_default = [
77
+ -1, -5, -5, -3, -5, 8, -2, -4 ]
78
+
79
+ racc_goto_table = [
80
+ 1, 4 ]
81
+
82
+ racc_goto_check = [
83
+ 1, 2 ]
84
+
85
+ racc_goto_pointer = [
86
+ nil, 0, 0 ]
87
+
88
+ racc_goto_default = [
89
+ nil, nil, nil ]
90
+
91
+ racc_token_table = {
92
+ false => 0,
93
+ Object.new => 1,
94
+ :EOL => 2,
95
+ :node => 3 }
96
+
97
+ racc_use_result_var = true
98
+
99
+ racc_nt_base = 4
100
+
101
+ Racc_arg = [
102
+ racc_action_table,
103
+ racc_action_check,
104
+ racc_action_default,
105
+ racc_action_pointer,
106
+ racc_goto_table,
107
+ racc_goto_check,
108
+ racc_goto_default,
109
+ racc_goto_pointer,
110
+ racc_nt_base,
111
+ racc_reduce_table,
112
+ racc_token_table,
113
+ racc_shift_n,
114
+ racc_reduce_n,
115
+ racc_use_result_var ]
116
+
117
+ Racc_token_to_s_table = [
118
+ '$end',
119
+ 'error',
120
+ 'EOL',
121
+ 'node',
122
+ '$start',
123
+ 'linucs',
124
+ 'linuc']
125
+
126
+ Racc_debug_parser = false
127
+
128
+ ##### racc system variables end #####
129
+
130
+ # reduce 0 omitted
131
+
132
+ # reduce 1 omitted
133
+
134
+ # reduce 2 omitted
135
+
136
+ # reduce 3 omitted
137
+
138
+ # reduce 4 omitted
139
+
140
+ def _reduce_none( val, _values, result )
141
+ result
142
+ end
143
+
144
+ end # class LinucsParser
@@ -0,0 +1,53 @@
1
+
2
+ class LinucsParser
3
+ rule
4
+ linucs :
5
+ | linucs linuc EOL
6
+ linuc : node
7
+ | linuc node
8
+ end
9
+
10
+ ---- inner
11
+
12
+ def next_token
13
+ @q.shift
14
+ end
15
+
16
+ def parse( line )
17
+ @q = []
18
+
19
+ line.strip!
20
+ until line.empty? do
21
+ case line
22
+ when /\A\[/
23
+ @q.push [:Ll, :Ll]
24
+ when /A\]/
25
+ @q.push [:Lr, :Lr]
26
+ when /\A\[([^\]]+)\]/
27
+ @q.push [:ATOM_SYMBOL, $&] # [m[1], m[2], m[3], m[4], m[5], m[6]]]
28
+ when /\A(Cl|C|F|H|N|O|Au|U)/
29
+ @q.push [:SYMBOL, $&.intern ]
30
+ when /\A[a-zA-Z_]/
31
+ @q.push [:SYMBOL, $&.intern ]
32
+ when /\A\d+/
33
+ @q.push [:NUMBER, $&.to_i ]
34
+ when /\\/
35
+ @q.push [:BSLASH, :BSLASH]
36
+ when /\A./
37
+ @q.push [$&, $&]
38
+ else
39
+ raise RuntimeError, 'must not happen'
40
+ end
41
+ line = $'
42
+ end
43
+ @q.push [ :EOL, nil ]
44
+ mol = Chem::SmilesMol.new
45
+ construct(mol, do_parse)
46
+ mol
47
+ end
48
+
49
+
50
+ ---- footer
51
+
52
+
53
+
@@ -0,0 +1,379 @@
1
+ #
2
+ # chem/db/mdl.rb - MDL molfile format class
3
+ #
4
+
5
+
6
+ module Chem
7
+
8
+ module Molecule
9
+
10
+ MDLCountLineFormat = "%3d%3d%3d%3d%3d%3d%3d%3d%3d 0999 V2000"
11
+
12
+ def save_as_mdl filename
13
+ File.open(filename, "w") do |out|
14
+ out.puts filename
15
+ out.puts # ChemRuby
16
+ out.puts
17
+ out.puts MDLCountLineFormat % [nodes.length, edges.length, 0, 0, 0, 0, 0, 0, 0]
18
+ nodes.each do |node|
19
+ out.puts node.to_mdl
20
+ end
21
+ edges.each do |edge, atom1, atom2|
22
+ out.puts edge.to_mdl(nodes.index(atom1) + 1, nodes.index(atom2) + 1)
23
+ end
24
+ end
25
+ end
26
+
27
+ end
28
+
29
+ module Atom
30
+
31
+ MDLAtomLineFormat = "%10.4f%10.4f%10.4f %2s%3d%3d%3d%3d%3d%3d%3d%3d%3d%3d%3d%3d"
32
+
33
+ def to_mdl mapping = 0
34
+ MDLAtomLineFormat % [x, y, z, element, 0, 0, 0, 0, 0, 0, 0, 0, 0, mapping, 0, 0]
35
+ end
36
+
37
+ end
38
+
39
+ module Bond
40
+
41
+ def to_mdl from, to
42
+ "%3d%3d%3d%3d " % [from, to, v, 0]
43
+ end
44
+
45
+ end
46
+
47
+ module Reaction
48
+
49
+ def to_mdl_rxn
50
+ return # fix me
51
+ # out = STDOUT
52
+ out.puts "$RXN"
53
+ out.puts
54
+ out.puts "ISIS 112620051015"
55
+ out.puts
56
+ out.puts "%3d%3d" % [@reactants.length, @products.length]
57
+ @reactants.each{ |mol| output_mdl_mol(mol, out)}
58
+ @products.each{ |mol| output_mdl_mol(mol, out)}
59
+ end
60
+
61
+ private
62
+ def output_mdl_mol mol, out
63
+ out.puts "$MOL"
64
+ mol.nodes.each do |node|
65
+ out.puts node.to_mdl(10)
66
+ end
67
+ out.puts "M END"
68
+ end
69
+
70
+ end
71
+
72
+ module MDL
73
+
74
+ class MDLAtom
75
+
76
+ require 'chem/utils/transform'
77
+ include Atom
78
+ include Chem::Transform::ThreeDimension
79
+
80
+ Stereo = {
81
+ 0 => :not_stereo,
82
+ 1 => :odd,
83
+ 2 => :even,
84
+ 3 => :either
85
+ }
86
+
87
+ attr_accessor :number
88
+
89
+ def initialize line ; @line = line ; end
90
+
91
+ # Returns atomic symbol
92
+ def element ; @element ||= @line[30..32].strip.intern ; end
93
+ def x ; @x ||= @line[0..9].to_f ; end
94
+ def y ; @y ||= @line[10..19].to_f ; end
95
+ # Coordinates for z-axis
96
+ def z ; @z ||= @line[20..29].to_f ; end
97
+ # Difference from mass in periodic table.
98
+ def mass_difference ; @mass_difference ||= @line[33..35] ; end
99
+ def charge ; @charge ||= @line[36..38].to_i ; end
100
+ def stereo_parity ; @stereo_parity ||= @line[39..41].to_i ; end
101
+ def hydrogen_count ; @hydrogen_count ||= @line[42..44].to_i ; end
102
+ def stereo_care_box ; @stereo_care_box ||= @line[45..47].to_i ; end
103
+
104
+ def valence ; @valence ||= @line[48..50].to_i ; end
105
+ def h0_designator ; @h0_designator ||= @line[51..53].to_i ; end
106
+ # 54..56 Not used
107
+ # 57..59 Not used
108
+ def mapping ; @mapping ||= @line[60..62].to_i ; end
109
+ def inversion ; @inversion ||= @line[63..65].to_i ; end
110
+ def exact_charge ; @exact_charge ||= @line[66..68].to_i ; end
111
+
112
+ end
113
+
114
+ class MDLBond
115
+
116
+ include Bond
117
+
118
+ Stereo = {
119
+ 0 => :not_stereo,
120
+ 1 => :up,
121
+ 3 => :cis_trans,
122
+ 4 => :either,
123
+ 6 => :down
124
+ }
125
+
126
+ BondType = {
127
+ 1 => :single,
128
+ 2 => :double,
129
+ 3 => :triple,
130
+ 4 => :aromatic,
131
+ 5 => :single_or_double,
132
+ 6 => :single_or_aromatic,
133
+ 7 => :double_or_aromatic,
134
+ 8 => :any
135
+ }
136
+
137
+ ReactingCenter = {
138
+ 0 => :unmarked,
139
+ 1 => :center,
140
+ -1 => :not,
141
+ 2 => :no_change,
142
+ 4 => :made_or_broken,
143
+ 8 => :order_changes
144
+ }
145
+
146
+ Topology = {
147
+ 0 => :either,
148
+ 1 => :ring,
149
+ 2 => :chain
150
+ }
151
+
152
+ def initialize line ; @line = line ; end
153
+
154
+ def v ; @v ||= @line[6..8].to_i ; end
155
+
156
+ def topology ; @topology ||= Topology[@line[13..15].to_i] ; end
157
+ def reacting_center ; @reacting_center ||= ReactingCenter[@line[16..18].to_i] ; end
158
+ def stereo ; @stereo ||= Stereo[@line[9..11].to_i] ; end
159
+ def bond_type ; @v ||= BondType[self.v] ; end
160
+
161
+ end
162
+
163
+ module MdlMolParser
164
+
165
+ attr_reader :filename
166
+ def open(filename)
167
+ @filename = filename
168
+ input = File.open(filename)
169
+ parse(input)
170
+ end
171
+
172
+ def entry
173
+ @title
174
+ end
175
+ alias name entry
176
+
177
+ def parse input
178
+ input.readline
179
+ @title = input.readline
180
+ raise MDLException if input.readline == nil
181
+ line = input.readline
182
+ n_atom = line[0..2].to_i
183
+ n_bond = line[3..5].to_i
184
+
185
+ if 0 > n_atom or 999 < n_atom or 0 > n_bond or 999 < n_bond
186
+ raise "counts line format error"
187
+ end
188
+
189
+ n_atom.times do |n|
190
+ mol = MDLAtom.new(input.readline)
191
+ mol.number = n + 1
192
+ @nodes.push(mol)
193
+ end
194
+
195
+ n_bond.times do |n|
196
+ line = input.readline
197
+ b = MDLBond.new line
198
+ b_n = line[0..2].to_i
199
+ e_n = line[3..5].to_i
200
+ if (b_n > n_atom || b_n < 1 || e_n > n_atom || e_n < 1)
201
+ p line
202
+ raise "MDL bond line format error"
203
+ end
204
+
205
+ @edges.push([b, @nodes[b_n - 1], @nodes[e_n - 1]])
206
+ end
207
+ input.each do |line|
208
+ break if /M END/.match(line)
209
+ end
210
+ self
211
+ end
212
+ end
213
+
214
+ class MdlMolecule
215
+
216
+ include Molecule
217
+ include Enumerable
218
+ include MdlMolParser
219
+
220
+ attr_reader :nodes, :edges
221
+
222
+ def initialize
223
+ @nodes = []
224
+ @edges = []
225
+ end
226
+
227
+ def self.parse_io input
228
+ mol = MdlMolecule.new
229
+ mol.parse input
230
+ end
231
+
232
+ def self.parse file
233
+ mol = MdlMolecule.new
234
+ input = open(file)
235
+ mol.parse input
236
+ end
237
+
238
+ end
239
+
240
+ class RxnAtom
241
+ include Atom
242
+ attr_accessor :reactant, :product
243
+
244
+ def reactant= rct
245
+ @reactant = @representative = rct
246
+ end
247
+
248
+ def method_missing name, *args
249
+ @representative.send(name, *args)
250
+ end
251
+
252
+
253
+ def product= prd
254
+ @product = prd
255
+ @representative = prd unless @representative
256
+ end
257
+
258
+ def x ; @representative.x ; end
259
+ def y ; @representative.y ; end
260
+ def element ; @representative.element ; end
261
+
262
+ end
263
+
264
+ class RxnBond
265
+ include Bond
266
+ attr_accessor :reactant, :product
267
+ attr_reader :v
268
+
269
+ def v
270
+ if @reactant and @product
271
+ return @product.v - @reactant.v
272
+ elsif @reactant
273
+ return - @reactant.v
274
+ else
275
+ return @product.v
276
+ end
277
+ end
278
+
279
+ end
280
+
281
+ class MdlReaction
282
+
283
+ include Molecule
284
+ include Reaction
285
+ include Enumerable
286
+
287
+ attr_reader :nodes, :edges
288
+
289
+ def initialize
290
+ @nodes = []
291
+ @edges = []
292
+ @reactants = []
293
+ @products = []
294
+ end
295
+
296
+ attr_reader :filename
297
+ def open_rxn(filename)
298
+ @filename = filename
299
+ input = File.open(filename)
300
+ n_reactants, n_products = parse_header(input)
301
+ read_mol(input, n_reactants, @reactants, @r_atoms = {})
302
+ read_mol(input, n_products, @products, @p_atoms = {})
303
+ construct
304
+ self
305
+ end
306
+
307
+ private
308
+ def construct
309
+ @p2r = {}
310
+ @r2p = {}
311
+ (@r_atoms.keys + @p_atoms.keys).each do |k|
312
+ ratom = RxnAtom.new
313
+ ratom.reactant = @r_atoms[k]
314
+ ratom.product = @p_atoms[k]
315
+ @p2r[@p_atoms[k]] = @r_atoms[k]
316
+ @r2p[@r_atoms[k]] = @p_atoms[k]
317
+ @nodes.push(ratom)
318
+ end
319
+ get_edge_hash(@reactants, r_edge_hash = {})
320
+ get_edge_hash(@products, p_edge_hash = {})
321
+ already = []
322
+ @reactants.each do |mol|
323
+ mol.edges.each do |edge, atom1, atom2|
324
+ bond = RxnBond.new
325
+ bond.reactant = edge
326
+ if @r2p[atom1] and @r2p[atom2]
327
+ bond.product = p_edge_hash[[@r2p[atom1], @r2p[atom2]].sort_by{|a| a.number}]
328
+ already.push(bond.product)
329
+ end
330
+ @edges.push(bond)
331
+ end
332
+ end
333
+ @products.each do |mol|
334
+ mol.edges.each do |bond, atom1, atom2|
335
+ next if already.include?(bond)
336
+ r_bond = RxnBond.new
337
+ r_bond.product = bond
338
+ @edges.push([r_bond, atom1, atom2])
339
+ end
340
+ end
341
+ end
342
+
343
+ private
344
+ def get_edge_hash mols, hash
345
+ mols.each do |mol|
346
+ mol.edges.each do |edge, atom1, atom2|
347
+ hash[[atom1, atom2].sort_by{|a| a.number}] = edge
348
+ end
349
+ end
350
+ end
351
+
352
+ private
353
+ def read_mol input, n, mols, atoms
354
+ n.times do
355
+ loop do
356
+ line = input.readline # $MOL
357
+ break if /\$MOL/.match(line)
358
+ end
359
+ mol = MdlMolecule.parse_io(input)
360
+ mol.nodes.each do |a|
361
+ next if a.mapping == 0
362
+ atoms[a.mapping] = a
363
+ end
364
+ mols.push mol
365
+ end
366
+ end
367
+
368
+ private
369
+ def parse_header input
370
+ 4.times{|n| input.readline}
371
+ input.readline.split.collect{|n| n.to_i}
372
+ end
373
+
374
+ end
375
+
376
+ end
377
+
378
+ end
379
+