chemruby 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. data/README +120 -0
  2. data/Rakefile +195 -0
  3. data/ext/extconf.rb +4 -0
  4. data/ext/subcomp.c +416 -0
  5. data/lib/chem.rb +130 -0
  6. data/lib/chem/appl.rb +1 -0
  7. data/lib/chem/appl/chem3dole.rb +36 -0
  8. data/lib/chem/appl/tinker/nucleic.rb +40 -0
  9. data/lib/chem/appl/tinker/tinker_reader.rb +43 -0
  10. data/lib/chem/data.rb +4 -0
  11. data/lib/chem/data/atomic_weight.rb +124 -0
  12. data/lib/chem/data/character.rb +2 -0
  13. data/lib/chem/data/electronegativity.rb +14 -0
  14. data/lib/chem/data/periodic_table.rb +6 -0
  15. data/lib/chem/data/prime_numbers.rb +1 -0
  16. data/lib/chem/data/vdw_radii.rb +1 -0
  17. data/lib/chem/db.rb +64 -0
  18. data/lib/chem/db/cansmi.rb +234 -0
  19. data/lib/chem/db/cdx.rb +1525 -0
  20. data/lib/chem/db/eps.rb +164 -0
  21. data/lib/chem/db/g98.rb +909 -0
  22. data/lib/chem/db/gspan.rb +130 -0
  23. data/lib/chem/db/iupac.rb +5 -0
  24. data/lib/chem/db/iupac/a_1.rb +46 -0
  25. data/lib/chem/db/iupac/iuparser.rb +226 -0
  26. data/lib/chem/db/iupac/iuparser.ry +97 -0
  27. data/lib/chem/db/iupac/postfix.rb +2 -0
  28. data/lib/chem/db/kcf.rb +390 -0
  29. data/lib/chem/db/kcf_glycan.rb +19 -0
  30. data/lib/chem/db/kegg.rb +516 -0
  31. data/lib/chem/db/linucs/linparser.rb +144 -0
  32. data/lib/chem/db/linucs/linucs.ry +53 -0
  33. data/lib/chem/db/mdl.rb +379 -0
  34. data/lib/chem/db/molconnz.rb +12 -0
  35. data/lib/chem/db/mopac.rb +88 -0
  36. data/lib/chem/db/msi.rb +107 -0
  37. data/lib/chem/db/pdb_dic.rb +115 -0
  38. data/lib/chem/db/pdf.rb +131 -0
  39. data/lib/chem/db/pubchem.rb +113 -0
  40. data/lib/chem/db/rmagick.rb +70 -0
  41. data/lib/chem/db/sdf.rb +37 -0
  42. data/lib/chem/db/smbl.rb +88 -0
  43. data/lib/chem/db/smiles.rb +2 -0
  44. data/lib/chem/db/smiles/smiles.ry +203 -0
  45. data/lib/chem/db/smiles/smiparser.rb +375 -0
  46. data/lib/chem/db/swf.rb +74 -0
  47. data/lib/chem/db/sybyl.rb +150 -0
  48. data/lib/chem/db/tinker.rb +77 -0
  49. data/lib/chem/db/types/type_cansmi.rb +9 -0
  50. data/lib/chem/db/types/type_cdx.rb +24 -0
  51. data/lib/chem/db/types/type_gspan.rb +31 -0
  52. data/lib/chem/db/types/type_kcf.rb +28 -0
  53. data/lib/chem/db/types/type_kcf_glycan.rb +26 -0
  54. data/lib/chem/db/types/type_kegg.rb +92 -0
  55. data/lib/chem/db/types/type_mdl.rb +31 -0
  56. data/lib/chem/db/types/type_pdf.rb +33 -0
  57. data/lib/chem/db/types/type_png.rb +31 -0
  58. data/lib/chem/db/types/type_rxn.rb +25 -0
  59. data/lib/chem/db/types/type_sdf.rb +25 -0
  60. data/lib/chem/db/types/type_sybyl.rb +30 -0
  61. data/lib/chem/db/types/type_xyz.rb +26 -0
  62. data/lib/chem/db/vector.rb +128 -0
  63. data/lib/chem/db/xyz.rb +39 -0
  64. data/lib/chem/model.rb +119 -0
  65. data/lib/chem/model/skeleton.rb +37 -0
  66. data/lib/chem/utils.rb +11 -0
  67. data/lib/chem/utils/geometry.rb +27 -0
  68. data/lib/chem/utils/graph_db.rb +146 -0
  69. data/lib/chem/utils/math.rb +17 -0
  70. data/lib/chem/utils/prop.rb +123 -0
  71. data/lib/chem/utils/sssr.rb +101 -0
  72. data/lib/chem/utils/sub.rb +78 -0
  73. data/lib/chem/utils/transform.rb +110 -0
  74. data/lib/chem/utils/traverse.rb +37 -0
  75. data/lib/chem/utils/ullmann.rb +134 -0
  76. data/lib/graph.rb +41 -0
  77. data/lib/graph/cluster.rb +20 -0
  78. data/lib/graph/morgan.rb +38 -0
  79. data/sample/frequent_subgraph.rb +46 -0
  80. data/sample/images/ex1.rb +11 -0
  81. data/sample/images/ex2.rb +4 -0
  82. data/sample/images/ex3.rb +5 -0
  83. data/sample/images/ex4.rb +17 -0
  84. data/sample/images/ex5.rb +10 -0
  85. data/sample/images/mol/adenine.mol +26 -0
  86. data/sample/images/mol/atp.mol +69 -0
  87. data/sample/images/temp/ex5.mol +344 -0
  88. data/sample/kegg_db.rb +116 -0
  89. data/setup.rb +1551 -0
  90. data/test/all.rb +6 -0
  91. data/test/coord_test.rb +17 -0
  92. data/test/ctab_test.rb +31 -0
  93. data/test/data/A_21.tar.gz +0 -0
  94. data/test/data/A_21/aceanthrylene.cdx +0 -0
  95. data/test/data/A_21/aceanthrylene.mol +40 -0
  96. data/test/data/A_21/acenaphthylene.cdx +0 -0
  97. data/test/data/A_21/acenaphthylene.mol +31 -0
  98. data/test/data/A_21/acephenanthrylene.cdx +0 -0
  99. data/test/data/A_21/acephenanthrylene.mol +40 -0
  100. data/test/data/A_21/anthracene.cdx +0 -0
  101. data/test/data/A_21/anthracene.mol +35 -0
  102. data/test/data/A_21/as-indacene.cdx +0 -0
  103. data/test/data/A_21/as-indacene.mol +31 -0
  104. data/test/data/A_21/azulene.cdx +0 -0
  105. data/test/data/A_21/azulene.mol +26 -0
  106. data/test/data/A_21/biphenylene.cdx +0 -0
  107. data/test/data/A_21/biphenylene.mol +31 -0
  108. data/test/data/A_21/chrysene.cdx +0 -0
  109. data/test/data/A_21/chrysene.mol +44 -0
  110. data/test/data/A_21/coronen.cdx +0 -0
  111. data/test/data/A_21/coronen.mol +59 -0
  112. data/test/data/A_21/fluoranthene.cdx +0 -0
  113. data/test/data/A_21/fluoranthene.mol +40 -0
  114. data/test/data/A_21/fluorene.cdx +0 -0
  115. data/test/data/A_21/fluorene.mol +33 -0
  116. data/test/data/A_21/heptacene.cdx +0 -0
  117. data/test/data/A_21/heptacene.mol +71 -0
  118. data/test/data/A_21/heptalene.cdx +0 -0
  119. data/test/data/A_21/heptalene.mol +30 -0
  120. data/test/data/A_21/heptaphene.cdx +0 -0
  121. data/test/data/A_21/heptaphene.mol +71 -0
  122. data/test/data/A_21/hexacene.cdx +0 -0
  123. data/test/data/A_21/hexacene.mol +62 -0
  124. data/test/data/A_21/hexaphene.cdx +0 -0
  125. data/test/data/A_21/hexaphene.mol +62 -0
  126. data/test/data/A_21/indene.cdx +0 -0
  127. data/test/data/A_21/indene.mol +24 -0
  128. data/test/data/A_21/iupac.txt +41 -0
  129. data/test/data/A_21/naphthacene.cdx +0 -0
  130. data/test/data/A_21/naphthacene.mol +44 -0
  131. data/test/data/A_21/naphthalene.cdx +0 -0
  132. data/test/data/A_21/naphthalene.mol +26 -0
  133. data/test/data/A_21/ovalene.cdx +0 -0
  134. data/test/data/A_21/ovalene.mol +78 -0
  135. data/test/data/A_21/pentacene.cdx +0 -0
  136. data/test/data/A_21/pentacene.mol +53 -0
  137. data/test/data/A_21/pentalene.cdx +0 -0
  138. data/test/data/A_21/pentalene.mol +22 -0
  139. data/test/data/A_21/pentaphene.cdx +0 -0
  140. data/test/data/A_21/pentaphene.mol +53 -0
  141. data/test/data/A_21/perylene.cdx +0 -0
  142. data/test/data/A_21/perylene.mol +49 -0
  143. data/test/data/A_21/phenalene.cdx +0 -0
  144. data/test/data/A_21/phenalene.mol +33 -0
  145. data/test/data/A_21/phenanthrene.cdx +0 -0
  146. data/test/data/A_21/phenanthrene.mol +35 -0
  147. data/test/data/A_21/picene.cdx +0 -0
  148. data/test/data/A_21/picene.mol +53 -0
  149. data/test/data/A_21/pleiadene.cdx +0 -0
  150. data/test/data/A_21/pleiadene.mol +44 -0
  151. data/test/data/A_21/pyranthrene.cdx +0 -0
  152. data/test/data/A_21/pyranthrene.mol +72 -0
  153. data/test/data/A_21/pyrene.cdx +0 -0
  154. data/test/data/A_21/pyrene.mol +40 -0
  155. data/test/data/A_21/rubicene.cdx +0 -0
  156. data/test/data/A_21/rubicene.mol +63 -0
  157. data/test/data/A_21/s-indacene.cdx +0 -0
  158. data/test/data/A_21/s-indacene.mol +31 -0
  159. data/test/data/A_21/tetraphenylene.cdx +0 -0
  160. data/test/data/A_21/tetraphenylene.mol +57 -0
  161. data/test/data/A_21/trinaphthylene.cdx +0 -0
  162. data/test/data/A_21/trinaphthylene.mol +71 -0
  163. data/test/data/A_21/triphenylene.cdx +0 -0
  164. data/test/data/A_21/triphenylene.mol +44 -0
  165. data/test/data/C00147.kcf +25 -0
  166. data/test/data/G00147.kcf +13 -0
  167. data/test/data/atp.mol +69 -0
  168. data/test/data/cyclohexane.mol +17 -0
  169. data/test/data/cyclohexane.ps +485 -0
  170. data/test/data/fullerene.mol +155 -0
  171. data/test/data/glycan +33 -0
  172. data/test/data/hypericin.cdx +0 -0
  173. data/test/data/hypericin.cdxml +596 -0
  174. data/test/data/hypericin.chm +0 -0
  175. data/test/data/hypericin.ct +85 -0
  176. data/test/data/hypericin.f1d +0 -0
  177. data/test/data/hypericin.f1q +0 -0
  178. data/test/data/hypericin.gif +0 -0
  179. data/test/data/hypericin.mol +88 -0
  180. data/test/data/hypericin.mol2 +159 -0
  181. data/test/data/hypericin.msm +123 -0
  182. data/test/data/hypericin.pdf +359 -0
  183. data/test/data/hypericin.png +0 -0
  184. data/test/data/hypericin.ps +0 -0
  185. data/test/data/hypericin.skc +0 -0
  186. data/test/data/hypericin2.gif +0 -0
  187. data/test/data/hypericin2.ps +0 -0
  188. data/test/data/kegg/genomes/hsa/hsa_enzyme.list +4 -0
  189. data/test/data/kegg/genomes/hsa/hsa_pfam.list +4 -0
  190. data/test/data/kegg/ligand/mol/C00147.mol +26 -0
  191. data/test/data/kegg/ligand/reaction +14 -0
  192. data/test/data/kegg/ligand/reaction.lst +1 -0
  193. data/test/data/kegg/ligand/reaction_mapformula.lst +3 -0
  194. data/test/data/reaction +14 -0
  195. data/test/data/reaction.lst +1 -0
  196. data/test/data/reaction_mapformula.lst +3 -0
  197. data/test/data/rxn/C00001.mol +6 -0
  198. data/test/data/rxn/C00011.mol +10 -0
  199. data/test/data/rxn/C00014.mol +6 -0
  200. data/test/data/rxn/C01010.mol +18 -0
  201. data/test/data/rxn/sample.rxn +50 -0
  202. data/test/data/rxn/substitution.rxn +45 -0
  203. data/test/data/test.eps +0 -0
  204. data/test/data/test.mol +28 -0
  205. data/test/data/test.sdf +143 -0
  206. data/test/data/test.skc +0 -0
  207. data/test/data/test.xyz +4 -0
  208. data/test/data/test_lf.sdf +143 -0
  209. data/test/heavy_test_pubchem.rb +16 -0
  210. data/test/multiple_test.rb +22 -0
  211. data/test/test_adj.rb +54 -0
  212. data/test/test_canonical_smiles.rb +46 -0
  213. data/test/test_cdx.rb +32 -0
  214. data/test/test_chem.rb +18 -0
  215. data/test/test_cluster.rb +19 -0
  216. data/test/test_db.rb +11 -0
  217. data/test/test_eps.rb +24 -0
  218. data/test/test_geometry.rb +11 -0
  219. data/test/test_gspan.rb +28 -0
  220. data/test/test_iupac.rb +36 -0
  221. data/test/test_kcf.rb +24 -0
  222. data/test/test_kcf_glycan.rb +10 -0
  223. data/test/test_kegg.rb +118 -0
  224. data/test/test_linucs.rb +21 -0
  225. data/test/test_mdl.rb +45 -0
  226. data/test/test_mol2.rb +62 -0
  227. data/test/test_morgan.rb +21 -0
  228. data/test/test_pdf.rb +12 -0
  229. data/test/test_prop.rb +86 -0
  230. data/test/test_rmagick.rb +15 -0
  231. data/test/test_sbdb.rb +23 -0
  232. data/test/test_sdf.rb +30 -0
  233. data/test/test_smiles.rb +84 -0
  234. data/test/test_sssr.rb +18 -0
  235. data/test/test_sub.rb +47 -0
  236. data/test/test_subcomp.rb +37 -0
  237. data/test/test_traverse.rb +29 -0
  238. data/test/test_writer.rb +13 -0
  239. data/test/test_xyz.rb +15 -0
  240. data/test/type_test.rb +25 -0
  241. metadata +290 -0
@@ -0,0 +1,144 @@
1
+ #
2
+ # DO NOT MODIFY!!!!
3
+ # This file is automatically generated by racc 1.4.4
4
+ # from racc grammer file "linucs.ry".
5
+ #
6
+
7
+ require 'racc/parser'
8
+
9
+
10
+ class LinucsParser < Racc::Parser
11
+
12
+ module_eval <<'..end linucs.ry modeval..idcf977ebdbe', 'linucs.ry', 11
13
+
14
+ def next_token
15
+ @q.shift
16
+ end
17
+
18
+ def parse( line )
19
+ @q = []
20
+
21
+ line.strip!
22
+ until line.empty? do
23
+ case line
24
+ when /\A\[/
25
+ @q.push [:Ll, :Ll]
26
+ when /A\]/
27
+ @q.push [:Lr, :Lr]
28
+ when /\A\[([^\]]+)\]/
29
+ @q.push [:ATOM_SYMBOL, $&] # [m[1], m[2], m[3], m[4], m[5], m[6]]]
30
+ when /\A(Cl|C|F|H|N|O|Au|U)/
31
+ @q.push [:SYMBOL, $&.intern ]
32
+ when /\A[a-zA-Z_]/
33
+ @q.push [:SYMBOL, $&.intern ]
34
+ when /\A\d+/
35
+ @q.push [:NUMBER, $&.to_i ]
36
+ when /\\/
37
+ @q.push [:BSLASH, :BSLASH]
38
+ when /\A./
39
+ @q.push [$&, $&]
40
+ else
41
+ raise RuntimeError, 'must not happen'
42
+ end
43
+ line = $'
44
+ end
45
+ @q.push [ :EOL, nil ]
46
+ mol = Chem::SmilesMol.new
47
+ construct(mol, do_parse)
48
+ mol
49
+ end
50
+
51
+
52
+ ..end linucs.ry modeval..idcf977ebdbe
53
+
54
+ ##### racc 1.4.4 generates ###
55
+
56
+ racc_reduce_table = [
57
+ 0, 0, :racc_error,
58
+ 0, 5, :_reduce_none,
59
+ 3, 5, :_reduce_none,
60
+ 1, 6, :_reduce_none,
61
+ 2, 6, :_reduce_none ]
62
+
63
+ racc_reduce_n = 5
64
+
65
+ racc_shift_n = 8
66
+
67
+ racc_action_table = [
68
+ 2, 6, 7, 3, 5 ]
69
+
70
+ racc_action_check = [
71
+ 1, 4, 4, 1, 2 ]
72
+
73
+ racc_action_pointer = [
74
+ nil, 0, 4, nil, -1, nil, nil, nil ]
75
+
76
+ racc_action_default = [
77
+ -1, -5, -5, -3, -5, 8, -2, -4 ]
78
+
79
+ racc_goto_table = [
80
+ 1, 4 ]
81
+
82
+ racc_goto_check = [
83
+ 1, 2 ]
84
+
85
+ racc_goto_pointer = [
86
+ nil, 0, 0 ]
87
+
88
+ racc_goto_default = [
89
+ nil, nil, nil ]
90
+
91
+ racc_token_table = {
92
+ false => 0,
93
+ Object.new => 1,
94
+ :EOL => 2,
95
+ :node => 3 }
96
+
97
+ racc_use_result_var = true
98
+
99
+ racc_nt_base = 4
100
+
101
+ Racc_arg = [
102
+ racc_action_table,
103
+ racc_action_check,
104
+ racc_action_default,
105
+ racc_action_pointer,
106
+ racc_goto_table,
107
+ racc_goto_check,
108
+ racc_goto_default,
109
+ racc_goto_pointer,
110
+ racc_nt_base,
111
+ racc_reduce_table,
112
+ racc_token_table,
113
+ racc_shift_n,
114
+ racc_reduce_n,
115
+ racc_use_result_var ]
116
+
117
+ Racc_token_to_s_table = [
118
+ '$end',
119
+ 'error',
120
+ 'EOL',
121
+ 'node',
122
+ '$start',
123
+ 'linucs',
124
+ 'linuc']
125
+
126
+ Racc_debug_parser = false
127
+
128
+ ##### racc system variables end #####
129
+
130
+ # reduce 0 omitted
131
+
132
+ # reduce 1 omitted
133
+
134
+ # reduce 2 omitted
135
+
136
+ # reduce 3 omitted
137
+
138
+ # reduce 4 omitted
139
+
140
+ def _reduce_none( val, _values, result )
141
+ result
142
+ end
143
+
144
+ end # class LinucsParser
@@ -0,0 +1,53 @@
1
+
2
+ class LinucsParser
3
+ rule
4
+ linucs :
5
+ | linucs linuc EOL
6
+ linuc : node
7
+ | linuc node
8
+ end
9
+
10
+ ---- inner
11
+
12
+ def next_token
13
+ @q.shift
14
+ end
15
+
16
+ def parse( line )
17
+ @q = []
18
+
19
+ line.strip!
20
+ until line.empty? do
21
+ case line
22
+ when /\A\[/
23
+ @q.push [:Ll, :Ll]
24
+ when /A\]/
25
+ @q.push [:Lr, :Lr]
26
+ when /\A\[([^\]]+)\]/
27
+ @q.push [:ATOM_SYMBOL, $&] # [m[1], m[2], m[3], m[4], m[5], m[6]]]
28
+ when /\A(Cl|C|F|H|N|O|Au|U)/
29
+ @q.push [:SYMBOL, $&.intern ]
30
+ when /\A[a-zA-Z_]/
31
+ @q.push [:SYMBOL, $&.intern ]
32
+ when /\A\d+/
33
+ @q.push [:NUMBER, $&.to_i ]
34
+ when /\\/
35
+ @q.push [:BSLASH, :BSLASH]
36
+ when /\A./
37
+ @q.push [$&, $&]
38
+ else
39
+ raise RuntimeError, 'must not happen'
40
+ end
41
+ line = $'
42
+ end
43
+ @q.push [ :EOL, nil ]
44
+ mol = Chem::SmilesMol.new
45
+ construct(mol, do_parse)
46
+ mol
47
+ end
48
+
49
+
50
+ ---- footer
51
+
52
+
53
+
@@ -0,0 +1,379 @@
1
+ #
2
+ # chem/db/mdl.rb - MDL molfile format class
3
+ #
4
+
5
+
6
+ module Chem
7
+
8
+ module Molecule
9
+
10
+ MDLCountLineFormat = "%3d%3d%3d%3d%3d%3d%3d%3d%3d 0999 V2000"
11
+
12
+ def save_as_mdl filename
13
+ File.open(filename, "w") do |out|
14
+ out.puts filename
15
+ out.puts # ChemRuby
16
+ out.puts
17
+ out.puts MDLCountLineFormat % [nodes.length, edges.length, 0, 0, 0, 0, 0, 0, 0]
18
+ nodes.each do |node|
19
+ out.puts node.to_mdl
20
+ end
21
+ edges.each do |edge, atom1, atom2|
22
+ out.puts edge.to_mdl(nodes.index(atom1) + 1, nodes.index(atom2) + 1)
23
+ end
24
+ end
25
+ end
26
+
27
+ end
28
+
29
+ module Atom
30
+
31
+ MDLAtomLineFormat = "%10.4f%10.4f%10.4f %2s%3d%3d%3d%3d%3d%3d%3d%3d%3d%3d%3d%3d"
32
+
33
+ def to_mdl mapping = 0
34
+ MDLAtomLineFormat % [x, y, z, element, 0, 0, 0, 0, 0, 0, 0, 0, 0, mapping, 0, 0]
35
+ end
36
+
37
+ end
38
+
39
+ module Bond
40
+
41
+ def to_mdl from, to
42
+ "%3d%3d%3d%3d " % [from, to, v, 0]
43
+ end
44
+
45
+ end
46
+
47
+ module Reaction
48
+
49
+ def to_mdl_rxn
50
+ return # fix me
51
+ # out = STDOUT
52
+ out.puts "$RXN"
53
+ out.puts
54
+ out.puts "ISIS 112620051015"
55
+ out.puts
56
+ out.puts "%3d%3d" % [@reactants.length, @products.length]
57
+ @reactants.each{ |mol| output_mdl_mol(mol, out)}
58
+ @products.each{ |mol| output_mdl_mol(mol, out)}
59
+ end
60
+
61
+ private
62
+ def output_mdl_mol mol, out
63
+ out.puts "$MOL"
64
+ mol.nodes.each do |node|
65
+ out.puts node.to_mdl(10)
66
+ end
67
+ out.puts "M END"
68
+ end
69
+
70
+ end
71
+
72
+ module MDL
73
+
74
+ class MDLAtom
75
+
76
+ require 'chem/utils/transform'
77
+ include Atom
78
+ include Chem::Transform::ThreeDimension
79
+
80
+ Stereo = {
81
+ 0 => :not_stereo,
82
+ 1 => :odd,
83
+ 2 => :even,
84
+ 3 => :either
85
+ }
86
+
87
+ attr_accessor :number
88
+
89
+ def initialize line ; @line = line ; end
90
+
91
+ # Returns atomic symbol
92
+ def element ; @element ||= @line[30..32].strip.intern ; end
93
+ def x ; @x ||= @line[0..9].to_f ; end
94
+ def y ; @y ||= @line[10..19].to_f ; end
95
+ # Coordinates for z-axis
96
+ def z ; @z ||= @line[20..29].to_f ; end
97
+ # Difference from mass in periodic table.
98
+ def mass_difference ; @mass_difference ||= @line[33..35] ; end
99
+ def charge ; @charge ||= @line[36..38].to_i ; end
100
+ def stereo_parity ; @stereo_parity ||= @line[39..41].to_i ; end
101
+ def hydrogen_count ; @hydrogen_count ||= @line[42..44].to_i ; end
102
+ def stereo_care_box ; @stereo_care_box ||= @line[45..47].to_i ; end
103
+
104
+ def valence ; @valence ||= @line[48..50].to_i ; end
105
+ def h0_designator ; @h0_designator ||= @line[51..53].to_i ; end
106
+ # 54..56 Not used
107
+ # 57..59 Not used
108
+ def mapping ; @mapping ||= @line[60..62].to_i ; end
109
+ def inversion ; @inversion ||= @line[63..65].to_i ; end
110
+ def exact_charge ; @exact_charge ||= @line[66..68].to_i ; end
111
+
112
+ end
113
+
114
+ class MDLBond
115
+
116
+ include Bond
117
+
118
+ Stereo = {
119
+ 0 => :not_stereo,
120
+ 1 => :up,
121
+ 3 => :cis_trans,
122
+ 4 => :either,
123
+ 6 => :down
124
+ }
125
+
126
+ BondType = {
127
+ 1 => :single,
128
+ 2 => :double,
129
+ 3 => :triple,
130
+ 4 => :aromatic,
131
+ 5 => :single_or_double,
132
+ 6 => :single_or_aromatic,
133
+ 7 => :double_or_aromatic,
134
+ 8 => :any
135
+ }
136
+
137
+ ReactingCenter = {
138
+ 0 => :unmarked,
139
+ 1 => :center,
140
+ -1 => :not,
141
+ 2 => :no_change,
142
+ 4 => :made_or_broken,
143
+ 8 => :order_changes
144
+ }
145
+
146
+ Topology = {
147
+ 0 => :either,
148
+ 1 => :ring,
149
+ 2 => :chain
150
+ }
151
+
152
+ def initialize line ; @line = line ; end
153
+
154
+ def v ; @v ||= @line[6..8].to_i ; end
155
+
156
+ def topology ; @topology ||= Topology[@line[13..15].to_i] ; end
157
+ def reacting_center ; @reacting_center ||= ReactingCenter[@line[16..18].to_i] ; end
158
+ def stereo ; @stereo ||= Stereo[@line[9..11].to_i] ; end
159
+ def bond_type ; @v ||= BondType[self.v] ; end
160
+
161
+ end
162
+
163
+ module MdlMolParser
164
+
165
+ attr_reader :filename
166
+ def open(filename)
167
+ @filename = filename
168
+ input = File.open(filename)
169
+ parse(input)
170
+ end
171
+
172
+ def entry
173
+ @title
174
+ end
175
+ alias name entry
176
+
177
+ def parse input
178
+ input.readline
179
+ @title = input.readline
180
+ raise MDLException if input.readline == nil
181
+ line = input.readline
182
+ n_atom = line[0..2].to_i
183
+ n_bond = line[3..5].to_i
184
+
185
+ if 0 > n_atom or 999 < n_atom or 0 > n_bond or 999 < n_bond
186
+ raise "counts line format error"
187
+ end
188
+
189
+ n_atom.times do |n|
190
+ mol = MDLAtom.new(input.readline)
191
+ mol.number = n + 1
192
+ @nodes.push(mol)
193
+ end
194
+
195
+ n_bond.times do |n|
196
+ line = input.readline
197
+ b = MDLBond.new line
198
+ b_n = line[0..2].to_i
199
+ e_n = line[3..5].to_i
200
+ if (b_n > n_atom || b_n < 1 || e_n > n_atom || e_n < 1)
201
+ p line
202
+ raise "MDL bond line format error"
203
+ end
204
+
205
+ @edges.push([b, @nodes[b_n - 1], @nodes[e_n - 1]])
206
+ end
207
+ input.each do |line|
208
+ break if /M END/.match(line)
209
+ end
210
+ self
211
+ end
212
+ end
213
+
214
+ class MdlMolecule
215
+
216
+ include Molecule
217
+ include Enumerable
218
+ include MdlMolParser
219
+
220
+ attr_reader :nodes, :edges
221
+
222
+ def initialize
223
+ @nodes = []
224
+ @edges = []
225
+ end
226
+
227
+ def self.parse_io input
228
+ mol = MdlMolecule.new
229
+ mol.parse input
230
+ end
231
+
232
+ def self.parse file
233
+ mol = MdlMolecule.new
234
+ input = open(file)
235
+ mol.parse input
236
+ end
237
+
238
+ end
239
+
240
+ class RxnAtom
241
+ include Atom
242
+ attr_accessor :reactant, :product
243
+
244
+ def reactant= rct
245
+ @reactant = @representative = rct
246
+ end
247
+
248
+ def method_missing name, *args
249
+ @representative.send(name, *args)
250
+ end
251
+
252
+
253
+ def product= prd
254
+ @product = prd
255
+ @representative = prd unless @representative
256
+ end
257
+
258
+ def x ; @representative.x ; end
259
+ def y ; @representative.y ; end
260
+ def element ; @representative.element ; end
261
+
262
+ end
263
+
264
+ class RxnBond
265
+ include Bond
266
+ attr_accessor :reactant, :product
267
+ attr_reader :v
268
+
269
+ def v
270
+ if @reactant and @product
271
+ return @product.v - @reactant.v
272
+ elsif @reactant
273
+ return - @reactant.v
274
+ else
275
+ return @product.v
276
+ end
277
+ end
278
+
279
+ end
280
+
281
+ class MdlReaction
282
+
283
+ include Molecule
284
+ include Reaction
285
+ include Enumerable
286
+
287
+ attr_reader :nodes, :edges
288
+
289
+ def initialize
290
+ @nodes = []
291
+ @edges = []
292
+ @reactants = []
293
+ @products = []
294
+ end
295
+
296
+ attr_reader :filename
297
+ def open_rxn(filename)
298
+ @filename = filename
299
+ input = File.open(filename)
300
+ n_reactants, n_products = parse_header(input)
301
+ read_mol(input, n_reactants, @reactants, @r_atoms = {})
302
+ read_mol(input, n_products, @products, @p_atoms = {})
303
+ construct
304
+ self
305
+ end
306
+
307
+ private
308
+ def construct
309
+ @p2r = {}
310
+ @r2p = {}
311
+ (@r_atoms.keys + @p_atoms.keys).each do |k|
312
+ ratom = RxnAtom.new
313
+ ratom.reactant = @r_atoms[k]
314
+ ratom.product = @p_atoms[k]
315
+ @p2r[@p_atoms[k]] = @r_atoms[k]
316
+ @r2p[@r_atoms[k]] = @p_atoms[k]
317
+ @nodes.push(ratom)
318
+ end
319
+ get_edge_hash(@reactants, r_edge_hash = {})
320
+ get_edge_hash(@products, p_edge_hash = {})
321
+ already = []
322
+ @reactants.each do |mol|
323
+ mol.edges.each do |edge, atom1, atom2|
324
+ bond = RxnBond.new
325
+ bond.reactant = edge
326
+ if @r2p[atom1] and @r2p[atom2]
327
+ bond.product = p_edge_hash[[@r2p[atom1], @r2p[atom2]].sort_by{|a| a.number}]
328
+ already.push(bond.product)
329
+ end
330
+ @edges.push(bond)
331
+ end
332
+ end
333
+ @products.each do |mol|
334
+ mol.edges.each do |bond, atom1, atom2|
335
+ next if already.include?(bond)
336
+ r_bond = RxnBond.new
337
+ r_bond.product = bond
338
+ @edges.push([r_bond, atom1, atom2])
339
+ end
340
+ end
341
+ end
342
+
343
+ private
344
+ def get_edge_hash mols, hash
345
+ mols.each do |mol|
346
+ mol.edges.each do |edge, atom1, atom2|
347
+ hash[[atom1, atom2].sort_by{|a| a.number}] = edge
348
+ end
349
+ end
350
+ end
351
+
352
+ private
353
+ def read_mol input, n, mols, atoms
354
+ n.times do
355
+ loop do
356
+ line = input.readline # $MOL
357
+ break if /\$MOL/.match(line)
358
+ end
359
+ mol = MdlMolecule.parse_io(input)
360
+ mol.nodes.each do |a|
361
+ next if a.mapping == 0
362
+ atoms[a.mapping] = a
363
+ end
364
+ mols.push mol
365
+ end
366
+ end
367
+
368
+ private
369
+ def parse_header input
370
+ 4.times{|n| input.readline}
371
+ input.readline.split.collect{|n| n.to_i}
372
+ end
373
+
374
+ end
375
+
376
+ end
377
+
378
+ end
379
+