chemruby 0.9.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (241) hide show
  1. data/README +120 -0
  2. data/Rakefile +195 -0
  3. data/ext/extconf.rb +4 -0
  4. data/ext/subcomp.c +416 -0
  5. data/lib/chem.rb +130 -0
  6. data/lib/chem/appl.rb +1 -0
  7. data/lib/chem/appl/chem3dole.rb +36 -0
  8. data/lib/chem/appl/tinker/nucleic.rb +40 -0
  9. data/lib/chem/appl/tinker/tinker_reader.rb +43 -0
  10. data/lib/chem/data.rb +4 -0
  11. data/lib/chem/data/atomic_weight.rb +124 -0
  12. data/lib/chem/data/character.rb +2 -0
  13. data/lib/chem/data/electronegativity.rb +14 -0
  14. data/lib/chem/data/periodic_table.rb +6 -0
  15. data/lib/chem/data/prime_numbers.rb +1 -0
  16. data/lib/chem/data/vdw_radii.rb +1 -0
  17. data/lib/chem/db.rb +64 -0
  18. data/lib/chem/db/cansmi.rb +234 -0
  19. data/lib/chem/db/cdx.rb +1525 -0
  20. data/lib/chem/db/eps.rb +164 -0
  21. data/lib/chem/db/g98.rb +909 -0
  22. data/lib/chem/db/gspan.rb +130 -0
  23. data/lib/chem/db/iupac.rb +5 -0
  24. data/lib/chem/db/iupac/a_1.rb +46 -0
  25. data/lib/chem/db/iupac/iuparser.rb +226 -0
  26. data/lib/chem/db/iupac/iuparser.ry +97 -0
  27. data/lib/chem/db/iupac/postfix.rb +2 -0
  28. data/lib/chem/db/kcf.rb +390 -0
  29. data/lib/chem/db/kcf_glycan.rb +19 -0
  30. data/lib/chem/db/kegg.rb +516 -0
  31. data/lib/chem/db/linucs/linparser.rb +144 -0
  32. data/lib/chem/db/linucs/linucs.ry +53 -0
  33. data/lib/chem/db/mdl.rb +379 -0
  34. data/lib/chem/db/molconnz.rb +12 -0
  35. data/lib/chem/db/mopac.rb +88 -0
  36. data/lib/chem/db/msi.rb +107 -0
  37. data/lib/chem/db/pdb_dic.rb +115 -0
  38. data/lib/chem/db/pdf.rb +131 -0
  39. data/lib/chem/db/pubchem.rb +113 -0
  40. data/lib/chem/db/rmagick.rb +70 -0
  41. data/lib/chem/db/sdf.rb +37 -0
  42. data/lib/chem/db/smbl.rb +88 -0
  43. data/lib/chem/db/smiles.rb +2 -0
  44. data/lib/chem/db/smiles/smiles.ry +203 -0
  45. data/lib/chem/db/smiles/smiparser.rb +375 -0
  46. data/lib/chem/db/swf.rb +74 -0
  47. data/lib/chem/db/sybyl.rb +150 -0
  48. data/lib/chem/db/tinker.rb +77 -0
  49. data/lib/chem/db/types/type_cansmi.rb +9 -0
  50. data/lib/chem/db/types/type_cdx.rb +24 -0
  51. data/lib/chem/db/types/type_gspan.rb +31 -0
  52. data/lib/chem/db/types/type_kcf.rb +28 -0
  53. data/lib/chem/db/types/type_kcf_glycan.rb +26 -0
  54. data/lib/chem/db/types/type_kegg.rb +92 -0
  55. data/lib/chem/db/types/type_mdl.rb +31 -0
  56. data/lib/chem/db/types/type_pdf.rb +33 -0
  57. data/lib/chem/db/types/type_png.rb +31 -0
  58. data/lib/chem/db/types/type_rxn.rb +25 -0
  59. data/lib/chem/db/types/type_sdf.rb +25 -0
  60. data/lib/chem/db/types/type_sybyl.rb +30 -0
  61. data/lib/chem/db/types/type_xyz.rb +26 -0
  62. data/lib/chem/db/vector.rb +128 -0
  63. data/lib/chem/db/xyz.rb +39 -0
  64. data/lib/chem/model.rb +119 -0
  65. data/lib/chem/model/skeleton.rb +37 -0
  66. data/lib/chem/utils.rb +11 -0
  67. data/lib/chem/utils/geometry.rb +27 -0
  68. data/lib/chem/utils/graph_db.rb +146 -0
  69. data/lib/chem/utils/math.rb +17 -0
  70. data/lib/chem/utils/prop.rb +123 -0
  71. data/lib/chem/utils/sssr.rb +101 -0
  72. data/lib/chem/utils/sub.rb +78 -0
  73. data/lib/chem/utils/transform.rb +110 -0
  74. data/lib/chem/utils/traverse.rb +37 -0
  75. data/lib/chem/utils/ullmann.rb +134 -0
  76. data/lib/graph.rb +41 -0
  77. data/lib/graph/cluster.rb +20 -0
  78. data/lib/graph/morgan.rb +38 -0
  79. data/sample/frequent_subgraph.rb +46 -0
  80. data/sample/images/ex1.rb +11 -0
  81. data/sample/images/ex2.rb +4 -0
  82. data/sample/images/ex3.rb +5 -0
  83. data/sample/images/ex4.rb +17 -0
  84. data/sample/images/ex5.rb +10 -0
  85. data/sample/images/mol/adenine.mol +26 -0
  86. data/sample/images/mol/atp.mol +69 -0
  87. data/sample/images/temp/ex5.mol +344 -0
  88. data/sample/kegg_db.rb +116 -0
  89. data/setup.rb +1551 -0
  90. data/test/all.rb +6 -0
  91. data/test/coord_test.rb +17 -0
  92. data/test/ctab_test.rb +31 -0
  93. data/test/data/A_21.tar.gz +0 -0
  94. data/test/data/A_21/aceanthrylene.cdx +0 -0
  95. data/test/data/A_21/aceanthrylene.mol +40 -0
  96. data/test/data/A_21/acenaphthylene.cdx +0 -0
  97. data/test/data/A_21/acenaphthylene.mol +31 -0
  98. data/test/data/A_21/acephenanthrylene.cdx +0 -0
  99. data/test/data/A_21/acephenanthrylene.mol +40 -0
  100. data/test/data/A_21/anthracene.cdx +0 -0
  101. data/test/data/A_21/anthracene.mol +35 -0
  102. data/test/data/A_21/as-indacene.cdx +0 -0
  103. data/test/data/A_21/as-indacene.mol +31 -0
  104. data/test/data/A_21/azulene.cdx +0 -0
  105. data/test/data/A_21/azulene.mol +26 -0
  106. data/test/data/A_21/biphenylene.cdx +0 -0
  107. data/test/data/A_21/biphenylene.mol +31 -0
  108. data/test/data/A_21/chrysene.cdx +0 -0
  109. data/test/data/A_21/chrysene.mol +44 -0
  110. data/test/data/A_21/coronen.cdx +0 -0
  111. data/test/data/A_21/coronen.mol +59 -0
  112. data/test/data/A_21/fluoranthene.cdx +0 -0
  113. data/test/data/A_21/fluoranthene.mol +40 -0
  114. data/test/data/A_21/fluorene.cdx +0 -0
  115. data/test/data/A_21/fluorene.mol +33 -0
  116. data/test/data/A_21/heptacene.cdx +0 -0
  117. data/test/data/A_21/heptacene.mol +71 -0
  118. data/test/data/A_21/heptalene.cdx +0 -0
  119. data/test/data/A_21/heptalene.mol +30 -0
  120. data/test/data/A_21/heptaphene.cdx +0 -0
  121. data/test/data/A_21/heptaphene.mol +71 -0
  122. data/test/data/A_21/hexacene.cdx +0 -0
  123. data/test/data/A_21/hexacene.mol +62 -0
  124. data/test/data/A_21/hexaphene.cdx +0 -0
  125. data/test/data/A_21/hexaphene.mol +62 -0
  126. data/test/data/A_21/indene.cdx +0 -0
  127. data/test/data/A_21/indene.mol +24 -0
  128. data/test/data/A_21/iupac.txt +41 -0
  129. data/test/data/A_21/naphthacene.cdx +0 -0
  130. data/test/data/A_21/naphthacene.mol +44 -0
  131. data/test/data/A_21/naphthalene.cdx +0 -0
  132. data/test/data/A_21/naphthalene.mol +26 -0
  133. data/test/data/A_21/ovalene.cdx +0 -0
  134. data/test/data/A_21/ovalene.mol +78 -0
  135. data/test/data/A_21/pentacene.cdx +0 -0
  136. data/test/data/A_21/pentacene.mol +53 -0
  137. data/test/data/A_21/pentalene.cdx +0 -0
  138. data/test/data/A_21/pentalene.mol +22 -0
  139. data/test/data/A_21/pentaphene.cdx +0 -0
  140. data/test/data/A_21/pentaphene.mol +53 -0
  141. data/test/data/A_21/perylene.cdx +0 -0
  142. data/test/data/A_21/perylene.mol +49 -0
  143. data/test/data/A_21/phenalene.cdx +0 -0
  144. data/test/data/A_21/phenalene.mol +33 -0
  145. data/test/data/A_21/phenanthrene.cdx +0 -0
  146. data/test/data/A_21/phenanthrene.mol +35 -0
  147. data/test/data/A_21/picene.cdx +0 -0
  148. data/test/data/A_21/picene.mol +53 -0
  149. data/test/data/A_21/pleiadene.cdx +0 -0
  150. data/test/data/A_21/pleiadene.mol +44 -0
  151. data/test/data/A_21/pyranthrene.cdx +0 -0
  152. data/test/data/A_21/pyranthrene.mol +72 -0
  153. data/test/data/A_21/pyrene.cdx +0 -0
  154. data/test/data/A_21/pyrene.mol +40 -0
  155. data/test/data/A_21/rubicene.cdx +0 -0
  156. data/test/data/A_21/rubicene.mol +63 -0
  157. data/test/data/A_21/s-indacene.cdx +0 -0
  158. data/test/data/A_21/s-indacene.mol +31 -0
  159. data/test/data/A_21/tetraphenylene.cdx +0 -0
  160. data/test/data/A_21/tetraphenylene.mol +57 -0
  161. data/test/data/A_21/trinaphthylene.cdx +0 -0
  162. data/test/data/A_21/trinaphthylene.mol +71 -0
  163. data/test/data/A_21/triphenylene.cdx +0 -0
  164. data/test/data/A_21/triphenylene.mol +44 -0
  165. data/test/data/C00147.kcf +25 -0
  166. data/test/data/G00147.kcf +13 -0
  167. data/test/data/atp.mol +69 -0
  168. data/test/data/cyclohexane.mol +17 -0
  169. data/test/data/cyclohexane.ps +485 -0
  170. data/test/data/fullerene.mol +155 -0
  171. data/test/data/glycan +33 -0
  172. data/test/data/hypericin.cdx +0 -0
  173. data/test/data/hypericin.cdxml +596 -0
  174. data/test/data/hypericin.chm +0 -0
  175. data/test/data/hypericin.ct +85 -0
  176. data/test/data/hypericin.f1d +0 -0
  177. data/test/data/hypericin.f1q +0 -0
  178. data/test/data/hypericin.gif +0 -0
  179. data/test/data/hypericin.mol +88 -0
  180. data/test/data/hypericin.mol2 +159 -0
  181. data/test/data/hypericin.msm +123 -0
  182. data/test/data/hypericin.pdf +359 -0
  183. data/test/data/hypericin.png +0 -0
  184. data/test/data/hypericin.ps +0 -0
  185. data/test/data/hypericin.skc +0 -0
  186. data/test/data/hypericin2.gif +0 -0
  187. data/test/data/hypericin2.ps +0 -0
  188. data/test/data/kegg/genomes/hsa/hsa_enzyme.list +4 -0
  189. data/test/data/kegg/genomes/hsa/hsa_pfam.list +4 -0
  190. data/test/data/kegg/ligand/mol/C00147.mol +26 -0
  191. data/test/data/kegg/ligand/reaction +14 -0
  192. data/test/data/kegg/ligand/reaction.lst +1 -0
  193. data/test/data/kegg/ligand/reaction_mapformula.lst +3 -0
  194. data/test/data/reaction +14 -0
  195. data/test/data/reaction.lst +1 -0
  196. data/test/data/reaction_mapformula.lst +3 -0
  197. data/test/data/rxn/C00001.mol +6 -0
  198. data/test/data/rxn/C00011.mol +10 -0
  199. data/test/data/rxn/C00014.mol +6 -0
  200. data/test/data/rxn/C01010.mol +18 -0
  201. data/test/data/rxn/sample.rxn +50 -0
  202. data/test/data/rxn/substitution.rxn +45 -0
  203. data/test/data/test.eps +0 -0
  204. data/test/data/test.mol +28 -0
  205. data/test/data/test.sdf +143 -0
  206. data/test/data/test.skc +0 -0
  207. data/test/data/test.xyz +4 -0
  208. data/test/data/test_lf.sdf +143 -0
  209. data/test/heavy_test_pubchem.rb +16 -0
  210. data/test/multiple_test.rb +22 -0
  211. data/test/test_adj.rb +54 -0
  212. data/test/test_canonical_smiles.rb +46 -0
  213. data/test/test_cdx.rb +32 -0
  214. data/test/test_chem.rb +18 -0
  215. data/test/test_cluster.rb +19 -0
  216. data/test/test_db.rb +11 -0
  217. data/test/test_eps.rb +24 -0
  218. data/test/test_geometry.rb +11 -0
  219. data/test/test_gspan.rb +28 -0
  220. data/test/test_iupac.rb +36 -0
  221. data/test/test_kcf.rb +24 -0
  222. data/test/test_kcf_glycan.rb +10 -0
  223. data/test/test_kegg.rb +118 -0
  224. data/test/test_linucs.rb +21 -0
  225. data/test/test_mdl.rb +45 -0
  226. data/test/test_mol2.rb +62 -0
  227. data/test/test_morgan.rb +21 -0
  228. data/test/test_pdf.rb +12 -0
  229. data/test/test_prop.rb +86 -0
  230. data/test/test_rmagick.rb +15 -0
  231. data/test/test_sbdb.rb +23 -0
  232. data/test/test_sdf.rb +30 -0
  233. data/test/test_smiles.rb +84 -0
  234. data/test/test_sssr.rb +18 -0
  235. data/test/test_sub.rb +47 -0
  236. data/test/test_subcomp.rb +37 -0
  237. data/test/test_traverse.rb +29 -0
  238. data/test/test_writer.rb +13 -0
  239. data/test/test_xyz.rb +15 -0
  240. data/test/type_test.rb +25 -0
  241. metadata +290 -0
@@ -0,0 +1,130 @@
1
+ #
2
+ # = gspan.rb - IO modules for gSpan format
3
+ #
4
+ # Copyright:: Copyright (C) 2005-2006
5
+ # Tadashi Kadowaki <kadowaki@kuicr.kyoto-u.ac.jp>
6
+ # Nobuya Tanaka <tanaka@kuicr.kyoto-u.ac.jp>
7
+
8
+ $: << "/home/tanaka/proj/chemruby/lib/"
9
+ $: << "/home/tanaka/proj/chemruby/ext/"
10
+
11
+ require 'chem'
12
+
13
+ module Chem
14
+
15
+ class GSpan
16
+
17
+ FIRST_NODE = /\((\d+)\)/
18
+ REG_NODE = / (\d+) \((\d+)+.(\d+)\)/
19
+
20
+ # Parse one-lined gSpan formatted string and return
21
+ # molecule object.
22
+ def self.parse str, name = ""
23
+ mol = GSpanMolecule.new
24
+ mol.name = name
25
+
26
+ first_atom = GSpanAtom.new(FIRST_NODE.match(str)[1].to_i)
27
+ mol.nodes.push(first_atom)
28
+
29
+ str.scan(REG_NODE) do |s|
30
+ bond = GSpanBond.new( s[0].to_i )
31
+ if s[2] == 'f'
32
+ from_atom = mol.nodes[ s[1].to_i ]
33
+ to_atom = GSpanAtom.new( s[3].to_i )
34
+ mol.nodes.push(to_atom)
35
+ else # s[2] == 'b'
36
+ from_atom = mol.nodes[ mol.nodes.size-1 ]
37
+ to_atom = mol.nodes[ s[3].to_i ]
38
+ end
39
+ mol.edges.push([bond, from_atom, to_atom ])
40
+ end
41
+
42
+ mol
43
+ end
44
+
45
+ # Save molecule as gSpan formatted file.
46
+ # Example :
47
+ # Chem::GSpan.save(mols , "filename") # mols : an array of molecules
48
+ def self.save mols, filename
49
+ open(filename, "w") do |out|
50
+ mols.each_with_index do |mol, mol_idx|
51
+ out.puts "t # #{mol_idx} -1 #{mol.name}"
52
+ mol.nodes.each_with_index do |node, idx|
53
+ out.puts "v %d %d" % [idx, node.atomic_number]
54
+ end
55
+ mol.edges.each_with_index do |(bond, node1, node2), idx|
56
+ out.puts "e %d %d %d" % [mol.nodes.index(node1), mol.nodes.index(node2), bond.v]
57
+ end
58
+ out.puts
59
+ end
60
+
61
+ end
62
+ end
63
+
64
+ end
65
+
66
+ # Concrete class of gSpan molecule
67
+ class GSpanMolecule
68
+
69
+ include Molecule
70
+ include Enumerable
71
+
72
+ def initialize
73
+ @nodes = []
74
+ @edges = []
75
+ end
76
+
77
+ end
78
+
79
+ class GSpanAtom
80
+
81
+ include Atom
82
+
83
+ def initialize element
84
+ @element = Number2Element[element]
85
+ end
86
+
87
+ def self.parse_line line
88
+ self.new(line.split[2].to_i)
89
+ end
90
+
91
+ end
92
+
93
+ class GSpanBond
94
+
95
+ include Bond
96
+
97
+ def initialize v
98
+ @v = v
99
+ end
100
+
101
+ end
102
+
103
+ def self.parse_gspan file
104
+ t = "t"[0]
105
+ v = "v"[0]
106
+ e = "e"[0]
107
+
108
+ mols = []
109
+ mol = GSpanMolecule.new
110
+
111
+ open(file).each do |line|
112
+ case line[0]
113
+ when t
114
+ mol = GSpanMolecule.new
115
+ mols.push(mol)
116
+ when v
117
+ mol.nodes.push(GSpanAtom.parse_line(line))
118
+ when e
119
+ ary = line.split
120
+ node1 = mol.nodes[ary[1].to_i]
121
+ node2 = mol.nodes[ary[2].to_i]
122
+ mol.edges.push([GSpanBond.new(ary[3].to_i), node1, node2])
123
+ else
124
+ end
125
+ end
126
+ mols
127
+ end
128
+
129
+
130
+ end
@@ -0,0 +1,5 @@
1
+
2
+ require 'chem/db/iupac/a_1'
3
+ require 'chem/db/iupac/postfix'
4
+ require 'chem/db/iupac/iuparser'
5
+
@@ -0,0 +1,46 @@
1
+ a_1_1 = [1, "meth",
2
+ 2, "eth",
3
+ 3, "prop",
4
+ 4, "but",
5
+ 5, "pent",
6
+ 6, "hex",
7
+ 7, "hept",
8
+ 8, "oct",
9
+ 9, "non",
10
+ 10, "dec",
11
+ 11, "undec",
12
+ 12, "dodec",
13
+ 13, "tridec",
14
+ 14, "tetradec",
15
+ 15, "pentadec",
16
+ 16, "hexadec",
17
+ 17, "heptadec",
18
+ 18, "octadec",
19
+ 19, "nonadec",
20
+ 20, "icos",
21
+ 21, "henicos",
22
+ 23, "tricos",
23
+ 22, "docos",
24
+ 24, "tetracos",
25
+ 25, "pentacos",
26
+ 26, "hexacos",
27
+ 27, "heptacos",
28
+ 28, "octacos",
29
+ 29, "nonacos",
30
+ 30, "triacont",
31
+ 31, "hentriacont",
32
+ 32, "dotriacont",
33
+ 33, "tritriacont",
34
+ 40, "tetracont",
35
+ 50, "pentacont",
36
+ 60, "hexacont",
37
+ 70, "heptacont",
38
+ 80, "octacont",
39
+ 90, "nonacont",
40
+ 100, "hect",
41
+ 132, "dotriacontahect"]
42
+
43
+ a = Hash[*a_1_1]
44
+
45
+ $reg_a_1_1 = /undec|tritriacont|tridec|tricos|triacont|tetradec|tetracos|tetracont|prop|pentadec|pentacos|pentacont|pent|octadec|octacos|octacont|oct|nonadec|nonacos|nonacont|non|meth|icos|hexadec|hexacos|hexacont|hex|heptadec|heptacos|heptacont|hept|hentriacont|henicos|hect|eth|dotriacontahect|dotriacont|dodec|docos|dec|but/
46
+
@@ -0,0 +1,226 @@
1
+ #
2
+ # DO NOT MODIFY!!!!
3
+ # This file is automatically generated by racc 1.4.4
4
+ # from racc grammer file "iuparser.ry".
5
+ #
6
+
7
+ require 'racc/parser'
8
+
9
+
10
+ class IupacParser < Racc::Parser
11
+
12
+ module_eval <<'..end iuparser.ry modeval..id57c30dc50e', 'iuparser.ry', 16
13
+
14
+ def next_token
15
+ @q.shift
16
+ end
17
+
18
+ def parse( line )
19
+ line.downcase!
20
+ @q = []
21
+
22
+ until line.empty? do
23
+ case line
24
+ when /\A(hex|prop|meth|non|but|dec)/
25
+ @q.push([:SKELETON, $&])
26
+ when /\A(ane)/
27
+ @q.push([:POSTFIX, $&])
28
+ when /\Ayl/
29
+ @q.push([:YL, 'yl'])
30
+ when /\A(di|tri)/
31
+ @q.push([:GREEK_NUM, $&])
32
+ when /\A\d+/
33
+ @q.push([:NUMBER, $&.to_i])
34
+ when /\A-/
35
+ @q.push([:HYPHEN, '-'])
36
+ when /\A,/
37
+ @q.push([:COMMA, ','])
38
+ when /\A[()]/
39
+ @q.push([:ROUND_B, $&])
40
+ end
41
+ line = $'
42
+ end
43
+ @q.push([:EOL, nil])
44
+ do_parse
45
+ end
46
+
47
+ # class IupacParser
48
+ # rule
49
+ # iupac : name EOF
50
+ # {
51
+ # puts 'The END'
52
+ # }
53
+ # name : SKELETON POSTFIX
54
+ # {
55
+ # p val
56
+ # result = val
57
+ # }
58
+ # end
59
+
60
+ # ---- inner
61
+
62
+ # def next_token
63
+ # @q.shift
64
+ # end
65
+
66
+ # def parse( line )
67
+ # @q = []
68
+
69
+ # # line.strip
70
+ # until line.empty? do
71
+ # case line
72
+ # when /hex/# $reg_a_1_1
73
+ # @q.push [:SKELETON, $&]
74
+ # when /ane/# $reg_postfix
75
+ # @q.push [:POSTFIX, $&]
76
+ # when /\A\d+/
77
+ # @q.push [:NUMBER, $&.to_i ]
78
+ # when /\A./
79
+ # @q.push [$&, $&]
80
+ # else
81
+ # raise RuntimeError, 'must not happen'
82
+ # end
83
+ # line = $'
84
+ # end
85
+ # @q.push [ :EOL, nil ]
86
+ # p @q
87
+ # do_parse
88
+ # end
89
+
90
+ ..end iuparser.ry modeval..id57c30dc50e
91
+
92
+ ##### racc 1.4.4 generates ###
93
+
94
+ racc_reduce_table = [
95
+ 0, 0, :racc_error,
96
+ 4, 11, :_reduce_none,
97
+ 0, 12, :_reduce_none,
98
+ 3, 12, :_reduce_none,
99
+ 2, 12, :_reduce_none,
100
+ 3, 13, :_reduce_none,
101
+ 1, 13, :_reduce_none,
102
+ 2, 15, :_reduce_none,
103
+ 3, 15, :_reduce_none,
104
+ 1, 14, :_reduce_none,
105
+ 3, 14, :_reduce_none ]
106
+
107
+ racc_reduce_n = 11
108
+
109
+ racc_shift_n = 23
110
+
111
+ racc_action_table = [
112
+ 5, 18, 16, 8, 16, 9, 10, 9, 10, 9,
113
+ 14, 11, 12, 19, 15, 13, 3, 21, 13, 22 ]
114
+
115
+ racc_action_check = [
116
+ 2, 9, 8, 2, 14, 2, 2, 8, 8, 14,
117
+ 6, 3, 5, 12, 6, 5, 1, 15, 16, 18 ]
118
+
119
+ racc_action_pointer = [
120
+ nil, 16, -2, 11, nil, 9, 5, nil, 0, -1,
121
+ nil, nil, 9, nil, 2, 9, 12, nil, 13, nil,
122
+ nil, nil, nil ]
123
+
124
+ racc_action_default = [
125
+ -2, -11, -11, -11, -4, -11, -11, -6, -11, -11,
126
+ -9, 23, -11, -7, -11, -11, -11, -3, -11, -1,
127
+ -5, -10, -8 ]
128
+
129
+ racc_goto_table = [
130
+ 4, 1, 2, 20, nil, nil, 17 ]
131
+
132
+ racc_goto_check = [
133
+ 3, 1, 2, 5, nil, nil, 3 ]
134
+
135
+ racc_goto_pointer = [
136
+ nil, 1, 2, -2, nil, -11 ]
137
+
138
+ racc_goto_default = [
139
+ nil, nil, nil, nil, 6, 7 ]
140
+
141
+ racc_token_table = {
142
+ false => 0,
143
+ Object.new => 1,
144
+ :SKELETON => 2,
145
+ :POSTFIX => 3,
146
+ :EOL => 4,
147
+ :HYPHEN => 5,
148
+ :YL => 6,
149
+ :GREEK_NUM => 7,
150
+ :NUMBER => 8,
151
+ :COMMA => 9 }
152
+
153
+ racc_use_result_var = true
154
+
155
+ racc_nt_base = 10
156
+
157
+ Racc_arg = [
158
+ racc_action_table,
159
+ racc_action_check,
160
+ racc_action_default,
161
+ racc_action_pointer,
162
+ racc_goto_table,
163
+ racc_goto_check,
164
+ racc_goto_default,
165
+ racc_goto_pointer,
166
+ racc_nt_base,
167
+ racc_reduce_table,
168
+ racc_token_table,
169
+ racc_shift_n,
170
+ racc_reduce_n,
171
+ racc_use_result_var ]
172
+
173
+ Racc_token_to_s_table = [
174
+ '$end',
175
+ 'error',
176
+ 'SKELETON',
177
+ 'POSTFIX',
178
+ 'EOL',
179
+ 'HYPHEN',
180
+ 'YL',
181
+ 'GREEK_NUM',
182
+ 'NUMBER',
183
+ 'COMMA',
184
+ '$start',
185
+ 'name',
186
+ 'prefix',
187
+ 'num_subs',
188
+ 'nums',
189
+ 'substituent']
190
+
191
+ Racc_debug_parser = false
192
+
193
+ ##### racc system variables end #####
194
+
195
+ # reduce 0 omitted
196
+
197
+ # reduce 1 omitted
198
+
199
+ # reduce 2 omitted
200
+
201
+ # reduce 3 omitted
202
+
203
+ # reduce 4 omitted
204
+
205
+ # reduce 5 omitted
206
+
207
+ # reduce 6 omitted
208
+
209
+ # reduce 7 omitted
210
+
211
+ # reduce 8 omitted
212
+
213
+ # reduce 9 omitted
214
+
215
+ # reduce 10 omitted
216
+
217
+ def _reduce_none( val, _values, result )
218
+ result
219
+ end
220
+
221
+ end # class IupacParser
222
+
223
+
224
+ # parser = IupacParser.new
225
+
226
+ # parser.parse("hexane")
@@ -0,0 +1,97 @@
1
+ class IupacParser
2
+ rule
3
+ name : prefix SKELETON POSTFIX EOL
4
+ prefix :
5
+ | prefix HYPHEN num_subs
6
+ | prefix num_subs
7
+ num_subs : nums HYPHEN substituent
8
+ | substituent
9
+ substituent : SKELETON YL
10
+ | GREEK_NUM SKELETON YL
11
+ nums : NUMBER
12
+ | nums COMMA NUMBER
13
+ end
14
+
15
+ ---- inner
16
+
17
+ def next_token
18
+ @q.shift
19
+ end
20
+
21
+ def parse( line )
22
+ line.downcase!
23
+ @q = []
24
+
25
+ until line.empty? do
26
+ case line
27
+ when /\A(hex|prop|meth|non|but|dec)/
28
+ @q.push([:SKELETON, $&])
29
+ when /\A(ane)/
30
+ @q.push([:POSTFIX, $&])
31
+ when /\Ayl/
32
+ @q.push([:YL, 'yl'])
33
+ when /\A(di|tri)/
34
+ @q.push([:GREEK_NUM, $&])
35
+ when /\A\d+/
36
+ @q.push([:NUMBER, $&.to_i])
37
+ when /\A-/
38
+ @q.push([:HYPHEN, '-'])
39
+ when /\A,/
40
+ @q.push([:COMMA, ','])
41
+ when /\A[()]/
42
+ @q.push([:ROUND_B, $&])
43
+ end
44
+ line = $'
45
+ end
46
+ @q.push([:EOL, nil])
47
+ do_parse
48
+ end
49
+
50
+ # class IupacParser
51
+ # rule
52
+ # iupac : name EOF
53
+ # {
54
+ # puts 'The END'
55
+ # }
56
+ # name : SKELETON POSTFIX
57
+ # {
58
+ # p val
59
+ # result = val
60
+ # }
61
+ # end
62
+
63
+ # ---- inner
64
+
65
+ # def next_token
66
+ # @q.shift
67
+ # end
68
+
69
+ # def parse( line )
70
+ # @q = []
71
+
72
+ # # line.strip
73
+ # until line.empty? do
74
+ # case line
75
+ # when /hex/# $reg_a_1_1
76
+ # @q.push [:SKELETON, $&]
77
+ # when /ane/# $reg_postfix
78
+ # @q.push [:POSTFIX, $&]
79
+ # when /\A\d+/
80
+ # @q.push [:NUMBER, $&.to_i ]
81
+ # when /\A./
82
+ # @q.push [$&, $&]
83
+ # else
84
+ # raise RuntimeError, 'must not happen'
85
+ # end
86
+ # line = $'
87
+ # end
88
+ # @q.push [ :EOL, nil ]
89
+ # p @q
90
+ # do_parse
91
+ # end
92
+
93
+ ---- footer
94
+
95
+ # parser = IupacParser.new
96
+
97
+ # parser.parse("hexane")