chemruby 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. data/README +120 -0
  2. data/Rakefile +195 -0
  3. data/ext/extconf.rb +4 -0
  4. data/ext/subcomp.c +416 -0
  5. data/lib/chem.rb +130 -0
  6. data/lib/chem/appl.rb +1 -0
  7. data/lib/chem/appl/chem3dole.rb +36 -0
  8. data/lib/chem/appl/tinker/nucleic.rb +40 -0
  9. data/lib/chem/appl/tinker/tinker_reader.rb +43 -0
  10. data/lib/chem/data.rb +4 -0
  11. data/lib/chem/data/atomic_weight.rb +124 -0
  12. data/lib/chem/data/character.rb +2 -0
  13. data/lib/chem/data/electronegativity.rb +14 -0
  14. data/lib/chem/data/periodic_table.rb +6 -0
  15. data/lib/chem/data/prime_numbers.rb +1 -0
  16. data/lib/chem/data/vdw_radii.rb +1 -0
  17. data/lib/chem/db.rb +64 -0
  18. data/lib/chem/db/cansmi.rb +234 -0
  19. data/lib/chem/db/cdx.rb +1525 -0
  20. data/lib/chem/db/eps.rb +164 -0
  21. data/lib/chem/db/g98.rb +909 -0
  22. data/lib/chem/db/gspan.rb +130 -0
  23. data/lib/chem/db/iupac.rb +5 -0
  24. data/lib/chem/db/iupac/a_1.rb +46 -0
  25. data/lib/chem/db/iupac/iuparser.rb +226 -0
  26. data/lib/chem/db/iupac/iuparser.ry +97 -0
  27. data/lib/chem/db/iupac/postfix.rb +2 -0
  28. data/lib/chem/db/kcf.rb +390 -0
  29. data/lib/chem/db/kcf_glycan.rb +19 -0
  30. data/lib/chem/db/kegg.rb +516 -0
  31. data/lib/chem/db/linucs/linparser.rb +144 -0
  32. data/lib/chem/db/linucs/linucs.ry +53 -0
  33. data/lib/chem/db/mdl.rb +379 -0
  34. data/lib/chem/db/molconnz.rb +12 -0
  35. data/lib/chem/db/mopac.rb +88 -0
  36. data/lib/chem/db/msi.rb +107 -0
  37. data/lib/chem/db/pdb_dic.rb +115 -0
  38. data/lib/chem/db/pdf.rb +131 -0
  39. data/lib/chem/db/pubchem.rb +113 -0
  40. data/lib/chem/db/rmagick.rb +70 -0
  41. data/lib/chem/db/sdf.rb +37 -0
  42. data/lib/chem/db/smbl.rb +88 -0
  43. data/lib/chem/db/smiles.rb +2 -0
  44. data/lib/chem/db/smiles/smiles.ry +203 -0
  45. data/lib/chem/db/smiles/smiparser.rb +375 -0
  46. data/lib/chem/db/swf.rb +74 -0
  47. data/lib/chem/db/sybyl.rb +150 -0
  48. data/lib/chem/db/tinker.rb +77 -0
  49. data/lib/chem/db/types/type_cansmi.rb +9 -0
  50. data/lib/chem/db/types/type_cdx.rb +24 -0
  51. data/lib/chem/db/types/type_gspan.rb +31 -0
  52. data/lib/chem/db/types/type_kcf.rb +28 -0
  53. data/lib/chem/db/types/type_kcf_glycan.rb +26 -0
  54. data/lib/chem/db/types/type_kegg.rb +92 -0
  55. data/lib/chem/db/types/type_mdl.rb +31 -0
  56. data/lib/chem/db/types/type_pdf.rb +33 -0
  57. data/lib/chem/db/types/type_png.rb +31 -0
  58. data/lib/chem/db/types/type_rxn.rb +25 -0
  59. data/lib/chem/db/types/type_sdf.rb +25 -0
  60. data/lib/chem/db/types/type_sybyl.rb +30 -0
  61. data/lib/chem/db/types/type_xyz.rb +26 -0
  62. data/lib/chem/db/vector.rb +128 -0
  63. data/lib/chem/db/xyz.rb +39 -0
  64. data/lib/chem/model.rb +119 -0
  65. data/lib/chem/model/skeleton.rb +37 -0
  66. data/lib/chem/utils.rb +11 -0
  67. data/lib/chem/utils/geometry.rb +27 -0
  68. data/lib/chem/utils/graph_db.rb +146 -0
  69. data/lib/chem/utils/math.rb +17 -0
  70. data/lib/chem/utils/prop.rb +123 -0
  71. data/lib/chem/utils/sssr.rb +101 -0
  72. data/lib/chem/utils/sub.rb +78 -0
  73. data/lib/chem/utils/transform.rb +110 -0
  74. data/lib/chem/utils/traverse.rb +37 -0
  75. data/lib/chem/utils/ullmann.rb +134 -0
  76. data/lib/graph.rb +41 -0
  77. data/lib/graph/cluster.rb +20 -0
  78. data/lib/graph/morgan.rb +38 -0
  79. data/sample/frequent_subgraph.rb +46 -0
  80. data/sample/images/ex1.rb +11 -0
  81. data/sample/images/ex2.rb +4 -0
  82. data/sample/images/ex3.rb +5 -0
  83. data/sample/images/ex4.rb +17 -0
  84. data/sample/images/ex5.rb +10 -0
  85. data/sample/images/mol/adenine.mol +26 -0
  86. data/sample/images/mol/atp.mol +69 -0
  87. data/sample/images/temp/ex5.mol +344 -0
  88. data/sample/kegg_db.rb +116 -0
  89. data/setup.rb +1551 -0
  90. data/test/all.rb +6 -0
  91. data/test/coord_test.rb +17 -0
  92. data/test/ctab_test.rb +31 -0
  93. data/test/data/A_21.tar.gz +0 -0
  94. data/test/data/A_21/aceanthrylene.cdx +0 -0
  95. data/test/data/A_21/aceanthrylene.mol +40 -0
  96. data/test/data/A_21/acenaphthylene.cdx +0 -0
  97. data/test/data/A_21/acenaphthylene.mol +31 -0
  98. data/test/data/A_21/acephenanthrylene.cdx +0 -0
  99. data/test/data/A_21/acephenanthrylene.mol +40 -0
  100. data/test/data/A_21/anthracene.cdx +0 -0
  101. data/test/data/A_21/anthracene.mol +35 -0
  102. data/test/data/A_21/as-indacene.cdx +0 -0
  103. data/test/data/A_21/as-indacene.mol +31 -0
  104. data/test/data/A_21/azulene.cdx +0 -0
  105. data/test/data/A_21/azulene.mol +26 -0
  106. data/test/data/A_21/biphenylene.cdx +0 -0
  107. data/test/data/A_21/biphenylene.mol +31 -0
  108. data/test/data/A_21/chrysene.cdx +0 -0
  109. data/test/data/A_21/chrysene.mol +44 -0
  110. data/test/data/A_21/coronen.cdx +0 -0
  111. data/test/data/A_21/coronen.mol +59 -0
  112. data/test/data/A_21/fluoranthene.cdx +0 -0
  113. data/test/data/A_21/fluoranthene.mol +40 -0
  114. data/test/data/A_21/fluorene.cdx +0 -0
  115. data/test/data/A_21/fluorene.mol +33 -0
  116. data/test/data/A_21/heptacene.cdx +0 -0
  117. data/test/data/A_21/heptacene.mol +71 -0
  118. data/test/data/A_21/heptalene.cdx +0 -0
  119. data/test/data/A_21/heptalene.mol +30 -0
  120. data/test/data/A_21/heptaphene.cdx +0 -0
  121. data/test/data/A_21/heptaphene.mol +71 -0
  122. data/test/data/A_21/hexacene.cdx +0 -0
  123. data/test/data/A_21/hexacene.mol +62 -0
  124. data/test/data/A_21/hexaphene.cdx +0 -0
  125. data/test/data/A_21/hexaphene.mol +62 -0
  126. data/test/data/A_21/indene.cdx +0 -0
  127. data/test/data/A_21/indene.mol +24 -0
  128. data/test/data/A_21/iupac.txt +41 -0
  129. data/test/data/A_21/naphthacene.cdx +0 -0
  130. data/test/data/A_21/naphthacene.mol +44 -0
  131. data/test/data/A_21/naphthalene.cdx +0 -0
  132. data/test/data/A_21/naphthalene.mol +26 -0
  133. data/test/data/A_21/ovalene.cdx +0 -0
  134. data/test/data/A_21/ovalene.mol +78 -0
  135. data/test/data/A_21/pentacene.cdx +0 -0
  136. data/test/data/A_21/pentacene.mol +53 -0
  137. data/test/data/A_21/pentalene.cdx +0 -0
  138. data/test/data/A_21/pentalene.mol +22 -0
  139. data/test/data/A_21/pentaphene.cdx +0 -0
  140. data/test/data/A_21/pentaphene.mol +53 -0
  141. data/test/data/A_21/perylene.cdx +0 -0
  142. data/test/data/A_21/perylene.mol +49 -0
  143. data/test/data/A_21/phenalene.cdx +0 -0
  144. data/test/data/A_21/phenalene.mol +33 -0
  145. data/test/data/A_21/phenanthrene.cdx +0 -0
  146. data/test/data/A_21/phenanthrene.mol +35 -0
  147. data/test/data/A_21/picene.cdx +0 -0
  148. data/test/data/A_21/picene.mol +53 -0
  149. data/test/data/A_21/pleiadene.cdx +0 -0
  150. data/test/data/A_21/pleiadene.mol +44 -0
  151. data/test/data/A_21/pyranthrene.cdx +0 -0
  152. data/test/data/A_21/pyranthrene.mol +72 -0
  153. data/test/data/A_21/pyrene.cdx +0 -0
  154. data/test/data/A_21/pyrene.mol +40 -0
  155. data/test/data/A_21/rubicene.cdx +0 -0
  156. data/test/data/A_21/rubicene.mol +63 -0
  157. data/test/data/A_21/s-indacene.cdx +0 -0
  158. data/test/data/A_21/s-indacene.mol +31 -0
  159. data/test/data/A_21/tetraphenylene.cdx +0 -0
  160. data/test/data/A_21/tetraphenylene.mol +57 -0
  161. data/test/data/A_21/trinaphthylene.cdx +0 -0
  162. data/test/data/A_21/trinaphthylene.mol +71 -0
  163. data/test/data/A_21/triphenylene.cdx +0 -0
  164. data/test/data/A_21/triphenylene.mol +44 -0
  165. data/test/data/C00147.kcf +25 -0
  166. data/test/data/G00147.kcf +13 -0
  167. data/test/data/atp.mol +69 -0
  168. data/test/data/cyclohexane.mol +17 -0
  169. data/test/data/cyclohexane.ps +485 -0
  170. data/test/data/fullerene.mol +155 -0
  171. data/test/data/glycan +33 -0
  172. data/test/data/hypericin.cdx +0 -0
  173. data/test/data/hypericin.cdxml +596 -0
  174. data/test/data/hypericin.chm +0 -0
  175. data/test/data/hypericin.ct +85 -0
  176. data/test/data/hypericin.f1d +0 -0
  177. data/test/data/hypericin.f1q +0 -0
  178. data/test/data/hypericin.gif +0 -0
  179. data/test/data/hypericin.mol +88 -0
  180. data/test/data/hypericin.mol2 +159 -0
  181. data/test/data/hypericin.msm +123 -0
  182. data/test/data/hypericin.pdf +359 -0
  183. data/test/data/hypericin.png +0 -0
  184. data/test/data/hypericin.ps +0 -0
  185. data/test/data/hypericin.skc +0 -0
  186. data/test/data/hypericin2.gif +0 -0
  187. data/test/data/hypericin2.ps +0 -0
  188. data/test/data/kegg/genomes/hsa/hsa_enzyme.list +4 -0
  189. data/test/data/kegg/genomes/hsa/hsa_pfam.list +4 -0
  190. data/test/data/kegg/ligand/mol/C00147.mol +26 -0
  191. data/test/data/kegg/ligand/reaction +14 -0
  192. data/test/data/kegg/ligand/reaction.lst +1 -0
  193. data/test/data/kegg/ligand/reaction_mapformula.lst +3 -0
  194. data/test/data/reaction +14 -0
  195. data/test/data/reaction.lst +1 -0
  196. data/test/data/reaction_mapformula.lst +3 -0
  197. data/test/data/rxn/C00001.mol +6 -0
  198. data/test/data/rxn/C00011.mol +10 -0
  199. data/test/data/rxn/C00014.mol +6 -0
  200. data/test/data/rxn/C01010.mol +18 -0
  201. data/test/data/rxn/sample.rxn +50 -0
  202. data/test/data/rxn/substitution.rxn +45 -0
  203. data/test/data/test.eps +0 -0
  204. data/test/data/test.mol +28 -0
  205. data/test/data/test.sdf +143 -0
  206. data/test/data/test.skc +0 -0
  207. data/test/data/test.xyz +4 -0
  208. data/test/data/test_lf.sdf +143 -0
  209. data/test/heavy_test_pubchem.rb +16 -0
  210. data/test/multiple_test.rb +22 -0
  211. data/test/test_adj.rb +54 -0
  212. data/test/test_canonical_smiles.rb +46 -0
  213. data/test/test_cdx.rb +32 -0
  214. data/test/test_chem.rb +18 -0
  215. data/test/test_cluster.rb +19 -0
  216. data/test/test_db.rb +11 -0
  217. data/test/test_eps.rb +24 -0
  218. data/test/test_geometry.rb +11 -0
  219. data/test/test_gspan.rb +28 -0
  220. data/test/test_iupac.rb +36 -0
  221. data/test/test_kcf.rb +24 -0
  222. data/test/test_kcf_glycan.rb +10 -0
  223. data/test/test_kegg.rb +118 -0
  224. data/test/test_linucs.rb +21 -0
  225. data/test/test_mdl.rb +45 -0
  226. data/test/test_mol2.rb +62 -0
  227. data/test/test_morgan.rb +21 -0
  228. data/test/test_pdf.rb +12 -0
  229. data/test/test_prop.rb +86 -0
  230. data/test/test_rmagick.rb +15 -0
  231. data/test/test_sbdb.rb +23 -0
  232. data/test/test_sdf.rb +30 -0
  233. data/test/test_smiles.rb +84 -0
  234. data/test/test_sssr.rb +18 -0
  235. data/test/test_sub.rb +47 -0
  236. data/test/test_subcomp.rb +37 -0
  237. data/test/test_traverse.rb +29 -0
  238. data/test/test_writer.rb +13 -0
  239. data/test/test_xyz.rb +15 -0
  240. data/test/type_test.rb +25 -0
  241. metadata +290 -0
@@ -0,0 +1,2 @@
1
+
2
+ $reg_postfix = /(ane|anol)/
@@ -0,0 +1,390 @@
1
+ #
2
+ #
3
+ # = chem/db/kcf.rb - KEGG Compound Function parser
4
+ #
5
+
6
+ module Chem
7
+
8
+ module KEGG
9
+
10
+ class ANumber
11
+
12
+ def self.open filename
13
+ @input = File.open(filename)
14
+ KCFCorrespondence.new(@input)
15
+ end
16
+
17
+ end
18
+
19
+ class KCFAtom
20
+
21
+ include Atom
22
+ attr_accessor :kcf_type, :atom_id, :next_atom
23
+
24
+ def initialize line
25
+ @line = line
26
+ @next_atom = {}
27
+ end
28
+
29
+ def x ; @x || @x = @line[22...32].to_f ; end
30
+ def y ; @y || @y = @line[32...42].to_f ; end
31
+ def kcf_type ; @kcf_type || @kcf_type = @line[16...19].strip ; end
32
+
33
+ def element ; @element || @element = @line[19...22].strip.intern ; end
34
+
35
+ def atom_id ; @atom_id || @atom_id = @line[0...16].to_i ; end
36
+
37
+ end
38
+
39
+ class KCFBond
40
+
41
+ include Bond
42
+ attr_accessor :bond_id, :property
43
+
44
+ def initialize line
45
+ @line = line
46
+ end
47
+
48
+ def bond_id ; @bond_id ||= @line[0...16].to_i ; end
49
+
50
+ def v ; @v ||= @line[23...25].to_i ; end
51
+ def property ; @property ||= @line[27..-1] ; end
52
+
53
+ end
54
+
55
+ class KCF
56
+
57
+ include Molecule
58
+ include Enumerable
59
+
60
+ def initialize input
61
+ @nodes = []
62
+ @edges = []
63
+ hash = {}
64
+ while ! /\/\/\//.match(line = input.readline)
65
+ case line[0...12]
66
+ when 'ENTRY '
67
+ when 'ATOM '
68
+ line.split[1].to_i.times do |n|
69
+ atom = KCFAtom.new input.readline
70
+
71
+ hash[atom.atom_id] = atom
72
+ @nodes.push(atom)
73
+ end
74
+ when 'BOND '
75
+ line.split[1].to_i.times do |n|
76
+ bond = KCFBond.new input.readline
77
+ @edges.push([bond, hash[line[16...19].to_i], hash[line[19...23].to_i]])
78
+ end
79
+ end
80
+ end
81
+ end
82
+
83
+ def KCF.open filename
84
+ @input = File.open(filename)
85
+ KCF.new(@input)
86
+ end
87
+
88
+ end
89
+
90
+ class KeggReaction
91
+
92
+ class ReactionEntry
93
+ attr_accessor :entry, :name, :definition, :reactants, :products, :rpair, :ec, :comment, :pathway
94
+ def initialize
95
+ @comment = []
96
+ @name = []
97
+ @definition = []
98
+ end
99
+ end
100
+
101
+ def initialize input
102
+ @input = input
103
+ end
104
+
105
+ def KeggReaction.open filename
106
+ KeggReaction.new(File.open(filename))
107
+ end
108
+
109
+ def each
110
+ while ! @input.eof?
111
+ entry = ReactionEntry.new
112
+ state = :INITIAL
113
+ while ! /\/\/\//.match(line = @input.readline)
114
+ #case line[0...12]
115
+ type = line[0...12]
116
+ if 'ENTRY ' == type
117
+ entry.entry = line[12...-1]
118
+ elsif 'NAME ' == type || state == :NAME
119
+ state = :NAME
120
+ entry.name = line[12...-1]
121
+ elsif 'DEFINITION ' == type || state == :DEFINITION
122
+ state = :DEFINITION
123
+ entry.definition.push(line[12...-1])
124
+ elsif 'EQUATION ' == type
125
+ ary = line[12...-1].split('<=>')
126
+ entry.reactants = ary[0].split('+').collect{|mol| mol.strip}
127
+ entry.products = ary[1].split('+').collect{|mol| mol.strip}
128
+ elsif 'RPAIR ' == type
129
+ entry.rpair = line[12...-1]
130
+ elsif 'ENZYME ' == type
131
+ entry.ec = line[12...-1].split('.').collect{|n| n.to_i}
132
+ elsif 'COMMENT ' == type || state == :COMMENT
133
+ state = :COMMENT
134
+ entry.comment.push(line[12...-1])
135
+ elsif 'PATHWAY ' == type || state == :PATHWAY
136
+ state = :PATHWAY
137
+ else
138
+ puts "Error Unknown line : %s" % line
139
+ end
140
+ end
141
+ yield entry
142
+ end
143
+ end
144
+ end
145
+
146
+ class KCFRXN
147
+ def initialize reactant, product
148
+ @reactant = reactant
149
+ @product = product
150
+ @matched_reactants = []
151
+ @matched_products = []
152
+ @nodes = []
153
+ end
154
+
155
+ def corresponds from, to
156
+ @matched_reactants.push(@reactant.atoms[from])
157
+ @matched_products.push(@product.atoms[from])
158
+ @nodes.push(RXNNode.new(@reactant.atoms[from], @product.atoms[to]))
159
+ end
160
+
161
+ def setup_bonds
162
+ @edges = []
163
+ @reactant.atoms.each do |atom|
164
+ if atom && ! @matched_reactants.member?(atom)
165
+ @nodes.push(RXNNode.new(atom, nil))
166
+ end
167
+ end
168
+ @product.atoms.each do |atom|
169
+ if atom && ! @matched_products.member?(atom)
170
+ @nodes.push(RXNNode.new(nil, atom))
171
+ end
172
+ end
173
+
174
+ @reactant.bonds.each do |bond|
175
+ bond.e.next_atom[bond.b] = bond
176
+ bond.b.next_atom[bond.e] = bond
177
+ end
178
+ @product.bonds.each do |bond|
179
+ bond.e.next_atom[bond.b] = bond
180
+ bond.b.next_atom[bond.e] = bond
181
+ end
182
+ @nodes.each_with_index do |node, index|
183
+ index.upto(@nodes.length - 1) do |n|
184
+ r_edge = p_edge = nil
185
+ if @nodes[n].reactant_node && @nodes[n].reactant_node.next_atom.has_key?(node.reactant_node)
186
+ r_edge = @nodes[n].reactant_node.next_atom[node.reactant_node]
187
+ end
188
+ if @nodes[n].product_node && @nodes[n].product_node.next_atom.has_key?(node.product_node)
189
+ p_edge = @nodes[n].product_node.next_atom[node.product_node]
190
+ end
191
+ if r_edge || p_edge
192
+ edge = RXNEdge.new
193
+ edge.reactant_edge = r_edge
194
+ edge.product_edge = p_edge
195
+ @edges.push(edge)
196
+ end
197
+ end
198
+ end
199
+ @edges.each do |edge|
200
+ from = edge.reactant_edge ? edge.reactant_edge.multiplicity : 0
201
+ to = edge.product_edge ? edge.product_edge.multiplicity : 0
202
+ puts "%3d %3d" % [from, to]
203
+ end
204
+ end
205
+
206
+ class RXNNode
207
+ attr_reader :reactant_node, :product_node
208
+ def initialize reactant, product
209
+ @reactant_node = reactant
210
+ @product_node = product
211
+ end
212
+ end
213
+ class RXNEdge
214
+ attr_accessor :product_edge, :reactant_edge
215
+ end
216
+ end
217
+
218
+ class KCFCorrespondence
219
+
220
+ attr_reader :compounds, :correspondence
221
+
222
+ def initialize input
223
+ @name = []
224
+ @input = input
225
+ @compounds = []
226
+ @correspondence = {}
227
+ parse(input)
228
+ end
229
+
230
+ def make_rxn dir
231
+ reactant = KCF.open("#{dir}#{@compounds[0]}.kcf")
232
+ product = KCF.open("#{dir}#{@compounds[1]}.kcf")
233
+ rxn = KCFRXN.new(reactant, product)
234
+ @correspondence.each do |k, corres|
235
+ rxn.corresponds(corres[0][0], corres[1][0])
236
+ end
237
+ rxn.setup_bonds
238
+ end
239
+
240
+ def parse input
241
+ while ! /\/\/\//.match(line = input.readline)
242
+ case line[0...12]
243
+ when 'ENTRY '
244
+ @no = /(\d+)/.match(line)[1].to_i
245
+ when 'NAME '
246
+ @name.push(line[12...-1])
247
+ when 'COMPOUND '
248
+ @compounds.push(line[12...-1])
249
+ when 'TYPE '
250
+ @type = line[12...-1]
251
+ when 'ALIGN '
252
+ @align = line[12...-1].to_i
253
+ alignment_mode = true
254
+ else
255
+ ary = line[12...-1].split
256
+ @correspondence[ary[0].to_i] = ary[1..2].collect{|e| a = e.split(':'); [a[0].to_i, a[1]]}
257
+ end
258
+ end
259
+ end
260
+
261
+ end
262
+
263
+ module Atom
264
+ attr_accessor :kcf_type, :kcf_prop
265
+
266
+ # Returns KCF formatted line
267
+ def kcf_line
268
+ if @kcf_prop
269
+ "%14d %3s%2s %10.4f%10.4f #%s" % [@number, @kcf_type, @element, @x, @y, @kcf_prop]
270
+ else
271
+ "%14d %3s%2s %10.4f%10.4f" % [@number, @kcf_type, @element, @x, @y]
272
+ end
273
+ end
274
+ end
275
+
276
+ module Bond
277
+ attr_accessor :kcf_prop
278
+
279
+ # Returns KCF formatted line
280
+ def kcf_line
281
+ if @kcf_prop
282
+ "%13d %4d%4d%2d #%s" % [@number, @b.number, @e.number, @multiplicity, @kcf_prop]
283
+ else
284
+ "%13d %4d%4d%2d" % [@number, @b.number, @e.number, @multiplicity, @kcf_prop]
285
+ end
286
+ end
287
+
288
+ end
289
+
290
+ class KCFReader
291
+
292
+ def KCFReader.open(file, &method)
293
+ input = File.open(file, 'r')
294
+ KCFReader.new.read(input, &method)
295
+ end
296
+
297
+ def read input, &method
298
+ # 0.upto(2) do |m|
299
+ # 0.upto(9) do |n|
300
+ # print n
301
+ # end
302
+ # end
303
+ # puts
304
+ status = :NEW
305
+ mol = KCFMolecule.new
306
+ input.each do |line|
307
+ case line[0..11]
308
+ when /ANUMBER/
309
+ mol.a_no = /A(\d+)/.match(line)[1].to_i
310
+ when /ENTRY/
311
+ entry = /C(\d+)/.match(line)[1].to_i
312
+ when /ATOM/
313
+ n_atoms = /(\d+)/.match(line)[1].to_i
314
+ status = :ATOM
315
+ when /BOND/
316
+ n_bonds = /(\d+)/.match(line)[1].to_i
317
+ status = :BOND
318
+ when /\/\/\//
319
+ if(method)
320
+ yield mol
321
+ end
322
+ mol = KCFMolecule.new
323
+ status = :NEW
324
+ else
325
+ case status
326
+ when :ATOM
327
+ atom = KCFAtom.new
328
+ atom.number, atom.kcf_type, atom.element, atom.x, atom.y, = line[12..-1].scanf("%d%s%s%f%f%s")
329
+ mol.atoms[atom.number] = atom
330
+ when :BOND
331
+ bond = KCFBond.new
332
+ no, b, e, bond.multiplicity, prop = line[12..-1].scanf("%d%d%d%d%s")
333
+ bond.b = mol.atoms[b]
334
+ bond.e = mol.atoms[e]
335
+ mol.bonds.push(bond)
336
+ end
337
+ end
338
+ end
339
+ end
340
+ end
341
+
342
+ class KCFMolecule
343
+
344
+ include Molecule
345
+ attr_accessor :a_no
346
+
347
+ def KCFMolecule.write_kcf molecule
348
+ n_atom = 1
349
+ molecule.atoms.each do |k, atom|
350
+ puts atom.kcf
351
+ n_atom += 1
352
+ end
353
+ n_bond = 1
354
+ molecule.bonds.each do |bond|
355
+ # 1 2 1 1 #UP
356
+ kcf.number = 48
357
+ puts bond.kcf_line
358
+ n_bond += 1
359
+ end
360
+ end
361
+
362
+ def KCFMolecule.open file
363
+ input = File.open(file, 'r')
364
+ KCFMolecule.new.read(input)
365
+ end
366
+
367
+ def read input
368
+ @entry = input.readline
369
+ number_of_atom = input.readline.split[1].to_i
370
+ 1.upto(number_of_atom) do |n|
371
+ atom = KCFAtom.new
372
+ atom.number, atom.kcf_type, atom.element, atom.x, atom.y, = input.readline.scanf("%d%s%s%f%f%s")
373
+ @atoms[atom.number] = atom
374
+ end
375
+ number_of_bond = input.readline.split[1].to_i
376
+ 1.upto(number_of_bond) do |n|
377
+ bond = KCFBond.new
378
+ no, b, e, bond.multiplicity, prop = input.readline.scanf("%d%d%d%d%s")
379
+ bond.b = @atoms[b]
380
+ bond.e = @atoms[e]
381
+ @bonds.push(bond)
382
+ end
383
+ self
384
+ end
385
+
386
+ end
387
+ end
388
+ end
389
+
390
+
@@ -0,0 +1,19 @@
1
+ # = KEGG Compound Function Glycan parser
2
+ # Not implemented
3
+
4
+ module Chem
5
+
6
+ module KEGG
7
+
8
+ class KCFGlycan
9
+
10
+ def initialize filename
11
+ # filename.each do |line|
12
+ # puts line
13
+ # end
14
+ end
15
+
16
+ end
17
+ end
18
+
19
+ end
@@ -0,0 +1,516 @@
1
+ #
2
+ # = chem/db/kegg.rb - KEGG (Kyoto Encylopedia of Genes and Genomes)
3
+ #
4
+ # Author:: Nobuya Tanaka <tanaka@chemruby.org>
5
+ #
6
+ # $Id:$
7
+ #
8
+
9
+ require 'chem/db/mdl'
10
+
11
+ module Chem
12
+
13
+ module KEGG
14
+ class KeggDirectory
15
+
16
+ attr_reader :dir
17
+ def initialize dir
18
+ @dir = dir
19
+ @compounds = {}
20
+ @ligand_dir = File.join(@dir, "ligand")
21
+ @mol_dir = File.join(@ligand_dir, "mol")
22
+ @parsed_file = []
23
+ end
24
+
25
+ def get_organism organism, file
26
+ File.join(@dir, "genomes", organism, file)
27
+ end
28
+
29
+ def gene_to_pfam organism
30
+ filename = File.join(@dir, "genomes", organism, organism + "_pfam.list")
31
+ return @pfam2gene if @parsed_file.include?(filename)
32
+ @parsed_file.push filename
33
+ @gene2pfam ||= {}
34
+ @pfam2gene ||= {}
35
+ open(filename).each do |line|
36
+ gene, pfam = line.split("\t")
37
+ @gene2pfam[gene] = pfam.chop
38
+ (@pfam2gene[pfam.chop] ||= []).push(KeggGene.new(gene, organism, self))
39
+ end
40
+ @pfam2gene
41
+ end
42
+
43
+ def get_ec_number gene
44
+ @gene2enzyme ||= {}
45
+ @enzyme2gene ||= {}
46
+ filename = File.join(@dir, "genomes", gene.organism, gene.organism + "_enzyme.list")
47
+ return @gene2enzyme[gene.gene] if @parsed_file.include?(filename)
48
+ @parsed_file.push filename
49
+
50
+ open(filename).each do |line|
51
+ gn, ec = line.chop.split("\t")
52
+ @gene2enzyme[gn] = ec
53
+ @enzyme2gene[ec] = gn
54
+ end
55
+ @gene2enzyme[gene.gene]
56
+ end
57
+
58
+ def [](key)
59
+ case key
60
+ when /(R\d+)/
61
+ get_reaction $1
62
+ when /(C\d+)/
63
+ get_compound $1
64
+ when /pf:(.+)/
65
+ KeggPfam.new($1, self)
66
+ when /^([^:]{3,4}):(\d+)/
67
+ # gene
68
+ raise "Parser for Organism not implemented!"
69
+ when /^([^:]{3,4})/
70
+ # organism
71
+ KeggOrganism.new($1, self)
72
+ else
73
+ raise "unknown KEGG key type : #{key}"
74
+ end
75
+ end
76
+
77
+ def map_formula
78
+ @reaction_map_formula = parse_reaction_map_formula unless @reaction_map_formula
79
+ @reaction_map_formula
80
+ end
81
+
82
+ def parse_reaction_map_formula
83
+ rxns = {}
84
+ parser = Chem.parse_file(File.join(@dir, "ligand", "reaction_mapformula.lst"))
85
+ parser.each do |rxn|
86
+ rxns[rxn.entry] = rxn
87
+ end
88
+ rxns
89
+ end
90
+
91
+ # Private methods
92
+ private
93
+ class KeggOrganism
94
+
95
+ def initialize organism, kegg
96
+ @organism = organism
97
+ @kegg = kegg
98
+ end
99
+
100
+ def pfam
101
+ pfam2gene = @kegg.gene_to_pfam(@organism)
102
+ pfam2gene
103
+ end
104
+
105
+ def [](key)
106
+ @kegg
107
+ end
108
+
109
+ end
110
+
111
+ private
112
+ class KeggGene
113
+
114
+ attr_reader :organism, :gene
115
+ def initialize gene, organism, kegg
116
+ @gene = gene
117
+ @organism = organism
118
+ @kegg = kegg
119
+ end
120
+
121
+ def ec_number
122
+ @kegg.get_ec_number(self).inspect
123
+ end
124
+
125
+ end
126
+
127
+ private
128
+ class KeggPfam
129
+
130
+ def initialize pfam_key, kegg
131
+ @kegg = kegg
132
+ @pfam_key = pfam_key
133
+ end
134
+
135
+ def [](organism)
136
+ @kegg[organism][@pfam_key]
137
+ end
138
+
139
+ end
140
+
141
+ private
142
+ def get_compound name
143
+ unless @compounds[name]
144
+ @compounds[name] = Chem.open_mol(File.join(@dir, "ligand", "mol", name) + ".mol")
145
+ end
146
+ @compounds[name]
147
+ end
148
+
149
+ def get_reaction name
150
+ @reactions ||= parse_reaction
151
+ @reactions[name]
152
+ end
153
+
154
+ def parse_reaction
155
+ rxns = {}
156
+ parser = Chem.parse_file(File.join(@dir, "ligand", "reaction"))
157
+ parser.each do |reaction|
158
+ reaction.kegg = self
159
+ rxns[reaction.entry] = reaction
160
+ end
161
+ rxns
162
+ end
163
+
164
+ end
165
+
166
+ #obsolete
167
+ @@kegg_compound_folder = nil
168
+ def self.kegg_compound_folder= (folder)
169
+ @@kegg_compound_folder = folder
170
+ end
171
+
172
+ def self.kegg_compound_folder
173
+ @@kegg_compound_folder
174
+ end
175
+
176
+ # Duplication definition!
177
+ class KEGGReaction
178
+
179
+ include Chem::Reaction
180
+ attr_accessor :entry, :name, :ecs, :compounds, :direction
181
+ def initialize
182
+ @ecs = []
183
+ @compounds = []
184
+ end
185
+
186
+ def kegg= kegg
187
+ @kegg = kegg
188
+ end
189
+
190
+ def map_formula
191
+ return nil unless @kegg.map_formula[@entry]
192
+ @kegg.map_formula[@entry].compounds
193
+ end
194
+ end
195
+
196
+ class KeggCompound
197
+ include Molecule
198
+ include Enumerable
199
+ include MDL::MdlMolParser
200
+ attr_reader :entry
201
+
202
+ def initialize
203
+ @nodes = []
204
+ @edges = []
205
+ end
206
+
207
+ @@entries = {}
208
+ def entry= entry_no
209
+ @entry = entry_no
210
+ if @@entries[entry_no] == nil
211
+ if Chem::Kegg.kegg_compound_folder == nil
212
+ raise ArgumentError.new("Chem::Kegg.kegg_compound_folder" +
213
+ " not specified")
214
+ end
215
+ # mol = KeggCompound.new
216
+ # mol.open(Chem::Kegg.kegg_compound_folder + entry_no + ".mol")
217
+ filename = File.join(Chem::Kegg.kegg_compound_folder, entry_no + ".mol")
218
+ mol = nil
219
+ if File.exist?(filename)
220
+ mol = Chem.open_mol(filename)
221
+ end
222
+
223
+ @@entries[entry_no] = mol
224
+ end
225
+ @fly_weight = @@entries[entry_no]
226
+ if @fly_weight
227
+ @nodes = @fly_weight.nodes
228
+ @edges = @fly_weight.edges
229
+ end
230
+ end
231
+
232
+ end
233
+
234
+ class KeggGlycan
235
+ attr_accessor :entry, :name
236
+ end
237
+
238
+ class KeggEc
239
+ attr_accessor :entry, :number
240
+ end
241
+
242
+ module KeggFormat
243
+
244
+ def compound_folder= (folder)
245
+ Chem::Kegg.kegg_compound_folder = folder
246
+ end
247
+
248
+ def each_entry
249
+ state = nil
250
+ str = ''
251
+ @input.each do |line|
252
+ if line[0..11] == ' '
253
+ str += line[12..-1]
254
+ else
255
+ yield(str, state) if state # Not first state
256
+ str = line[12..-1]
257
+ state = line[0..11].strip
258
+ end
259
+ end
260
+ end
261
+ end
262
+
263
+ class KeggReactionParser
264
+
265
+ include KeggFormat
266
+ include Enumerable
267
+
268
+ def initialize filename
269
+ @input = File.open(filename)
270
+ end
271
+
272
+ def parse_compounds species
273
+ ary = []
274
+ species.split(" + ").each do |mol|
275
+ stoichiometry = 1
276
+ if m = /(\d+) *[CG]/.match(mol)
277
+ stoichiometry = m[1].to_i
278
+ end
279
+ compound_entry = ""
280
+ if m = /(C\d+)/.match(mol)
281
+ compound_entry = m[1]
282
+ elsif m = /(G\d+)/.match(mol)
283
+ compound_entry = m[1]
284
+ end
285
+ ary.push([compound_entry, stoichiometry])
286
+ end
287
+ ary
288
+ end
289
+
290
+ def each
291
+ reaction = nil
292
+ each_entry do |str, state|
293
+ case state
294
+ when "ENTRY"
295
+ # reaction = Reaction.find(:first, :conditions => ["entry = ?", str.split[0]])
296
+ # if reaction == nil
297
+ reaction = KEGGReaction.new
298
+ reaction.entry = str.split[0]
299
+ # end
300
+ when "NAME"
301
+ reaction.name = str
302
+ when "DEFINITION"
303
+ #@definition = str
304
+ when "EQUATION"
305
+ c = str.split("<=>")
306
+ reaction.compounds << parse_compounds(c[0])
307
+ reaction.compounds << parse_compounds(c[1])
308
+ when "RPAIR"
309
+ # @rpair = str
310
+ when "ENZYME"
311
+ str.split.each do |e|
312
+ ec = KeggEc.new
313
+ ec.entry = "EC" + e
314
+ sp = e.split(".")
315
+ ec.number = sp.collect{|i| i.to_i}
316
+ reaction.ecs << ec
317
+ end
318
+ when "///"
319
+ # reaction.save
320
+ yield reaction
321
+ when "PATHWAY"
322
+ when "COMMENT"
323
+ when "REFERENCE"
324
+ else
325
+ p state
326
+ end
327
+ end
328
+ end
329
+
330
+ end
331
+
332
+ class KeggReactionLstParser
333
+
334
+ include Enumerable
335
+ include KeggFormat
336
+
337
+ def initialize filename
338
+ @input = open(filename)
339
+ end
340
+
341
+ def each
342
+ @input.each do |line|
343
+ rxn = KEGGReaction.new
344
+ r_number, comps = line.split(":")
345
+ rxn.entry = r_number
346
+ cc = comps.split(/<=>/)
347
+
348
+ reactant = cc[0].split("+").collect do |c|
349
+ ary = c.split
350
+ #compound = KeggCompound.new
351
+ if ary.length == 1
352
+ #compound.entry = c.strip
353
+ [c.strip, 1]
354
+ else
355
+ #compound.entry = ary[1].strip
356
+ [c.strip, ary[0].to_i]
357
+ end
358
+ end
359
+ product = cc[1].split("+").collect do |c|
360
+ ary = c.split
361
+ #compound = KeggCompound.new
362
+ if ary.length == 1
363
+ #compound.entry = c.strip
364
+ [c.strip, 1]
365
+ else
366
+ #compound.entry = ary[1].strip
367
+ [c.strip, ary[0].to_i]
368
+ end
369
+ end
370
+ rxn.compounds = [reactant, product]
371
+ yield rxn
372
+ end
373
+
374
+ end
375
+
376
+ end
377
+
378
+ # ftp://ftp.genome.ad.jp/pub/kegg/ligand/reaction_mapformula.lst
379
+ class KeggReactionMapParser
380
+
381
+ include Enumerable
382
+ include KeggFormat
383
+
384
+ def initialize filename
385
+ @input = open(filename)
386
+ @reactions = @input.inject({}) do |ret, line|
387
+ ary = line.split(":")
388
+ ret[ary[0]] = ary[1..-1]
389
+ ret
390
+ end
391
+ end
392
+
393
+ def each
394
+ @reactions.each do |r_number, (map_number, comps)|
395
+ yield self[r_number]
396
+ end
397
+ end
398
+
399
+ def [](r_number)
400
+ return nil if @reactions[r_number] == nil
401
+ map_number, comps = @reactions[r_number]
402
+ rxn = KEGGReaction.new
403
+ # r_number, map_number, comps = line.split(":")
404
+ rxn.entry = r_number
405
+ cc = comps.split(/(<?=>?)/)
406
+ case cc[1]
407
+ when "<="
408
+ rxn.direction = -1
409
+ when "<=>"
410
+ rxn.direction = 0
411
+ when "=>"
412
+ rxn.direction = 1
413
+ end
414
+ reactant = cc[0].split("+").collect do |c|
415
+ #compound = KeggCompound.new
416
+ #compound.entry = c.strip
417
+ [c.strip, 1]
418
+ end
419
+ product = cc[2].split("+").collect do |c|
420
+ #compound = KeggCompound.new
421
+ #compound.entry = c.strip
422
+ [c.strip, 1]
423
+ end
424
+ rxn.compounds = [reactant, product]
425
+ rxn
426
+ end
427
+
428
+ end
429
+
430
+ # Parses KEGG Glycan format
431
+ # http://www.genome.jp/ligand/kcam/kcam/kcf.html
432
+ # Not fully implemented
433
+ class KeggGlycanParser
434
+
435
+ include Enumerable
436
+ include KeggFormat
437
+
438
+ def initialize filename
439
+ @input = open(filename)
440
+ end
441
+
442
+ def each
443
+ glycan = nil
444
+ each_entry do |str, state|
445
+ case state
446
+ when "ENTRY"
447
+ glycan = KeggGlycan.new
448
+ # glycan = Compound.find(:first, :conditions => ["glycan_entry = ?", str.split[0]])
449
+ if glycan == nil
450
+ # glycan = Compound.new
451
+ glycan.entry = str.split[0]
452
+ end
453
+ when "NAME"
454
+ if glycan.name
455
+ glycan.name = glycan.name + str.split("\n").join if str
456
+ else
457
+ glycan.name = str.split("\n").join if str
458
+ end
459
+ when "///"
460
+ # glycan.save
461
+ end
462
+ end
463
+ end
464
+
465
+ end
466
+
467
+ def self.parse_compound_file
468
+ compound = nil
469
+ parse($home + "compound") do |str, state|
470
+ case state
471
+ when "ENTRY"
472
+ compound = Compound.find(:first, :conditions => ["entry = ?", str.split[0]])
473
+ if compound == nil
474
+ compound = Compound.new
475
+ compound.entry = str.split[0]
476
+ end
477
+ when "NAME"
478
+ compound.name = str.split("\n").join if str
479
+ when "DBLINKS"
480
+ str.split("\n").each do |line|
481
+ if m = /ChEBI: (\d+)/.match(line)
482
+ compound.chebi = m[1].to_i
483
+ elsif m = /PubChem: (\d+)/.match(line)
484
+ compound.pubchem = m[1].to_i
485
+ end
486
+ end
487
+ when "GLYCAN"
488
+ compound.glycan_entry = str
489
+ when "///"
490
+ #compound.save
491
+ end
492
+ end
493
+ end
494
+
495
+ def set_compounds
496
+ require 'util'
497
+ Dir.glob($home + "/mol/*.mol").each do |mol|
498
+ entry = /(.\d+).mol/.match(mol)[1]
499
+ # comp = KeggCompound.find(:first, :conditions => ["entry = ?", entry])
500
+ mol = Chem.open_mol(mol)
501
+ if comp == nil
502
+ puts mol
503
+ next
504
+ end
505
+ if comp.ctab == nil
506
+ comp.ctab = Marshal.dump(mol)
507
+ comp.save
508
+ end
509
+ #p comp
510
+ end
511
+ end
512
+
513
+ end
514
+ end
515
+
516
+