chemruby 0.9.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (241) hide show
  1. data/README +120 -0
  2. data/Rakefile +195 -0
  3. data/ext/extconf.rb +4 -0
  4. data/ext/subcomp.c +416 -0
  5. data/lib/chem.rb +130 -0
  6. data/lib/chem/appl.rb +1 -0
  7. data/lib/chem/appl/chem3dole.rb +36 -0
  8. data/lib/chem/appl/tinker/nucleic.rb +40 -0
  9. data/lib/chem/appl/tinker/tinker_reader.rb +43 -0
  10. data/lib/chem/data.rb +4 -0
  11. data/lib/chem/data/atomic_weight.rb +124 -0
  12. data/lib/chem/data/character.rb +2 -0
  13. data/lib/chem/data/electronegativity.rb +14 -0
  14. data/lib/chem/data/periodic_table.rb +6 -0
  15. data/lib/chem/data/prime_numbers.rb +1 -0
  16. data/lib/chem/data/vdw_radii.rb +1 -0
  17. data/lib/chem/db.rb +64 -0
  18. data/lib/chem/db/cansmi.rb +234 -0
  19. data/lib/chem/db/cdx.rb +1525 -0
  20. data/lib/chem/db/eps.rb +164 -0
  21. data/lib/chem/db/g98.rb +909 -0
  22. data/lib/chem/db/gspan.rb +130 -0
  23. data/lib/chem/db/iupac.rb +5 -0
  24. data/lib/chem/db/iupac/a_1.rb +46 -0
  25. data/lib/chem/db/iupac/iuparser.rb +226 -0
  26. data/lib/chem/db/iupac/iuparser.ry +97 -0
  27. data/lib/chem/db/iupac/postfix.rb +2 -0
  28. data/lib/chem/db/kcf.rb +390 -0
  29. data/lib/chem/db/kcf_glycan.rb +19 -0
  30. data/lib/chem/db/kegg.rb +516 -0
  31. data/lib/chem/db/linucs/linparser.rb +144 -0
  32. data/lib/chem/db/linucs/linucs.ry +53 -0
  33. data/lib/chem/db/mdl.rb +379 -0
  34. data/lib/chem/db/molconnz.rb +12 -0
  35. data/lib/chem/db/mopac.rb +88 -0
  36. data/lib/chem/db/msi.rb +107 -0
  37. data/lib/chem/db/pdb_dic.rb +115 -0
  38. data/lib/chem/db/pdf.rb +131 -0
  39. data/lib/chem/db/pubchem.rb +113 -0
  40. data/lib/chem/db/rmagick.rb +70 -0
  41. data/lib/chem/db/sdf.rb +37 -0
  42. data/lib/chem/db/smbl.rb +88 -0
  43. data/lib/chem/db/smiles.rb +2 -0
  44. data/lib/chem/db/smiles/smiles.ry +203 -0
  45. data/lib/chem/db/smiles/smiparser.rb +375 -0
  46. data/lib/chem/db/swf.rb +74 -0
  47. data/lib/chem/db/sybyl.rb +150 -0
  48. data/lib/chem/db/tinker.rb +77 -0
  49. data/lib/chem/db/types/type_cansmi.rb +9 -0
  50. data/lib/chem/db/types/type_cdx.rb +24 -0
  51. data/lib/chem/db/types/type_gspan.rb +31 -0
  52. data/lib/chem/db/types/type_kcf.rb +28 -0
  53. data/lib/chem/db/types/type_kcf_glycan.rb +26 -0
  54. data/lib/chem/db/types/type_kegg.rb +92 -0
  55. data/lib/chem/db/types/type_mdl.rb +31 -0
  56. data/lib/chem/db/types/type_pdf.rb +33 -0
  57. data/lib/chem/db/types/type_png.rb +31 -0
  58. data/lib/chem/db/types/type_rxn.rb +25 -0
  59. data/lib/chem/db/types/type_sdf.rb +25 -0
  60. data/lib/chem/db/types/type_sybyl.rb +30 -0
  61. data/lib/chem/db/types/type_xyz.rb +26 -0
  62. data/lib/chem/db/vector.rb +128 -0
  63. data/lib/chem/db/xyz.rb +39 -0
  64. data/lib/chem/model.rb +119 -0
  65. data/lib/chem/model/skeleton.rb +37 -0
  66. data/lib/chem/utils.rb +11 -0
  67. data/lib/chem/utils/geometry.rb +27 -0
  68. data/lib/chem/utils/graph_db.rb +146 -0
  69. data/lib/chem/utils/math.rb +17 -0
  70. data/lib/chem/utils/prop.rb +123 -0
  71. data/lib/chem/utils/sssr.rb +101 -0
  72. data/lib/chem/utils/sub.rb +78 -0
  73. data/lib/chem/utils/transform.rb +110 -0
  74. data/lib/chem/utils/traverse.rb +37 -0
  75. data/lib/chem/utils/ullmann.rb +134 -0
  76. data/lib/graph.rb +41 -0
  77. data/lib/graph/cluster.rb +20 -0
  78. data/lib/graph/morgan.rb +38 -0
  79. data/sample/frequent_subgraph.rb +46 -0
  80. data/sample/images/ex1.rb +11 -0
  81. data/sample/images/ex2.rb +4 -0
  82. data/sample/images/ex3.rb +5 -0
  83. data/sample/images/ex4.rb +17 -0
  84. data/sample/images/ex5.rb +10 -0
  85. data/sample/images/mol/adenine.mol +26 -0
  86. data/sample/images/mol/atp.mol +69 -0
  87. data/sample/images/temp/ex5.mol +344 -0
  88. data/sample/kegg_db.rb +116 -0
  89. data/setup.rb +1551 -0
  90. data/test/all.rb +6 -0
  91. data/test/coord_test.rb +17 -0
  92. data/test/ctab_test.rb +31 -0
  93. data/test/data/A_21.tar.gz +0 -0
  94. data/test/data/A_21/aceanthrylene.cdx +0 -0
  95. data/test/data/A_21/aceanthrylene.mol +40 -0
  96. data/test/data/A_21/acenaphthylene.cdx +0 -0
  97. data/test/data/A_21/acenaphthylene.mol +31 -0
  98. data/test/data/A_21/acephenanthrylene.cdx +0 -0
  99. data/test/data/A_21/acephenanthrylene.mol +40 -0
  100. data/test/data/A_21/anthracene.cdx +0 -0
  101. data/test/data/A_21/anthracene.mol +35 -0
  102. data/test/data/A_21/as-indacene.cdx +0 -0
  103. data/test/data/A_21/as-indacene.mol +31 -0
  104. data/test/data/A_21/azulene.cdx +0 -0
  105. data/test/data/A_21/azulene.mol +26 -0
  106. data/test/data/A_21/biphenylene.cdx +0 -0
  107. data/test/data/A_21/biphenylene.mol +31 -0
  108. data/test/data/A_21/chrysene.cdx +0 -0
  109. data/test/data/A_21/chrysene.mol +44 -0
  110. data/test/data/A_21/coronen.cdx +0 -0
  111. data/test/data/A_21/coronen.mol +59 -0
  112. data/test/data/A_21/fluoranthene.cdx +0 -0
  113. data/test/data/A_21/fluoranthene.mol +40 -0
  114. data/test/data/A_21/fluorene.cdx +0 -0
  115. data/test/data/A_21/fluorene.mol +33 -0
  116. data/test/data/A_21/heptacene.cdx +0 -0
  117. data/test/data/A_21/heptacene.mol +71 -0
  118. data/test/data/A_21/heptalene.cdx +0 -0
  119. data/test/data/A_21/heptalene.mol +30 -0
  120. data/test/data/A_21/heptaphene.cdx +0 -0
  121. data/test/data/A_21/heptaphene.mol +71 -0
  122. data/test/data/A_21/hexacene.cdx +0 -0
  123. data/test/data/A_21/hexacene.mol +62 -0
  124. data/test/data/A_21/hexaphene.cdx +0 -0
  125. data/test/data/A_21/hexaphene.mol +62 -0
  126. data/test/data/A_21/indene.cdx +0 -0
  127. data/test/data/A_21/indene.mol +24 -0
  128. data/test/data/A_21/iupac.txt +41 -0
  129. data/test/data/A_21/naphthacene.cdx +0 -0
  130. data/test/data/A_21/naphthacene.mol +44 -0
  131. data/test/data/A_21/naphthalene.cdx +0 -0
  132. data/test/data/A_21/naphthalene.mol +26 -0
  133. data/test/data/A_21/ovalene.cdx +0 -0
  134. data/test/data/A_21/ovalene.mol +78 -0
  135. data/test/data/A_21/pentacene.cdx +0 -0
  136. data/test/data/A_21/pentacene.mol +53 -0
  137. data/test/data/A_21/pentalene.cdx +0 -0
  138. data/test/data/A_21/pentalene.mol +22 -0
  139. data/test/data/A_21/pentaphene.cdx +0 -0
  140. data/test/data/A_21/pentaphene.mol +53 -0
  141. data/test/data/A_21/perylene.cdx +0 -0
  142. data/test/data/A_21/perylene.mol +49 -0
  143. data/test/data/A_21/phenalene.cdx +0 -0
  144. data/test/data/A_21/phenalene.mol +33 -0
  145. data/test/data/A_21/phenanthrene.cdx +0 -0
  146. data/test/data/A_21/phenanthrene.mol +35 -0
  147. data/test/data/A_21/picene.cdx +0 -0
  148. data/test/data/A_21/picene.mol +53 -0
  149. data/test/data/A_21/pleiadene.cdx +0 -0
  150. data/test/data/A_21/pleiadene.mol +44 -0
  151. data/test/data/A_21/pyranthrene.cdx +0 -0
  152. data/test/data/A_21/pyranthrene.mol +72 -0
  153. data/test/data/A_21/pyrene.cdx +0 -0
  154. data/test/data/A_21/pyrene.mol +40 -0
  155. data/test/data/A_21/rubicene.cdx +0 -0
  156. data/test/data/A_21/rubicene.mol +63 -0
  157. data/test/data/A_21/s-indacene.cdx +0 -0
  158. data/test/data/A_21/s-indacene.mol +31 -0
  159. data/test/data/A_21/tetraphenylene.cdx +0 -0
  160. data/test/data/A_21/tetraphenylene.mol +57 -0
  161. data/test/data/A_21/trinaphthylene.cdx +0 -0
  162. data/test/data/A_21/trinaphthylene.mol +71 -0
  163. data/test/data/A_21/triphenylene.cdx +0 -0
  164. data/test/data/A_21/triphenylene.mol +44 -0
  165. data/test/data/C00147.kcf +25 -0
  166. data/test/data/G00147.kcf +13 -0
  167. data/test/data/atp.mol +69 -0
  168. data/test/data/cyclohexane.mol +17 -0
  169. data/test/data/cyclohexane.ps +485 -0
  170. data/test/data/fullerene.mol +155 -0
  171. data/test/data/glycan +33 -0
  172. data/test/data/hypericin.cdx +0 -0
  173. data/test/data/hypericin.cdxml +596 -0
  174. data/test/data/hypericin.chm +0 -0
  175. data/test/data/hypericin.ct +85 -0
  176. data/test/data/hypericin.f1d +0 -0
  177. data/test/data/hypericin.f1q +0 -0
  178. data/test/data/hypericin.gif +0 -0
  179. data/test/data/hypericin.mol +88 -0
  180. data/test/data/hypericin.mol2 +159 -0
  181. data/test/data/hypericin.msm +123 -0
  182. data/test/data/hypericin.pdf +359 -0
  183. data/test/data/hypericin.png +0 -0
  184. data/test/data/hypericin.ps +0 -0
  185. data/test/data/hypericin.skc +0 -0
  186. data/test/data/hypericin2.gif +0 -0
  187. data/test/data/hypericin2.ps +0 -0
  188. data/test/data/kegg/genomes/hsa/hsa_enzyme.list +4 -0
  189. data/test/data/kegg/genomes/hsa/hsa_pfam.list +4 -0
  190. data/test/data/kegg/ligand/mol/C00147.mol +26 -0
  191. data/test/data/kegg/ligand/reaction +14 -0
  192. data/test/data/kegg/ligand/reaction.lst +1 -0
  193. data/test/data/kegg/ligand/reaction_mapformula.lst +3 -0
  194. data/test/data/reaction +14 -0
  195. data/test/data/reaction.lst +1 -0
  196. data/test/data/reaction_mapformula.lst +3 -0
  197. data/test/data/rxn/C00001.mol +6 -0
  198. data/test/data/rxn/C00011.mol +10 -0
  199. data/test/data/rxn/C00014.mol +6 -0
  200. data/test/data/rxn/C01010.mol +18 -0
  201. data/test/data/rxn/sample.rxn +50 -0
  202. data/test/data/rxn/substitution.rxn +45 -0
  203. data/test/data/test.eps +0 -0
  204. data/test/data/test.mol +28 -0
  205. data/test/data/test.sdf +143 -0
  206. data/test/data/test.skc +0 -0
  207. data/test/data/test.xyz +4 -0
  208. data/test/data/test_lf.sdf +143 -0
  209. data/test/heavy_test_pubchem.rb +16 -0
  210. data/test/multiple_test.rb +22 -0
  211. data/test/test_adj.rb +54 -0
  212. data/test/test_canonical_smiles.rb +46 -0
  213. data/test/test_cdx.rb +32 -0
  214. data/test/test_chem.rb +18 -0
  215. data/test/test_cluster.rb +19 -0
  216. data/test/test_db.rb +11 -0
  217. data/test/test_eps.rb +24 -0
  218. data/test/test_geometry.rb +11 -0
  219. data/test/test_gspan.rb +28 -0
  220. data/test/test_iupac.rb +36 -0
  221. data/test/test_kcf.rb +24 -0
  222. data/test/test_kcf_glycan.rb +10 -0
  223. data/test/test_kegg.rb +118 -0
  224. data/test/test_linucs.rb +21 -0
  225. data/test/test_mdl.rb +45 -0
  226. data/test/test_mol2.rb +62 -0
  227. data/test/test_morgan.rb +21 -0
  228. data/test/test_pdf.rb +12 -0
  229. data/test/test_prop.rb +86 -0
  230. data/test/test_rmagick.rb +15 -0
  231. data/test/test_sbdb.rb +23 -0
  232. data/test/test_sdf.rb +30 -0
  233. data/test/test_smiles.rb +84 -0
  234. data/test/test_sssr.rb +18 -0
  235. data/test/test_sub.rb +47 -0
  236. data/test/test_subcomp.rb +37 -0
  237. data/test/test_traverse.rb +29 -0
  238. data/test/test_writer.rb +13 -0
  239. data/test/test_xyz.rb +15 -0
  240. data/test/type_test.rb +25 -0
  241. metadata +290 -0
@@ -0,0 +1,2 @@
1
+
2
+ $reg_postfix = /(ane|anol)/
@@ -0,0 +1,390 @@
1
+ #
2
+ #
3
+ # = chem/db/kcf.rb - KEGG Compound Function parser
4
+ #
5
+
6
+ module Chem
7
+
8
+ module KEGG
9
+
10
+ class ANumber
11
+
12
+ def self.open filename
13
+ @input = File.open(filename)
14
+ KCFCorrespondence.new(@input)
15
+ end
16
+
17
+ end
18
+
19
+ class KCFAtom
20
+
21
+ include Atom
22
+ attr_accessor :kcf_type, :atom_id, :next_atom
23
+
24
+ def initialize line
25
+ @line = line
26
+ @next_atom = {}
27
+ end
28
+
29
+ def x ; @x || @x = @line[22...32].to_f ; end
30
+ def y ; @y || @y = @line[32...42].to_f ; end
31
+ def kcf_type ; @kcf_type || @kcf_type = @line[16...19].strip ; end
32
+
33
+ def element ; @element || @element = @line[19...22].strip.intern ; end
34
+
35
+ def atom_id ; @atom_id || @atom_id = @line[0...16].to_i ; end
36
+
37
+ end
38
+
39
+ class KCFBond
40
+
41
+ include Bond
42
+ attr_accessor :bond_id, :property
43
+
44
+ def initialize line
45
+ @line = line
46
+ end
47
+
48
+ def bond_id ; @bond_id ||= @line[0...16].to_i ; end
49
+
50
+ def v ; @v ||= @line[23...25].to_i ; end
51
+ def property ; @property ||= @line[27..-1] ; end
52
+
53
+ end
54
+
55
+ class KCF
56
+
57
+ include Molecule
58
+ include Enumerable
59
+
60
+ def initialize input
61
+ @nodes = []
62
+ @edges = []
63
+ hash = {}
64
+ while ! /\/\/\//.match(line = input.readline)
65
+ case line[0...12]
66
+ when 'ENTRY '
67
+ when 'ATOM '
68
+ line.split[1].to_i.times do |n|
69
+ atom = KCFAtom.new input.readline
70
+
71
+ hash[atom.atom_id] = atom
72
+ @nodes.push(atom)
73
+ end
74
+ when 'BOND '
75
+ line.split[1].to_i.times do |n|
76
+ bond = KCFBond.new input.readline
77
+ @edges.push([bond, hash[line[16...19].to_i], hash[line[19...23].to_i]])
78
+ end
79
+ end
80
+ end
81
+ end
82
+
83
+ def KCF.open filename
84
+ @input = File.open(filename)
85
+ KCF.new(@input)
86
+ end
87
+
88
+ end
89
+
90
+ class KeggReaction
91
+
92
+ class ReactionEntry
93
+ attr_accessor :entry, :name, :definition, :reactants, :products, :rpair, :ec, :comment, :pathway
94
+ def initialize
95
+ @comment = []
96
+ @name = []
97
+ @definition = []
98
+ end
99
+ end
100
+
101
+ def initialize input
102
+ @input = input
103
+ end
104
+
105
+ def KeggReaction.open filename
106
+ KeggReaction.new(File.open(filename))
107
+ end
108
+
109
+ def each
110
+ while ! @input.eof?
111
+ entry = ReactionEntry.new
112
+ state = :INITIAL
113
+ while ! /\/\/\//.match(line = @input.readline)
114
+ #case line[0...12]
115
+ type = line[0...12]
116
+ if 'ENTRY ' == type
117
+ entry.entry = line[12...-1]
118
+ elsif 'NAME ' == type || state == :NAME
119
+ state = :NAME
120
+ entry.name = line[12...-1]
121
+ elsif 'DEFINITION ' == type || state == :DEFINITION
122
+ state = :DEFINITION
123
+ entry.definition.push(line[12...-1])
124
+ elsif 'EQUATION ' == type
125
+ ary = line[12...-1].split('<=>')
126
+ entry.reactants = ary[0].split('+').collect{|mol| mol.strip}
127
+ entry.products = ary[1].split('+').collect{|mol| mol.strip}
128
+ elsif 'RPAIR ' == type
129
+ entry.rpair = line[12...-1]
130
+ elsif 'ENZYME ' == type
131
+ entry.ec = line[12...-1].split('.').collect{|n| n.to_i}
132
+ elsif 'COMMENT ' == type || state == :COMMENT
133
+ state = :COMMENT
134
+ entry.comment.push(line[12...-1])
135
+ elsif 'PATHWAY ' == type || state == :PATHWAY
136
+ state = :PATHWAY
137
+ else
138
+ puts "Error Unknown line : %s" % line
139
+ end
140
+ end
141
+ yield entry
142
+ end
143
+ end
144
+ end
145
+
146
+ class KCFRXN
147
+ def initialize reactant, product
148
+ @reactant = reactant
149
+ @product = product
150
+ @matched_reactants = []
151
+ @matched_products = []
152
+ @nodes = []
153
+ end
154
+
155
+ def corresponds from, to
156
+ @matched_reactants.push(@reactant.atoms[from])
157
+ @matched_products.push(@product.atoms[from])
158
+ @nodes.push(RXNNode.new(@reactant.atoms[from], @product.atoms[to]))
159
+ end
160
+
161
+ def setup_bonds
162
+ @edges = []
163
+ @reactant.atoms.each do |atom|
164
+ if atom && ! @matched_reactants.member?(atom)
165
+ @nodes.push(RXNNode.new(atom, nil))
166
+ end
167
+ end
168
+ @product.atoms.each do |atom|
169
+ if atom && ! @matched_products.member?(atom)
170
+ @nodes.push(RXNNode.new(nil, atom))
171
+ end
172
+ end
173
+
174
+ @reactant.bonds.each do |bond|
175
+ bond.e.next_atom[bond.b] = bond
176
+ bond.b.next_atom[bond.e] = bond
177
+ end
178
+ @product.bonds.each do |bond|
179
+ bond.e.next_atom[bond.b] = bond
180
+ bond.b.next_atom[bond.e] = bond
181
+ end
182
+ @nodes.each_with_index do |node, index|
183
+ index.upto(@nodes.length - 1) do |n|
184
+ r_edge = p_edge = nil
185
+ if @nodes[n].reactant_node && @nodes[n].reactant_node.next_atom.has_key?(node.reactant_node)
186
+ r_edge = @nodes[n].reactant_node.next_atom[node.reactant_node]
187
+ end
188
+ if @nodes[n].product_node && @nodes[n].product_node.next_atom.has_key?(node.product_node)
189
+ p_edge = @nodes[n].product_node.next_atom[node.product_node]
190
+ end
191
+ if r_edge || p_edge
192
+ edge = RXNEdge.new
193
+ edge.reactant_edge = r_edge
194
+ edge.product_edge = p_edge
195
+ @edges.push(edge)
196
+ end
197
+ end
198
+ end
199
+ @edges.each do |edge|
200
+ from = edge.reactant_edge ? edge.reactant_edge.multiplicity : 0
201
+ to = edge.product_edge ? edge.product_edge.multiplicity : 0
202
+ puts "%3d %3d" % [from, to]
203
+ end
204
+ end
205
+
206
+ class RXNNode
207
+ attr_reader :reactant_node, :product_node
208
+ def initialize reactant, product
209
+ @reactant_node = reactant
210
+ @product_node = product
211
+ end
212
+ end
213
+ class RXNEdge
214
+ attr_accessor :product_edge, :reactant_edge
215
+ end
216
+ end
217
+
218
+ class KCFCorrespondence
219
+
220
+ attr_reader :compounds, :correspondence
221
+
222
+ def initialize input
223
+ @name = []
224
+ @input = input
225
+ @compounds = []
226
+ @correspondence = {}
227
+ parse(input)
228
+ end
229
+
230
+ def make_rxn dir
231
+ reactant = KCF.open("#{dir}#{@compounds[0]}.kcf")
232
+ product = KCF.open("#{dir}#{@compounds[1]}.kcf")
233
+ rxn = KCFRXN.new(reactant, product)
234
+ @correspondence.each do |k, corres|
235
+ rxn.corresponds(corres[0][0], corres[1][0])
236
+ end
237
+ rxn.setup_bonds
238
+ end
239
+
240
+ def parse input
241
+ while ! /\/\/\//.match(line = input.readline)
242
+ case line[0...12]
243
+ when 'ENTRY '
244
+ @no = /(\d+)/.match(line)[1].to_i
245
+ when 'NAME '
246
+ @name.push(line[12...-1])
247
+ when 'COMPOUND '
248
+ @compounds.push(line[12...-1])
249
+ when 'TYPE '
250
+ @type = line[12...-1]
251
+ when 'ALIGN '
252
+ @align = line[12...-1].to_i
253
+ alignment_mode = true
254
+ else
255
+ ary = line[12...-1].split
256
+ @correspondence[ary[0].to_i] = ary[1..2].collect{|e| a = e.split(':'); [a[0].to_i, a[1]]}
257
+ end
258
+ end
259
+ end
260
+
261
+ end
262
+
263
+ module Atom
264
+ attr_accessor :kcf_type, :kcf_prop
265
+
266
+ # Returns KCF formatted line
267
+ def kcf_line
268
+ if @kcf_prop
269
+ "%14d %3s%2s %10.4f%10.4f #%s" % [@number, @kcf_type, @element, @x, @y, @kcf_prop]
270
+ else
271
+ "%14d %3s%2s %10.4f%10.4f" % [@number, @kcf_type, @element, @x, @y]
272
+ end
273
+ end
274
+ end
275
+
276
+ module Bond
277
+ attr_accessor :kcf_prop
278
+
279
+ # Returns KCF formatted line
280
+ def kcf_line
281
+ if @kcf_prop
282
+ "%13d %4d%4d%2d #%s" % [@number, @b.number, @e.number, @multiplicity, @kcf_prop]
283
+ else
284
+ "%13d %4d%4d%2d" % [@number, @b.number, @e.number, @multiplicity, @kcf_prop]
285
+ end
286
+ end
287
+
288
+ end
289
+
290
+ class KCFReader
291
+
292
+ def KCFReader.open(file, &method)
293
+ input = File.open(file, 'r')
294
+ KCFReader.new.read(input, &method)
295
+ end
296
+
297
+ def read input, &method
298
+ # 0.upto(2) do |m|
299
+ # 0.upto(9) do |n|
300
+ # print n
301
+ # end
302
+ # end
303
+ # puts
304
+ status = :NEW
305
+ mol = KCFMolecule.new
306
+ input.each do |line|
307
+ case line[0..11]
308
+ when /ANUMBER/
309
+ mol.a_no = /A(\d+)/.match(line)[1].to_i
310
+ when /ENTRY/
311
+ entry = /C(\d+)/.match(line)[1].to_i
312
+ when /ATOM/
313
+ n_atoms = /(\d+)/.match(line)[1].to_i
314
+ status = :ATOM
315
+ when /BOND/
316
+ n_bonds = /(\d+)/.match(line)[1].to_i
317
+ status = :BOND
318
+ when /\/\/\//
319
+ if(method)
320
+ yield mol
321
+ end
322
+ mol = KCFMolecule.new
323
+ status = :NEW
324
+ else
325
+ case status
326
+ when :ATOM
327
+ atom = KCFAtom.new
328
+ atom.number, atom.kcf_type, atom.element, atom.x, atom.y, = line[12..-1].scanf("%d%s%s%f%f%s")
329
+ mol.atoms[atom.number] = atom
330
+ when :BOND
331
+ bond = KCFBond.new
332
+ no, b, e, bond.multiplicity, prop = line[12..-1].scanf("%d%d%d%d%s")
333
+ bond.b = mol.atoms[b]
334
+ bond.e = mol.atoms[e]
335
+ mol.bonds.push(bond)
336
+ end
337
+ end
338
+ end
339
+ end
340
+ end
341
+
342
+ class KCFMolecule
343
+
344
+ include Molecule
345
+ attr_accessor :a_no
346
+
347
+ def KCFMolecule.write_kcf molecule
348
+ n_atom = 1
349
+ molecule.atoms.each do |k, atom|
350
+ puts atom.kcf
351
+ n_atom += 1
352
+ end
353
+ n_bond = 1
354
+ molecule.bonds.each do |bond|
355
+ # 1 2 1 1 #UP
356
+ kcf.number = 48
357
+ puts bond.kcf_line
358
+ n_bond += 1
359
+ end
360
+ end
361
+
362
+ def KCFMolecule.open file
363
+ input = File.open(file, 'r')
364
+ KCFMolecule.new.read(input)
365
+ end
366
+
367
+ def read input
368
+ @entry = input.readline
369
+ number_of_atom = input.readline.split[1].to_i
370
+ 1.upto(number_of_atom) do |n|
371
+ atom = KCFAtom.new
372
+ atom.number, atom.kcf_type, atom.element, atom.x, atom.y, = input.readline.scanf("%d%s%s%f%f%s")
373
+ @atoms[atom.number] = atom
374
+ end
375
+ number_of_bond = input.readline.split[1].to_i
376
+ 1.upto(number_of_bond) do |n|
377
+ bond = KCFBond.new
378
+ no, b, e, bond.multiplicity, prop = input.readline.scanf("%d%d%d%d%s")
379
+ bond.b = @atoms[b]
380
+ bond.e = @atoms[e]
381
+ @bonds.push(bond)
382
+ end
383
+ self
384
+ end
385
+
386
+ end
387
+ end
388
+ end
389
+
390
+
@@ -0,0 +1,19 @@
1
+ # = KEGG Compound Function Glycan parser
2
+ # Not implemented
3
+
4
+ module Chem
5
+
6
+ module KEGG
7
+
8
+ class KCFGlycan
9
+
10
+ def initialize filename
11
+ # filename.each do |line|
12
+ # puts line
13
+ # end
14
+ end
15
+
16
+ end
17
+ end
18
+
19
+ end
@@ -0,0 +1,516 @@
1
+ #
2
+ # = chem/db/kegg.rb - KEGG (Kyoto Encylopedia of Genes and Genomes)
3
+ #
4
+ # Author:: Nobuya Tanaka <tanaka@chemruby.org>
5
+ #
6
+ # $Id:$
7
+ #
8
+
9
+ require 'chem/db/mdl'
10
+
11
+ module Chem
12
+
13
+ module KEGG
14
+ class KeggDirectory
15
+
16
+ attr_reader :dir
17
+ def initialize dir
18
+ @dir = dir
19
+ @compounds = {}
20
+ @ligand_dir = File.join(@dir, "ligand")
21
+ @mol_dir = File.join(@ligand_dir, "mol")
22
+ @parsed_file = []
23
+ end
24
+
25
+ def get_organism organism, file
26
+ File.join(@dir, "genomes", organism, file)
27
+ end
28
+
29
+ def gene_to_pfam organism
30
+ filename = File.join(@dir, "genomes", organism, organism + "_pfam.list")
31
+ return @pfam2gene if @parsed_file.include?(filename)
32
+ @parsed_file.push filename
33
+ @gene2pfam ||= {}
34
+ @pfam2gene ||= {}
35
+ open(filename).each do |line|
36
+ gene, pfam = line.split("\t")
37
+ @gene2pfam[gene] = pfam.chop
38
+ (@pfam2gene[pfam.chop] ||= []).push(KeggGene.new(gene, organism, self))
39
+ end
40
+ @pfam2gene
41
+ end
42
+
43
+ def get_ec_number gene
44
+ @gene2enzyme ||= {}
45
+ @enzyme2gene ||= {}
46
+ filename = File.join(@dir, "genomes", gene.organism, gene.organism + "_enzyme.list")
47
+ return @gene2enzyme[gene.gene] if @parsed_file.include?(filename)
48
+ @parsed_file.push filename
49
+
50
+ open(filename).each do |line|
51
+ gn, ec = line.chop.split("\t")
52
+ @gene2enzyme[gn] = ec
53
+ @enzyme2gene[ec] = gn
54
+ end
55
+ @gene2enzyme[gene.gene]
56
+ end
57
+
58
+ def [](key)
59
+ case key
60
+ when /(R\d+)/
61
+ get_reaction $1
62
+ when /(C\d+)/
63
+ get_compound $1
64
+ when /pf:(.+)/
65
+ KeggPfam.new($1, self)
66
+ when /^([^:]{3,4}):(\d+)/
67
+ # gene
68
+ raise "Parser for Organism not implemented!"
69
+ when /^([^:]{3,4})/
70
+ # organism
71
+ KeggOrganism.new($1, self)
72
+ else
73
+ raise "unknown KEGG key type : #{key}"
74
+ end
75
+ end
76
+
77
+ def map_formula
78
+ @reaction_map_formula = parse_reaction_map_formula unless @reaction_map_formula
79
+ @reaction_map_formula
80
+ end
81
+
82
+ def parse_reaction_map_formula
83
+ rxns = {}
84
+ parser = Chem.parse_file(File.join(@dir, "ligand", "reaction_mapformula.lst"))
85
+ parser.each do |rxn|
86
+ rxns[rxn.entry] = rxn
87
+ end
88
+ rxns
89
+ end
90
+
91
+ # Private methods
92
+ private
93
+ class KeggOrganism
94
+
95
+ def initialize organism, kegg
96
+ @organism = organism
97
+ @kegg = kegg
98
+ end
99
+
100
+ def pfam
101
+ pfam2gene = @kegg.gene_to_pfam(@organism)
102
+ pfam2gene
103
+ end
104
+
105
+ def [](key)
106
+ @kegg
107
+ end
108
+
109
+ end
110
+
111
+ private
112
+ class KeggGene
113
+
114
+ attr_reader :organism, :gene
115
+ def initialize gene, organism, kegg
116
+ @gene = gene
117
+ @organism = organism
118
+ @kegg = kegg
119
+ end
120
+
121
+ def ec_number
122
+ @kegg.get_ec_number(self).inspect
123
+ end
124
+
125
+ end
126
+
127
+ private
128
+ class KeggPfam
129
+
130
+ def initialize pfam_key, kegg
131
+ @kegg = kegg
132
+ @pfam_key = pfam_key
133
+ end
134
+
135
+ def [](organism)
136
+ @kegg[organism][@pfam_key]
137
+ end
138
+
139
+ end
140
+
141
+ private
142
+ def get_compound name
143
+ unless @compounds[name]
144
+ @compounds[name] = Chem.open_mol(File.join(@dir, "ligand", "mol", name) + ".mol")
145
+ end
146
+ @compounds[name]
147
+ end
148
+
149
+ def get_reaction name
150
+ @reactions ||= parse_reaction
151
+ @reactions[name]
152
+ end
153
+
154
+ def parse_reaction
155
+ rxns = {}
156
+ parser = Chem.parse_file(File.join(@dir, "ligand", "reaction"))
157
+ parser.each do |reaction|
158
+ reaction.kegg = self
159
+ rxns[reaction.entry] = reaction
160
+ end
161
+ rxns
162
+ end
163
+
164
+ end
165
+
166
+ #obsolete
167
+ @@kegg_compound_folder = nil
168
+ def self.kegg_compound_folder= (folder)
169
+ @@kegg_compound_folder = folder
170
+ end
171
+
172
+ def self.kegg_compound_folder
173
+ @@kegg_compound_folder
174
+ end
175
+
176
+ # Duplication definition!
177
+ class KEGGReaction
178
+
179
+ include Chem::Reaction
180
+ attr_accessor :entry, :name, :ecs, :compounds, :direction
181
+ def initialize
182
+ @ecs = []
183
+ @compounds = []
184
+ end
185
+
186
+ def kegg= kegg
187
+ @kegg = kegg
188
+ end
189
+
190
+ def map_formula
191
+ return nil unless @kegg.map_formula[@entry]
192
+ @kegg.map_formula[@entry].compounds
193
+ end
194
+ end
195
+
196
+ class KeggCompound
197
+ include Molecule
198
+ include Enumerable
199
+ include MDL::MdlMolParser
200
+ attr_reader :entry
201
+
202
+ def initialize
203
+ @nodes = []
204
+ @edges = []
205
+ end
206
+
207
+ @@entries = {}
208
+ def entry= entry_no
209
+ @entry = entry_no
210
+ if @@entries[entry_no] == nil
211
+ if Chem::Kegg.kegg_compound_folder == nil
212
+ raise ArgumentError.new("Chem::Kegg.kegg_compound_folder" +
213
+ " not specified")
214
+ end
215
+ # mol = KeggCompound.new
216
+ # mol.open(Chem::Kegg.kegg_compound_folder + entry_no + ".mol")
217
+ filename = File.join(Chem::Kegg.kegg_compound_folder, entry_no + ".mol")
218
+ mol = nil
219
+ if File.exist?(filename)
220
+ mol = Chem.open_mol(filename)
221
+ end
222
+
223
+ @@entries[entry_no] = mol
224
+ end
225
+ @fly_weight = @@entries[entry_no]
226
+ if @fly_weight
227
+ @nodes = @fly_weight.nodes
228
+ @edges = @fly_weight.edges
229
+ end
230
+ end
231
+
232
+ end
233
+
234
+ class KeggGlycan
235
+ attr_accessor :entry, :name
236
+ end
237
+
238
+ class KeggEc
239
+ attr_accessor :entry, :number
240
+ end
241
+
242
+ module KeggFormat
243
+
244
+ def compound_folder= (folder)
245
+ Chem::Kegg.kegg_compound_folder = folder
246
+ end
247
+
248
+ def each_entry
249
+ state = nil
250
+ str = ''
251
+ @input.each do |line|
252
+ if line[0..11] == ' '
253
+ str += line[12..-1]
254
+ else
255
+ yield(str, state) if state # Not first state
256
+ str = line[12..-1]
257
+ state = line[0..11].strip
258
+ end
259
+ end
260
+ end
261
+ end
262
+
263
+ class KeggReactionParser
264
+
265
+ include KeggFormat
266
+ include Enumerable
267
+
268
+ def initialize filename
269
+ @input = File.open(filename)
270
+ end
271
+
272
+ def parse_compounds species
273
+ ary = []
274
+ species.split(" + ").each do |mol|
275
+ stoichiometry = 1
276
+ if m = /(\d+) *[CG]/.match(mol)
277
+ stoichiometry = m[1].to_i
278
+ end
279
+ compound_entry = ""
280
+ if m = /(C\d+)/.match(mol)
281
+ compound_entry = m[1]
282
+ elsif m = /(G\d+)/.match(mol)
283
+ compound_entry = m[1]
284
+ end
285
+ ary.push([compound_entry, stoichiometry])
286
+ end
287
+ ary
288
+ end
289
+
290
+ def each
291
+ reaction = nil
292
+ each_entry do |str, state|
293
+ case state
294
+ when "ENTRY"
295
+ # reaction = Reaction.find(:first, :conditions => ["entry = ?", str.split[0]])
296
+ # if reaction == nil
297
+ reaction = KEGGReaction.new
298
+ reaction.entry = str.split[0]
299
+ # end
300
+ when "NAME"
301
+ reaction.name = str
302
+ when "DEFINITION"
303
+ #@definition = str
304
+ when "EQUATION"
305
+ c = str.split("<=>")
306
+ reaction.compounds << parse_compounds(c[0])
307
+ reaction.compounds << parse_compounds(c[1])
308
+ when "RPAIR"
309
+ # @rpair = str
310
+ when "ENZYME"
311
+ str.split.each do |e|
312
+ ec = KeggEc.new
313
+ ec.entry = "EC" + e
314
+ sp = e.split(".")
315
+ ec.number = sp.collect{|i| i.to_i}
316
+ reaction.ecs << ec
317
+ end
318
+ when "///"
319
+ # reaction.save
320
+ yield reaction
321
+ when "PATHWAY"
322
+ when "COMMENT"
323
+ when "REFERENCE"
324
+ else
325
+ p state
326
+ end
327
+ end
328
+ end
329
+
330
+ end
331
+
332
+ class KeggReactionLstParser
333
+
334
+ include Enumerable
335
+ include KeggFormat
336
+
337
+ def initialize filename
338
+ @input = open(filename)
339
+ end
340
+
341
+ def each
342
+ @input.each do |line|
343
+ rxn = KEGGReaction.new
344
+ r_number, comps = line.split(":")
345
+ rxn.entry = r_number
346
+ cc = comps.split(/<=>/)
347
+
348
+ reactant = cc[0].split("+").collect do |c|
349
+ ary = c.split
350
+ #compound = KeggCompound.new
351
+ if ary.length == 1
352
+ #compound.entry = c.strip
353
+ [c.strip, 1]
354
+ else
355
+ #compound.entry = ary[1].strip
356
+ [c.strip, ary[0].to_i]
357
+ end
358
+ end
359
+ product = cc[1].split("+").collect do |c|
360
+ ary = c.split
361
+ #compound = KeggCompound.new
362
+ if ary.length == 1
363
+ #compound.entry = c.strip
364
+ [c.strip, 1]
365
+ else
366
+ #compound.entry = ary[1].strip
367
+ [c.strip, ary[0].to_i]
368
+ end
369
+ end
370
+ rxn.compounds = [reactant, product]
371
+ yield rxn
372
+ end
373
+
374
+ end
375
+
376
+ end
377
+
378
+ # ftp://ftp.genome.ad.jp/pub/kegg/ligand/reaction_mapformula.lst
379
+ class KeggReactionMapParser
380
+
381
+ include Enumerable
382
+ include KeggFormat
383
+
384
+ def initialize filename
385
+ @input = open(filename)
386
+ @reactions = @input.inject({}) do |ret, line|
387
+ ary = line.split(":")
388
+ ret[ary[0]] = ary[1..-1]
389
+ ret
390
+ end
391
+ end
392
+
393
+ def each
394
+ @reactions.each do |r_number, (map_number, comps)|
395
+ yield self[r_number]
396
+ end
397
+ end
398
+
399
+ def [](r_number)
400
+ return nil if @reactions[r_number] == nil
401
+ map_number, comps = @reactions[r_number]
402
+ rxn = KEGGReaction.new
403
+ # r_number, map_number, comps = line.split(":")
404
+ rxn.entry = r_number
405
+ cc = comps.split(/(<?=>?)/)
406
+ case cc[1]
407
+ when "<="
408
+ rxn.direction = -1
409
+ when "<=>"
410
+ rxn.direction = 0
411
+ when "=>"
412
+ rxn.direction = 1
413
+ end
414
+ reactant = cc[0].split("+").collect do |c|
415
+ #compound = KeggCompound.new
416
+ #compound.entry = c.strip
417
+ [c.strip, 1]
418
+ end
419
+ product = cc[2].split("+").collect do |c|
420
+ #compound = KeggCompound.new
421
+ #compound.entry = c.strip
422
+ [c.strip, 1]
423
+ end
424
+ rxn.compounds = [reactant, product]
425
+ rxn
426
+ end
427
+
428
+ end
429
+
430
+ # Parses KEGG Glycan format
431
+ # http://www.genome.jp/ligand/kcam/kcam/kcf.html
432
+ # Not fully implemented
433
+ class KeggGlycanParser
434
+
435
+ include Enumerable
436
+ include KeggFormat
437
+
438
+ def initialize filename
439
+ @input = open(filename)
440
+ end
441
+
442
+ def each
443
+ glycan = nil
444
+ each_entry do |str, state|
445
+ case state
446
+ when "ENTRY"
447
+ glycan = KeggGlycan.new
448
+ # glycan = Compound.find(:first, :conditions => ["glycan_entry = ?", str.split[0]])
449
+ if glycan == nil
450
+ # glycan = Compound.new
451
+ glycan.entry = str.split[0]
452
+ end
453
+ when "NAME"
454
+ if glycan.name
455
+ glycan.name = glycan.name + str.split("\n").join if str
456
+ else
457
+ glycan.name = str.split("\n").join if str
458
+ end
459
+ when "///"
460
+ # glycan.save
461
+ end
462
+ end
463
+ end
464
+
465
+ end
466
+
467
+ def self.parse_compound_file
468
+ compound = nil
469
+ parse($home + "compound") do |str, state|
470
+ case state
471
+ when "ENTRY"
472
+ compound = Compound.find(:first, :conditions => ["entry = ?", str.split[0]])
473
+ if compound == nil
474
+ compound = Compound.new
475
+ compound.entry = str.split[0]
476
+ end
477
+ when "NAME"
478
+ compound.name = str.split("\n").join if str
479
+ when "DBLINKS"
480
+ str.split("\n").each do |line|
481
+ if m = /ChEBI: (\d+)/.match(line)
482
+ compound.chebi = m[1].to_i
483
+ elsif m = /PubChem: (\d+)/.match(line)
484
+ compound.pubchem = m[1].to_i
485
+ end
486
+ end
487
+ when "GLYCAN"
488
+ compound.glycan_entry = str
489
+ when "///"
490
+ #compound.save
491
+ end
492
+ end
493
+ end
494
+
495
+ def set_compounds
496
+ require 'util'
497
+ Dir.glob($home + "/mol/*.mol").each do |mol|
498
+ entry = /(.\d+).mol/.match(mol)[1]
499
+ # comp = KeggCompound.find(:first, :conditions => ["entry = ?", entry])
500
+ mol = Chem.open_mol(mol)
501
+ if comp == nil
502
+ puts mol
503
+ next
504
+ end
505
+ if comp.ctab == nil
506
+ comp.ctab = Marshal.dump(mol)
507
+ comp.save
508
+ end
509
+ #p comp
510
+ end
511
+ end
512
+
513
+ end
514
+ end
515
+
516
+