chemruby 0.9.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README +120 -0
- data/Rakefile +195 -0
- data/ext/extconf.rb +4 -0
- data/ext/subcomp.c +416 -0
- data/lib/chem.rb +130 -0
- data/lib/chem/appl.rb +1 -0
- data/lib/chem/appl/chem3dole.rb +36 -0
- data/lib/chem/appl/tinker/nucleic.rb +40 -0
- data/lib/chem/appl/tinker/tinker_reader.rb +43 -0
- data/lib/chem/data.rb +4 -0
- data/lib/chem/data/atomic_weight.rb +124 -0
- data/lib/chem/data/character.rb +2 -0
- data/lib/chem/data/electronegativity.rb +14 -0
- data/lib/chem/data/periodic_table.rb +6 -0
- data/lib/chem/data/prime_numbers.rb +1 -0
- data/lib/chem/data/vdw_radii.rb +1 -0
- data/lib/chem/db.rb +64 -0
- data/lib/chem/db/cansmi.rb +234 -0
- data/lib/chem/db/cdx.rb +1525 -0
- data/lib/chem/db/eps.rb +164 -0
- data/lib/chem/db/g98.rb +909 -0
- data/lib/chem/db/gspan.rb +130 -0
- data/lib/chem/db/iupac.rb +5 -0
- data/lib/chem/db/iupac/a_1.rb +46 -0
- data/lib/chem/db/iupac/iuparser.rb +226 -0
- data/lib/chem/db/iupac/iuparser.ry +97 -0
- data/lib/chem/db/iupac/postfix.rb +2 -0
- data/lib/chem/db/kcf.rb +390 -0
- data/lib/chem/db/kcf_glycan.rb +19 -0
- data/lib/chem/db/kegg.rb +516 -0
- data/lib/chem/db/linucs/linparser.rb +144 -0
- data/lib/chem/db/linucs/linucs.ry +53 -0
- data/lib/chem/db/mdl.rb +379 -0
- data/lib/chem/db/molconnz.rb +12 -0
- data/lib/chem/db/mopac.rb +88 -0
- data/lib/chem/db/msi.rb +107 -0
- data/lib/chem/db/pdb_dic.rb +115 -0
- data/lib/chem/db/pdf.rb +131 -0
- data/lib/chem/db/pubchem.rb +113 -0
- data/lib/chem/db/rmagick.rb +70 -0
- data/lib/chem/db/sdf.rb +37 -0
- data/lib/chem/db/smbl.rb +88 -0
- data/lib/chem/db/smiles.rb +2 -0
- data/lib/chem/db/smiles/smiles.ry +203 -0
- data/lib/chem/db/smiles/smiparser.rb +375 -0
- data/lib/chem/db/swf.rb +74 -0
- data/lib/chem/db/sybyl.rb +150 -0
- data/lib/chem/db/tinker.rb +77 -0
- data/lib/chem/db/types/type_cansmi.rb +9 -0
- data/lib/chem/db/types/type_cdx.rb +24 -0
- data/lib/chem/db/types/type_gspan.rb +31 -0
- data/lib/chem/db/types/type_kcf.rb +28 -0
- data/lib/chem/db/types/type_kcf_glycan.rb +26 -0
- data/lib/chem/db/types/type_kegg.rb +92 -0
- data/lib/chem/db/types/type_mdl.rb +31 -0
- data/lib/chem/db/types/type_pdf.rb +33 -0
- data/lib/chem/db/types/type_png.rb +31 -0
- data/lib/chem/db/types/type_rxn.rb +25 -0
- data/lib/chem/db/types/type_sdf.rb +25 -0
- data/lib/chem/db/types/type_sybyl.rb +30 -0
- data/lib/chem/db/types/type_xyz.rb +26 -0
- data/lib/chem/db/vector.rb +128 -0
- data/lib/chem/db/xyz.rb +39 -0
- data/lib/chem/model.rb +119 -0
- data/lib/chem/model/skeleton.rb +37 -0
- data/lib/chem/utils.rb +11 -0
- data/lib/chem/utils/geometry.rb +27 -0
- data/lib/chem/utils/graph_db.rb +146 -0
- data/lib/chem/utils/math.rb +17 -0
- data/lib/chem/utils/prop.rb +123 -0
- data/lib/chem/utils/sssr.rb +101 -0
- data/lib/chem/utils/sub.rb +78 -0
- data/lib/chem/utils/transform.rb +110 -0
- data/lib/chem/utils/traverse.rb +37 -0
- data/lib/chem/utils/ullmann.rb +134 -0
- data/lib/graph.rb +41 -0
- data/lib/graph/cluster.rb +20 -0
- data/lib/graph/morgan.rb +38 -0
- data/sample/frequent_subgraph.rb +46 -0
- data/sample/images/ex1.rb +11 -0
- data/sample/images/ex2.rb +4 -0
- data/sample/images/ex3.rb +5 -0
- data/sample/images/ex4.rb +17 -0
- data/sample/images/ex5.rb +10 -0
- data/sample/images/mol/adenine.mol +26 -0
- data/sample/images/mol/atp.mol +69 -0
- data/sample/images/temp/ex5.mol +344 -0
- data/sample/kegg_db.rb +116 -0
- data/setup.rb +1551 -0
- data/test/all.rb +6 -0
- data/test/coord_test.rb +17 -0
- data/test/ctab_test.rb +31 -0
- data/test/data/A_21.tar.gz +0 -0
- data/test/data/A_21/aceanthrylene.cdx +0 -0
- data/test/data/A_21/aceanthrylene.mol +40 -0
- data/test/data/A_21/acenaphthylene.cdx +0 -0
- data/test/data/A_21/acenaphthylene.mol +31 -0
- data/test/data/A_21/acephenanthrylene.cdx +0 -0
- data/test/data/A_21/acephenanthrylene.mol +40 -0
- data/test/data/A_21/anthracene.cdx +0 -0
- data/test/data/A_21/anthracene.mol +35 -0
- data/test/data/A_21/as-indacene.cdx +0 -0
- data/test/data/A_21/as-indacene.mol +31 -0
- data/test/data/A_21/azulene.cdx +0 -0
- data/test/data/A_21/azulene.mol +26 -0
- data/test/data/A_21/biphenylene.cdx +0 -0
- data/test/data/A_21/biphenylene.mol +31 -0
- data/test/data/A_21/chrysene.cdx +0 -0
- data/test/data/A_21/chrysene.mol +44 -0
- data/test/data/A_21/coronen.cdx +0 -0
- data/test/data/A_21/coronen.mol +59 -0
- data/test/data/A_21/fluoranthene.cdx +0 -0
- data/test/data/A_21/fluoranthene.mol +40 -0
- data/test/data/A_21/fluorene.cdx +0 -0
- data/test/data/A_21/fluorene.mol +33 -0
- data/test/data/A_21/heptacene.cdx +0 -0
- data/test/data/A_21/heptacene.mol +71 -0
- data/test/data/A_21/heptalene.cdx +0 -0
- data/test/data/A_21/heptalene.mol +30 -0
- data/test/data/A_21/heptaphene.cdx +0 -0
- data/test/data/A_21/heptaphene.mol +71 -0
- data/test/data/A_21/hexacene.cdx +0 -0
- data/test/data/A_21/hexacene.mol +62 -0
- data/test/data/A_21/hexaphene.cdx +0 -0
- data/test/data/A_21/hexaphene.mol +62 -0
- data/test/data/A_21/indene.cdx +0 -0
- data/test/data/A_21/indene.mol +24 -0
- data/test/data/A_21/iupac.txt +41 -0
- data/test/data/A_21/naphthacene.cdx +0 -0
- data/test/data/A_21/naphthacene.mol +44 -0
- data/test/data/A_21/naphthalene.cdx +0 -0
- data/test/data/A_21/naphthalene.mol +26 -0
- data/test/data/A_21/ovalene.cdx +0 -0
- data/test/data/A_21/ovalene.mol +78 -0
- data/test/data/A_21/pentacene.cdx +0 -0
- data/test/data/A_21/pentacene.mol +53 -0
- data/test/data/A_21/pentalene.cdx +0 -0
- data/test/data/A_21/pentalene.mol +22 -0
- data/test/data/A_21/pentaphene.cdx +0 -0
- data/test/data/A_21/pentaphene.mol +53 -0
- data/test/data/A_21/perylene.cdx +0 -0
- data/test/data/A_21/perylene.mol +49 -0
- data/test/data/A_21/phenalene.cdx +0 -0
- data/test/data/A_21/phenalene.mol +33 -0
- data/test/data/A_21/phenanthrene.cdx +0 -0
- data/test/data/A_21/phenanthrene.mol +35 -0
- data/test/data/A_21/picene.cdx +0 -0
- data/test/data/A_21/picene.mol +53 -0
- data/test/data/A_21/pleiadene.cdx +0 -0
- data/test/data/A_21/pleiadene.mol +44 -0
- data/test/data/A_21/pyranthrene.cdx +0 -0
- data/test/data/A_21/pyranthrene.mol +72 -0
- data/test/data/A_21/pyrene.cdx +0 -0
- data/test/data/A_21/pyrene.mol +40 -0
- data/test/data/A_21/rubicene.cdx +0 -0
- data/test/data/A_21/rubicene.mol +63 -0
- data/test/data/A_21/s-indacene.cdx +0 -0
- data/test/data/A_21/s-indacene.mol +31 -0
- data/test/data/A_21/tetraphenylene.cdx +0 -0
- data/test/data/A_21/tetraphenylene.mol +57 -0
- data/test/data/A_21/trinaphthylene.cdx +0 -0
- data/test/data/A_21/trinaphthylene.mol +71 -0
- data/test/data/A_21/triphenylene.cdx +0 -0
- data/test/data/A_21/triphenylene.mol +44 -0
- data/test/data/C00147.kcf +25 -0
- data/test/data/G00147.kcf +13 -0
- data/test/data/atp.mol +69 -0
- data/test/data/cyclohexane.mol +17 -0
- data/test/data/cyclohexane.ps +485 -0
- data/test/data/fullerene.mol +155 -0
- data/test/data/glycan +33 -0
- data/test/data/hypericin.cdx +0 -0
- data/test/data/hypericin.cdxml +596 -0
- data/test/data/hypericin.chm +0 -0
- data/test/data/hypericin.ct +85 -0
- data/test/data/hypericin.f1d +0 -0
- data/test/data/hypericin.f1q +0 -0
- data/test/data/hypericin.gif +0 -0
- data/test/data/hypericin.mol +88 -0
- data/test/data/hypericin.mol2 +159 -0
- data/test/data/hypericin.msm +123 -0
- data/test/data/hypericin.pdf +359 -0
- data/test/data/hypericin.png +0 -0
- data/test/data/hypericin.ps +0 -0
- data/test/data/hypericin.skc +0 -0
- data/test/data/hypericin2.gif +0 -0
- data/test/data/hypericin2.ps +0 -0
- data/test/data/kegg/genomes/hsa/hsa_enzyme.list +4 -0
- data/test/data/kegg/genomes/hsa/hsa_pfam.list +4 -0
- data/test/data/kegg/ligand/mol/C00147.mol +26 -0
- data/test/data/kegg/ligand/reaction +14 -0
- data/test/data/kegg/ligand/reaction.lst +1 -0
- data/test/data/kegg/ligand/reaction_mapformula.lst +3 -0
- data/test/data/reaction +14 -0
- data/test/data/reaction.lst +1 -0
- data/test/data/reaction_mapformula.lst +3 -0
- data/test/data/rxn/C00001.mol +6 -0
- data/test/data/rxn/C00011.mol +10 -0
- data/test/data/rxn/C00014.mol +6 -0
- data/test/data/rxn/C01010.mol +18 -0
- data/test/data/rxn/sample.rxn +50 -0
- data/test/data/rxn/substitution.rxn +45 -0
- data/test/data/test.eps +0 -0
- data/test/data/test.mol +28 -0
- data/test/data/test.sdf +143 -0
- data/test/data/test.skc +0 -0
- data/test/data/test.xyz +4 -0
- data/test/data/test_lf.sdf +143 -0
- data/test/heavy_test_pubchem.rb +16 -0
- data/test/multiple_test.rb +22 -0
- data/test/test_adj.rb +54 -0
- data/test/test_canonical_smiles.rb +46 -0
- data/test/test_cdx.rb +32 -0
- data/test/test_chem.rb +18 -0
- data/test/test_cluster.rb +19 -0
- data/test/test_db.rb +11 -0
- data/test/test_eps.rb +24 -0
- data/test/test_geometry.rb +11 -0
- data/test/test_gspan.rb +28 -0
- data/test/test_iupac.rb +36 -0
- data/test/test_kcf.rb +24 -0
- data/test/test_kcf_glycan.rb +10 -0
- data/test/test_kegg.rb +118 -0
- data/test/test_linucs.rb +21 -0
- data/test/test_mdl.rb +45 -0
- data/test/test_mol2.rb +62 -0
- data/test/test_morgan.rb +21 -0
- data/test/test_pdf.rb +12 -0
- data/test/test_prop.rb +86 -0
- data/test/test_rmagick.rb +15 -0
- data/test/test_sbdb.rb +23 -0
- data/test/test_sdf.rb +30 -0
- data/test/test_smiles.rb +84 -0
- data/test/test_sssr.rb +18 -0
- data/test/test_sub.rb +47 -0
- data/test/test_subcomp.rb +37 -0
- data/test/test_traverse.rb +29 -0
- data/test/test_writer.rb +13 -0
- data/test/test_xyz.rb +15 -0
- data/test/type_test.rb +25 -0
- metadata +290 -0
data/lib/chem/db/kcf.rb
ADDED
@@ -0,0 +1,390 @@
|
|
1
|
+
#
|
2
|
+
#
|
3
|
+
# = chem/db/kcf.rb - KEGG Compound Function parser
|
4
|
+
#
|
5
|
+
|
6
|
+
module Chem
|
7
|
+
|
8
|
+
module KEGG
|
9
|
+
|
10
|
+
class ANumber
|
11
|
+
|
12
|
+
def self.open filename
|
13
|
+
@input = File.open(filename)
|
14
|
+
KCFCorrespondence.new(@input)
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
class KCFAtom
|
20
|
+
|
21
|
+
include Atom
|
22
|
+
attr_accessor :kcf_type, :atom_id, :next_atom
|
23
|
+
|
24
|
+
def initialize line
|
25
|
+
@line = line
|
26
|
+
@next_atom = {}
|
27
|
+
end
|
28
|
+
|
29
|
+
def x ; @x || @x = @line[22...32].to_f ; end
|
30
|
+
def y ; @y || @y = @line[32...42].to_f ; end
|
31
|
+
def kcf_type ; @kcf_type || @kcf_type = @line[16...19].strip ; end
|
32
|
+
|
33
|
+
def element ; @element || @element = @line[19...22].strip.intern ; end
|
34
|
+
|
35
|
+
def atom_id ; @atom_id || @atom_id = @line[0...16].to_i ; end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
class KCFBond
|
40
|
+
|
41
|
+
include Bond
|
42
|
+
attr_accessor :bond_id, :property
|
43
|
+
|
44
|
+
def initialize line
|
45
|
+
@line = line
|
46
|
+
end
|
47
|
+
|
48
|
+
def bond_id ; @bond_id ||= @line[0...16].to_i ; end
|
49
|
+
|
50
|
+
def v ; @v ||= @line[23...25].to_i ; end
|
51
|
+
def property ; @property ||= @line[27..-1] ; end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
class KCF
|
56
|
+
|
57
|
+
include Molecule
|
58
|
+
include Enumerable
|
59
|
+
|
60
|
+
def initialize input
|
61
|
+
@nodes = []
|
62
|
+
@edges = []
|
63
|
+
hash = {}
|
64
|
+
while ! /\/\/\//.match(line = input.readline)
|
65
|
+
case line[0...12]
|
66
|
+
when 'ENTRY '
|
67
|
+
when 'ATOM '
|
68
|
+
line.split[1].to_i.times do |n|
|
69
|
+
atom = KCFAtom.new input.readline
|
70
|
+
|
71
|
+
hash[atom.atom_id] = atom
|
72
|
+
@nodes.push(atom)
|
73
|
+
end
|
74
|
+
when 'BOND '
|
75
|
+
line.split[1].to_i.times do |n|
|
76
|
+
bond = KCFBond.new input.readline
|
77
|
+
@edges.push([bond, hash[line[16...19].to_i], hash[line[19...23].to_i]])
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def KCF.open filename
|
84
|
+
@input = File.open(filename)
|
85
|
+
KCF.new(@input)
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
class KeggReaction
|
91
|
+
|
92
|
+
class ReactionEntry
|
93
|
+
attr_accessor :entry, :name, :definition, :reactants, :products, :rpair, :ec, :comment, :pathway
|
94
|
+
def initialize
|
95
|
+
@comment = []
|
96
|
+
@name = []
|
97
|
+
@definition = []
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def initialize input
|
102
|
+
@input = input
|
103
|
+
end
|
104
|
+
|
105
|
+
def KeggReaction.open filename
|
106
|
+
KeggReaction.new(File.open(filename))
|
107
|
+
end
|
108
|
+
|
109
|
+
def each
|
110
|
+
while ! @input.eof?
|
111
|
+
entry = ReactionEntry.new
|
112
|
+
state = :INITIAL
|
113
|
+
while ! /\/\/\//.match(line = @input.readline)
|
114
|
+
#case line[0...12]
|
115
|
+
type = line[0...12]
|
116
|
+
if 'ENTRY ' == type
|
117
|
+
entry.entry = line[12...-1]
|
118
|
+
elsif 'NAME ' == type || state == :NAME
|
119
|
+
state = :NAME
|
120
|
+
entry.name = line[12...-1]
|
121
|
+
elsif 'DEFINITION ' == type || state == :DEFINITION
|
122
|
+
state = :DEFINITION
|
123
|
+
entry.definition.push(line[12...-1])
|
124
|
+
elsif 'EQUATION ' == type
|
125
|
+
ary = line[12...-1].split('<=>')
|
126
|
+
entry.reactants = ary[0].split('+').collect{|mol| mol.strip}
|
127
|
+
entry.products = ary[1].split('+').collect{|mol| mol.strip}
|
128
|
+
elsif 'RPAIR ' == type
|
129
|
+
entry.rpair = line[12...-1]
|
130
|
+
elsif 'ENZYME ' == type
|
131
|
+
entry.ec = line[12...-1].split('.').collect{|n| n.to_i}
|
132
|
+
elsif 'COMMENT ' == type || state == :COMMENT
|
133
|
+
state = :COMMENT
|
134
|
+
entry.comment.push(line[12...-1])
|
135
|
+
elsif 'PATHWAY ' == type || state == :PATHWAY
|
136
|
+
state = :PATHWAY
|
137
|
+
else
|
138
|
+
puts "Error Unknown line : %s" % line
|
139
|
+
end
|
140
|
+
end
|
141
|
+
yield entry
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
class KCFRXN
|
147
|
+
def initialize reactant, product
|
148
|
+
@reactant = reactant
|
149
|
+
@product = product
|
150
|
+
@matched_reactants = []
|
151
|
+
@matched_products = []
|
152
|
+
@nodes = []
|
153
|
+
end
|
154
|
+
|
155
|
+
def corresponds from, to
|
156
|
+
@matched_reactants.push(@reactant.atoms[from])
|
157
|
+
@matched_products.push(@product.atoms[from])
|
158
|
+
@nodes.push(RXNNode.new(@reactant.atoms[from], @product.atoms[to]))
|
159
|
+
end
|
160
|
+
|
161
|
+
def setup_bonds
|
162
|
+
@edges = []
|
163
|
+
@reactant.atoms.each do |atom|
|
164
|
+
if atom && ! @matched_reactants.member?(atom)
|
165
|
+
@nodes.push(RXNNode.new(atom, nil))
|
166
|
+
end
|
167
|
+
end
|
168
|
+
@product.atoms.each do |atom|
|
169
|
+
if atom && ! @matched_products.member?(atom)
|
170
|
+
@nodes.push(RXNNode.new(nil, atom))
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
@reactant.bonds.each do |bond|
|
175
|
+
bond.e.next_atom[bond.b] = bond
|
176
|
+
bond.b.next_atom[bond.e] = bond
|
177
|
+
end
|
178
|
+
@product.bonds.each do |bond|
|
179
|
+
bond.e.next_atom[bond.b] = bond
|
180
|
+
bond.b.next_atom[bond.e] = bond
|
181
|
+
end
|
182
|
+
@nodes.each_with_index do |node, index|
|
183
|
+
index.upto(@nodes.length - 1) do |n|
|
184
|
+
r_edge = p_edge = nil
|
185
|
+
if @nodes[n].reactant_node && @nodes[n].reactant_node.next_atom.has_key?(node.reactant_node)
|
186
|
+
r_edge = @nodes[n].reactant_node.next_atom[node.reactant_node]
|
187
|
+
end
|
188
|
+
if @nodes[n].product_node && @nodes[n].product_node.next_atom.has_key?(node.product_node)
|
189
|
+
p_edge = @nodes[n].product_node.next_atom[node.product_node]
|
190
|
+
end
|
191
|
+
if r_edge || p_edge
|
192
|
+
edge = RXNEdge.new
|
193
|
+
edge.reactant_edge = r_edge
|
194
|
+
edge.product_edge = p_edge
|
195
|
+
@edges.push(edge)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
@edges.each do |edge|
|
200
|
+
from = edge.reactant_edge ? edge.reactant_edge.multiplicity : 0
|
201
|
+
to = edge.product_edge ? edge.product_edge.multiplicity : 0
|
202
|
+
puts "%3d %3d" % [from, to]
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
class RXNNode
|
207
|
+
attr_reader :reactant_node, :product_node
|
208
|
+
def initialize reactant, product
|
209
|
+
@reactant_node = reactant
|
210
|
+
@product_node = product
|
211
|
+
end
|
212
|
+
end
|
213
|
+
class RXNEdge
|
214
|
+
attr_accessor :product_edge, :reactant_edge
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
class KCFCorrespondence
|
219
|
+
|
220
|
+
attr_reader :compounds, :correspondence
|
221
|
+
|
222
|
+
def initialize input
|
223
|
+
@name = []
|
224
|
+
@input = input
|
225
|
+
@compounds = []
|
226
|
+
@correspondence = {}
|
227
|
+
parse(input)
|
228
|
+
end
|
229
|
+
|
230
|
+
def make_rxn dir
|
231
|
+
reactant = KCF.open("#{dir}#{@compounds[0]}.kcf")
|
232
|
+
product = KCF.open("#{dir}#{@compounds[1]}.kcf")
|
233
|
+
rxn = KCFRXN.new(reactant, product)
|
234
|
+
@correspondence.each do |k, corres|
|
235
|
+
rxn.corresponds(corres[0][0], corres[1][0])
|
236
|
+
end
|
237
|
+
rxn.setup_bonds
|
238
|
+
end
|
239
|
+
|
240
|
+
def parse input
|
241
|
+
while ! /\/\/\//.match(line = input.readline)
|
242
|
+
case line[0...12]
|
243
|
+
when 'ENTRY '
|
244
|
+
@no = /(\d+)/.match(line)[1].to_i
|
245
|
+
when 'NAME '
|
246
|
+
@name.push(line[12...-1])
|
247
|
+
when 'COMPOUND '
|
248
|
+
@compounds.push(line[12...-1])
|
249
|
+
when 'TYPE '
|
250
|
+
@type = line[12...-1]
|
251
|
+
when 'ALIGN '
|
252
|
+
@align = line[12...-1].to_i
|
253
|
+
alignment_mode = true
|
254
|
+
else
|
255
|
+
ary = line[12...-1].split
|
256
|
+
@correspondence[ary[0].to_i] = ary[1..2].collect{|e| a = e.split(':'); [a[0].to_i, a[1]]}
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
end
|
262
|
+
|
263
|
+
module Atom
|
264
|
+
attr_accessor :kcf_type, :kcf_prop
|
265
|
+
|
266
|
+
# Returns KCF formatted line
|
267
|
+
def kcf_line
|
268
|
+
if @kcf_prop
|
269
|
+
"%14d %3s%2s %10.4f%10.4f #%s" % [@number, @kcf_type, @element, @x, @y, @kcf_prop]
|
270
|
+
else
|
271
|
+
"%14d %3s%2s %10.4f%10.4f" % [@number, @kcf_type, @element, @x, @y]
|
272
|
+
end
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
module Bond
|
277
|
+
attr_accessor :kcf_prop
|
278
|
+
|
279
|
+
# Returns KCF formatted line
|
280
|
+
def kcf_line
|
281
|
+
if @kcf_prop
|
282
|
+
"%13d %4d%4d%2d #%s" % [@number, @b.number, @e.number, @multiplicity, @kcf_prop]
|
283
|
+
else
|
284
|
+
"%13d %4d%4d%2d" % [@number, @b.number, @e.number, @multiplicity, @kcf_prop]
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
end
|
289
|
+
|
290
|
+
class KCFReader
|
291
|
+
|
292
|
+
def KCFReader.open(file, &method)
|
293
|
+
input = File.open(file, 'r')
|
294
|
+
KCFReader.new.read(input, &method)
|
295
|
+
end
|
296
|
+
|
297
|
+
def read input, &method
|
298
|
+
# 0.upto(2) do |m|
|
299
|
+
# 0.upto(9) do |n|
|
300
|
+
# print n
|
301
|
+
# end
|
302
|
+
# end
|
303
|
+
# puts
|
304
|
+
status = :NEW
|
305
|
+
mol = KCFMolecule.new
|
306
|
+
input.each do |line|
|
307
|
+
case line[0..11]
|
308
|
+
when /ANUMBER/
|
309
|
+
mol.a_no = /A(\d+)/.match(line)[1].to_i
|
310
|
+
when /ENTRY/
|
311
|
+
entry = /C(\d+)/.match(line)[1].to_i
|
312
|
+
when /ATOM/
|
313
|
+
n_atoms = /(\d+)/.match(line)[1].to_i
|
314
|
+
status = :ATOM
|
315
|
+
when /BOND/
|
316
|
+
n_bonds = /(\d+)/.match(line)[1].to_i
|
317
|
+
status = :BOND
|
318
|
+
when /\/\/\//
|
319
|
+
if(method)
|
320
|
+
yield mol
|
321
|
+
end
|
322
|
+
mol = KCFMolecule.new
|
323
|
+
status = :NEW
|
324
|
+
else
|
325
|
+
case status
|
326
|
+
when :ATOM
|
327
|
+
atom = KCFAtom.new
|
328
|
+
atom.number, atom.kcf_type, atom.element, atom.x, atom.y, = line[12..-1].scanf("%d%s%s%f%f%s")
|
329
|
+
mol.atoms[atom.number] = atom
|
330
|
+
when :BOND
|
331
|
+
bond = KCFBond.new
|
332
|
+
no, b, e, bond.multiplicity, prop = line[12..-1].scanf("%d%d%d%d%s")
|
333
|
+
bond.b = mol.atoms[b]
|
334
|
+
bond.e = mol.atoms[e]
|
335
|
+
mol.bonds.push(bond)
|
336
|
+
end
|
337
|
+
end
|
338
|
+
end
|
339
|
+
end
|
340
|
+
end
|
341
|
+
|
342
|
+
class KCFMolecule
|
343
|
+
|
344
|
+
include Molecule
|
345
|
+
attr_accessor :a_no
|
346
|
+
|
347
|
+
def KCFMolecule.write_kcf molecule
|
348
|
+
n_atom = 1
|
349
|
+
molecule.atoms.each do |k, atom|
|
350
|
+
puts atom.kcf
|
351
|
+
n_atom += 1
|
352
|
+
end
|
353
|
+
n_bond = 1
|
354
|
+
molecule.bonds.each do |bond|
|
355
|
+
# 1 2 1 1 #UP
|
356
|
+
kcf.number = 48
|
357
|
+
puts bond.kcf_line
|
358
|
+
n_bond += 1
|
359
|
+
end
|
360
|
+
end
|
361
|
+
|
362
|
+
def KCFMolecule.open file
|
363
|
+
input = File.open(file, 'r')
|
364
|
+
KCFMolecule.new.read(input)
|
365
|
+
end
|
366
|
+
|
367
|
+
def read input
|
368
|
+
@entry = input.readline
|
369
|
+
number_of_atom = input.readline.split[1].to_i
|
370
|
+
1.upto(number_of_atom) do |n|
|
371
|
+
atom = KCFAtom.new
|
372
|
+
atom.number, atom.kcf_type, atom.element, atom.x, atom.y, = input.readline.scanf("%d%s%s%f%f%s")
|
373
|
+
@atoms[atom.number] = atom
|
374
|
+
end
|
375
|
+
number_of_bond = input.readline.split[1].to_i
|
376
|
+
1.upto(number_of_bond) do |n|
|
377
|
+
bond = KCFBond.new
|
378
|
+
no, b, e, bond.multiplicity, prop = input.readline.scanf("%d%d%d%d%s")
|
379
|
+
bond.b = @atoms[b]
|
380
|
+
bond.e = @atoms[e]
|
381
|
+
@bonds.push(bond)
|
382
|
+
end
|
383
|
+
self
|
384
|
+
end
|
385
|
+
|
386
|
+
end
|
387
|
+
end
|
388
|
+
end
|
389
|
+
|
390
|
+
|
data/lib/chem/db/kegg.rb
ADDED
@@ -0,0 +1,516 @@
|
|
1
|
+
#
|
2
|
+
# = chem/db/kegg.rb - KEGG (Kyoto Encylopedia of Genes and Genomes)
|
3
|
+
#
|
4
|
+
# Author:: Nobuya Tanaka <tanaka@chemruby.org>
|
5
|
+
#
|
6
|
+
# $Id:$
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'chem/db/mdl'
|
10
|
+
|
11
|
+
module Chem
|
12
|
+
|
13
|
+
module KEGG
|
14
|
+
class KeggDirectory
|
15
|
+
|
16
|
+
attr_reader :dir
|
17
|
+
def initialize dir
|
18
|
+
@dir = dir
|
19
|
+
@compounds = {}
|
20
|
+
@ligand_dir = File.join(@dir, "ligand")
|
21
|
+
@mol_dir = File.join(@ligand_dir, "mol")
|
22
|
+
@parsed_file = []
|
23
|
+
end
|
24
|
+
|
25
|
+
def get_organism organism, file
|
26
|
+
File.join(@dir, "genomes", organism, file)
|
27
|
+
end
|
28
|
+
|
29
|
+
def gene_to_pfam organism
|
30
|
+
filename = File.join(@dir, "genomes", organism, organism + "_pfam.list")
|
31
|
+
return @pfam2gene if @parsed_file.include?(filename)
|
32
|
+
@parsed_file.push filename
|
33
|
+
@gene2pfam ||= {}
|
34
|
+
@pfam2gene ||= {}
|
35
|
+
open(filename).each do |line|
|
36
|
+
gene, pfam = line.split("\t")
|
37
|
+
@gene2pfam[gene] = pfam.chop
|
38
|
+
(@pfam2gene[pfam.chop] ||= []).push(KeggGene.new(gene, organism, self))
|
39
|
+
end
|
40
|
+
@pfam2gene
|
41
|
+
end
|
42
|
+
|
43
|
+
def get_ec_number gene
|
44
|
+
@gene2enzyme ||= {}
|
45
|
+
@enzyme2gene ||= {}
|
46
|
+
filename = File.join(@dir, "genomes", gene.organism, gene.organism + "_enzyme.list")
|
47
|
+
return @gene2enzyme[gene.gene] if @parsed_file.include?(filename)
|
48
|
+
@parsed_file.push filename
|
49
|
+
|
50
|
+
open(filename).each do |line|
|
51
|
+
gn, ec = line.chop.split("\t")
|
52
|
+
@gene2enzyme[gn] = ec
|
53
|
+
@enzyme2gene[ec] = gn
|
54
|
+
end
|
55
|
+
@gene2enzyme[gene.gene]
|
56
|
+
end
|
57
|
+
|
58
|
+
def [](key)
|
59
|
+
case key
|
60
|
+
when /(R\d+)/
|
61
|
+
get_reaction $1
|
62
|
+
when /(C\d+)/
|
63
|
+
get_compound $1
|
64
|
+
when /pf:(.+)/
|
65
|
+
KeggPfam.new($1, self)
|
66
|
+
when /^([^:]{3,4}):(\d+)/
|
67
|
+
# gene
|
68
|
+
raise "Parser for Organism not implemented!"
|
69
|
+
when /^([^:]{3,4})/
|
70
|
+
# organism
|
71
|
+
KeggOrganism.new($1, self)
|
72
|
+
else
|
73
|
+
raise "unknown KEGG key type : #{key}"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def map_formula
|
78
|
+
@reaction_map_formula = parse_reaction_map_formula unless @reaction_map_formula
|
79
|
+
@reaction_map_formula
|
80
|
+
end
|
81
|
+
|
82
|
+
def parse_reaction_map_formula
|
83
|
+
rxns = {}
|
84
|
+
parser = Chem.parse_file(File.join(@dir, "ligand", "reaction_mapformula.lst"))
|
85
|
+
parser.each do |rxn|
|
86
|
+
rxns[rxn.entry] = rxn
|
87
|
+
end
|
88
|
+
rxns
|
89
|
+
end
|
90
|
+
|
91
|
+
# Private methods
|
92
|
+
private
|
93
|
+
class KeggOrganism
|
94
|
+
|
95
|
+
def initialize organism, kegg
|
96
|
+
@organism = organism
|
97
|
+
@kegg = kegg
|
98
|
+
end
|
99
|
+
|
100
|
+
def pfam
|
101
|
+
pfam2gene = @kegg.gene_to_pfam(@organism)
|
102
|
+
pfam2gene
|
103
|
+
end
|
104
|
+
|
105
|
+
def [](key)
|
106
|
+
@kegg
|
107
|
+
end
|
108
|
+
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
class KeggGene
|
113
|
+
|
114
|
+
attr_reader :organism, :gene
|
115
|
+
def initialize gene, organism, kegg
|
116
|
+
@gene = gene
|
117
|
+
@organism = organism
|
118
|
+
@kegg = kegg
|
119
|
+
end
|
120
|
+
|
121
|
+
def ec_number
|
122
|
+
@kegg.get_ec_number(self).inspect
|
123
|
+
end
|
124
|
+
|
125
|
+
end
|
126
|
+
|
127
|
+
private
|
128
|
+
class KeggPfam
|
129
|
+
|
130
|
+
def initialize pfam_key, kegg
|
131
|
+
@kegg = kegg
|
132
|
+
@pfam_key = pfam_key
|
133
|
+
end
|
134
|
+
|
135
|
+
def [](organism)
|
136
|
+
@kegg[organism][@pfam_key]
|
137
|
+
end
|
138
|
+
|
139
|
+
end
|
140
|
+
|
141
|
+
private
|
142
|
+
def get_compound name
|
143
|
+
unless @compounds[name]
|
144
|
+
@compounds[name] = Chem.open_mol(File.join(@dir, "ligand", "mol", name) + ".mol")
|
145
|
+
end
|
146
|
+
@compounds[name]
|
147
|
+
end
|
148
|
+
|
149
|
+
def get_reaction name
|
150
|
+
@reactions ||= parse_reaction
|
151
|
+
@reactions[name]
|
152
|
+
end
|
153
|
+
|
154
|
+
def parse_reaction
|
155
|
+
rxns = {}
|
156
|
+
parser = Chem.parse_file(File.join(@dir, "ligand", "reaction"))
|
157
|
+
parser.each do |reaction|
|
158
|
+
reaction.kegg = self
|
159
|
+
rxns[reaction.entry] = reaction
|
160
|
+
end
|
161
|
+
rxns
|
162
|
+
end
|
163
|
+
|
164
|
+
end
|
165
|
+
|
166
|
+
#obsolete
|
167
|
+
@@kegg_compound_folder = nil
|
168
|
+
def self.kegg_compound_folder= (folder)
|
169
|
+
@@kegg_compound_folder = folder
|
170
|
+
end
|
171
|
+
|
172
|
+
def self.kegg_compound_folder
|
173
|
+
@@kegg_compound_folder
|
174
|
+
end
|
175
|
+
|
176
|
+
# Duplication definition!
|
177
|
+
class KEGGReaction
|
178
|
+
|
179
|
+
include Chem::Reaction
|
180
|
+
attr_accessor :entry, :name, :ecs, :compounds, :direction
|
181
|
+
def initialize
|
182
|
+
@ecs = []
|
183
|
+
@compounds = []
|
184
|
+
end
|
185
|
+
|
186
|
+
def kegg= kegg
|
187
|
+
@kegg = kegg
|
188
|
+
end
|
189
|
+
|
190
|
+
def map_formula
|
191
|
+
return nil unless @kegg.map_formula[@entry]
|
192
|
+
@kegg.map_formula[@entry].compounds
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
class KeggCompound
|
197
|
+
include Molecule
|
198
|
+
include Enumerable
|
199
|
+
include MDL::MdlMolParser
|
200
|
+
attr_reader :entry
|
201
|
+
|
202
|
+
def initialize
|
203
|
+
@nodes = []
|
204
|
+
@edges = []
|
205
|
+
end
|
206
|
+
|
207
|
+
@@entries = {}
|
208
|
+
def entry= entry_no
|
209
|
+
@entry = entry_no
|
210
|
+
if @@entries[entry_no] == nil
|
211
|
+
if Chem::Kegg.kegg_compound_folder == nil
|
212
|
+
raise ArgumentError.new("Chem::Kegg.kegg_compound_folder" +
|
213
|
+
" not specified")
|
214
|
+
end
|
215
|
+
# mol = KeggCompound.new
|
216
|
+
# mol.open(Chem::Kegg.kegg_compound_folder + entry_no + ".mol")
|
217
|
+
filename = File.join(Chem::Kegg.kegg_compound_folder, entry_no + ".mol")
|
218
|
+
mol = nil
|
219
|
+
if File.exist?(filename)
|
220
|
+
mol = Chem.open_mol(filename)
|
221
|
+
end
|
222
|
+
|
223
|
+
@@entries[entry_no] = mol
|
224
|
+
end
|
225
|
+
@fly_weight = @@entries[entry_no]
|
226
|
+
if @fly_weight
|
227
|
+
@nodes = @fly_weight.nodes
|
228
|
+
@edges = @fly_weight.edges
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
end
|
233
|
+
|
234
|
+
class KeggGlycan
|
235
|
+
attr_accessor :entry, :name
|
236
|
+
end
|
237
|
+
|
238
|
+
class KeggEc
|
239
|
+
attr_accessor :entry, :number
|
240
|
+
end
|
241
|
+
|
242
|
+
module KeggFormat
|
243
|
+
|
244
|
+
def compound_folder= (folder)
|
245
|
+
Chem::Kegg.kegg_compound_folder = folder
|
246
|
+
end
|
247
|
+
|
248
|
+
def each_entry
|
249
|
+
state = nil
|
250
|
+
str = ''
|
251
|
+
@input.each do |line|
|
252
|
+
if line[0..11] == ' '
|
253
|
+
str += line[12..-1]
|
254
|
+
else
|
255
|
+
yield(str, state) if state # Not first state
|
256
|
+
str = line[12..-1]
|
257
|
+
state = line[0..11].strip
|
258
|
+
end
|
259
|
+
end
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
class KeggReactionParser
|
264
|
+
|
265
|
+
include KeggFormat
|
266
|
+
include Enumerable
|
267
|
+
|
268
|
+
def initialize filename
|
269
|
+
@input = File.open(filename)
|
270
|
+
end
|
271
|
+
|
272
|
+
def parse_compounds species
|
273
|
+
ary = []
|
274
|
+
species.split(" + ").each do |mol|
|
275
|
+
stoichiometry = 1
|
276
|
+
if m = /(\d+) *[CG]/.match(mol)
|
277
|
+
stoichiometry = m[1].to_i
|
278
|
+
end
|
279
|
+
compound_entry = ""
|
280
|
+
if m = /(C\d+)/.match(mol)
|
281
|
+
compound_entry = m[1]
|
282
|
+
elsif m = /(G\d+)/.match(mol)
|
283
|
+
compound_entry = m[1]
|
284
|
+
end
|
285
|
+
ary.push([compound_entry, stoichiometry])
|
286
|
+
end
|
287
|
+
ary
|
288
|
+
end
|
289
|
+
|
290
|
+
def each
|
291
|
+
reaction = nil
|
292
|
+
each_entry do |str, state|
|
293
|
+
case state
|
294
|
+
when "ENTRY"
|
295
|
+
# reaction = Reaction.find(:first, :conditions => ["entry = ?", str.split[0]])
|
296
|
+
# if reaction == nil
|
297
|
+
reaction = KEGGReaction.new
|
298
|
+
reaction.entry = str.split[0]
|
299
|
+
# end
|
300
|
+
when "NAME"
|
301
|
+
reaction.name = str
|
302
|
+
when "DEFINITION"
|
303
|
+
#@definition = str
|
304
|
+
when "EQUATION"
|
305
|
+
c = str.split("<=>")
|
306
|
+
reaction.compounds << parse_compounds(c[0])
|
307
|
+
reaction.compounds << parse_compounds(c[1])
|
308
|
+
when "RPAIR"
|
309
|
+
# @rpair = str
|
310
|
+
when "ENZYME"
|
311
|
+
str.split.each do |e|
|
312
|
+
ec = KeggEc.new
|
313
|
+
ec.entry = "EC" + e
|
314
|
+
sp = e.split(".")
|
315
|
+
ec.number = sp.collect{|i| i.to_i}
|
316
|
+
reaction.ecs << ec
|
317
|
+
end
|
318
|
+
when "///"
|
319
|
+
# reaction.save
|
320
|
+
yield reaction
|
321
|
+
when "PATHWAY"
|
322
|
+
when "COMMENT"
|
323
|
+
when "REFERENCE"
|
324
|
+
else
|
325
|
+
p state
|
326
|
+
end
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
end
|
331
|
+
|
332
|
+
class KeggReactionLstParser
|
333
|
+
|
334
|
+
include Enumerable
|
335
|
+
include KeggFormat
|
336
|
+
|
337
|
+
def initialize filename
|
338
|
+
@input = open(filename)
|
339
|
+
end
|
340
|
+
|
341
|
+
def each
|
342
|
+
@input.each do |line|
|
343
|
+
rxn = KEGGReaction.new
|
344
|
+
r_number, comps = line.split(":")
|
345
|
+
rxn.entry = r_number
|
346
|
+
cc = comps.split(/<=>/)
|
347
|
+
|
348
|
+
reactant = cc[0].split("+").collect do |c|
|
349
|
+
ary = c.split
|
350
|
+
#compound = KeggCompound.new
|
351
|
+
if ary.length == 1
|
352
|
+
#compound.entry = c.strip
|
353
|
+
[c.strip, 1]
|
354
|
+
else
|
355
|
+
#compound.entry = ary[1].strip
|
356
|
+
[c.strip, ary[0].to_i]
|
357
|
+
end
|
358
|
+
end
|
359
|
+
product = cc[1].split("+").collect do |c|
|
360
|
+
ary = c.split
|
361
|
+
#compound = KeggCompound.new
|
362
|
+
if ary.length == 1
|
363
|
+
#compound.entry = c.strip
|
364
|
+
[c.strip, 1]
|
365
|
+
else
|
366
|
+
#compound.entry = ary[1].strip
|
367
|
+
[c.strip, ary[0].to_i]
|
368
|
+
end
|
369
|
+
end
|
370
|
+
rxn.compounds = [reactant, product]
|
371
|
+
yield rxn
|
372
|
+
end
|
373
|
+
|
374
|
+
end
|
375
|
+
|
376
|
+
end
|
377
|
+
|
378
|
+
# ftp://ftp.genome.ad.jp/pub/kegg/ligand/reaction_mapformula.lst
|
379
|
+
class KeggReactionMapParser
|
380
|
+
|
381
|
+
include Enumerable
|
382
|
+
include KeggFormat
|
383
|
+
|
384
|
+
def initialize filename
|
385
|
+
@input = open(filename)
|
386
|
+
@reactions = @input.inject({}) do |ret, line|
|
387
|
+
ary = line.split(":")
|
388
|
+
ret[ary[0]] = ary[1..-1]
|
389
|
+
ret
|
390
|
+
end
|
391
|
+
end
|
392
|
+
|
393
|
+
def each
|
394
|
+
@reactions.each do |r_number, (map_number, comps)|
|
395
|
+
yield self[r_number]
|
396
|
+
end
|
397
|
+
end
|
398
|
+
|
399
|
+
def [](r_number)
|
400
|
+
return nil if @reactions[r_number] == nil
|
401
|
+
map_number, comps = @reactions[r_number]
|
402
|
+
rxn = KEGGReaction.new
|
403
|
+
# r_number, map_number, comps = line.split(":")
|
404
|
+
rxn.entry = r_number
|
405
|
+
cc = comps.split(/(<?=>?)/)
|
406
|
+
case cc[1]
|
407
|
+
when "<="
|
408
|
+
rxn.direction = -1
|
409
|
+
when "<=>"
|
410
|
+
rxn.direction = 0
|
411
|
+
when "=>"
|
412
|
+
rxn.direction = 1
|
413
|
+
end
|
414
|
+
reactant = cc[0].split("+").collect do |c|
|
415
|
+
#compound = KeggCompound.new
|
416
|
+
#compound.entry = c.strip
|
417
|
+
[c.strip, 1]
|
418
|
+
end
|
419
|
+
product = cc[2].split("+").collect do |c|
|
420
|
+
#compound = KeggCompound.new
|
421
|
+
#compound.entry = c.strip
|
422
|
+
[c.strip, 1]
|
423
|
+
end
|
424
|
+
rxn.compounds = [reactant, product]
|
425
|
+
rxn
|
426
|
+
end
|
427
|
+
|
428
|
+
end
|
429
|
+
|
430
|
+
# Parses KEGG Glycan format
|
431
|
+
# http://www.genome.jp/ligand/kcam/kcam/kcf.html
|
432
|
+
# Not fully implemented
|
433
|
+
class KeggGlycanParser
|
434
|
+
|
435
|
+
include Enumerable
|
436
|
+
include KeggFormat
|
437
|
+
|
438
|
+
def initialize filename
|
439
|
+
@input = open(filename)
|
440
|
+
end
|
441
|
+
|
442
|
+
def each
|
443
|
+
glycan = nil
|
444
|
+
each_entry do |str, state|
|
445
|
+
case state
|
446
|
+
when "ENTRY"
|
447
|
+
glycan = KeggGlycan.new
|
448
|
+
# glycan = Compound.find(:first, :conditions => ["glycan_entry = ?", str.split[0]])
|
449
|
+
if glycan == nil
|
450
|
+
# glycan = Compound.new
|
451
|
+
glycan.entry = str.split[0]
|
452
|
+
end
|
453
|
+
when "NAME"
|
454
|
+
if glycan.name
|
455
|
+
glycan.name = glycan.name + str.split("\n").join if str
|
456
|
+
else
|
457
|
+
glycan.name = str.split("\n").join if str
|
458
|
+
end
|
459
|
+
when "///"
|
460
|
+
# glycan.save
|
461
|
+
end
|
462
|
+
end
|
463
|
+
end
|
464
|
+
|
465
|
+
end
|
466
|
+
|
467
|
+
def self.parse_compound_file
|
468
|
+
compound = nil
|
469
|
+
parse($home + "compound") do |str, state|
|
470
|
+
case state
|
471
|
+
when "ENTRY"
|
472
|
+
compound = Compound.find(:first, :conditions => ["entry = ?", str.split[0]])
|
473
|
+
if compound == nil
|
474
|
+
compound = Compound.new
|
475
|
+
compound.entry = str.split[0]
|
476
|
+
end
|
477
|
+
when "NAME"
|
478
|
+
compound.name = str.split("\n").join if str
|
479
|
+
when "DBLINKS"
|
480
|
+
str.split("\n").each do |line|
|
481
|
+
if m = /ChEBI: (\d+)/.match(line)
|
482
|
+
compound.chebi = m[1].to_i
|
483
|
+
elsif m = /PubChem: (\d+)/.match(line)
|
484
|
+
compound.pubchem = m[1].to_i
|
485
|
+
end
|
486
|
+
end
|
487
|
+
when "GLYCAN"
|
488
|
+
compound.glycan_entry = str
|
489
|
+
when "///"
|
490
|
+
#compound.save
|
491
|
+
end
|
492
|
+
end
|
493
|
+
end
|
494
|
+
|
495
|
+
def set_compounds
|
496
|
+
require 'util'
|
497
|
+
Dir.glob($home + "/mol/*.mol").each do |mol|
|
498
|
+
entry = /(.\d+).mol/.match(mol)[1]
|
499
|
+
# comp = KeggCompound.find(:first, :conditions => ["entry = ?", entry])
|
500
|
+
mol = Chem.open_mol(mol)
|
501
|
+
if comp == nil
|
502
|
+
puts mol
|
503
|
+
next
|
504
|
+
end
|
505
|
+
if comp.ctab == nil
|
506
|
+
comp.ctab = Marshal.dump(mol)
|
507
|
+
comp.save
|
508
|
+
end
|
509
|
+
#p comp
|
510
|
+
end
|
511
|
+
end
|
512
|
+
|
513
|
+
end
|
514
|
+
end
|
515
|
+
|
516
|
+
|