chemruby 0.9.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README +120 -0
- data/Rakefile +195 -0
- data/ext/extconf.rb +4 -0
- data/ext/subcomp.c +416 -0
- data/lib/chem.rb +130 -0
- data/lib/chem/appl.rb +1 -0
- data/lib/chem/appl/chem3dole.rb +36 -0
- data/lib/chem/appl/tinker/nucleic.rb +40 -0
- data/lib/chem/appl/tinker/tinker_reader.rb +43 -0
- data/lib/chem/data.rb +4 -0
- data/lib/chem/data/atomic_weight.rb +124 -0
- data/lib/chem/data/character.rb +2 -0
- data/lib/chem/data/electronegativity.rb +14 -0
- data/lib/chem/data/periodic_table.rb +6 -0
- data/lib/chem/data/prime_numbers.rb +1 -0
- data/lib/chem/data/vdw_radii.rb +1 -0
- data/lib/chem/db.rb +64 -0
- data/lib/chem/db/cansmi.rb +234 -0
- data/lib/chem/db/cdx.rb +1525 -0
- data/lib/chem/db/eps.rb +164 -0
- data/lib/chem/db/g98.rb +909 -0
- data/lib/chem/db/gspan.rb +130 -0
- data/lib/chem/db/iupac.rb +5 -0
- data/lib/chem/db/iupac/a_1.rb +46 -0
- data/lib/chem/db/iupac/iuparser.rb +226 -0
- data/lib/chem/db/iupac/iuparser.ry +97 -0
- data/lib/chem/db/iupac/postfix.rb +2 -0
- data/lib/chem/db/kcf.rb +390 -0
- data/lib/chem/db/kcf_glycan.rb +19 -0
- data/lib/chem/db/kegg.rb +516 -0
- data/lib/chem/db/linucs/linparser.rb +144 -0
- data/lib/chem/db/linucs/linucs.ry +53 -0
- data/lib/chem/db/mdl.rb +379 -0
- data/lib/chem/db/molconnz.rb +12 -0
- data/lib/chem/db/mopac.rb +88 -0
- data/lib/chem/db/msi.rb +107 -0
- data/lib/chem/db/pdb_dic.rb +115 -0
- data/lib/chem/db/pdf.rb +131 -0
- data/lib/chem/db/pubchem.rb +113 -0
- data/lib/chem/db/rmagick.rb +70 -0
- data/lib/chem/db/sdf.rb +37 -0
- data/lib/chem/db/smbl.rb +88 -0
- data/lib/chem/db/smiles.rb +2 -0
- data/lib/chem/db/smiles/smiles.ry +203 -0
- data/lib/chem/db/smiles/smiparser.rb +375 -0
- data/lib/chem/db/swf.rb +74 -0
- data/lib/chem/db/sybyl.rb +150 -0
- data/lib/chem/db/tinker.rb +77 -0
- data/lib/chem/db/types/type_cansmi.rb +9 -0
- data/lib/chem/db/types/type_cdx.rb +24 -0
- data/lib/chem/db/types/type_gspan.rb +31 -0
- data/lib/chem/db/types/type_kcf.rb +28 -0
- data/lib/chem/db/types/type_kcf_glycan.rb +26 -0
- data/lib/chem/db/types/type_kegg.rb +92 -0
- data/lib/chem/db/types/type_mdl.rb +31 -0
- data/lib/chem/db/types/type_pdf.rb +33 -0
- data/lib/chem/db/types/type_png.rb +31 -0
- data/lib/chem/db/types/type_rxn.rb +25 -0
- data/lib/chem/db/types/type_sdf.rb +25 -0
- data/lib/chem/db/types/type_sybyl.rb +30 -0
- data/lib/chem/db/types/type_xyz.rb +26 -0
- data/lib/chem/db/vector.rb +128 -0
- data/lib/chem/db/xyz.rb +39 -0
- data/lib/chem/model.rb +119 -0
- data/lib/chem/model/skeleton.rb +37 -0
- data/lib/chem/utils.rb +11 -0
- data/lib/chem/utils/geometry.rb +27 -0
- data/lib/chem/utils/graph_db.rb +146 -0
- data/lib/chem/utils/math.rb +17 -0
- data/lib/chem/utils/prop.rb +123 -0
- data/lib/chem/utils/sssr.rb +101 -0
- data/lib/chem/utils/sub.rb +78 -0
- data/lib/chem/utils/transform.rb +110 -0
- data/lib/chem/utils/traverse.rb +37 -0
- data/lib/chem/utils/ullmann.rb +134 -0
- data/lib/graph.rb +41 -0
- data/lib/graph/cluster.rb +20 -0
- data/lib/graph/morgan.rb +38 -0
- data/sample/frequent_subgraph.rb +46 -0
- data/sample/images/ex1.rb +11 -0
- data/sample/images/ex2.rb +4 -0
- data/sample/images/ex3.rb +5 -0
- data/sample/images/ex4.rb +17 -0
- data/sample/images/ex5.rb +10 -0
- data/sample/images/mol/adenine.mol +26 -0
- data/sample/images/mol/atp.mol +69 -0
- data/sample/images/temp/ex5.mol +344 -0
- data/sample/kegg_db.rb +116 -0
- data/setup.rb +1551 -0
- data/test/all.rb +6 -0
- data/test/coord_test.rb +17 -0
- data/test/ctab_test.rb +31 -0
- data/test/data/A_21.tar.gz +0 -0
- data/test/data/A_21/aceanthrylene.cdx +0 -0
- data/test/data/A_21/aceanthrylene.mol +40 -0
- data/test/data/A_21/acenaphthylene.cdx +0 -0
- data/test/data/A_21/acenaphthylene.mol +31 -0
- data/test/data/A_21/acephenanthrylene.cdx +0 -0
- data/test/data/A_21/acephenanthrylene.mol +40 -0
- data/test/data/A_21/anthracene.cdx +0 -0
- data/test/data/A_21/anthracene.mol +35 -0
- data/test/data/A_21/as-indacene.cdx +0 -0
- data/test/data/A_21/as-indacene.mol +31 -0
- data/test/data/A_21/azulene.cdx +0 -0
- data/test/data/A_21/azulene.mol +26 -0
- data/test/data/A_21/biphenylene.cdx +0 -0
- data/test/data/A_21/biphenylene.mol +31 -0
- data/test/data/A_21/chrysene.cdx +0 -0
- data/test/data/A_21/chrysene.mol +44 -0
- data/test/data/A_21/coronen.cdx +0 -0
- data/test/data/A_21/coronen.mol +59 -0
- data/test/data/A_21/fluoranthene.cdx +0 -0
- data/test/data/A_21/fluoranthene.mol +40 -0
- data/test/data/A_21/fluorene.cdx +0 -0
- data/test/data/A_21/fluorene.mol +33 -0
- data/test/data/A_21/heptacene.cdx +0 -0
- data/test/data/A_21/heptacene.mol +71 -0
- data/test/data/A_21/heptalene.cdx +0 -0
- data/test/data/A_21/heptalene.mol +30 -0
- data/test/data/A_21/heptaphene.cdx +0 -0
- data/test/data/A_21/heptaphene.mol +71 -0
- data/test/data/A_21/hexacene.cdx +0 -0
- data/test/data/A_21/hexacene.mol +62 -0
- data/test/data/A_21/hexaphene.cdx +0 -0
- data/test/data/A_21/hexaphene.mol +62 -0
- data/test/data/A_21/indene.cdx +0 -0
- data/test/data/A_21/indene.mol +24 -0
- data/test/data/A_21/iupac.txt +41 -0
- data/test/data/A_21/naphthacene.cdx +0 -0
- data/test/data/A_21/naphthacene.mol +44 -0
- data/test/data/A_21/naphthalene.cdx +0 -0
- data/test/data/A_21/naphthalene.mol +26 -0
- data/test/data/A_21/ovalene.cdx +0 -0
- data/test/data/A_21/ovalene.mol +78 -0
- data/test/data/A_21/pentacene.cdx +0 -0
- data/test/data/A_21/pentacene.mol +53 -0
- data/test/data/A_21/pentalene.cdx +0 -0
- data/test/data/A_21/pentalene.mol +22 -0
- data/test/data/A_21/pentaphene.cdx +0 -0
- data/test/data/A_21/pentaphene.mol +53 -0
- data/test/data/A_21/perylene.cdx +0 -0
- data/test/data/A_21/perylene.mol +49 -0
- data/test/data/A_21/phenalene.cdx +0 -0
- data/test/data/A_21/phenalene.mol +33 -0
- data/test/data/A_21/phenanthrene.cdx +0 -0
- data/test/data/A_21/phenanthrene.mol +35 -0
- data/test/data/A_21/picene.cdx +0 -0
- data/test/data/A_21/picene.mol +53 -0
- data/test/data/A_21/pleiadene.cdx +0 -0
- data/test/data/A_21/pleiadene.mol +44 -0
- data/test/data/A_21/pyranthrene.cdx +0 -0
- data/test/data/A_21/pyranthrene.mol +72 -0
- data/test/data/A_21/pyrene.cdx +0 -0
- data/test/data/A_21/pyrene.mol +40 -0
- data/test/data/A_21/rubicene.cdx +0 -0
- data/test/data/A_21/rubicene.mol +63 -0
- data/test/data/A_21/s-indacene.cdx +0 -0
- data/test/data/A_21/s-indacene.mol +31 -0
- data/test/data/A_21/tetraphenylene.cdx +0 -0
- data/test/data/A_21/tetraphenylene.mol +57 -0
- data/test/data/A_21/trinaphthylene.cdx +0 -0
- data/test/data/A_21/trinaphthylene.mol +71 -0
- data/test/data/A_21/triphenylene.cdx +0 -0
- data/test/data/A_21/triphenylene.mol +44 -0
- data/test/data/C00147.kcf +25 -0
- data/test/data/G00147.kcf +13 -0
- data/test/data/atp.mol +69 -0
- data/test/data/cyclohexane.mol +17 -0
- data/test/data/cyclohexane.ps +485 -0
- data/test/data/fullerene.mol +155 -0
- data/test/data/glycan +33 -0
- data/test/data/hypericin.cdx +0 -0
- data/test/data/hypericin.cdxml +596 -0
- data/test/data/hypericin.chm +0 -0
- data/test/data/hypericin.ct +85 -0
- data/test/data/hypericin.f1d +0 -0
- data/test/data/hypericin.f1q +0 -0
- data/test/data/hypericin.gif +0 -0
- data/test/data/hypericin.mol +88 -0
- data/test/data/hypericin.mol2 +159 -0
- data/test/data/hypericin.msm +123 -0
- data/test/data/hypericin.pdf +359 -0
- data/test/data/hypericin.png +0 -0
- data/test/data/hypericin.ps +0 -0
- data/test/data/hypericin.skc +0 -0
- data/test/data/hypericin2.gif +0 -0
- data/test/data/hypericin2.ps +0 -0
- data/test/data/kegg/genomes/hsa/hsa_enzyme.list +4 -0
- data/test/data/kegg/genomes/hsa/hsa_pfam.list +4 -0
- data/test/data/kegg/ligand/mol/C00147.mol +26 -0
- data/test/data/kegg/ligand/reaction +14 -0
- data/test/data/kegg/ligand/reaction.lst +1 -0
- data/test/data/kegg/ligand/reaction_mapformula.lst +3 -0
- data/test/data/reaction +14 -0
- data/test/data/reaction.lst +1 -0
- data/test/data/reaction_mapformula.lst +3 -0
- data/test/data/rxn/C00001.mol +6 -0
- data/test/data/rxn/C00011.mol +10 -0
- data/test/data/rxn/C00014.mol +6 -0
- data/test/data/rxn/C01010.mol +18 -0
- data/test/data/rxn/sample.rxn +50 -0
- data/test/data/rxn/substitution.rxn +45 -0
- data/test/data/test.eps +0 -0
- data/test/data/test.mol +28 -0
- data/test/data/test.sdf +143 -0
- data/test/data/test.skc +0 -0
- data/test/data/test.xyz +4 -0
- data/test/data/test_lf.sdf +143 -0
- data/test/heavy_test_pubchem.rb +16 -0
- data/test/multiple_test.rb +22 -0
- data/test/test_adj.rb +54 -0
- data/test/test_canonical_smiles.rb +46 -0
- data/test/test_cdx.rb +32 -0
- data/test/test_chem.rb +18 -0
- data/test/test_cluster.rb +19 -0
- data/test/test_db.rb +11 -0
- data/test/test_eps.rb +24 -0
- data/test/test_geometry.rb +11 -0
- data/test/test_gspan.rb +28 -0
- data/test/test_iupac.rb +36 -0
- data/test/test_kcf.rb +24 -0
- data/test/test_kcf_glycan.rb +10 -0
- data/test/test_kegg.rb +118 -0
- data/test/test_linucs.rb +21 -0
- data/test/test_mdl.rb +45 -0
- data/test/test_mol2.rb +62 -0
- data/test/test_morgan.rb +21 -0
- data/test/test_pdf.rb +12 -0
- data/test/test_prop.rb +86 -0
- data/test/test_rmagick.rb +15 -0
- data/test/test_sbdb.rb +23 -0
- data/test/test_sdf.rb +30 -0
- data/test/test_smiles.rb +84 -0
- data/test/test_sssr.rb +18 -0
- data/test/test_sub.rb +47 -0
- data/test/test_subcomp.rb +37 -0
- data/test/test_traverse.rb +29 -0
- data/test/test_writer.rb +13 -0
- data/test/test_xyz.rb +15 -0
- data/test/type_test.rb +25 -0
- metadata +290 -0
@@ -0,0 +1,113 @@
|
|
1
|
+
#
|
2
|
+
# chem/db/pubchem.rb - PubChem database class
|
3
|
+
#
|
4
|
+
# Copyright (C) 2005 KADOWAKI Tadashi <kado@kuicr.kyoto-u.ac.jp>
|
5
|
+
# TANAKA Nobuya <tanaka@kuicr.kyoto-u.ac.jp>
|
6
|
+
#
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'uri'
|
10
|
+
require 'net/http'
|
11
|
+
|
12
|
+
module Chem
|
13
|
+
|
14
|
+
module Molecule
|
15
|
+
def search_pubchem
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
module PubChem
|
20
|
+
|
21
|
+
Host="pubchem.ncbi.nlm.nih.gov"
|
22
|
+
Summary="/summary/summary.cgi"
|
23
|
+
|
24
|
+
class PubChem
|
25
|
+
Searchpath="/search/"
|
26
|
+
Query="PreQSrv.cgi"
|
27
|
+
Boundary="-----boundary-----"
|
28
|
+
|
29
|
+
Data = [
|
30
|
+
Boundary, "Content-Disposition: form-data; name=\"mode\"", "", "simplequery",
|
31
|
+
Boundary, "Content-Disposition: form-data; name=\"check\"", "", "remote",
|
32
|
+
Boundary, "Content-Disposition: form-data; name=\"execution\"", "", "remote",
|
33
|
+
Boundary, "Content-Disposition: form-data; name=\"queue\"", "", "ssquery",
|
34
|
+
Boundary, "Content-Disposition: form-data; name=\"simple_searchdata\"", "", '%s',
|
35
|
+
Boundary, "Content-Disposition: form-data; name=\"simple_cid\"", "", "",
|
36
|
+
Boundary, "Content-Disposition: form-data; name=\"simple_sid\"", "", "",
|
37
|
+
Boundary, "Content-Disposition: form-data; name=\"file\"; filename=\"\"",
|
38
|
+
"Content-Type: application/octet-stream", "", "",
|
39
|
+
Boundary, "Content-Disposition: form-data; name=\"simple_searchtype\"", "", "fs",
|
40
|
+
Boundary, "Content-Disposition: form-data; name=\"maxhits\"", "", '%s',
|
41
|
+
Boundary].join("\x0d\x0a")
|
42
|
+
|
43
|
+
def self.smiles_search(smiles, maxhits=100)
|
44
|
+
cid = []
|
45
|
+
url = ""
|
46
|
+
body = ""
|
47
|
+
Net::HTTP.version_1_2
|
48
|
+
Net::HTTP.start(Host, 80) do |http|
|
49
|
+
body = http.post(Searchpath + Query, Data % [smiles, maxhits],
|
50
|
+
{'Content-Type' => "multipart/form-data; boundary=#{Boundary}",
|
51
|
+
'Referer' => "http://pubchem.ncbi.nlm.nih.gov/search/"}).body
|
52
|
+
if m = /url="([^"]+)"/.match(body)
|
53
|
+
body = http.get(Searchpath + m[1]).body
|
54
|
+
end
|
55
|
+
while /setTimeout\('document.location.replace\("([^"]+)"\);', (\d+)\)/ =~ body do
|
56
|
+
sleep($2.to_f/100)
|
57
|
+
response = http.get(URI.parse($1))
|
58
|
+
body = response.body
|
59
|
+
url = response['location']
|
60
|
+
end
|
61
|
+
if /PubChem structure search report:(\s|\S)+No hits/ !~ body
|
62
|
+
# text format
|
63
|
+
url.sub!(/cmd=Select\+from\+History/, 'cmd=Text&dopt=Brief')
|
64
|
+
body = http.get(url).body
|
65
|
+
body.scan(/\d+: CID: (\d+)/).each do |id|
|
66
|
+
cid.push(PubChemEntry.new(id[0].to_i))
|
67
|
+
end
|
68
|
+
# # html format
|
69
|
+
# body = http.get(url).body
|
70
|
+
# while /CID: <a href=\"([^"]+)\">(\d+)<\/a>/ =~ body do
|
71
|
+
# cid.push($2)
|
72
|
+
# body = $'
|
73
|
+
# end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
cid
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
class PubChemEntry
|
82
|
+
|
83
|
+
def initialize cid
|
84
|
+
@cid = cid
|
85
|
+
end
|
86
|
+
|
87
|
+
def get_xml
|
88
|
+
Net::HTTP.version_1_2
|
89
|
+
Net::HTTP.get(Host, Summary + "\?disopt=DisplayXML&cid=%dd" % @cid)
|
90
|
+
end
|
91
|
+
|
92
|
+
def get_sdf
|
93
|
+
Net::HTTP.version_1_2
|
94
|
+
Net::HTTP.get(Host, Summary + "\?disopt=DisplaySDF&cid=%d" % @cid)
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
101
|
+
|
102
|
+
if $0 == __FILE__
|
103
|
+
smiles="CC23(CCC1c4ccc(O)cc4(CCC1C3(CC(O)C2(O))))"
|
104
|
+
puts "===== CID(s) for SMILES, #{smiles} ====="
|
105
|
+
cid = Chem::PubChem.smiles_search(smiles)
|
106
|
+
p cid
|
107
|
+
puts "===== MOL format data ===="
|
108
|
+
cid.each do |c|
|
109
|
+
puts c.get_sdf
|
110
|
+
end
|
111
|
+
# p Chem::PubChem.get_xml(cid[0])
|
112
|
+
# puts Chem::PubChem.get_xml(cid[0]).sdf2mol.data
|
113
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# RMagick adaptor for chem/db/vector.rb
|
2
|
+
|
3
|
+
require 'chem/db/vector'
|
4
|
+
|
5
|
+
module Chem
|
6
|
+
class RMagickWriter
|
7
|
+
|
8
|
+
include Writer
|
9
|
+
|
10
|
+
# Constructor for RMagick Adaptor
|
11
|
+
# See chem/db/vector.rb for detail parameters
|
12
|
+
def initialize mol, params
|
13
|
+
params[:size] ||= [350, 350]
|
14
|
+
params[:orig_point] ||= [10, 10]
|
15
|
+
params[:margin] ||= [10, 10]
|
16
|
+
@default_pointsize = (params[:pointsize] ? params[:pointsize] : 14)
|
17
|
+
params[:upside_down] = params[:upside_down] ? false : true
|
18
|
+
super
|
19
|
+
end
|
20
|
+
|
21
|
+
# Draws line
|
22
|
+
# This method may be invoked from chem/db/vector.rb
|
23
|
+
def line(from, to, color)
|
24
|
+
@canvas.stroke("rgb(%f, %f, %f)" % color)
|
25
|
+
@canvas.line(from[0], from[1], to[0], to[1])
|
26
|
+
end
|
27
|
+
|
28
|
+
def fill(nodes, color, params = {})
|
29
|
+
@canvas.fill("rgb(%f, %f, %f)" % params[:color]) if params[:color]
|
30
|
+
path = nodes.inject([]){|ret, node| ret << node[0] ; ret << node[1]}
|
31
|
+
@canvas.polygon(* path)
|
32
|
+
end
|
33
|
+
|
34
|
+
def text(str, x, y, params = {})
|
35
|
+
@canvas.pointsize = @default_pointsize
|
36
|
+
metrics = @canvas.get_type_metrics(@img, str)
|
37
|
+
@canvas.stroke('transparent')
|
38
|
+
@canvas.pointsize(params[:pontsize]) if params[:pointsize]
|
39
|
+
@canvas.fill("rgb(%f, %f, %f)" % params[:color]) if params[:color]
|
40
|
+
|
41
|
+
@canvas.text(x - metrics.width / 2.0,
|
42
|
+
y + metrics.height / 4.0,
|
43
|
+
str)
|
44
|
+
|
45
|
+
@canvas.pointsize(@default_pointsize) if params[:pointsize]
|
46
|
+
@canvas.fill('black') if params[:color]
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.save(mol, filename, params)
|
50
|
+
writer = self.new(mol, params)
|
51
|
+
writer.draw(filename, params)
|
52
|
+
end
|
53
|
+
|
54
|
+
def draw filename, params
|
55
|
+
@img = Magick::ImageList.new
|
56
|
+
x, y = params[:size]
|
57
|
+
x += params[:margin][0] * 2
|
58
|
+
y += params[:margin][0] * 2
|
59
|
+
@img.new_image(x, y)
|
60
|
+
|
61
|
+
@canvas = Magick::Draw.new
|
62
|
+
draw_body
|
63
|
+
|
64
|
+
@canvas.draw(@img)
|
65
|
+
@img.write(filename)
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
data/lib/chem/db/sdf.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
module Chem
|
4
|
+
|
5
|
+
module MDL
|
6
|
+
|
7
|
+
class SdfParser
|
8
|
+
include Enumerable
|
9
|
+
|
10
|
+
def initialize file
|
11
|
+
require 'chem/db/mdl'
|
12
|
+
@input = open(file)
|
13
|
+
end
|
14
|
+
|
15
|
+
def each
|
16
|
+
@input.rewind
|
17
|
+
|
18
|
+
# for \r\n and \n
|
19
|
+
first_entry = true
|
20
|
+
from = 0
|
21
|
+
@input.each("$$$$") do |entry|
|
22
|
+
from = entry.index("\n") + 1unless first_entry
|
23
|
+
first_entry = false
|
24
|
+
next if entry[from..-1].length < 3
|
25
|
+
yield MdlMolecule.parse_io(StringIO.new(entry[from..-1]))
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.parse file
|
31
|
+
SdfParser.new(file)
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
data/lib/chem/db/smbl.rb
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
#!/usr/local/bin/ruby
|
2
|
+
# obsolete class
|
3
|
+
require 'rexml/document'
|
4
|
+
|
5
|
+
include REXML
|
6
|
+
|
7
|
+
|
8
|
+
module Chem
|
9
|
+
module SMBL
|
10
|
+
|
11
|
+
class Specie
|
12
|
+
attr_reader :name
|
13
|
+
def initialize name, compartment, boundaryCondition, initialAmount
|
14
|
+
@name = name
|
15
|
+
@compartment = compartment
|
16
|
+
@boundaryCondition = "false" == boundaryCondition
|
17
|
+
@initialAmount = initialAmount.to_f
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
class SMBLReaction
|
22
|
+
|
23
|
+
attr_reader :reactants, :products, :name
|
24
|
+
def initialize name, reversible
|
25
|
+
@name = name
|
26
|
+
@reversible = reversible
|
27
|
+
@reactants = []
|
28
|
+
@products = []
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class Model
|
33
|
+
attr_reader :species, :reactions
|
34
|
+
def initialize
|
35
|
+
@species = {}
|
36
|
+
@reactions = []
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
doc = Document.new(file)
|
41
|
+
|
42
|
+
model = Model.new
|
43
|
+
|
44
|
+
doc.elements.each("*/model/listOfSpecies/specie") do |s|
|
45
|
+
model.species[s.attribute("name").to_s] = Specie.new(s.attribute("name").to_s,
|
46
|
+
s.attribute("compartment").to_s,
|
47
|
+
s.attribute("boundaryCondition").to_s,
|
48
|
+
s.attribute("initialAmount").to_s)
|
49
|
+
end
|
50
|
+
|
51
|
+
doc.elements.each("*/model/listOfReactions/reaction") do |r|
|
52
|
+
reaction = Reaction.new(r.attribute("name").to_s,
|
53
|
+
r.attribute("").to_s == "false")
|
54
|
+
r.elements.each("listOfReactants/specieReference") do |r_sp|
|
55
|
+
reaction.reactants.push([model.species[r_sp.attribute("specie").to_s], r_sp.attribute("stoichiometry")])
|
56
|
+
end
|
57
|
+
r.elements.each("listOfProducts/specieReference") do |r_sp|
|
58
|
+
reaction.products.push([model.species[r_sp.attribute("specie").to_s], r_sp.attribute("stoichiometry")])
|
59
|
+
end
|
60
|
+
model.reactions.push(reaction)
|
61
|
+
end
|
62
|
+
|
63
|
+
def make_SPN(m, out)
|
64
|
+
out.puts "digraph SPN {"
|
65
|
+
tab = 3
|
66
|
+
m.species.keys.each do |k|
|
67
|
+
out.puts "%s \"%s\" [shape=circle];" % [" " * tab, k]
|
68
|
+
end
|
69
|
+
out.puts
|
70
|
+
m.reactions.each do |r|
|
71
|
+
out.puts "%s \"%s\" [shape=box]" % [" " * tab, r.name]
|
72
|
+
r.reactants.each do |r_sp|
|
73
|
+
out.puts "%s \"%s\" -> \"%s\" [label=\"%s\"];" % [" " * tab, r_sp[0].name, r.name, r_sp[1]]
|
74
|
+
end
|
75
|
+
out.puts
|
76
|
+
r.products.each do |r_sp|
|
77
|
+
out.puts "%s \"%s\" -> \"%s\"" % [" " * tab, r.name, r_sp[0].name]
|
78
|
+
end
|
79
|
+
end
|
80
|
+
out.puts "}"
|
81
|
+
out.close
|
82
|
+
end
|
83
|
+
|
84
|
+
make_SPN(model, open("test.dot", "w"))
|
85
|
+
|
86
|
+
system("dot -Tps test.dot >out.eps")
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,203 @@
|
|
1
|
+
#http://www.daylight.com/dayhtml/doc/theory/theory.smiles.html
|
2
|
+
|
3
|
+
class SmilesParser
|
4
|
+
rule
|
5
|
+
smiles :
|
6
|
+
| smiles smile EOL
|
7
|
+
{
|
8
|
+
return val[1]
|
9
|
+
}
|
10
|
+
smile : node
|
11
|
+
{
|
12
|
+
result = val
|
13
|
+
}
|
14
|
+
| smile node
|
15
|
+
{
|
16
|
+
result.push(val[1])
|
17
|
+
}
|
18
|
+
node : atom_pos
|
19
|
+
| bond atom_pos
|
20
|
+
{
|
21
|
+
val[1].bond = val[0]
|
22
|
+
result = val[1]
|
23
|
+
}
|
24
|
+
| branch
|
25
|
+
atom : SYMBOL
|
26
|
+
{
|
27
|
+
node = Chem::SmilesAtom.new
|
28
|
+
node.element = val[0][0]
|
29
|
+
node.is_aromatic = val[0][1][:is_aromatic]
|
30
|
+
result = node
|
31
|
+
}
|
32
|
+
| ATOM_SYMBOL
|
33
|
+
{
|
34
|
+
m = /(\d*)([A-Za-z][a-z]?)(@*)(H\d*)?(\d*[+-]+)?/.match(val[0])
|
35
|
+
# m = /(\d*)([A-Za-z][a-z]?)(@*)(H?\d*)(-?\d*)?/.match(val[0])
|
36
|
+
node = Chem::SmilesAtom.new
|
37
|
+
node.mass = m[1].to_i
|
38
|
+
node.element = m[2].capitalize.intern
|
39
|
+
node.is_aromatic = (97 <= m[2][0]) #and 122 < $&[0]
|
40
|
+
node.chiral = m[3]
|
41
|
+
|
42
|
+
if m[4] and not m[4] == ""
|
43
|
+
hydrogen = /H(\d)+/.match(m[4])
|
44
|
+
node.hydrogen_count = hydrogen ? hydrogen[1].to_i : 1
|
45
|
+
end
|
46
|
+
if m[5]
|
47
|
+
charge = /(\d*)([+-]+)/.match(m[5])
|
48
|
+
sign = (charge[2][0..0] == "+") ? 1 : -1
|
49
|
+
n = charge[1] == "" ? charge[2].length : charge[1].to_i
|
50
|
+
node.charge = sign * n
|
51
|
+
end
|
52
|
+
|
53
|
+
result = node
|
54
|
+
}
|
55
|
+
atom_pos : atom
|
56
|
+
| atom NUMBER # Ring
|
57
|
+
{
|
58
|
+
val[0].smiles_pos = val[1].split("").collect{|c| c.to_i}
|
59
|
+
result = val[0]
|
60
|
+
}
|
61
|
+
bond : '-' # single bond
|
62
|
+
| '=' # double bond
|
63
|
+
| '#' # triple bond
|
64
|
+
| ':'
|
65
|
+
| '/'
|
66
|
+
| BSLASH
|
67
|
+
| '.' # zero bond
|
68
|
+
branch : '(' smile ')'
|
69
|
+
{
|
70
|
+
result = val[1]
|
71
|
+
}
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
# atom : '[' <mass> symbol <chiral> <hcount> <sign<charge>> ']'
|
76
|
+
# bond : <empty> | '-' | '=' | '#' | ':'
|
77
|
+
# branch : '(' <chain> ')'
|
78
|
+
# | '(' <chain> <branch> ')'
|
79
|
+
# | '(' <branch> <chain> ')'
|
80
|
+
# | '(' <chain> <branch> <chain> ')'
|
81
|
+
# ;
|
82
|
+
|
83
|
+
---- inner
|
84
|
+
|
85
|
+
def next_token
|
86
|
+
@q.shift
|
87
|
+
end
|
88
|
+
|
89
|
+
def construct mol, tree, prev = nil
|
90
|
+
tree.each do |node|
|
91
|
+
if node.instance_of?(Array)
|
92
|
+
construct(mol, node, prev)
|
93
|
+
else
|
94
|
+
mol.nodes.push(node)
|
95
|
+
mol.join(prev, node) if prev
|
96
|
+
# if node.pos != nil
|
97
|
+
if node.smiles_pos
|
98
|
+
node.smiles_pos.each do |smiles_pos|
|
99
|
+
if @ring[smiles_pos]
|
100
|
+
mol.join(node, @ring[smiles_pos])
|
101
|
+
# re-use ring closure digits
|
102
|
+
# Higher-numbered ring closures are not supported yet.
|
103
|
+
@ring[smiles_pos] = nil
|
104
|
+
else
|
105
|
+
@ring[smiles_pos] = node
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
prev = node
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def initialize
|
115
|
+
super
|
116
|
+
@ring = []
|
117
|
+
end
|
118
|
+
|
119
|
+
@@parser = SmilesParser.new
|
120
|
+
|
121
|
+
def self.parse_smiles( smiles )
|
122
|
+
@@parser.parse( smiles )
|
123
|
+
end
|
124
|
+
|
125
|
+
ElementRegex = Regexp.new('\A(' + Chem::Number2Element.inject([]){|ret, el| ret.push(el.to_s)}.sort.reverse.join("|") + ')', Regexp::IGNORECASE)
|
126
|
+
|
127
|
+
def parse( line )
|
128
|
+
@q = []
|
129
|
+
|
130
|
+
line.strip!
|
131
|
+
until line.empty? do
|
132
|
+
case line
|
133
|
+
when /\A\[([^\]]+)\]/
|
134
|
+
# [nH] : aromatic N-H
|
135
|
+
@q.push [:ATOM_SYMBOL, $&] # [m[1], m[2], m[3], m[4], m[5], m[6]]]
|
136
|
+
when ElementRegex
|
137
|
+
prop = {}
|
138
|
+
prop[:is_aromatic] = (97 <= $&[0]) #and 122 < $&[0]
|
139
|
+
@q.push [:SYMBOL, [$&.capitalize.intern , prop]]
|
140
|
+
when /\A\d+/
|
141
|
+
@q.push [:NUMBER, $& ]
|
142
|
+
when /\\/
|
143
|
+
@q.push [:BSLASH, :BSLASH]
|
144
|
+
when /\A./
|
145
|
+
@q.push [$&, $&]
|
146
|
+
else
|
147
|
+
raise RuntimeError, 'must not happen'
|
148
|
+
end
|
149
|
+
line = $'
|
150
|
+
end
|
151
|
+
@q.push [ :EOL, nil ]
|
152
|
+
# p @q
|
153
|
+
mol = Chem::SmilesMol.new
|
154
|
+
construct(mol, do_parse)
|
155
|
+
mol
|
156
|
+
end
|
157
|
+
|
158
|
+
---- footer
|
159
|
+
|
160
|
+
module Chem
|
161
|
+
|
162
|
+
class SmilesAtom
|
163
|
+
include Atom
|
164
|
+
attr_accessor :bond, :element, :element, :chiral, :hydrogen_count, :charge, :smiles_pos, :is_aromatic
|
165
|
+
def inspect
|
166
|
+
"{%s %s(%s)}" % [@bond, @element, smiles_pos ? smiles_pos.join : ""]
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
class SmilesBond
|
171
|
+
include Bond
|
172
|
+
end
|
173
|
+
|
174
|
+
class SmilesMol
|
175
|
+
|
176
|
+
include Molecule
|
177
|
+
include Enumerable
|
178
|
+
|
179
|
+
attr_reader :nodes, :edges
|
180
|
+
|
181
|
+
def initialize
|
182
|
+
@nodes = []
|
183
|
+
@edges = []
|
184
|
+
end
|
185
|
+
|
186
|
+
def join from, to
|
187
|
+
return if to.bond == '.'
|
188
|
+
bond = SmilesBond.new
|
189
|
+
|
190
|
+
case to.bond
|
191
|
+
when '='
|
192
|
+
bond.v = 2
|
193
|
+
when '#'
|
194
|
+
bond.v = 3
|
195
|
+
else
|
196
|
+
bond.v = 1
|
197
|
+
end
|
198
|
+
@edges.push([bond, from, to])
|
199
|
+
end
|
200
|
+
|
201
|
+
end
|
202
|
+
|
203
|
+
end
|