chemruby 0.9.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README +120 -0
- data/Rakefile +195 -0
- data/ext/extconf.rb +4 -0
- data/ext/subcomp.c +416 -0
- data/lib/chem.rb +130 -0
- data/lib/chem/appl.rb +1 -0
- data/lib/chem/appl/chem3dole.rb +36 -0
- data/lib/chem/appl/tinker/nucleic.rb +40 -0
- data/lib/chem/appl/tinker/tinker_reader.rb +43 -0
- data/lib/chem/data.rb +4 -0
- data/lib/chem/data/atomic_weight.rb +124 -0
- data/lib/chem/data/character.rb +2 -0
- data/lib/chem/data/electronegativity.rb +14 -0
- data/lib/chem/data/periodic_table.rb +6 -0
- data/lib/chem/data/prime_numbers.rb +1 -0
- data/lib/chem/data/vdw_radii.rb +1 -0
- data/lib/chem/db.rb +64 -0
- data/lib/chem/db/cansmi.rb +234 -0
- data/lib/chem/db/cdx.rb +1525 -0
- data/lib/chem/db/eps.rb +164 -0
- data/lib/chem/db/g98.rb +909 -0
- data/lib/chem/db/gspan.rb +130 -0
- data/lib/chem/db/iupac.rb +5 -0
- data/lib/chem/db/iupac/a_1.rb +46 -0
- data/lib/chem/db/iupac/iuparser.rb +226 -0
- data/lib/chem/db/iupac/iuparser.ry +97 -0
- data/lib/chem/db/iupac/postfix.rb +2 -0
- data/lib/chem/db/kcf.rb +390 -0
- data/lib/chem/db/kcf_glycan.rb +19 -0
- data/lib/chem/db/kegg.rb +516 -0
- data/lib/chem/db/linucs/linparser.rb +144 -0
- data/lib/chem/db/linucs/linucs.ry +53 -0
- data/lib/chem/db/mdl.rb +379 -0
- data/lib/chem/db/molconnz.rb +12 -0
- data/lib/chem/db/mopac.rb +88 -0
- data/lib/chem/db/msi.rb +107 -0
- data/lib/chem/db/pdb_dic.rb +115 -0
- data/lib/chem/db/pdf.rb +131 -0
- data/lib/chem/db/pubchem.rb +113 -0
- data/lib/chem/db/rmagick.rb +70 -0
- data/lib/chem/db/sdf.rb +37 -0
- data/lib/chem/db/smbl.rb +88 -0
- data/lib/chem/db/smiles.rb +2 -0
- data/lib/chem/db/smiles/smiles.ry +203 -0
- data/lib/chem/db/smiles/smiparser.rb +375 -0
- data/lib/chem/db/swf.rb +74 -0
- data/lib/chem/db/sybyl.rb +150 -0
- data/lib/chem/db/tinker.rb +77 -0
- data/lib/chem/db/types/type_cansmi.rb +9 -0
- data/lib/chem/db/types/type_cdx.rb +24 -0
- data/lib/chem/db/types/type_gspan.rb +31 -0
- data/lib/chem/db/types/type_kcf.rb +28 -0
- data/lib/chem/db/types/type_kcf_glycan.rb +26 -0
- data/lib/chem/db/types/type_kegg.rb +92 -0
- data/lib/chem/db/types/type_mdl.rb +31 -0
- data/lib/chem/db/types/type_pdf.rb +33 -0
- data/lib/chem/db/types/type_png.rb +31 -0
- data/lib/chem/db/types/type_rxn.rb +25 -0
- data/lib/chem/db/types/type_sdf.rb +25 -0
- data/lib/chem/db/types/type_sybyl.rb +30 -0
- data/lib/chem/db/types/type_xyz.rb +26 -0
- data/lib/chem/db/vector.rb +128 -0
- data/lib/chem/db/xyz.rb +39 -0
- data/lib/chem/model.rb +119 -0
- data/lib/chem/model/skeleton.rb +37 -0
- data/lib/chem/utils.rb +11 -0
- data/lib/chem/utils/geometry.rb +27 -0
- data/lib/chem/utils/graph_db.rb +146 -0
- data/lib/chem/utils/math.rb +17 -0
- data/lib/chem/utils/prop.rb +123 -0
- data/lib/chem/utils/sssr.rb +101 -0
- data/lib/chem/utils/sub.rb +78 -0
- data/lib/chem/utils/transform.rb +110 -0
- data/lib/chem/utils/traverse.rb +37 -0
- data/lib/chem/utils/ullmann.rb +134 -0
- data/lib/graph.rb +41 -0
- data/lib/graph/cluster.rb +20 -0
- data/lib/graph/morgan.rb +38 -0
- data/sample/frequent_subgraph.rb +46 -0
- data/sample/images/ex1.rb +11 -0
- data/sample/images/ex2.rb +4 -0
- data/sample/images/ex3.rb +5 -0
- data/sample/images/ex4.rb +17 -0
- data/sample/images/ex5.rb +10 -0
- data/sample/images/mol/adenine.mol +26 -0
- data/sample/images/mol/atp.mol +69 -0
- data/sample/images/temp/ex5.mol +344 -0
- data/sample/kegg_db.rb +116 -0
- data/setup.rb +1551 -0
- data/test/all.rb +6 -0
- data/test/coord_test.rb +17 -0
- data/test/ctab_test.rb +31 -0
- data/test/data/A_21.tar.gz +0 -0
- data/test/data/A_21/aceanthrylene.cdx +0 -0
- data/test/data/A_21/aceanthrylene.mol +40 -0
- data/test/data/A_21/acenaphthylene.cdx +0 -0
- data/test/data/A_21/acenaphthylene.mol +31 -0
- data/test/data/A_21/acephenanthrylene.cdx +0 -0
- data/test/data/A_21/acephenanthrylene.mol +40 -0
- data/test/data/A_21/anthracene.cdx +0 -0
- data/test/data/A_21/anthracene.mol +35 -0
- data/test/data/A_21/as-indacene.cdx +0 -0
- data/test/data/A_21/as-indacene.mol +31 -0
- data/test/data/A_21/azulene.cdx +0 -0
- data/test/data/A_21/azulene.mol +26 -0
- data/test/data/A_21/biphenylene.cdx +0 -0
- data/test/data/A_21/biphenylene.mol +31 -0
- data/test/data/A_21/chrysene.cdx +0 -0
- data/test/data/A_21/chrysene.mol +44 -0
- data/test/data/A_21/coronen.cdx +0 -0
- data/test/data/A_21/coronen.mol +59 -0
- data/test/data/A_21/fluoranthene.cdx +0 -0
- data/test/data/A_21/fluoranthene.mol +40 -0
- data/test/data/A_21/fluorene.cdx +0 -0
- data/test/data/A_21/fluorene.mol +33 -0
- data/test/data/A_21/heptacene.cdx +0 -0
- data/test/data/A_21/heptacene.mol +71 -0
- data/test/data/A_21/heptalene.cdx +0 -0
- data/test/data/A_21/heptalene.mol +30 -0
- data/test/data/A_21/heptaphene.cdx +0 -0
- data/test/data/A_21/heptaphene.mol +71 -0
- data/test/data/A_21/hexacene.cdx +0 -0
- data/test/data/A_21/hexacene.mol +62 -0
- data/test/data/A_21/hexaphene.cdx +0 -0
- data/test/data/A_21/hexaphene.mol +62 -0
- data/test/data/A_21/indene.cdx +0 -0
- data/test/data/A_21/indene.mol +24 -0
- data/test/data/A_21/iupac.txt +41 -0
- data/test/data/A_21/naphthacene.cdx +0 -0
- data/test/data/A_21/naphthacene.mol +44 -0
- data/test/data/A_21/naphthalene.cdx +0 -0
- data/test/data/A_21/naphthalene.mol +26 -0
- data/test/data/A_21/ovalene.cdx +0 -0
- data/test/data/A_21/ovalene.mol +78 -0
- data/test/data/A_21/pentacene.cdx +0 -0
- data/test/data/A_21/pentacene.mol +53 -0
- data/test/data/A_21/pentalene.cdx +0 -0
- data/test/data/A_21/pentalene.mol +22 -0
- data/test/data/A_21/pentaphene.cdx +0 -0
- data/test/data/A_21/pentaphene.mol +53 -0
- data/test/data/A_21/perylene.cdx +0 -0
- data/test/data/A_21/perylene.mol +49 -0
- data/test/data/A_21/phenalene.cdx +0 -0
- data/test/data/A_21/phenalene.mol +33 -0
- data/test/data/A_21/phenanthrene.cdx +0 -0
- data/test/data/A_21/phenanthrene.mol +35 -0
- data/test/data/A_21/picene.cdx +0 -0
- data/test/data/A_21/picene.mol +53 -0
- data/test/data/A_21/pleiadene.cdx +0 -0
- data/test/data/A_21/pleiadene.mol +44 -0
- data/test/data/A_21/pyranthrene.cdx +0 -0
- data/test/data/A_21/pyranthrene.mol +72 -0
- data/test/data/A_21/pyrene.cdx +0 -0
- data/test/data/A_21/pyrene.mol +40 -0
- data/test/data/A_21/rubicene.cdx +0 -0
- data/test/data/A_21/rubicene.mol +63 -0
- data/test/data/A_21/s-indacene.cdx +0 -0
- data/test/data/A_21/s-indacene.mol +31 -0
- data/test/data/A_21/tetraphenylene.cdx +0 -0
- data/test/data/A_21/tetraphenylene.mol +57 -0
- data/test/data/A_21/trinaphthylene.cdx +0 -0
- data/test/data/A_21/trinaphthylene.mol +71 -0
- data/test/data/A_21/triphenylene.cdx +0 -0
- data/test/data/A_21/triphenylene.mol +44 -0
- data/test/data/C00147.kcf +25 -0
- data/test/data/G00147.kcf +13 -0
- data/test/data/atp.mol +69 -0
- data/test/data/cyclohexane.mol +17 -0
- data/test/data/cyclohexane.ps +485 -0
- data/test/data/fullerene.mol +155 -0
- data/test/data/glycan +33 -0
- data/test/data/hypericin.cdx +0 -0
- data/test/data/hypericin.cdxml +596 -0
- data/test/data/hypericin.chm +0 -0
- data/test/data/hypericin.ct +85 -0
- data/test/data/hypericin.f1d +0 -0
- data/test/data/hypericin.f1q +0 -0
- data/test/data/hypericin.gif +0 -0
- data/test/data/hypericin.mol +88 -0
- data/test/data/hypericin.mol2 +159 -0
- data/test/data/hypericin.msm +123 -0
- data/test/data/hypericin.pdf +359 -0
- data/test/data/hypericin.png +0 -0
- data/test/data/hypericin.ps +0 -0
- data/test/data/hypericin.skc +0 -0
- data/test/data/hypericin2.gif +0 -0
- data/test/data/hypericin2.ps +0 -0
- data/test/data/kegg/genomes/hsa/hsa_enzyme.list +4 -0
- data/test/data/kegg/genomes/hsa/hsa_pfam.list +4 -0
- data/test/data/kegg/ligand/mol/C00147.mol +26 -0
- data/test/data/kegg/ligand/reaction +14 -0
- data/test/data/kegg/ligand/reaction.lst +1 -0
- data/test/data/kegg/ligand/reaction_mapformula.lst +3 -0
- data/test/data/reaction +14 -0
- data/test/data/reaction.lst +1 -0
- data/test/data/reaction_mapformula.lst +3 -0
- data/test/data/rxn/C00001.mol +6 -0
- data/test/data/rxn/C00011.mol +10 -0
- data/test/data/rxn/C00014.mol +6 -0
- data/test/data/rxn/C01010.mol +18 -0
- data/test/data/rxn/sample.rxn +50 -0
- data/test/data/rxn/substitution.rxn +45 -0
- data/test/data/test.eps +0 -0
- data/test/data/test.mol +28 -0
- data/test/data/test.sdf +143 -0
- data/test/data/test.skc +0 -0
- data/test/data/test.xyz +4 -0
- data/test/data/test_lf.sdf +143 -0
- data/test/heavy_test_pubchem.rb +16 -0
- data/test/multiple_test.rb +22 -0
- data/test/test_adj.rb +54 -0
- data/test/test_canonical_smiles.rb +46 -0
- data/test/test_cdx.rb +32 -0
- data/test/test_chem.rb +18 -0
- data/test/test_cluster.rb +19 -0
- data/test/test_db.rb +11 -0
- data/test/test_eps.rb +24 -0
- data/test/test_geometry.rb +11 -0
- data/test/test_gspan.rb +28 -0
- data/test/test_iupac.rb +36 -0
- data/test/test_kcf.rb +24 -0
- data/test/test_kcf_glycan.rb +10 -0
- data/test/test_kegg.rb +118 -0
- data/test/test_linucs.rb +21 -0
- data/test/test_mdl.rb +45 -0
- data/test/test_mol2.rb +62 -0
- data/test/test_morgan.rb +21 -0
- data/test/test_pdf.rb +12 -0
- data/test/test_prop.rb +86 -0
- data/test/test_rmagick.rb +15 -0
- data/test/test_sbdb.rb +23 -0
- data/test/test_sdf.rb +30 -0
- data/test/test_smiles.rb +84 -0
- data/test/test_sssr.rb +18 -0
- data/test/test_sub.rb +47 -0
- data/test/test_subcomp.rb +37 -0
- data/test/test_traverse.rb +29 -0
- data/test/test_writer.rb +13 -0
- data/test/test_xyz.rb +15 -0
- data/test/type_test.rb +25 -0
- metadata +290 -0
@@ -0,0 +1,144 @@
|
|
1
|
+
#
|
2
|
+
# DO NOT MODIFY!!!!
|
3
|
+
# This file is automatically generated by racc 1.4.4
|
4
|
+
# from racc grammer file "linucs.ry".
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'racc/parser'
|
8
|
+
|
9
|
+
|
10
|
+
class LinucsParser < Racc::Parser
|
11
|
+
|
12
|
+
module_eval <<'..end linucs.ry modeval..idcf977ebdbe', 'linucs.ry', 11
|
13
|
+
|
14
|
+
def next_token
|
15
|
+
@q.shift
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse( line )
|
19
|
+
@q = []
|
20
|
+
|
21
|
+
line.strip!
|
22
|
+
until line.empty? do
|
23
|
+
case line
|
24
|
+
when /\A\[/
|
25
|
+
@q.push [:Ll, :Ll]
|
26
|
+
when /A\]/
|
27
|
+
@q.push [:Lr, :Lr]
|
28
|
+
when /\A\[([^\]]+)\]/
|
29
|
+
@q.push [:ATOM_SYMBOL, $&] # [m[1], m[2], m[3], m[4], m[5], m[6]]]
|
30
|
+
when /\A(Cl|C|F|H|N|O|Au|U)/
|
31
|
+
@q.push [:SYMBOL, $&.intern ]
|
32
|
+
when /\A[a-zA-Z_]/
|
33
|
+
@q.push [:SYMBOL, $&.intern ]
|
34
|
+
when /\A\d+/
|
35
|
+
@q.push [:NUMBER, $&.to_i ]
|
36
|
+
when /\\/
|
37
|
+
@q.push [:BSLASH, :BSLASH]
|
38
|
+
when /\A./
|
39
|
+
@q.push [$&, $&]
|
40
|
+
else
|
41
|
+
raise RuntimeError, 'must not happen'
|
42
|
+
end
|
43
|
+
line = $'
|
44
|
+
end
|
45
|
+
@q.push [ :EOL, nil ]
|
46
|
+
mol = Chem::SmilesMol.new
|
47
|
+
construct(mol, do_parse)
|
48
|
+
mol
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
..end linucs.ry modeval..idcf977ebdbe
|
53
|
+
|
54
|
+
##### racc 1.4.4 generates ###
|
55
|
+
|
56
|
+
racc_reduce_table = [
|
57
|
+
0, 0, :racc_error,
|
58
|
+
0, 5, :_reduce_none,
|
59
|
+
3, 5, :_reduce_none,
|
60
|
+
1, 6, :_reduce_none,
|
61
|
+
2, 6, :_reduce_none ]
|
62
|
+
|
63
|
+
racc_reduce_n = 5
|
64
|
+
|
65
|
+
racc_shift_n = 8
|
66
|
+
|
67
|
+
racc_action_table = [
|
68
|
+
2, 6, 7, 3, 5 ]
|
69
|
+
|
70
|
+
racc_action_check = [
|
71
|
+
1, 4, 4, 1, 2 ]
|
72
|
+
|
73
|
+
racc_action_pointer = [
|
74
|
+
nil, 0, 4, nil, -1, nil, nil, nil ]
|
75
|
+
|
76
|
+
racc_action_default = [
|
77
|
+
-1, -5, -5, -3, -5, 8, -2, -4 ]
|
78
|
+
|
79
|
+
racc_goto_table = [
|
80
|
+
1, 4 ]
|
81
|
+
|
82
|
+
racc_goto_check = [
|
83
|
+
1, 2 ]
|
84
|
+
|
85
|
+
racc_goto_pointer = [
|
86
|
+
nil, 0, 0 ]
|
87
|
+
|
88
|
+
racc_goto_default = [
|
89
|
+
nil, nil, nil ]
|
90
|
+
|
91
|
+
racc_token_table = {
|
92
|
+
false => 0,
|
93
|
+
Object.new => 1,
|
94
|
+
:EOL => 2,
|
95
|
+
:node => 3 }
|
96
|
+
|
97
|
+
racc_use_result_var = true
|
98
|
+
|
99
|
+
racc_nt_base = 4
|
100
|
+
|
101
|
+
Racc_arg = [
|
102
|
+
racc_action_table,
|
103
|
+
racc_action_check,
|
104
|
+
racc_action_default,
|
105
|
+
racc_action_pointer,
|
106
|
+
racc_goto_table,
|
107
|
+
racc_goto_check,
|
108
|
+
racc_goto_default,
|
109
|
+
racc_goto_pointer,
|
110
|
+
racc_nt_base,
|
111
|
+
racc_reduce_table,
|
112
|
+
racc_token_table,
|
113
|
+
racc_shift_n,
|
114
|
+
racc_reduce_n,
|
115
|
+
racc_use_result_var ]
|
116
|
+
|
117
|
+
Racc_token_to_s_table = [
|
118
|
+
'$end',
|
119
|
+
'error',
|
120
|
+
'EOL',
|
121
|
+
'node',
|
122
|
+
'$start',
|
123
|
+
'linucs',
|
124
|
+
'linuc']
|
125
|
+
|
126
|
+
Racc_debug_parser = false
|
127
|
+
|
128
|
+
##### racc system variables end #####
|
129
|
+
|
130
|
+
# reduce 0 omitted
|
131
|
+
|
132
|
+
# reduce 1 omitted
|
133
|
+
|
134
|
+
# reduce 2 omitted
|
135
|
+
|
136
|
+
# reduce 3 omitted
|
137
|
+
|
138
|
+
# reduce 4 omitted
|
139
|
+
|
140
|
+
def _reduce_none( val, _values, result )
|
141
|
+
result
|
142
|
+
end
|
143
|
+
|
144
|
+
end # class LinucsParser
|
@@ -0,0 +1,53 @@
|
|
1
|
+
|
2
|
+
class LinucsParser
|
3
|
+
rule
|
4
|
+
linucs :
|
5
|
+
| linucs linuc EOL
|
6
|
+
linuc : node
|
7
|
+
| linuc node
|
8
|
+
end
|
9
|
+
|
10
|
+
---- inner
|
11
|
+
|
12
|
+
def next_token
|
13
|
+
@q.shift
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse( line )
|
17
|
+
@q = []
|
18
|
+
|
19
|
+
line.strip!
|
20
|
+
until line.empty? do
|
21
|
+
case line
|
22
|
+
when /\A\[/
|
23
|
+
@q.push [:Ll, :Ll]
|
24
|
+
when /A\]/
|
25
|
+
@q.push [:Lr, :Lr]
|
26
|
+
when /\A\[([^\]]+)\]/
|
27
|
+
@q.push [:ATOM_SYMBOL, $&] # [m[1], m[2], m[3], m[4], m[5], m[6]]]
|
28
|
+
when /\A(Cl|C|F|H|N|O|Au|U)/
|
29
|
+
@q.push [:SYMBOL, $&.intern ]
|
30
|
+
when /\A[a-zA-Z_]/
|
31
|
+
@q.push [:SYMBOL, $&.intern ]
|
32
|
+
when /\A\d+/
|
33
|
+
@q.push [:NUMBER, $&.to_i ]
|
34
|
+
when /\\/
|
35
|
+
@q.push [:BSLASH, :BSLASH]
|
36
|
+
when /\A./
|
37
|
+
@q.push [$&, $&]
|
38
|
+
else
|
39
|
+
raise RuntimeError, 'must not happen'
|
40
|
+
end
|
41
|
+
line = $'
|
42
|
+
end
|
43
|
+
@q.push [ :EOL, nil ]
|
44
|
+
mol = Chem::SmilesMol.new
|
45
|
+
construct(mol, do_parse)
|
46
|
+
mol
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
---- footer
|
51
|
+
|
52
|
+
|
53
|
+
|
data/lib/chem/db/mdl.rb
ADDED
@@ -0,0 +1,379 @@
|
|
1
|
+
#
|
2
|
+
# chem/db/mdl.rb - MDL molfile format class
|
3
|
+
#
|
4
|
+
|
5
|
+
|
6
|
+
module Chem
|
7
|
+
|
8
|
+
module Molecule
|
9
|
+
|
10
|
+
MDLCountLineFormat = "%3d%3d%3d%3d%3d%3d%3d%3d%3d 0999 V2000"
|
11
|
+
|
12
|
+
def save_as_mdl filename
|
13
|
+
File.open(filename, "w") do |out|
|
14
|
+
out.puts filename
|
15
|
+
out.puts # ChemRuby
|
16
|
+
out.puts
|
17
|
+
out.puts MDLCountLineFormat % [nodes.length, edges.length, 0, 0, 0, 0, 0, 0, 0]
|
18
|
+
nodes.each do |node|
|
19
|
+
out.puts node.to_mdl
|
20
|
+
end
|
21
|
+
edges.each do |edge, atom1, atom2|
|
22
|
+
out.puts edge.to_mdl(nodes.index(atom1) + 1, nodes.index(atom2) + 1)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
module Atom
|
30
|
+
|
31
|
+
MDLAtomLineFormat = "%10.4f%10.4f%10.4f %2s%3d%3d%3d%3d%3d%3d%3d%3d%3d%3d%3d%3d"
|
32
|
+
|
33
|
+
def to_mdl mapping = 0
|
34
|
+
MDLAtomLineFormat % [x, y, z, element, 0, 0, 0, 0, 0, 0, 0, 0, 0, mapping, 0, 0]
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
module Bond
|
40
|
+
|
41
|
+
def to_mdl from, to
|
42
|
+
"%3d%3d%3d%3d " % [from, to, v, 0]
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
module Reaction
|
48
|
+
|
49
|
+
def to_mdl_rxn
|
50
|
+
return # fix me
|
51
|
+
# out = STDOUT
|
52
|
+
out.puts "$RXN"
|
53
|
+
out.puts
|
54
|
+
out.puts "ISIS 112620051015"
|
55
|
+
out.puts
|
56
|
+
out.puts "%3d%3d" % [@reactants.length, @products.length]
|
57
|
+
@reactants.each{ |mol| output_mdl_mol(mol, out)}
|
58
|
+
@products.each{ |mol| output_mdl_mol(mol, out)}
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
def output_mdl_mol mol, out
|
63
|
+
out.puts "$MOL"
|
64
|
+
mol.nodes.each do |node|
|
65
|
+
out.puts node.to_mdl(10)
|
66
|
+
end
|
67
|
+
out.puts "M END"
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
module MDL
|
73
|
+
|
74
|
+
class MDLAtom
|
75
|
+
|
76
|
+
require 'chem/utils/transform'
|
77
|
+
include Atom
|
78
|
+
include Chem::Transform::ThreeDimension
|
79
|
+
|
80
|
+
Stereo = {
|
81
|
+
0 => :not_stereo,
|
82
|
+
1 => :odd,
|
83
|
+
2 => :even,
|
84
|
+
3 => :either
|
85
|
+
}
|
86
|
+
|
87
|
+
attr_accessor :number
|
88
|
+
|
89
|
+
def initialize line ; @line = line ; end
|
90
|
+
|
91
|
+
# Returns atomic symbol
|
92
|
+
def element ; @element ||= @line[30..32].strip.intern ; end
|
93
|
+
def x ; @x ||= @line[0..9].to_f ; end
|
94
|
+
def y ; @y ||= @line[10..19].to_f ; end
|
95
|
+
# Coordinates for z-axis
|
96
|
+
def z ; @z ||= @line[20..29].to_f ; end
|
97
|
+
# Difference from mass in periodic table.
|
98
|
+
def mass_difference ; @mass_difference ||= @line[33..35] ; end
|
99
|
+
def charge ; @charge ||= @line[36..38].to_i ; end
|
100
|
+
def stereo_parity ; @stereo_parity ||= @line[39..41].to_i ; end
|
101
|
+
def hydrogen_count ; @hydrogen_count ||= @line[42..44].to_i ; end
|
102
|
+
def stereo_care_box ; @stereo_care_box ||= @line[45..47].to_i ; end
|
103
|
+
|
104
|
+
def valence ; @valence ||= @line[48..50].to_i ; end
|
105
|
+
def h0_designator ; @h0_designator ||= @line[51..53].to_i ; end
|
106
|
+
# 54..56 Not used
|
107
|
+
# 57..59 Not used
|
108
|
+
def mapping ; @mapping ||= @line[60..62].to_i ; end
|
109
|
+
def inversion ; @inversion ||= @line[63..65].to_i ; end
|
110
|
+
def exact_charge ; @exact_charge ||= @line[66..68].to_i ; end
|
111
|
+
|
112
|
+
end
|
113
|
+
|
114
|
+
class MDLBond
|
115
|
+
|
116
|
+
include Bond
|
117
|
+
|
118
|
+
Stereo = {
|
119
|
+
0 => :not_stereo,
|
120
|
+
1 => :up,
|
121
|
+
3 => :cis_trans,
|
122
|
+
4 => :either,
|
123
|
+
6 => :down
|
124
|
+
}
|
125
|
+
|
126
|
+
BondType = {
|
127
|
+
1 => :single,
|
128
|
+
2 => :double,
|
129
|
+
3 => :triple,
|
130
|
+
4 => :aromatic,
|
131
|
+
5 => :single_or_double,
|
132
|
+
6 => :single_or_aromatic,
|
133
|
+
7 => :double_or_aromatic,
|
134
|
+
8 => :any
|
135
|
+
}
|
136
|
+
|
137
|
+
ReactingCenter = {
|
138
|
+
0 => :unmarked,
|
139
|
+
1 => :center,
|
140
|
+
-1 => :not,
|
141
|
+
2 => :no_change,
|
142
|
+
4 => :made_or_broken,
|
143
|
+
8 => :order_changes
|
144
|
+
}
|
145
|
+
|
146
|
+
Topology = {
|
147
|
+
0 => :either,
|
148
|
+
1 => :ring,
|
149
|
+
2 => :chain
|
150
|
+
}
|
151
|
+
|
152
|
+
def initialize line ; @line = line ; end
|
153
|
+
|
154
|
+
def v ; @v ||= @line[6..8].to_i ; end
|
155
|
+
|
156
|
+
def topology ; @topology ||= Topology[@line[13..15].to_i] ; end
|
157
|
+
def reacting_center ; @reacting_center ||= ReactingCenter[@line[16..18].to_i] ; end
|
158
|
+
def stereo ; @stereo ||= Stereo[@line[9..11].to_i] ; end
|
159
|
+
def bond_type ; @v ||= BondType[self.v] ; end
|
160
|
+
|
161
|
+
end
|
162
|
+
|
163
|
+
module MdlMolParser
|
164
|
+
|
165
|
+
attr_reader :filename
|
166
|
+
def open(filename)
|
167
|
+
@filename = filename
|
168
|
+
input = File.open(filename)
|
169
|
+
parse(input)
|
170
|
+
end
|
171
|
+
|
172
|
+
def entry
|
173
|
+
@title
|
174
|
+
end
|
175
|
+
alias name entry
|
176
|
+
|
177
|
+
def parse input
|
178
|
+
input.readline
|
179
|
+
@title = input.readline
|
180
|
+
raise MDLException if input.readline == nil
|
181
|
+
line = input.readline
|
182
|
+
n_atom = line[0..2].to_i
|
183
|
+
n_bond = line[3..5].to_i
|
184
|
+
|
185
|
+
if 0 > n_atom or 999 < n_atom or 0 > n_bond or 999 < n_bond
|
186
|
+
raise "counts line format error"
|
187
|
+
end
|
188
|
+
|
189
|
+
n_atom.times do |n|
|
190
|
+
mol = MDLAtom.new(input.readline)
|
191
|
+
mol.number = n + 1
|
192
|
+
@nodes.push(mol)
|
193
|
+
end
|
194
|
+
|
195
|
+
n_bond.times do |n|
|
196
|
+
line = input.readline
|
197
|
+
b = MDLBond.new line
|
198
|
+
b_n = line[0..2].to_i
|
199
|
+
e_n = line[3..5].to_i
|
200
|
+
if (b_n > n_atom || b_n < 1 || e_n > n_atom || e_n < 1)
|
201
|
+
p line
|
202
|
+
raise "MDL bond line format error"
|
203
|
+
end
|
204
|
+
|
205
|
+
@edges.push([b, @nodes[b_n - 1], @nodes[e_n - 1]])
|
206
|
+
end
|
207
|
+
input.each do |line|
|
208
|
+
break if /M END/.match(line)
|
209
|
+
end
|
210
|
+
self
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
class MdlMolecule
|
215
|
+
|
216
|
+
include Molecule
|
217
|
+
include Enumerable
|
218
|
+
include MdlMolParser
|
219
|
+
|
220
|
+
attr_reader :nodes, :edges
|
221
|
+
|
222
|
+
def initialize
|
223
|
+
@nodes = []
|
224
|
+
@edges = []
|
225
|
+
end
|
226
|
+
|
227
|
+
def self.parse_io input
|
228
|
+
mol = MdlMolecule.new
|
229
|
+
mol.parse input
|
230
|
+
end
|
231
|
+
|
232
|
+
def self.parse file
|
233
|
+
mol = MdlMolecule.new
|
234
|
+
input = open(file)
|
235
|
+
mol.parse input
|
236
|
+
end
|
237
|
+
|
238
|
+
end
|
239
|
+
|
240
|
+
class RxnAtom
|
241
|
+
include Atom
|
242
|
+
attr_accessor :reactant, :product
|
243
|
+
|
244
|
+
def reactant= rct
|
245
|
+
@reactant = @representative = rct
|
246
|
+
end
|
247
|
+
|
248
|
+
def method_missing name, *args
|
249
|
+
@representative.send(name, *args)
|
250
|
+
end
|
251
|
+
|
252
|
+
|
253
|
+
def product= prd
|
254
|
+
@product = prd
|
255
|
+
@representative = prd unless @representative
|
256
|
+
end
|
257
|
+
|
258
|
+
def x ; @representative.x ; end
|
259
|
+
def y ; @representative.y ; end
|
260
|
+
def element ; @representative.element ; end
|
261
|
+
|
262
|
+
end
|
263
|
+
|
264
|
+
class RxnBond
|
265
|
+
include Bond
|
266
|
+
attr_accessor :reactant, :product
|
267
|
+
attr_reader :v
|
268
|
+
|
269
|
+
def v
|
270
|
+
if @reactant and @product
|
271
|
+
return @product.v - @reactant.v
|
272
|
+
elsif @reactant
|
273
|
+
return - @reactant.v
|
274
|
+
else
|
275
|
+
return @product.v
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
end
|
280
|
+
|
281
|
+
class MdlReaction
|
282
|
+
|
283
|
+
include Molecule
|
284
|
+
include Reaction
|
285
|
+
include Enumerable
|
286
|
+
|
287
|
+
attr_reader :nodes, :edges
|
288
|
+
|
289
|
+
def initialize
|
290
|
+
@nodes = []
|
291
|
+
@edges = []
|
292
|
+
@reactants = []
|
293
|
+
@products = []
|
294
|
+
end
|
295
|
+
|
296
|
+
attr_reader :filename
|
297
|
+
def open_rxn(filename)
|
298
|
+
@filename = filename
|
299
|
+
input = File.open(filename)
|
300
|
+
n_reactants, n_products = parse_header(input)
|
301
|
+
read_mol(input, n_reactants, @reactants, @r_atoms = {})
|
302
|
+
read_mol(input, n_products, @products, @p_atoms = {})
|
303
|
+
construct
|
304
|
+
self
|
305
|
+
end
|
306
|
+
|
307
|
+
private
|
308
|
+
def construct
|
309
|
+
@p2r = {}
|
310
|
+
@r2p = {}
|
311
|
+
(@r_atoms.keys + @p_atoms.keys).each do |k|
|
312
|
+
ratom = RxnAtom.new
|
313
|
+
ratom.reactant = @r_atoms[k]
|
314
|
+
ratom.product = @p_atoms[k]
|
315
|
+
@p2r[@p_atoms[k]] = @r_atoms[k]
|
316
|
+
@r2p[@r_atoms[k]] = @p_atoms[k]
|
317
|
+
@nodes.push(ratom)
|
318
|
+
end
|
319
|
+
get_edge_hash(@reactants, r_edge_hash = {})
|
320
|
+
get_edge_hash(@products, p_edge_hash = {})
|
321
|
+
already = []
|
322
|
+
@reactants.each do |mol|
|
323
|
+
mol.edges.each do |edge, atom1, atom2|
|
324
|
+
bond = RxnBond.new
|
325
|
+
bond.reactant = edge
|
326
|
+
if @r2p[atom1] and @r2p[atom2]
|
327
|
+
bond.product = p_edge_hash[[@r2p[atom1], @r2p[atom2]].sort_by{|a| a.number}]
|
328
|
+
already.push(bond.product)
|
329
|
+
end
|
330
|
+
@edges.push(bond)
|
331
|
+
end
|
332
|
+
end
|
333
|
+
@products.each do |mol|
|
334
|
+
mol.edges.each do |bond, atom1, atom2|
|
335
|
+
next if already.include?(bond)
|
336
|
+
r_bond = RxnBond.new
|
337
|
+
r_bond.product = bond
|
338
|
+
@edges.push([r_bond, atom1, atom2])
|
339
|
+
end
|
340
|
+
end
|
341
|
+
end
|
342
|
+
|
343
|
+
private
|
344
|
+
def get_edge_hash mols, hash
|
345
|
+
mols.each do |mol|
|
346
|
+
mol.edges.each do |edge, atom1, atom2|
|
347
|
+
hash[[atom1, atom2].sort_by{|a| a.number}] = edge
|
348
|
+
end
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
private
|
353
|
+
def read_mol input, n, mols, atoms
|
354
|
+
n.times do
|
355
|
+
loop do
|
356
|
+
line = input.readline # $MOL
|
357
|
+
break if /\$MOL/.match(line)
|
358
|
+
end
|
359
|
+
mol = MdlMolecule.parse_io(input)
|
360
|
+
mol.nodes.each do |a|
|
361
|
+
next if a.mapping == 0
|
362
|
+
atoms[a.mapping] = a
|
363
|
+
end
|
364
|
+
mols.push mol
|
365
|
+
end
|
366
|
+
end
|
367
|
+
|
368
|
+
private
|
369
|
+
def parse_header input
|
370
|
+
4.times{|n| input.readline}
|
371
|
+
input.readline.split.collect{|n| n.to_i}
|
372
|
+
end
|
373
|
+
|
374
|
+
end
|
375
|
+
|
376
|
+
end
|
377
|
+
|
378
|
+
end
|
379
|
+
|