chemruby 0.9.3 → 1.1.9
Sign up to get free protection for your applications and to get access to all the features.
- data/README +2 -2
- data/Rakefile +67 -63
- data/ext/extconf.rb +2 -0
- data/ext/subcomp.c +461 -320
- data/ext/utils.c +56 -0
- data/ext/utils.h +13 -0
- data/lib/chem.rb +34 -8
- data/lib/chem/db.rb +8 -0
- data/lib/chem/db/cansmi.rb +1 -1
- data/lib/chem/db/cdx.rb +1 -1
- data/lib/chem/db/cml.rb +52 -0
- data/lib/chem/db/gd.rb +64 -0
- data/lib/chem/db/gspan.rb +2 -2
- data/lib/chem/db/kcf_rpair.rb +34 -0
- data/lib/chem/db/kegg.rb +35 -1
- data/lib/chem/db/mdl.rb +75 -34
- data/lib/chem/db/opsin.rb +24 -0
- data/lib/chem/db/pdb.rb +105 -0
- data/lib/chem/db/pdf.rb +2 -0
- data/lib/chem/db/pubchem.rb +1071 -88
- data/lib/chem/db/rmagick.rb +5 -3
- data/lib/chem/db/sdf.rb +28 -2
- data/lib/chem/db/smiles/smiles.ry +27 -25
- data/lib/chem/db/smiles/smiparser.rb +29 -27
- data/lib/chem/db/types/type_gd.rb +35 -0
- data/lib/chem/db/types/type_gspan.rb +2 -2
- data/lib/chem/db/types/type_kcf.rb +19 -0
- data/lib/chem/db/types/type_kegg.rb +2 -0
- data/lib/chem/db/types/type_mdl.rb +1 -1
- data/lib/chem/db/types/type_png.rb +5 -1
- data/lib/chem/db/types/type_rdf.rb +22 -0
- data/lib/chem/db/types/type_xyz.rb +1 -1
- data/lib/chem/db/vector.rb +19 -3
- data/lib/chem/model.rb +5 -2
- data/lib/chem/utils.rb +17 -1
- data/lib/chem/utils/bitdb.rb +49 -0
- data/lib/chem/utils/cas.rb +28 -0
- data/lib/chem/utils/cdk.rb +403 -0
- data/lib/chem/utils/fingerprint.rb +98 -0
- data/lib/chem/utils/geometry.rb +8 -0
- data/lib/chem/utils/net.rb +303 -0
- data/lib/chem/utils/once.rb +28 -0
- data/lib/chem/utils/openbabel.rb +204 -0
- data/lib/chem/utils/sssr.rb +33 -25
- data/lib/chem/utils/sub.rb +6 -0
- data/lib/chem/utils/transform.rb +9 -8
- data/lib/chem/utils/ullmann.rb +138 -95
- data/lib/graph.rb +5 -6
- data/lib/graph/utils.rb +8 -0
- data/sample/calc_maximum_common_subgraph.rb +27 -0
- data/sample/calc_properties.rb +9 -0
- data/sample/data/atp.mol +69 -0
- data/sample/data/pioglitazone.mol +58 -0
- data/sample/data/rosiglitazone.mol +55 -0
- data/sample/data/troglitazone.mol +70 -0
- data/sample/find_compound_by_keggapi.rb +19 -0
- data/sample/generate_inchi.rb +7 -0
- data/sample/generate_substructurekey.rb +11 -0
- data/sample/images/ex6.rb +17 -0
- data/sample/images/ex7.rb +18 -0
- data/sample/iupac2mol.rb +8 -0
- data/sample/kekule.rb +13 -0
- data/sample/logp.rb +4 -0
- data/sample/mcs.rb +13 -0
- data/sample/mol2pdf.rb +8 -0
- data/sample/pubchem_fetch.rb +8 -0
- data/sample/pubchem_search.rb +12 -0
- data/sample/rosiglitazone.mol +57 -0
- data/sample/smarts.rb +10 -0
- data/sample/structure_match.rb +8 -0
- data/sample/structure_match_color.rb +22 -0
- data/sample/thiazolidinedione.mol +19 -0
- data/sample/troglitazone.mol +232 -0
- data/sample/vicinity.rb +8 -0
- data/test/data/CID_704.sdf +236 -0
- data/test/data/CID_994.sdf +146 -0
- data/test/data/db_EXPT03276.txt +321 -0
- data/test/data/pioglitazone.mol +58 -0
- data/test/data/rosiglitazone.mol +55 -0
- data/test/data/thiazolidinedione.mol +19 -0
- data/test/data/troglitazone.mol +70 -0
- data/test/{test_adj.rb → tc_adj.rb} +0 -0
- data/test/{test_canonical_smiles.rb → tc_canonical_smiles.rb} +0 -0
- data/test/tc_casrn.rb +17 -0
- data/test/tc_cdk.rb +89 -0
- data/test/{test_cdx.rb → tc_cdx.rb} +0 -0
- data/test/{test_chem.rb → tc_chem.rb} +0 -0
- data/test/{test_cluster.rb → tc_cluster.rb} +0 -0
- data/test/{test_db.rb → tc_db.rb} +0 -0
- data/test/tc_develop.rb +38 -0
- data/test/tc_drugbank.rb +13 -0
- data/test/{test_eps.rb → tc_eps.rb} +0 -0
- data/test/tc_gd.rb +8 -0
- data/test/{test_geometry.rb → tc_geometry.rb} +0 -0
- data/test/tc_graph.rb +15 -0
- data/test/{test_gspan.rb → tc_gspan.rb} +0 -0
- data/test/{test_iupac.rb → tc_iupac.rb} +0 -0
- data/test/{test_kcf.rb → tc_kcf.rb} +0 -0
- data/test/{test_kcf_glycan.rb → tc_kcf_glycan.rb} +0 -0
- data/test/{test_kegg.rb → tc_kegg.rb} +13 -0
- data/test/{test_linucs.rb → tc_linucs.rb} +0 -0
- data/test/{test_mdl.rb → tc_mdl.rb} +20 -0
- data/test/{test_mol2.rb → tc_mol2.rb} +1 -1
- data/test/{test_morgan.rb → tc_morgan.rb} +0 -0
- data/test/tc_net.rb +5 -0
- data/test/tc_once.rb +29 -0
- data/test/tc_openbabel.rb +57 -0
- data/test/{test_pdf.rb → tc_pdf.rb} +0 -0
- data/test/{test_prop.rb → tc_prop.rb} +1 -1
- data/test/tc_pubchem.rb +32 -0
- data/test/{test_rmagick.rb → tc_rmagick.rb} +0 -0
- data/test/{test_sbdb.rb → tc_sbdb.rb} +0 -0
- data/test/{test_sdf.rb → tc_sdf.rb} +2 -0
- data/test/{test_smiles.rb → tc_smiles.rb} +46 -30
- data/test/tc_sssr.rb +1 -0
- data/test/{test_sub.rb → tc_sub.rb} +0 -0
- data/test/tc_subcomp.rb +59 -0
- data/test/{test_traverse.rb → tc_traverse.rb} +0 -0
- data/test/{test_writer.rb → tc_writer.rb} +0 -0
- data/test/{test_xyz.rb → tc_xyz.rb} +0 -0
- data/test/ts_current.rb +11 -0
- data/test/ts_image.rb +6 -0
- data/test/ts_main.rb +12 -0
- metadata +259 -194
- data/lib/chem/utils/graph_db.rb +0 -146
- data/test/test_sssr.rb +0 -18
- data/test/test_subcomp.rb +0 -37
@@ -0,0 +1,24 @@
|
|
1
|
+
|
2
|
+
module Chem
|
3
|
+
|
4
|
+
def self.opsin_parse(iupac_name)
|
5
|
+
OpsinMolecule.new(iupac_name)
|
6
|
+
end
|
7
|
+
|
8
|
+
class OpsinMolecule
|
9
|
+
include Molecule
|
10
|
+
|
11
|
+
def initialize(iupac_name)
|
12
|
+
require 'rcdk'
|
13
|
+
@iupac_name = iupac_name
|
14
|
+
name2struct = Rjb::import('uk.ac.cam.ch.wwmm.opsin.NameToStructure').new
|
15
|
+
@cml = name2struct.parseToCML(iupac_name).toXML.to_s
|
16
|
+
@mol = Chem::CMLMolecule.new(@cml)
|
17
|
+
end
|
18
|
+
|
19
|
+
def nodes ; @mol.nodes ; end
|
20
|
+
|
21
|
+
def edges ; @mol.edges ; end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
data/lib/chem/db/pdb.rb
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
$: << "/Users/tanaka/proj/chemruby/lib"
|
2
|
+
$: << "/Users/tanaka/proj/chemruby/ext"
|
3
|
+
$: << "/Users/tanaka/temp/bioruby/lib"
|
4
|
+
|
5
|
+
require 'bio'
|
6
|
+
require 'chem'
|
7
|
+
|
8
|
+
module Chem
|
9
|
+
module PDB
|
10
|
+
|
11
|
+
class PDBBond
|
12
|
+
include Bond
|
13
|
+
end
|
14
|
+
|
15
|
+
class PDBMolecule
|
16
|
+
include Chem::Molecule
|
17
|
+
|
18
|
+
def initialize name
|
19
|
+
@name = name
|
20
|
+
@nodes = []
|
21
|
+
@edges = []
|
22
|
+
end
|
23
|
+
|
24
|
+
# Set connection using het_dictionary
|
25
|
+
def set_connection het_dic
|
26
|
+
atom_hash = @nodes.inject({}){|ret, atom| ret[atom.name.strip] = atom ; ret}
|
27
|
+
con = het_dic.find{|entry| entry.entry_id == @name}
|
28
|
+
con.record["CONECT"].each do |b|
|
29
|
+
if from = atom_hash[b.name.strip]
|
30
|
+
b.other_atoms.each do |to_atom|
|
31
|
+
if to = atom_hash[to_atom.strip]
|
32
|
+
bond = PDBBond.new
|
33
|
+
@edges.push([bond, from, to])
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
module Bio
|
46
|
+
|
47
|
+
class PDB
|
48
|
+
|
49
|
+
def mols
|
50
|
+
mols = {}
|
51
|
+
@hash["HETATM"].each do |atom|
|
52
|
+
mol = (mols[[atom.resName, atom.chainID]] ||= Chem::PDB::PDBMolecule.new(atom.resName))
|
53
|
+
mol.nodes.push(atom)
|
54
|
+
end
|
55
|
+
mols
|
56
|
+
end
|
57
|
+
|
58
|
+
# reprensent one entry of het_dictionary.txt
|
59
|
+
class ChemicalComponent
|
60
|
+
end
|
61
|
+
|
62
|
+
class Record::HETATM
|
63
|
+
include Chem::Atom
|
64
|
+
include Chem::Transform::ThreeDimension
|
65
|
+
def pos ; @pos ||= Vector[@x, @y, @z] ; end
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
if __FILE__ == $0
|
73
|
+
dir = "/Users/tanaka/data/"
|
74
|
+
|
75
|
+
enzyme = Bio::FlatFile.auto(dir + "/pdb/1j4r.ent")
|
76
|
+
|
77
|
+
mols = {}
|
78
|
+
enzyme.each do |entry|
|
79
|
+
entry.mols.each do |key, mol|
|
80
|
+
p mol.nodes.length
|
81
|
+
dic = Bio::FlatFile.auto(dir + "het_dictionary.txt")
|
82
|
+
mol.set_connection(dic)
|
83
|
+
mol.save("#{key.join('_')}.png")
|
84
|
+
end
|
85
|
+
exit
|
86
|
+
entry.record("HETATM").each do |atom|
|
87
|
+
(mols[atom.resName] ||= []).push atom
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
# p mols.keys
|
92
|
+
end
|
93
|
+
|
94
|
+
#c001 = dic.find{|entry| entry.entry_id == "001"}
|
95
|
+
|
96
|
+
#p c001.hello#.record["CONECT"]
|
97
|
+
|
98
|
+
#p mols["001"]
|
99
|
+
|
100
|
+
__END__
|
101
|
+
|
102
|
+
|
103
|
+
pdb.each do |entry|
|
104
|
+
p entry.entry_id
|
105
|
+
end
|
data/lib/chem/db/pdf.rb
CHANGED
@@ -101,6 +101,8 @@ EOL
|
|
101
101
|
|
102
102
|
def text(str, x, y, params = {})
|
103
103
|
@vect << "BT"
|
104
|
+
color = params[:color].nil? ? "0 0 0" : params[:color].join(" ")
|
105
|
+
@vect << "#{color} rg"
|
104
106
|
@vect << "/F1 #{@params[:font]} Tf"
|
105
107
|
@vect << "1 0 0 1 #{x - @params[:font] * 0.4} #{y - @params[:font] * 0.4} Tm"
|
106
108
|
@vect << "(#{str}) Tj"
|
data/lib/chem/db/pubchem.rb
CHANGED
@@ -1,113 +1,1096 @@
|
|
1
1
|
#
|
2
2
|
# chem/db/pubchem.rb - PubChem database class
|
3
3
|
#
|
4
|
-
# Copyright (C) 2005
|
5
|
-
# TANAKA Nobuya <tanaka@kuicr.kyoto-u.ac.jp>
|
6
|
-
#
|
4
|
+
# Copyright (C) 2005-2007 TANAKA Nobuya <nobuya.tanaka@gmail.com>
|
7
5
|
#
|
8
6
|
|
9
|
-
require '
|
10
|
-
|
7
|
+
require 'chem'
|
8
|
+
# ftp://ftp.ncbi.nlm.nih.gov/pubchem/specifications/pubchem_fingerprints.txt
|
11
9
|
|
12
10
|
module Chem
|
11
|
+
# Section 1.
|
12
|
+
HierarchicElementCounts = {
|
13
|
+
:H => [
|
14
|
+
[4, 0],
|
15
|
+
[8, 1],
|
16
|
+
[16, 2],
|
17
|
+
[32, 3]],
|
18
|
+
:Li => [
|
19
|
+
[ 1, 4],
|
20
|
+
[ 2, 5]],
|
21
|
+
:B => [
|
22
|
+
[ 1, 6],
|
23
|
+
[ 2, 7],
|
24
|
+
[ 4, 8]],
|
25
|
+
:C => [
|
26
|
+
[ 2, 9],
|
27
|
+
[ 4, 10],
|
28
|
+
[ 8, 11],
|
29
|
+
[ 16, 12],
|
30
|
+
[ 32, 13]],
|
31
|
+
:N => [
|
32
|
+
[ 1, 14],
|
33
|
+
[ 2, 15],
|
34
|
+
[ 4, 16],
|
35
|
+
[ 8, 17]],
|
36
|
+
:O => [
|
37
|
+
[ 1, 18],
|
38
|
+
[ 2, 19],
|
39
|
+
[ 4, 20],
|
40
|
+
[ 8, 21],
|
41
|
+
[ 16, 22]],
|
42
|
+
:F => [
|
43
|
+
[ 1, 23],
|
44
|
+
[ 2, 24],
|
45
|
+
[ 4, 25]],
|
46
|
+
:Na => [
|
47
|
+
[ 1, 26],
|
48
|
+
[ 2, 27]],
|
49
|
+
:Si => [
|
50
|
+
[ 1, 28],
|
51
|
+
[ 2, 29]],
|
52
|
+
:P => [
|
53
|
+
[ 1, 30],
|
54
|
+
[ 2, 31],
|
55
|
+
[ 4, 32]],
|
56
|
+
:S => [
|
57
|
+
[ 1, 33],
|
58
|
+
[ 2, 34],
|
59
|
+
[ 4, 35],
|
60
|
+
[ 8, 36]],
|
61
|
+
:Cl => [
|
62
|
+
[ 1, 37],
|
63
|
+
[ 2, 38],
|
64
|
+
[ 4, 39],
|
65
|
+
[ 8, 40]],
|
66
|
+
:K => [
|
67
|
+
[ 1, 41],
|
68
|
+
[ 2, 42]],
|
69
|
+
:Br => [
|
70
|
+
[ 1, 43],
|
71
|
+
[ 2, 44],
|
72
|
+
[ 4, 45]],
|
73
|
+
:I => [
|
74
|
+
[ 1, 46],
|
75
|
+
[ 2, 47],
|
76
|
+
[ 4, 48]],
|
77
|
+
:Be => [[ 1, 49 ]],
|
78
|
+
:Mg => [[ 1, 50]],
|
79
|
+
:Al => [[ 1, 51]],
|
80
|
+
:Ca => [[ 1, 52]],
|
81
|
+
:Sc => [[ 1, 53]],
|
82
|
+
:Ti => [[ 1, 54]],
|
83
|
+
:V => [[ 1, 55]],
|
84
|
+
:Cr => [[ 1, 56]],
|
85
|
+
:Mn => [[ 1, 57]],
|
86
|
+
:Fe => [[ 1, 58]],
|
87
|
+
:Co => [[ 1, 59]],
|
88
|
+
:Ni => [[ 1, 60]],
|
89
|
+
:Cu => [[ 1, 61]],
|
90
|
+
:Zn => [[ 1, 62]],
|
91
|
+
:Ga => [[ 1, 63]],
|
92
|
+
:Ge => [[ 1, 64]],
|
93
|
+
:As => [[ 1, 65]],
|
94
|
+
:Se => [[ 1, 66]],
|
95
|
+
:Kr => [[ 1, 67]],
|
96
|
+
:Rb => [[ 1, 68]],
|
97
|
+
:Sr => [[ 1, 69]],
|
98
|
+
:Y => [[ 1, 70]],
|
99
|
+
:Zr => [[ 1, 71]],
|
100
|
+
:Nb => [[ 1, 72]],
|
101
|
+
:Mo => [[ 1, 73]],
|
102
|
+
:Ru => [[ 1, 74]],
|
103
|
+
:Rh => [[ 1, 75]],
|
104
|
+
:Pd => [[ 1, 76]],
|
105
|
+
:Ag => [[ 1, 77]],
|
106
|
+
:Cd => [[ 1, 78]],
|
107
|
+
:In => [[ 1, 79]],
|
108
|
+
:Sn => [[ 1, 80]],
|
109
|
+
:Sb => [[ 1, 81]],
|
110
|
+
:Te => [[ 1, 82]],
|
111
|
+
:Xe => [[ 1, 83]],
|
112
|
+
:Cs => [[ 1, 84]],
|
113
|
+
:Ba => [[ 1, 85]],
|
114
|
+
:Lu => [[ 1, 86]],
|
115
|
+
:Hf => [[ 1, 87]],
|
116
|
+
:Ta => [[ 1, 88]],
|
117
|
+
:W => [[ 1, 89]],
|
118
|
+
:Re => [[ 1, 90]],
|
119
|
+
:Os => [[ 1, 91]],
|
120
|
+
:Ir => [[ 1, 92]],
|
121
|
+
:Pt => [[ 1, 93]],
|
122
|
+
:Au => [[ 1, 94]],
|
123
|
+
:Hg => [[ 1, 95]],
|
124
|
+
:Tl => [[ 1, 96]],
|
125
|
+
:Pb => [[ 1, 97]],
|
126
|
+
:Bi => [[ 1, 98]],
|
127
|
+
:La => [[ 1, 99]],
|
128
|
+
:Ce => [[ 1, 100]],
|
129
|
+
:Pr => [[ 1, 101]],
|
130
|
+
:Nd => [[ 1, 102]],
|
131
|
+
:Pm => [[ 1, 103]],
|
132
|
+
:Sm => [[ 1, 104]],
|
133
|
+
:Eu => [[ 1, 105]],
|
134
|
+
:Gd => [[ 1, 106]],
|
135
|
+
:Tb => [[ 1, 107]],
|
136
|
+
:Dy => [[ 1, 108]],
|
137
|
+
:Ho => [[ 1, 109]],
|
138
|
+
:Er => [[ 1, 110]],
|
139
|
+
:Tm => [[ 1, 111]],
|
140
|
+
:Yb => [[ 1, 112]],
|
141
|
+
:Tc => [[ 1, 113]],
|
142
|
+
:U => [[ 1, 114]],
|
143
|
+
}
|
13
144
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
145
|
+
# Section 2
|
146
|
+
|
147
|
+
# Section 3
|
148
|
+
Section3 = {
|
149
|
+
'H-Li' => 263,
|
150
|
+
'Li-Li' => 264,
|
151
|
+
'B-Li' => 265,
|
152
|
+
'C-Li' => 266,
|
153
|
+
'Li-O' => 267,
|
154
|
+
'F-Li' => 268,
|
155
|
+
'Li-P' => 269,
|
156
|
+
'Li-S' => 270,
|
157
|
+
'Cl-Li' => 271,
|
158
|
+
'B-H' => 272,
|
159
|
+
'B-B' => 273,
|
160
|
+
'B-C' => 274,
|
161
|
+
'B-N' => 275,
|
162
|
+
'B-O' => 276,
|
163
|
+
'B-F' => 277,
|
164
|
+
'B-Si' => 278,
|
165
|
+
'B-P' => 279,
|
166
|
+
'B-S' => 280,
|
167
|
+
'B-Cl' => 281,
|
168
|
+
'B-Br' => 282,
|
169
|
+
'C-H' => 283,
|
170
|
+
'C-C' => 284,
|
171
|
+
'C-N' => 285,
|
172
|
+
'C-O' => 286,
|
173
|
+
'C-F' => 287,
|
174
|
+
'C-Na' => 288,
|
175
|
+
'C-Mg' => 289,
|
176
|
+
'Al-C' => 290,
|
177
|
+
'C-Si' => 291,
|
178
|
+
'C-P' => 292,
|
179
|
+
'C-S' => 293,
|
180
|
+
'C-Cl' => 294,
|
181
|
+
'As-C' => 295,
|
182
|
+
'C-Se' => 296,
|
183
|
+
'Br-C' => 297,
|
184
|
+
'C-I' => 298,
|
185
|
+
'H-N' => 299,
|
186
|
+
'N-N' => 300,
|
187
|
+
'N-O' => 301,
|
188
|
+
'F-N' => 302,
|
189
|
+
'N-Si' => 303,
|
190
|
+
'N-P' => 304,
|
191
|
+
'N-S' => 305,
|
192
|
+
'Cl-N' => 306,
|
193
|
+
'Br-N' => 307,
|
194
|
+
'H-O' => 308,
|
195
|
+
'O-O' => 309,
|
196
|
+
'Mg-O' => 310,
|
197
|
+
'Na-O' => 311,
|
198
|
+
'Al-O' => 312,
|
199
|
+
'O-Si' => 313,
|
200
|
+
'O-P' => 314,
|
201
|
+
'K-O' => 315,
|
202
|
+
'F-P' => 316,
|
203
|
+
'F-S' => 317,
|
204
|
+
'Al-H' => 318,
|
205
|
+
'Al-Cl' => 319,
|
206
|
+
'H-Si' => 320,
|
207
|
+
'Si-Si' => 321,
|
208
|
+
'Cl-Si' => 322,
|
209
|
+
'H-P' => 323,
|
210
|
+
'P-P' => 324,
|
211
|
+
'As-H' => 325,
|
212
|
+
'As-As' => 326,
|
213
|
+
}
|
214
|
+
|
215
|
+
# Section 4 Simple atom nearest neighbors
|
216
|
+
|
217
|
+
Section4 = {
|
218
|
+
:C => [
|
219
|
+
["-Br", "-C", 327],
|
220
|
+
["-Br", "-C", "-C", 328],
|
221
|
+
["-Br", "-H", 329],
|
222
|
+
["-Br", "~C", 330],
|
223
|
+
["-Br", "~N", 331],
|
224
|
+
["-C", "-C", 332],
|
225
|
+
["-C", "-C", "-C", 333],
|
226
|
+
["-C", "-C", "-C", "-C", 334],
|
227
|
+
["-C", "-C", "-C", "-H", 335],
|
228
|
+
["-C", "-C", "-C", "-N", 336],
|
229
|
+
["-C", "-C", "-C", "-O", 337],
|
230
|
+
["-C", "-C", "-H", "-N", 338],
|
231
|
+
["-C", "-C", "-H", "-O", 339],
|
232
|
+
["-C", "-C", "-N", 340],
|
233
|
+
["-C", "-C", "-O", 341],
|
234
|
+
["-C", "-Cl", 342],
|
235
|
+
["-C", "-Cl", "-H", 343],
|
236
|
+
["-C", "-H", 344],
|
237
|
+
["-C", "-H", "-N", 345],
|
238
|
+
["-C", "-H", "-O", 346],
|
239
|
+
["-C", "-H", "-O", "-O", 347],
|
240
|
+
["-C", "-H", "-P", 348],
|
241
|
+
["-C", "-H", "-S", 349],
|
242
|
+
["-C", "-I", 350],
|
243
|
+
["-C", "-N", 351],
|
244
|
+
["-C", "-O", 352],
|
245
|
+
["-C", "-S", 353],
|
246
|
+
["-C", "-Si", 354],
|
247
|
+
["-C", "~C", 355],
|
248
|
+
["-C", "~C", "~C", 356],
|
249
|
+
["-C", "~C", "~N", 357],
|
250
|
+
["-C", "~N", 358],
|
251
|
+
["-C", "~N", "~N", 359],
|
252
|
+
["-Cl", "-Cl", 360],
|
253
|
+
["-Cl", "-H", 361],
|
254
|
+
["-Cl", "~C", 362],
|
255
|
+
["-F", "-F", 363],
|
256
|
+
["-F", "~C", 364],
|
257
|
+
["-H", "-N", 365],
|
258
|
+
["-H", "-O", 366],
|
259
|
+
["-H", "-O", "-O", 367],
|
260
|
+
["-H", "-S", 368],
|
261
|
+
["-H", "-Si", 369],
|
262
|
+
["-H", "~C", 370],
|
263
|
+
["-H", "~C", "~C", 371],
|
264
|
+
["-H", "~C", "~N", 372],
|
265
|
+
["-H", "~N", 373],
|
266
|
+
["-H", "-H", "-H", 374],
|
267
|
+
["-N", "-N", 375],
|
268
|
+
["-N", "~C", 376],
|
269
|
+
["-N", "~C", "~C", 377],
|
270
|
+
["-N", "~C", "~N", 378],
|
271
|
+
["-N", "~N", 379],
|
272
|
+
["-O", "-O", 380],
|
273
|
+
["-O", "~C", 381],
|
274
|
+
["-O", "~C", "~C", 382],
|
275
|
+
["-S", "~C", 383],
|
276
|
+
["~C", "~C", 384],
|
277
|
+
["~C", "~C", "~C", 385],
|
278
|
+
["~C", "~C", "~N", 386],
|
279
|
+
["~C", "~N", 387],
|
280
|
+
["~C", "~N", "~N", 388],
|
281
|
+
["~N", "~N", 389]],
|
282
|
+
:N => [
|
283
|
+
["-C", "-C", 390],
|
284
|
+
["-C", "-C", "-C", 391],
|
285
|
+
["-C", "-C", "-H", 392],
|
286
|
+
["-C", "-H", 393],
|
287
|
+
["-C", "-H", "-N", 394],
|
288
|
+
["-C", "-O", 395],
|
289
|
+
["-C", "~C", 396],
|
290
|
+
["-C", "~C", "~C", 397],
|
291
|
+
["-H", "-N", 398],
|
292
|
+
["-H", "~C", 399],
|
293
|
+
["-H", "~C", "~C", 400],
|
294
|
+
["-O", "-O", 401],
|
295
|
+
["-O", "~O", 402],
|
296
|
+
["~C", "~C", 403],
|
297
|
+
["~C", "~C", "~C", 404]],
|
298
|
+
:O => [
|
299
|
+
["-C", "-C", 405],
|
300
|
+
["-C", "-H", 406],
|
301
|
+
["-C", "-P", 407],
|
302
|
+
["-H", "-S", 408],
|
303
|
+
["~C", "~C", 409]],
|
304
|
+
:P => [
|
305
|
+
["-C", "-C", 410],
|
306
|
+
["-O", "-O", 411]],
|
307
|
+
:S => [
|
308
|
+
["-C", "-C", 412],
|
309
|
+
["-C", "-H", 413],
|
310
|
+
["-C", "-O", 414]],
|
311
|
+
:Si => [
|
312
|
+
["-C", "-C", 415]
|
313
|
+
]
|
314
|
+
}
|
315
|
+
|
316
|
+
|
317
|
+
|
318
|
+
# Section 5 Detailed atom neighborhoods
|
319
|
+
|
320
|
+
# Section 6 Simple SMARTS patterns
|
321
|
+
|
322
|
+
Section6 = {
|
323
|
+
"C-C-C#C" => 460,
|
324
|
+
"O-C-C=N" => 461,
|
325
|
+
"O-C-C=O" => 462,
|
326
|
+
"N:C-S-[#1]" => 463,
|
327
|
+
"N-C-C=C" => 464,
|
328
|
+
"O=S-C-C" => 465,
|
329
|
+
"N#C-C=C" => 466,
|
330
|
+
"C=N-N-C" => 467,
|
331
|
+
"O=S-C-N" => 468,
|
332
|
+
"S-S-C:C" => 469,
|
333
|
+
"C:C-C=C" => 470,
|
334
|
+
"S:C:C:C" => 471,
|
335
|
+
"C:N:C-C" => 472,
|
336
|
+
"S-C:N:C" => 473,
|
337
|
+
"S:C:C:N" => 474,
|
338
|
+
"S-C=N-C" => 475,
|
339
|
+
"C-O-C=C" => 476,
|
340
|
+
"N-N-C:C" => 477,
|
341
|
+
"S-C=N-[#1]" => 478,
|
342
|
+
"S-C-S-C" => 479,
|
343
|
+
"C:S:C-C" => 480,
|
344
|
+
"O-S-C:C" => 481,
|
345
|
+
"C:N-C:C" => 482,
|
346
|
+
"N-S-C:C" => 483,
|
347
|
+
"N-C:N:C" => 484,
|
348
|
+
"N:C:C:N" => 485,
|
349
|
+
"N-C:N:N" => 486,
|
350
|
+
"N-C=N-C" => 487,
|
351
|
+
"N-C=N-[#1]" => 488,
|
352
|
+
"N-C-S-C" => 489,
|
353
|
+
"C-C-C=C" => 490,
|
354
|
+
"C-N:C-[#1]" => 491,
|
355
|
+
"N-C:O:C" => 492,
|
356
|
+
"O=C-C:C" => 493,
|
357
|
+
"O=C-C:N" => 494,
|
358
|
+
"C-N-C:C" => 495,
|
359
|
+
"N:N-C-[#1]" => 496,
|
360
|
+
"O-C:C:N" => 497,
|
361
|
+
"O-C=C-C" => 498,
|
362
|
+
"N-C:C:N" => 499,
|
363
|
+
"C-S-C:C" => 500,
|
364
|
+
"Cl-C:C-C" => 501,
|
365
|
+
"N-C=C-[#1]" => 502,
|
366
|
+
"Cl-C:C-[#1]" => 503,
|
367
|
+
"N:C:N-C" => 504,
|
368
|
+
"Cl-C:C-O" => 505,
|
369
|
+
"C-C:N:C" => 506,
|
370
|
+
"C-C-S-C" => 507,
|
371
|
+
"S=C-N-C" => 508,
|
372
|
+
"Br-C:C-C" => 509,
|
373
|
+
"[#1]-N-N-[#1]" => 510,
|
374
|
+
"S=C-N-[#1]" => 511,
|
375
|
+
"C-[As]-O-[#1]" => 512,
|
376
|
+
"S:C:C-[#1]" => 513,
|
377
|
+
"O-N-C-C" => 514,
|
378
|
+
"N-N-C-C" => 515,
|
379
|
+
"[#1]-C=C-[#1]" => 516,
|
380
|
+
"N-N-C-N" => 517,
|
381
|
+
"O=C-N-N" => 518,
|
382
|
+
"N=C-N-C" => 519,
|
383
|
+
"C=C-C:C" => 520,
|
384
|
+
"C:N-C-[#1]" => 521,
|
385
|
+
"C-N-N-[#1]" => 522,
|
386
|
+
"N:C:C-C" => 523,
|
387
|
+
"C-C=C-C" => 524,
|
388
|
+
"[As]-C:C-[#1]" => 525,
|
389
|
+
"Cl-C:C-Cl" => 526,
|
390
|
+
"C:C:N-[#1]" => 527,
|
391
|
+
"[#1]-N-C-[#1]" => 528,
|
392
|
+
"Cl-C-C-Cl" => 529,
|
393
|
+
"N:C-C:C" => 530,
|
394
|
+
"S-C:C-C" => 531,
|
395
|
+
"S-C:C-[#1]" => 532,
|
396
|
+
"S-C:C-N" => 533,
|
397
|
+
"S-C:C-O" => 534,
|
398
|
+
"O=C-C-C" => 535,
|
399
|
+
"O=C-C-N" => 536,
|
400
|
+
"O=C-C-O" => 537,
|
401
|
+
"N=C-C-C" => 538,
|
402
|
+
"N=C-C-[#1]" => 539,
|
403
|
+
"C-N-C-[#1]" => 540,
|
404
|
+
"O-C:C-C" => 541,
|
405
|
+
"O-C:C-[#1]" => 542,
|
406
|
+
"O-C:C-N" => 543,
|
407
|
+
"O-C:C-O" => 544,
|
408
|
+
"N-C:C-C" => 545,
|
409
|
+
"N-C:C-[#1]" => 546,
|
410
|
+
"N-C:C-N" => 547,
|
411
|
+
"O-C-C:C" => 548,
|
412
|
+
"N-C-C:C" => 549,
|
413
|
+
"Cl-C-C-C" => 550,
|
414
|
+
"Cl-C-C-O" => 551,
|
415
|
+
"C:C-C:C" => 552,
|
416
|
+
"O=C-C=C" => 553,
|
417
|
+
"Br-C-C-C" => 554,
|
418
|
+
"N=C-C=C" => 555,
|
419
|
+
"C=C-C-C" => 556,
|
420
|
+
"N:C-O-[#1]" => 557,
|
421
|
+
"O=N-C:C" => 558,
|
422
|
+
"O-C-N-[#1]" => 559,
|
423
|
+
"N-C-N-C" => 560,
|
424
|
+
"Cl-C-C=O" => 561,
|
425
|
+
"Br-C-C=O" => 562,
|
426
|
+
"O-C-O-C" => 563,
|
427
|
+
"C=C-C=C" => 564,
|
428
|
+
"C:C-O-C" => 565,
|
429
|
+
"O-C-C-N" => 566,
|
430
|
+
"O-C-C-O" => 567,
|
431
|
+
"N#C-C-C" => 568,
|
432
|
+
"N-C-C-N" => 569,
|
433
|
+
"C:C-C-C" => 570,
|
434
|
+
"[#1]-C-O-[#1]" => 571,
|
435
|
+
"N:C:N:C" => 572,
|
436
|
+
"O-C-C=C" => 573,
|
437
|
+
"O-C-C:C-C" => 574,
|
438
|
+
"O-C-C:C-O" => 575,
|
439
|
+
"N=C-C:C-[#1]" => 576,
|
440
|
+
"C:C-N-C:C" => 577,
|
441
|
+
"C-C:C-C:C" => 578,
|
442
|
+
"O=C-C-C-C" => 579,
|
443
|
+
"O=C-C-C-N" => 580,
|
444
|
+
"O=C-C-C-O" => 581,
|
445
|
+
"C-C-C-C-C" => 582,
|
446
|
+
"Cl-C:C-O-C" => 583,
|
447
|
+
"C:C-C=C-C" => 584,
|
448
|
+
"C-C:C-N-C" => 585,
|
449
|
+
"C-S-C-C-C" => 586,
|
450
|
+
"N-C:C-O-[#1]" => 587,
|
451
|
+
"O=C-C-C=O" => 588,
|
452
|
+
"C-C:C-O-C" => 589,
|
453
|
+
"C-C:C-O-[#1]" => 590,
|
454
|
+
"Cl-C-C-C-C" => 591,
|
455
|
+
"N-C-C-C-C" => 592,
|
456
|
+
"N-C-C-C-N" => 593,
|
457
|
+
"C-O-C-C=C" => 594,
|
458
|
+
"C:C-C-C-C" => 595,
|
459
|
+
"N=C-N-C-C" => 596,
|
460
|
+
"O=C-C-C:C" => 597,
|
461
|
+
"Cl-C:C:C-C" => 598,
|
462
|
+
"[#1]-C-C=C-[#1]" => 599,
|
463
|
+
"N-C:C:C-C" => 600,
|
464
|
+
"N-C:C:C-N" => 601,
|
465
|
+
"O=C-C-N-C" => 602,
|
466
|
+
"C-C:C:C-C" => 603,
|
467
|
+
"C-O-C-C:C" => 604,
|
468
|
+
"O=C-C-O-C" => 605,
|
469
|
+
"O-C:C-C-C" => 606,
|
470
|
+
"N-C-C-C:C" => 607,
|
471
|
+
"C-C-C-C:C" => 608,
|
472
|
+
"Cl-C-C-N-C" => 609,
|
473
|
+
"C-O-C-O-C" => 610,
|
474
|
+
"N-C-C-N-C" => 611,
|
475
|
+
"N-C-O-C-C" => 612,
|
476
|
+
"C-N-C-C-C" => 613,
|
477
|
+
"C-C-O-C-C" => 614,
|
478
|
+
"N-C-C-O-C" => 615,
|
479
|
+
"C:C:N:N:C" => 616,
|
480
|
+
"C-C-C-O-[#1]" => 617,
|
481
|
+
"C:C-C-C:C" => 618,
|
482
|
+
"O-C-C=C-C" => 619,
|
483
|
+
"C:C-O-C-C" => 620,
|
484
|
+
"N-C:C:C:N" => 621,
|
485
|
+
"O=C-O-C:C" => 622,
|
486
|
+
"O=C-C:C-C" => 623,
|
487
|
+
"O=C-C:C-N" => 624,
|
488
|
+
"O=C-C:C-O" => 625,
|
489
|
+
"C-O-C:C-C" => 626,
|
490
|
+
"O=[As]-C:C:C" => 627,
|
491
|
+
"C-N-C-C:C" => 628,
|
492
|
+
"S-C:C:C-N" => 629,
|
493
|
+
"O-C:C-O-C" => 630,
|
494
|
+
"O-C:C-O-[#1]" => 631,
|
495
|
+
"C-C-O-C:C" => 632,
|
496
|
+
"N-C-C:C-C" => 633,
|
497
|
+
"C-C-C:C-C" => 634,
|
498
|
+
"N-N-C-N-[#1]" => 635,
|
499
|
+
"C-N-C-N-C" => 636,
|
500
|
+
"O-C-C-C-C" => 637,
|
501
|
+
"O-C-C-C-N" => 638,
|
502
|
+
"O-C-C-C-O" => 639,
|
503
|
+
"C=C-C-C-C" => 640,
|
504
|
+
"O-C-C-C=C" => 641,
|
505
|
+
"O-C-C-C=O" => 642,
|
506
|
+
"[#1]-C-C-N-[#1]" => 643,
|
507
|
+
"C-C=N-N-C" => 644,
|
508
|
+
"O=C-N-C-C" => 645,
|
509
|
+
"O=C-N-C-[#1]" => 646,
|
510
|
+
"O=C-N-C-N" => 647,
|
511
|
+
"O=N-C:C-N" => 648,
|
512
|
+
"O=N-C:C-O" => 649,
|
513
|
+
"O=C-N-C=O" => 650,
|
514
|
+
"O-C:C:C-C" => 651,
|
515
|
+
"O-C:C:C-N" => 652,
|
516
|
+
"O-C:C:C-O" => 653,
|
517
|
+
"N-C-N-C-C" => 654,
|
518
|
+
"O-C-C-C:C" => 655,
|
519
|
+
"C-C-N-C-C" => 656,
|
520
|
+
"C-N-C:C-C" => 657,
|
521
|
+
"C-C-S-C-C" => 658,
|
522
|
+
"O-C-C-N-C" => 659,
|
523
|
+
"C-C=C-C-C" => 660,
|
524
|
+
"O-C-O-C-C" => 661,
|
525
|
+
"O-C-C-O-C" => 662,
|
526
|
+
"O-C-C-O-[#1]" => 663,
|
527
|
+
"C-C=C-C=C" => 664,
|
528
|
+
"N-C:C-C-C" => 665,
|
529
|
+
"C=C-C-O-C" => 666,
|
530
|
+
"C=C-C-O-[#1]" => 667,
|
531
|
+
"C-C:C-C-C" => 668,
|
532
|
+
"Cl-C:C-C=O" => 669,
|
533
|
+
"Br-C:C:C-C" => 670,
|
534
|
+
"O=C-C=C-C" => 671,
|
535
|
+
"O=C-C=C-[#1]" => 672,
|
536
|
+
"O=C-C=C-N" => 673,
|
537
|
+
"N-C-N-C:C" => 674,
|
538
|
+
"Br-C-C-C:C" => 675,
|
539
|
+
"N#C-C-C-C" => 676,
|
540
|
+
"C-C=C-C:C" => 677,
|
541
|
+
"C-C-C=C-C" => 678,
|
542
|
+
"C-C-C-C-C-C" => 679,
|
543
|
+
"O-C-C-C-C-C" => 680,
|
544
|
+
"O-C-C-C-C-O" => 681,
|
545
|
+
"O-C-C-C-C-N" => 682,
|
546
|
+
"N-C-C-C-C-C" => 683,
|
547
|
+
"O=C-C-C-C-C" => 684,
|
548
|
+
"O=C-C-C-C-N" => 685,
|
549
|
+
"O=C-C-C-C-O" => 686,
|
550
|
+
"O=C-C-C-C=O" => 687,
|
551
|
+
"C-C-C-C-C-C-C" => 688,
|
552
|
+
"O-C-C-C-C-C-C" => 689,
|
553
|
+
"O-C-C-C-C-C-O" => 690,
|
554
|
+
"O-C-C-C-C-C-N" => 691,
|
555
|
+
"O=C-C-C-C-C-C" => 692,
|
556
|
+
"O=C-C-C-C-C-O" => 693,
|
557
|
+
"O=C-C-C-C-C=O" => 694,
|
558
|
+
"O=C-C-C-C-C-N" => 695,
|
559
|
+
"C-C-C-C-C-C-C-C" => 696,
|
560
|
+
"C-C-C-C-C-C(C)-C" => 697,
|
561
|
+
"O-C-C-C-C-C-C-C" => 698,
|
562
|
+
"O-C-C-C-C-C(C)-C" => 699,
|
563
|
+
"O-C-C-C-C-C-O-C" => 700,
|
564
|
+
"O-C-C-C-C-C(O)-C" => 701,
|
565
|
+
"O-C-C-C-C-C-N-C" => 702,
|
566
|
+
"O-C-C-C-C-C(N)-C" => 703,
|
567
|
+
"O=C-C-C-C-C-C-C" => 704,
|
568
|
+
"O=C-C-C-C-C(O)-C" => 705,
|
569
|
+
"O=C-C-C-C-C(=O)-C" => 706,
|
570
|
+
"O=C-C-C-C-C(N)-C" => 707,
|
571
|
+
"C-C(C)-C-C" => 708,
|
572
|
+
"C-C(C)-C-C-C" => 709,
|
573
|
+
"C-C-C(C)-C-C" => 710,
|
574
|
+
"C-C(C)(C)-C-C" => 711,
|
575
|
+
"C-C(C)-C(C)-C" => 712,
|
576
|
+
}
|
577
|
+
|
578
|
+
# Section 7: Complex SMARTS patterns
|
18
579
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
580
|
+
Section7 = {
|
581
|
+
"Cc1ccc(C)cc1" => 713,
|
582
|
+
"Cc1ccc(O)cc1" => 714,
|
583
|
+
"Cc1ccc(S)cc1" => 715,
|
584
|
+
"Cc1ccc(N)cc1" => 716,
|
585
|
+
"Cc1ccc(Cl)cc1" => 717,
|
586
|
+
"Cc1ccc(Br)cc1" => 718,
|
587
|
+
"Oc1ccc(O)cc1" => 719,
|
588
|
+
"Oc1ccc(S)cc1" => 720,
|
589
|
+
"Oc1ccc(N)cc1" => 721,
|
590
|
+
"Oc1ccc(Cl)cc1" => 722,
|
591
|
+
"Oc1ccc(Br)cc1" => 723,
|
592
|
+
"Sc1ccc(S)cc1" => 724,
|
593
|
+
"Sc1ccc(N)cc1" => 725,
|
594
|
+
"Sc1ccc(Cl)cc1" => 726,
|
595
|
+
"Sc1ccc(Br)cc1" => 727,
|
596
|
+
"Nc1ccc(N)cc1" => 728,
|
597
|
+
"Nc1ccc(Cl)cc1" => 729,
|
598
|
+
"Nc1ccc(Br)cc1" => 730,
|
599
|
+
"Clc1ccc(Cl)cc1" => 731,
|
600
|
+
"Clc1ccc(Br)cc1" => 732,
|
601
|
+
"Brc1ccc(Br)cc1" => 733,
|
602
|
+
"Cc1cc(C)ccc1" => 734,
|
603
|
+
"Cc1cc(O)ccc1" => 735,
|
604
|
+
"Cc1cc(S)ccc1" => 736,
|
605
|
+
"Cc1cc(N)ccc1" => 737,
|
606
|
+
"Cc1cc(Cl)ccc1" => 738,
|
607
|
+
"Cc1cc(Br)ccc1" => 739,
|
608
|
+
"Oc1cc(O)ccc1" => 740,
|
609
|
+
"Oc1cc(S)ccc1" => 741,
|
610
|
+
"Oc1cc(N)ccc1" => 742,
|
611
|
+
"Oc1cc(Cl)ccc1" => 743,
|
612
|
+
"Oc1cc(Br)ccc1" => 744,
|
613
|
+
"Sc1cc(S)ccc1" => 745,
|
614
|
+
"Sc1cc(N)ccc1" => 746,
|
615
|
+
"Sc1cc(Cl)ccc1" => 747,
|
616
|
+
"Sc1cc(Br)ccc1" => 748,
|
617
|
+
"Nc1cc(N)ccc1" => 749,
|
618
|
+
"Nc1cc(Cl)ccc1" => 750,
|
619
|
+
"Nc1cc(Br)ccc1" => 751,
|
620
|
+
"Clc1cc(Cl)ccc1" => 752,
|
621
|
+
"Clc1cc(Br)ccc1" => 753,
|
622
|
+
"Brc1cc(Br)ccc1" => 754,
|
623
|
+
"Cc1c(C)cccc1" => 755,
|
624
|
+
"Cc1c(O)cccc1" => 756,
|
625
|
+
"Cc1c(S)cccc1" => 757,
|
626
|
+
"Cc1c(N)cccc1" => 758,
|
627
|
+
"Cc1c(Cl)cccc1" => 759,
|
628
|
+
"Cc1c(Br)cccc1" => 760,
|
629
|
+
"Oc1c(O)cccc1" => 761,
|
630
|
+
"Oc1c(S)cccc1" => 762,
|
631
|
+
"Oc1c(N)cccc1" => 763,
|
632
|
+
"Oc1c(Cl)cccc1" => 764,
|
633
|
+
"Oc1c(Br)cccc1" => 765,
|
634
|
+
"Sc1c(S)cccc1" => 766,
|
635
|
+
"Sc1c(N)cccc1" => 767,
|
636
|
+
"Sc1c(Cl)cccc1" => 768,
|
637
|
+
"Sc1c(Br)cccc1" => 769,
|
638
|
+
"Nc1c(N)cccc1" => 770,
|
639
|
+
"Nc1c(Cl)cccc1" => 771,
|
640
|
+
"Nc1c(Br)cccc1" => 772,
|
641
|
+
"Clc1c(Cl)cccc1" => 773,
|
642
|
+
"Clc1c(Br)cccc1" => 774,
|
643
|
+
"Brc1c(Br)cccc1" => 775,
|
644
|
+
"CC1CCC(C)CC1" => 776,
|
645
|
+
"CC1CCC(O)CC1" => 777,
|
646
|
+
"CC1CCC(S)CC1" => 778,
|
647
|
+
"CC1CCC(N)CC1" => 779,
|
648
|
+
"CC1CCC(Cl)CC1" => 780,
|
649
|
+
"CC1CCC(Br)CC1" => 781,
|
650
|
+
"OC1CCC(O)CC1" => 782,
|
651
|
+
"OC1CCC(S)CC1" => 783,
|
652
|
+
"OC1CCC(N)CC1" => 784,
|
653
|
+
"OC1CCC(Cl)CC1" => 785,
|
654
|
+
"OC1CCC(Br)CC1" => 786,
|
655
|
+
"SC1CCC(S)CC1" => 787,
|
656
|
+
"SC1CCC(N)CC1" => 788,
|
657
|
+
"SC1CCC(Cl)CC1" => 789,
|
658
|
+
"SC1CCC(Br)CC1" => 790,
|
659
|
+
"NC1CCC(N)CC1" => 791,
|
660
|
+
"NC1CCC(Cl)CC1" => 792,
|
661
|
+
"NC1CCC(Br)CC1" => 793,
|
662
|
+
"ClC1CCC(Cl)CC1" => 794,
|
663
|
+
"ClC1CCC(Br)CC1" => 795,
|
664
|
+
"BrC1CCC(Br)CC1" => 796,
|
665
|
+
"CC1CC(C)CCC1" => 797,
|
666
|
+
"CC1CC(O)CCC1" => 798,
|
667
|
+
"CC1CC(S)CCC1" => 799,
|
668
|
+
"CC1CC(N)CCC1" => 800,
|
669
|
+
"CC1CC(Cl)CCC1" => 801,
|
670
|
+
"CC1CC(Br)CCC1" => 802,
|
671
|
+
"OC1CC(O)CCC1" => 803,
|
672
|
+
"OC1CC(S)CCC1" => 804,
|
673
|
+
"OC1CC(N)CCC1" => 805,
|
674
|
+
"OC1CC(Cl)CCC1" => 806,
|
675
|
+
"OC1CC(Br)CCC1" => 807,
|
676
|
+
"SC1CC(S)CCC1" => 808,
|
677
|
+
"SC1CC(N)CCC1" => 809,
|
678
|
+
"SC1CC(Cl)CCC1" => 810,
|
679
|
+
"SC1CC(Br)CCC1" => 811,
|
680
|
+
"NC1CC(N)CCC1" => 812,
|
681
|
+
"NC1CC(Cl)CCC1" => 813,
|
682
|
+
"NC1CC(Br)CCC1" => 814,
|
683
|
+
"ClC1CC(Cl)CCC1" => 815,
|
684
|
+
"ClC1CC(Br)CCC1" => 816,
|
685
|
+
"BrC1CC(Br)CCC1" => 817,
|
686
|
+
"CC1C(C)CCCC1" => 818,
|
687
|
+
"CC1C(O)CCCC1" => 819,
|
688
|
+
"CC1C(S)CCCC1" => 820,
|
689
|
+
"CC1C(N)CCCC1" => 821,
|
690
|
+
"CC1C(Cl)CCCC1" => 822,
|
691
|
+
"CC1C(Br)CCCC1" => 823,
|
692
|
+
"OC1C(O)CCCC1" => 824,
|
693
|
+
"OC1C(S)CCCC1" => 825,
|
694
|
+
"OC1C(N)CCCC1" => 826,
|
695
|
+
"OC1C(Cl)CCCC1" => 827,
|
696
|
+
"OC1C(Br)CCCC1" => 828,
|
697
|
+
"SC1C(S)CCCC1" => 829,
|
698
|
+
"SC1C(N)CCCC1" => 830,
|
699
|
+
"SC1C(Cl)CCCC1" => 831,
|
700
|
+
"SC1C(Br)CCCC1" => 832,
|
701
|
+
"NC1C(N)CCCC1" => 833,
|
702
|
+
"NC1C(Cl)CCCC1" => 834,
|
703
|
+
"NC1C(Br)CCCC1" => 835,
|
704
|
+
"ClC1C(Cl)CCCC1" => 836,
|
705
|
+
"ClC1C(Br)CCCC1" => 837,
|
706
|
+
"BrC1C(Br)CCCC1" => 838,
|
707
|
+
"CC1CC(C)CC1" => 839,
|
708
|
+
"CC1CC(O)CC1" => 840,
|
709
|
+
"CC1CC(S)CC1" => 841,
|
710
|
+
"CC1CC(N)CC1" => 842,
|
711
|
+
"CC1CC(Cl)CC1" => 843,
|
712
|
+
"CC1CC(Br)CC1" => 844,
|
713
|
+
"OC1CC(O)CC1" => 845,
|
714
|
+
"OC1CC(S)CC1" => 846,
|
715
|
+
"OC1CC(N)CC1" => 847,
|
716
|
+
"OC1CC(Cl)CC1" => 848,
|
717
|
+
"OC1CC(Br)CC1" => 849,
|
718
|
+
"SC1CC(S)CC1" => 850,
|
719
|
+
"SC1CC(N)CC1" => 851,
|
720
|
+
"SC1CC(Cl)CC1" => 852,
|
721
|
+
"SC1CC(Br)CC1" => 853,
|
722
|
+
"NC1CC(N)CC1" => 854,
|
723
|
+
"NC1CC(Cl)CC1" => 855,
|
724
|
+
"NC1CC(Br)CC1" => 856,
|
725
|
+
"ClC1CC(Cl)CC1" => 857,
|
726
|
+
"ClC1CC(Br)CC1" => 858,
|
727
|
+
"BrC1CC(Br)CC1" => 859,
|
728
|
+
"CC1C(C)CCC1" => 860,
|
729
|
+
"CC1C(O)CCC1" => 861,
|
730
|
+
"CC1C(S)CCC1" => 862,
|
731
|
+
"CC1C(N)CCC1" => 863,
|
732
|
+
"CC1C(Cl)CCC1" => 864,
|
733
|
+
"CC1C(Br)CCC1" => 865,
|
734
|
+
"OC1C(O)CCC1" => 866,
|
735
|
+
"OC1C(S)CCC1" => 867,
|
736
|
+
"OC1C(N)CCC1" => 868,
|
737
|
+
"OC1C(Cl)CCC1" => 869,
|
738
|
+
"OC1C(Br)CCC1" => 870,
|
739
|
+
"SC1C(S)CCC1" => 871,
|
740
|
+
"SC1C(N)CCC1" => 872,
|
741
|
+
"SC1C(Cl)CCC1" => 873,
|
742
|
+
"SC1C(Br)CCC1" => 874,
|
743
|
+
"NC1C(N)CCC1" => 875,
|
744
|
+
"NC1C(Cl)CC1" => 876,
|
745
|
+
"NC1C(Br)CCC1" => 877,
|
746
|
+
"ClC1C(Cl)CCC1" => 878,
|
747
|
+
"ClC1C(Br)CCC1" => 879,
|
748
|
+
"BrC1C(Br)CCC1" => 880,
|
749
|
+
}
|
750
|
+
|
751
|
+
PubChemSubsKey = [
|
752
|
+
">= 4 H",
|
753
|
+
">= 8 H",
|
754
|
+
">= 16 H",
|
755
|
+
">= 32 H",
|
756
|
+
">= 1 Li",
|
757
|
+
">= 2 Li",
|
758
|
+
">= 1 B",
|
759
|
+
">= 2 B",
|
760
|
+
">= 4 B",
|
761
|
+
">= 2 C",
|
762
|
+
">= 4 C",
|
763
|
+
">= 8 C",
|
764
|
+
">= 16 C",
|
765
|
+
">= 32 C",
|
766
|
+
">= 1 N",
|
767
|
+
">= 2 N",
|
768
|
+
">= 4 N",
|
769
|
+
">= 8 N",
|
770
|
+
">= 1 O",
|
771
|
+
">= 2 O",
|
772
|
+
">= 4 O",
|
773
|
+
">= 8 O",
|
774
|
+
">= 16 O",
|
775
|
+
">= 1 F",
|
776
|
+
">= 2 F",
|
777
|
+
">= 4 F",
|
778
|
+
">= 1 Na",
|
779
|
+
">= 2 Na",
|
780
|
+
">= 1 Si",
|
781
|
+
">= 2 Si",
|
782
|
+
">= 1 P",
|
783
|
+
">= 2 P",
|
784
|
+
">= 4 P",
|
785
|
+
">= 1 S",
|
786
|
+
">= 2 S",
|
787
|
+
">= 4 S",
|
788
|
+
">= 8 S",
|
789
|
+
">= 1 Cl",
|
790
|
+
">= 2 Cl",
|
791
|
+
">= 4 Cl",
|
792
|
+
">= 8 Cl",
|
793
|
+
">= 1 K",
|
794
|
+
">= 2 K",
|
795
|
+
">= 1 Br",
|
796
|
+
">= 2 Br",
|
797
|
+
">= 4 Br",
|
798
|
+
">= 1 I",
|
799
|
+
">= 2 I",
|
800
|
+
">= 4 I",
|
801
|
+
">= 1 Be",
|
802
|
+
">= 1 Mg",
|
803
|
+
">= 1 Al",
|
804
|
+
">= 1 Ca",
|
805
|
+
">= 1 Sc",
|
806
|
+
">= 1 Ti",
|
807
|
+
">= 1 V",
|
808
|
+
">= 1 Cr",
|
809
|
+
">= 1 Mn",
|
810
|
+
">= 1 Fe",
|
811
|
+
">= 1 Co",
|
812
|
+
">= 1 Ni",
|
813
|
+
">= 1 Cu",
|
814
|
+
">= 1 Zn",
|
815
|
+
">= 1 Ga",
|
816
|
+
">= 1 Ge",
|
817
|
+
">= 1 As",
|
818
|
+
">= 1 Se",
|
819
|
+
">= 1 Kr",
|
820
|
+
">= 1 Rb",
|
821
|
+
">= 1 Sr",
|
822
|
+
">= 1 Y",
|
823
|
+
">= 1 Zr",
|
824
|
+
">= 1 Nb",
|
825
|
+
">= 1 Mo",
|
826
|
+
">= 1 Ru",
|
827
|
+
">= 1 Rh",
|
828
|
+
">= 1 Pd",
|
829
|
+
">= 1 Ag",
|
830
|
+
">= 1 Cd",
|
831
|
+
">= 1 In",
|
832
|
+
">= 1 Sn",
|
833
|
+
">= 1 Sb",
|
834
|
+
">= 1 Te",
|
835
|
+
">= 1 Xe",
|
836
|
+
">= 1 Cs",
|
837
|
+
">= 1 Ba",
|
838
|
+
">= 1 Lu",
|
839
|
+
">= 1 Hf",
|
840
|
+
">= 1 Ta",
|
841
|
+
">= 1 W",
|
842
|
+
">= 1 Re",
|
843
|
+
">= 1 Os",
|
844
|
+
">= 1 Ir",
|
845
|
+
">= 1 Pt",
|
846
|
+
">= 1 Au",
|
847
|
+
">= 1 Hg",
|
848
|
+
">= 1 Tl",
|
849
|
+
">= 1 Pb",
|
850
|
+
">= 1 Bi",
|
851
|
+
">= 1 La",
|
852
|
+
">= 1 Ce",
|
853
|
+
">= 1 Pr",
|
854
|
+
">= 1 Nd",
|
855
|
+
">= 1 Pm",
|
856
|
+
">= 1 Sm",
|
857
|
+
">= 1 Eu",
|
858
|
+
">= 1 Gd",
|
859
|
+
">= 1 Tb",
|
860
|
+
">= 1 Dy",
|
861
|
+
">= 1 Ho",
|
862
|
+
">= 1 Er",
|
863
|
+
">= 1 Tm",
|
864
|
+
">= 1 Yb",
|
865
|
+
">= 1 Tc",
|
866
|
+
">= 1 U",
|
867
|
+
">= 1 any ring size 3",
|
868
|
+
">= 1 saturated carbon-only ring size 3",
|
869
|
+
">= 1 saturated nitrogen-containing ring size 3",
|
870
|
+
">= 1 saturated heteroatom-containing ring size 3",
|
871
|
+
">= 1 unsaturated or aromatic carbon-only ring size 3",
|
872
|
+
">= 1 unsaturated or aromatic nitrogen-containing ring size 3",
|
873
|
+
">= 1 unsaturated or aromatic heteroatom-containing ring size 3",
|
874
|
+
">= 2 any ring size 3",
|
875
|
+
">= 2 saturated carbon-only ring size 3",
|
876
|
+
">= 2 saturated nitrogen-containing ring size 3",
|
877
|
+
">= 2 saturated heteroatom-containing ring size 3",
|
878
|
+
">= 2 unsaturated or aromatic carbon-only ring size 3",
|
879
|
+
">= 2 unsaturated or aromatic nitrogen-containing ring size 3",
|
880
|
+
">= 2 unsaturated or aromatic heteroatom-containing ring size 3",
|
881
|
+
">= 1 any ring size 4",
|
882
|
+
">= 1 saturated carbon-only ring size 4",
|
883
|
+
">= 1 saturated nitrogen-containing ring size 4",
|
884
|
+
">= 1 saturated heteroatom-containing ring size 4",
|
885
|
+
">= 1 unsaturated or aromatic carbon-only ring size 4",
|
886
|
+
">= 1 unsaturated or aromatic nitrogen-containing ring size 4",
|
887
|
+
">= 1 unsaturated or aromatic heteroatom-containing ring size 4",
|
888
|
+
">= 2 any ring size 4",
|
889
|
+
">= 2 saturated carbon-only ring size 4",
|
890
|
+
">= 2 saturated nitrogen-containing ring size 4",
|
891
|
+
">= 2 saturated heteroatom-containing ring size 4",
|
892
|
+
">= 2 unsaturated or aromatic carbon-only ring size 4",
|
893
|
+
">= 2 unsaturated or aromatic nitrogen-containing ring size 4",
|
894
|
+
">= 2 unsaturated or aromatic heteroatom-containing ring size 4",
|
895
|
+
">= 1 any ring size 5",
|
896
|
+
">= 1 saturated carbon-only ring size 5",
|
897
|
+
">= 1 saturated nitrogen-containing ring size 5",
|
898
|
+
">= 1 saturated heteroatom-containing ring size 5",
|
899
|
+
">= 1 unsaturated or aromatic carbon-only ring size 5",
|
900
|
+
">= 1 unsaturated or aromatic nitrogen-containing ring size 5",
|
901
|
+
">= 1 unsaturated or aromatic heteroatom-containing ring size 5",
|
902
|
+
">= 2 any ring size 5",
|
903
|
+
">= 2 saturated carbon-only ring size 5",
|
904
|
+
">= 2 saturated nitrogen-containing ring size 5",
|
905
|
+
">= 2 saturated heteroatom-containing ring size 5",
|
906
|
+
">= 2 unsaturated or aromatic carbon-only ring size 5",
|
907
|
+
">= 2 unsaturated or aromatic nitrogen-containing ring size 5",
|
908
|
+
">= 2 unsaturated or aromatic heteroatom-containing ring size 5",
|
909
|
+
">= 3 any ring size 5",
|
910
|
+
">= 3 saturated carbon-only ring size 5",
|
911
|
+
">= 3 saturated nitrogen-containing ring size 5",
|
912
|
+
">= 3 saturated heteroatom-containing ring size 5",
|
913
|
+
">= 3 unsaturated or aromatic carbon-only ring size 5",
|
914
|
+
">= 3 unsaturated or aromatic nitrogen-containing ring size 5",
|
915
|
+
">= 3 unsaturated or aromatic heteroatom-containing ring size 5",
|
916
|
+
">= 4 any ring size 5",
|
917
|
+
">= 4 saturated carbon-only ring size 5",
|
918
|
+
">= 4 saturated nitrogen-containing ring size 5",
|
919
|
+
">= 4 saturated heteroatom-containing ring size 5",
|
920
|
+
">= 4 unsaturated or aromatic carbon-only ring size 5",
|
921
|
+
">= 4 unsaturated or aromatic nitrogen-containing ring size 5",
|
922
|
+
">= 4 unsaturated or aromatic heteroatom-containing ring size 5",
|
923
|
+
">= 5 any ring size 5",
|
924
|
+
">= 5 saturated carbon-only ring size 5",
|
925
|
+
">= 5 saturated nitrogen-containing ring size 5",
|
926
|
+
">= 5 saturated heteroatom-containing ring size 5",
|
927
|
+
">= 5 unsaturated or aromatic carbon-only ring size 5",
|
928
|
+
">= 5 unsaturated or aromatic nitrogen-containing ring size 5",
|
929
|
+
">= 5 unsaturated or aromatic heteroatom-containing ring size 5",
|
930
|
+
">= 1 any ring size 6",
|
931
|
+
">= 1 saturated carbon-only ring size 6",
|
932
|
+
">= 1 saturated nitrogen-containing ring size 6",
|
933
|
+
">= 1 saturated heteroatom-containing ring size 6",
|
934
|
+
">= 1 unsaturated or aromatic carbon-only ring size 6",
|
935
|
+
">= 1 unsaturated or aromatic nitrogen-containing ring size 6",
|
936
|
+
">= 1 unsaturated or aromatic heteroatom-containing ring size 6",
|
937
|
+
">= 2 any ring size 6",
|
938
|
+
">= 2 saturated carbon-only ring size 6",
|
939
|
+
">= 2 saturated nitrogen-containing ring size 6",
|
940
|
+
">= 2 saturated heteroatom-containing ring size 6",
|
941
|
+
">= 2 unsaturated or aromatic carbon-only ring size 6",
|
942
|
+
">= 2 unsaturated or aromatic nitrogen-containing ring size 6",
|
943
|
+
">= 2 unsaturated or aromatic heteroatom-containing ring size 6",
|
944
|
+
">= 3 any ring size 6",
|
945
|
+
">= 3 saturated carbon-only ring size 6",
|
946
|
+
">= 3 saturated nitrogen-containing ring size 6",
|
947
|
+
">= 3 saturated heteroatom-containing ring size 6",
|
948
|
+
">= 3 unsaturated or aromatic carbon-only ring size 6",
|
949
|
+
">= 3 unsaturated or aromatic nitrogen-containing ring size 6",
|
950
|
+
">= 3 unsaturated or aromatic heteroatom-containing ring size 6",
|
951
|
+
">= 4 any ring size 6",
|
952
|
+
">= 4 saturated carbon-only ring size 6",
|
953
|
+
">= 4 saturated nitrogen-containing ring size 6",
|
954
|
+
">= 4 saturated heteroatom-containing ring size 6",
|
955
|
+
">= 4 unsaturated or aromatic carbon-only ring size 6",
|
956
|
+
">= 4 unsaturated or aromatic nitrogen-containing ring size 6",
|
957
|
+
">= 4 unsaturated or aromatic heteroatom-containing ring size 6",
|
958
|
+
">= 5 any ring size 6",
|
959
|
+
">= 5 saturated carbon-only ring size 6",
|
960
|
+
">= 5 saturated nitrogen-containing ring size 6",
|
961
|
+
">= 5 saturated heteroatom-containing ring size 6",
|
962
|
+
">= 5 unsaturated or aromatic carbon-only ring size 6",
|
963
|
+
">= 5 unsaturated or aromatic nitrogen-containing ring size 6",
|
964
|
+
">= 5 unsaturated or aromatic heteroatom-containing ring size 6",
|
965
|
+
">= 1 any ring size 7",
|
966
|
+
">= 1 saturated carbon-only ring size 7",
|
967
|
+
">= 1 saturated nitrogen-containing ring size 7",
|
968
|
+
">= 1 saturated heteroatom-containing ring size 7",
|
969
|
+
">= 1 unsaturated or aromatic carbon-only ring size 7",
|
970
|
+
">= 1 unsaturated or aromatic nitrogen-containing ring size 7",
|
971
|
+
">= 1 unsaturated or aromatic heteroatom-containing ring size 7",
|
972
|
+
">= 2 any ring size 7",
|
973
|
+
">= 2 saturated carbon-only ring size 7",
|
974
|
+
">= 2 saturated nitrogen-containing ring size 7",
|
975
|
+
">= 2 saturated heteroatom-containing ring size 7",
|
976
|
+
">= 2 unsaturated or aromatic carbon-only ring size 7",
|
977
|
+
">= 2 unsaturated or aromatic nitrogen-containing ring size 7",
|
978
|
+
">= 2 unsaturated or aromatic heteroatom-containing ring size 7",
|
979
|
+
">= 1 any ring size 8",
|
980
|
+
">= 1 saturated carbon-only ring size 8",
|
981
|
+
">= 1 saturated nitrogen-containing ring size 8",
|
982
|
+
">= 1 saturated heteroatom-containing ring size 8",
|
983
|
+
">= 1 unsaturated or aromatic carbon-only ring size 8",
|
984
|
+
">= 1 unsaturated or aromatic nitrogen-containing ring size 8",
|
985
|
+
">= 1 unsaturated or aromatic heteroatom-containing ring size 8",
|
986
|
+
">= 2 any ring size 8",
|
987
|
+
">= 2 saturated carbon-only ring size 8",
|
988
|
+
">= 2 saturated nitrogen-containing ring size 8",
|
989
|
+
">= 2 saturated heteroatom-containing ring size 8",
|
990
|
+
">= 2 unsaturated or aromatic carbon-only ring size 8",
|
991
|
+
">= 2 unsaturated or aromatic nitrogen-containing ring size 8",
|
992
|
+
">= 2 unsaturated or aromatic heteroatom-containing ring size 8",
|
993
|
+
">= 1 any ring size 9",
|
994
|
+
">= 1 saturated carbon-only ring size 9",
|
995
|
+
">= 1 saturated nitrogen-containing ring size 9",
|
996
|
+
">= 1 saturated heteroatom-containing ring size 9",
|
997
|
+
">= 1 unsaturated or aromatic carbon-only ring size 9",
|
998
|
+
">= 1 unsaturated or aromatic nitrogen-containing ring size 9",
|
999
|
+
">= 1 unsaturated or aromatic heteroatom-containing ring size 9",
|
1000
|
+
">= 1 any ring size 10",
|
1001
|
+
">= 1 saturated carbon-only ring size 10",
|
1002
|
+
">= 1 saturated nitrogen-containing ring size 10",
|
1003
|
+
">= 1 saturated heteroatom-containing ring size 10",
|
1004
|
+
">= 1 unsaturated or aromatic carbon-only ring size 10",
|
1005
|
+
">= 1 unsaturated or aromatic nitrogen-containing ring size 10",
|
1006
|
+
">= 1 unsaturated or aromatic heteroatom-containing ring size 10",
|
1007
|
+
">= 1 aromatic ring",
|
1008
|
+
">= 1 hetero-aromatic ring",
|
1009
|
+
">= 2 aromatic rings",
|
1010
|
+
">= 2 hetero-aromatic rings",
|
1011
|
+
">= 3 aromatic rings",
|
1012
|
+
">= 3 hetero-aromatic rings",
|
1013
|
+
">= 4 aromatic rings",
|
1014
|
+
">= 4 hetero-aromatic rings",
|
1015
|
+
]
|
1016
|
+
|
1017
|
+
RingSizeBaseNum = {
|
1018
|
+
3 => 115,
|
1019
|
+
4 => 129,
|
1020
|
+
5 => 143,
|
1021
|
+
6 => 178,
|
1022
|
+
7 => 213,
|
1023
|
+
8 => 227,
|
1024
|
+
9 => 241,
|
1025
|
+
10 => 248,
|
1026
|
+
}
|
1027
|
+
module Molecule
|
1028
|
+
def generate_pubchem_subskey
|
1029
|
+
fp = 0
|
1030
|
+
# Section 1
|
1031
|
+
self.composition.each do |elem, num|
|
1032
|
+
HierarchicElementCounts[elem].each do |n_atoms, bit|
|
1033
|
+
fp |= (1 << bit) if num >= n_atoms
|
75
1034
|
end
|
76
|
-
cid
|
77
1035
|
end
|
78
1036
|
|
79
|
-
|
1037
|
+
# Section 2
|
1038
|
+
[143, 150, 157, 164, 171]
|
1039
|
+
s2bit = []
|
1040
|
+
sssrs = self.find_sssr.inject({}){|hash, ring| (hash[ring.size] ||= []) << ring ; hash}
|
1041
|
+
sssrs.each do |ring_size, rings|
|
1042
|
+
# base_num = case ring_size
|
1043
|
+
|
1044
|
+
# p [ring_size, rings.size]
|
1045
|
+
# p rings.any?{|ring| ring.any?{|atom| atom.element == :N}}
|
1046
|
+
# p rings.any?{|ring| ring.any?{|atom| atom.element != :C}}
|
1047
|
+
end
|
80
1048
|
|
81
|
-
|
1049
|
+
# Section 3
|
1050
|
+
self.edges.collect{ |bond, atom1, atom2|
|
1051
|
+
[atom1.element.to_s, atom2.element.to_s].sort.join("-")
|
1052
|
+
}.uniq.each do |pair|
|
1053
|
+
fp |= (1 << Section3[pair]) if Section3[pair]
|
1054
|
+
end
|
82
1055
|
|
83
|
-
|
84
|
-
|
1056
|
+
# Section 4
|
1057
|
+
self.nodes.each do |node|
|
1058
|
+
adj = self.adjacent_to(node).collect{|bond, atom| }
|
1059
|
+
p Section4[node.element]
|
85
1060
|
end
|
1061
|
+
# exit
|
1062
|
+
|
86
1063
|
|
87
|
-
|
88
|
-
|
89
|
-
|
1064
|
+
Section6.each do |smarts, bit|
|
1065
|
+
pat = Chem::OpenBabel::parse_smarts(smarts)
|
1066
|
+
fp |= (1 << bit ) if pat.match(self)
|
90
1067
|
end
|
91
1068
|
|
92
|
-
|
93
|
-
|
94
|
-
|
1069
|
+
Section7.each do |smarts, bit|
|
1070
|
+
pat = Chem::OpenBabel::parse_smarts(smarts)
|
1071
|
+
fp |= (1 << bit ) if pat.match(self)
|
95
1072
|
end
|
1073
|
+
fp
|
1074
|
+
end
|
96
1075
|
|
1076
|
+
# Extract PubChem substructural keys
|
1077
|
+
# see ftp://ftp.ncbi.nlm.nih.gov/pubchem/specifications/pubchem_fingerprints.txt
|
1078
|
+
def pubchem_subskeys
|
1079
|
+
require 'base64'
|
1080
|
+
fp = 0
|
1081
|
+
b64 = self.sdf_data["PUBCHEM_CACTVS_SUBSKEYS"]
|
1082
|
+
Base64.decode64(b64).unpack("N*")[1..-1].each_with_index do |bit, idx|
|
1083
|
+
# fp += (bit << (881 - (idx + 1) * 32))
|
1084
|
+
bb = 0
|
1085
|
+
0.upto(31) do |n|
|
1086
|
+
bb += (1 << n) if (bit & (1 << (31 - n)) != 0)
|
1087
|
+
end
|
1088
|
+
fp += (bb << (idx * 32))
|
1089
|
+
end
|
1090
|
+
fp
|
97
1091
|
end
|
1092
|
+
|
98
1093
|
end
|
99
1094
|
|
100
1095
|
end
|
101
1096
|
|
102
|
-
if $0 == __FILE__
|
103
|
-
smiles="CC23(CCC1c4ccc(O)cc4(CCC1C3(CC(O)C2(O))))"
|
104
|
-
puts "===== CID(s) for SMILES, #{smiles} ====="
|
105
|
-
cid = Chem::PubChem.smiles_search(smiles)
|
106
|
-
p cid
|
107
|
-
puts "===== MOL format data ===="
|
108
|
-
cid.each do |c|
|
109
|
-
puts c.get_sdf
|
110
|
-
end
|
111
|
-
# p Chem::PubChem.get_xml(cid[0])
|
112
|
-
# puts Chem::PubChem.get_xml(cid[0]).sdf2mol.data
|
113
|
-
end
|