chemruby 0.9.3 → 1.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +2 -2
- data/Rakefile +67 -63
- data/ext/extconf.rb +2 -0
- data/ext/subcomp.c +461 -320
- data/ext/utils.c +56 -0
- data/ext/utils.h +13 -0
- data/lib/chem.rb +34 -8
- data/lib/chem/db.rb +8 -0
- data/lib/chem/db/cansmi.rb +1 -1
- data/lib/chem/db/cdx.rb +1 -1
- data/lib/chem/db/cml.rb +52 -0
- data/lib/chem/db/gd.rb +64 -0
- data/lib/chem/db/gspan.rb +2 -2
- data/lib/chem/db/kcf_rpair.rb +34 -0
- data/lib/chem/db/kegg.rb +35 -1
- data/lib/chem/db/mdl.rb +75 -34
- data/lib/chem/db/opsin.rb +24 -0
- data/lib/chem/db/pdb.rb +105 -0
- data/lib/chem/db/pdf.rb +2 -0
- data/lib/chem/db/pubchem.rb +1071 -88
- data/lib/chem/db/rmagick.rb +5 -3
- data/lib/chem/db/sdf.rb +28 -2
- data/lib/chem/db/smiles/smiles.ry +27 -25
- data/lib/chem/db/smiles/smiparser.rb +29 -27
- data/lib/chem/db/types/type_gd.rb +35 -0
- data/lib/chem/db/types/type_gspan.rb +2 -2
- data/lib/chem/db/types/type_kcf.rb +19 -0
- data/lib/chem/db/types/type_kegg.rb +2 -0
- data/lib/chem/db/types/type_mdl.rb +1 -1
- data/lib/chem/db/types/type_png.rb +5 -1
- data/lib/chem/db/types/type_rdf.rb +22 -0
- data/lib/chem/db/types/type_xyz.rb +1 -1
- data/lib/chem/db/vector.rb +19 -3
- data/lib/chem/model.rb +5 -2
- data/lib/chem/utils.rb +17 -1
- data/lib/chem/utils/bitdb.rb +49 -0
- data/lib/chem/utils/cas.rb +28 -0
- data/lib/chem/utils/cdk.rb +403 -0
- data/lib/chem/utils/fingerprint.rb +98 -0
- data/lib/chem/utils/geometry.rb +8 -0
- data/lib/chem/utils/net.rb +303 -0
- data/lib/chem/utils/once.rb +28 -0
- data/lib/chem/utils/openbabel.rb +204 -0
- data/lib/chem/utils/sssr.rb +33 -25
- data/lib/chem/utils/sub.rb +6 -0
- data/lib/chem/utils/transform.rb +9 -8
- data/lib/chem/utils/ullmann.rb +138 -95
- data/lib/graph.rb +5 -6
- data/lib/graph/utils.rb +8 -0
- data/sample/calc_maximum_common_subgraph.rb +27 -0
- data/sample/calc_properties.rb +9 -0
- data/sample/data/atp.mol +69 -0
- data/sample/data/pioglitazone.mol +58 -0
- data/sample/data/rosiglitazone.mol +55 -0
- data/sample/data/troglitazone.mol +70 -0
- data/sample/find_compound_by_keggapi.rb +19 -0
- data/sample/generate_inchi.rb +7 -0
- data/sample/generate_substructurekey.rb +11 -0
- data/sample/images/ex6.rb +17 -0
- data/sample/images/ex7.rb +18 -0
- data/sample/iupac2mol.rb +8 -0
- data/sample/kekule.rb +13 -0
- data/sample/logp.rb +4 -0
- data/sample/mcs.rb +13 -0
- data/sample/mol2pdf.rb +8 -0
- data/sample/pubchem_fetch.rb +8 -0
- data/sample/pubchem_search.rb +12 -0
- data/sample/rosiglitazone.mol +57 -0
- data/sample/smarts.rb +10 -0
- data/sample/structure_match.rb +8 -0
- data/sample/structure_match_color.rb +22 -0
- data/sample/thiazolidinedione.mol +19 -0
- data/sample/troglitazone.mol +232 -0
- data/sample/vicinity.rb +8 -0
- data/test/data/CID_704.sdf +236 -0
- data/test/data/CID_994.sdf +146 -0
- data/test/data/db_EXPT03276.txt +321 -0
- data/test/data/pioglitazone.mol +58 -0
- data/test/data/rosiglitazone.mol +55 -0
- data/test/data/thiazolidinedione.mol +19 -0
- data/test/data/troglitazone.mol +70 -0
- data/test/{test_adj.rb → tc_adj.rb} +0 -0
- data/test/{test_canonical_smiles.rb → tc_canonical_smiles.rb} +0 -0
- data/test/tc_casrn.rb +17 -0
- data/test/tc_cdk.rb +89 -0
- data/test/{test_cdx.rb → tc_cdx.rb} +0 -0
- data/test/{test_chem.rb → tc_chem.rb} +0 -0
- data/test/{test_cluster.rb → tc_cluster.rb} +0 -0
- data/test/{test_db.rb → tc_db.rb} +0 -0
- data/test/tc_develop.rb +38 -0
- data/test/tc_drugbank.rb +13 -0
- data/test/{test_eps.rb → tc_eps.rb} +0 -0
- data/test/tc_gd.rb +8 -0
- data/test/{test_geometry.rb → tc_geometry.rb} +0 -0
- data/test/tc_graph.rb +15 -0
- data/test/{test_gspan.rb → tc_gspan.rb} +0 -0
- data/test/{test_iupac.rb → tc_iupac.rb} +0 -0
- data/test/{test_kcf.rb → tc_kcf.rb} +0 -0
- data/test/{test_kcf_glycan.rb → tc_kcf_glycan.rb} +0 -0
- data/test/{test_kegg.rb → tc_kegg.rb} +13 -0
- data/test/{test_linucs.rb → tc_linucs.rb} +0 -0
- data/test/{test_mdl.rb → tc_mdl.rb} +20 -0
- data/test/{test_mol2.rb → tc_mol2.rb} +1 -1
- data/test/{test_morgan.rb → tc_morgan.rb} +0 -0
- data/test/tc_net.rb +5 -0
- data/test/tc_once.rb +29 -0
- data/test/tc_openbabel.rb +57 -0
- data/test/{test_pdf.rb → tc_pdf.rb} +0 -0
- data/test/{test_prop.rb → tc_prop.rb} +1 -1
- data/test/tc_pubchem.rb +32 -0
- data/test/{test_rmagick.rb → tc_rmagick.rb} +0 -0
- data/test/{test_sbdb.rb → tc_sbdb.rb} +0 -0
- data/test/{test_sdf.rb → tc_sdf.rb} +2 -0
- data/test/{test_smiles.rb → tc_smiles.rb} +46 -30
- data/test/tc_sssr.rb +1 -0
- data/test/{test_sub.rb → tc_sub.rb} +0 -0
- data/test/tc_subcomp.rb +59 -0
- data/test/{test_traverse.rb → tc_traverse.rb} +0 -0
- data/test/{test_writer.rb → tc_writer.rb} +0 -0
- data/test/{test_xyz.rb → tc_xyz.rb} +0 -0
- data/test/ts_current.rb +11 -0
- data/test/ts_image.rb +6 -0
- data/test/ts_main.rb +12 -0
- metadata +259 -194
- data/lib/chem/utils/graph_db.rb +0 -146
- data/test/test_sssr.rb +0 -18
- data/test/test_subcomp.rb +0 -37
@@ -0,0 +1,24 @@
|
|
1
|
+
|
2
|
+
module Chem
|
3
|
+
|
4
|
+
def self.opsin_parse(iupac_name)
|
5
|
+
OpsinMolecule.new(iupac_name)
|
6
|
+
end
|
7
|
+
|
8
|
+
class OpsinMolecule
|
9
|
+
include Molecule
|
10
|
+
|
11
|
+
def initialize(iupac_name)
|
12
|
+
require 'rcdk'
|
13
|
+
@iupac_name = iupac_name
|
14
|
+
name2struct = Rjb::import('uk.ac.cam.ch.wwmm.opsin.NameToStructure').new
|
15
|
+
@cml = name2struct.parseToCML(iupac_name).toXML.to_s
|
16
|
+
@mol = Chem::CMLMolecule.new(@cml)
|
17
|
+
end
|
18
|
+
|
19
|
+
def nodes ; @mol.nodes ; end
|
20
|
+
|
21
|
+
def edges ; @mol.edges ; end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
data/lib/chem/db/pdb.rb
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
$: << "/Users/tanaka/proj/chemruby/lib"
|
2
|
+
$: << "/Users/tanaka/proj/chemruby/ext"
|
3
|
+
$: << "/Users/tanaka/temp/bioruby/lib"
|
4
|
+
|
5
|
+
require 'bio'
|
6
|
+
require 'chem'
|
7
|
+
|
8
|
+
module Chem
|
9
|
+
module PDB
|
10
|
+
|
11
|
+
class PDBBond
|
12
|
+
include Bond
|
13
|
+
end
|
14
|
+
|
15
|
+
class PDBMolecule
|
16
|
+
include Chem::Molecule
|
17
|
+
|
18
|
+
def initialize name
|
19
|
+
@name = name
|
20
|
+
@nodes = []
|
21
|
+
@edges = []
|
22
|
+
end
|
23
|
+
|
24
|
+
# Set connection using het_dictionary
|
25
|
+
def set_connection het_dic
|
26
|
+
atom_hash = @nodes.inject({}){|ret, atom| ret[atom.name.strip] = atom ; ret}
|
27
|
+
con = het_dic.find{|entry| entry.entry_id == @name}
|
28
|
+
con.record["CONECT"].each do |b|
|
29
|
+
if from = atom_hash[b.name.strip]
|
30
|
+
b.other_atoms.each do |to_atom|
|
31
|
+
if to = atom_hash[to_atom.strip]
|
32
|
+
bond = PDBBond.new
|
33
|
+
@edges.push([bond, from, to])
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
module Bio
|
46
|
+
|
47
|
+
class PDB
|
48
|
+
|
49
|
+
def mols
|
50
|
+
mols = {}
|
51
|
+
@hash["HETATM"].each do |atom|
|
52
|
+
mol = (mols[[atom.resName, atom.chainID]] ||= Chem::PDB::PDBMolecule.new(atom.resName))
|
53
|
+
mol.nodes.push(atom)
|
54
|
+
end
|
55
|
+
mols
|
56
|
+
end
|
57
|
+
|
58
|
+
# reprensent one entry of het_dictionary.txt
|
59
|
+
class ChemicalComponent
|
60
|
+
end
|
61
|
+
|
62
|
+
class Record::HETATM
|
63
|
+
include Chem::Atom
|
64
|
+
include Chem::Transform::ThreeDimension
|
65
|
+
def pos ; @pos ||= Vector[@x, @y, @z] ; end
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
if __FILE__ == $0
|
73
|
+
dir = "/Users/tanaka/data/"
|
74
|
+
|
75
|
+
enzyme = Bio::FlatFile.auto(dir + "/pdb/1j4r.ent")
|
76
|
+
|
77
|
+
mols = {}
|
78
|
+
enzyme.each do |entry|
|
79
|
+
entry.mols.each do |key, mol|
|
80
|
+
p mol.nodes.length
|
81
|
+
dic = Bio::FlatFile.auto(dir + "het_dictionary.txt")
|
82
|
+
mol.set_connection(dic)
|
83
|
+
mol.save("#{key.join('_')}.png")
|
84
|
+
end
|
85
|
+
exit
|
86
|
+
entry.record("HETATM").each do |atom|
|
87
|
+
(mols[atom.resName] ||= []).push atom
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
# p mols.keys
|
92
|
+
end
|
93
|
+
|
94
|
+
#c001 = dic.find{|entry| entry.entry_id == "001"}
|
95
|
+
|
96
|
+
#p c001.hello#.record["CONECT"]
|
97
|
+
|
98
|
+
#p mols["001"]
|
99
|
+
|
100
|
+
__END__
|
101
|
+
|
102
|
+
|
103
|
+
pdb.each do |entry|
|
104
|
+
p entry.entry_id
|
105
|
+
end
|
data/lib/chem/db/pdf.rb
CHANGED
@@ -101,6 +101,8 @@ EOL
|
|
101
101
|
|
102
102
|
def text(str, x, y, params = {})
|
103
103
|
@vect << "BT"
|
104
|
+
color = params[:color].nil? ? "0 0 0" : params[:color].join(" ")
|
105
|
+
@vect << "#{color} rg"
|
104
106
|
@vect << "/F1 #{@params[:font]} Tf"
|
105
107
|
@vect << "1 0 0 1 #{x - @params[:font] * 0.4} #{y - @params[:font] * 0.4} Tm"
|
106
108
|
@vect << "(#{str}) Tj"
|
data/lib/chem/db/pubchem.rb
CHANGED
@@ -1,113 +1,1096 @@
|
|
1
1
|
#
|
2
2
|
# chem/db/pubchem.rb - PubChem database class
|
3
3
|
#
|
4
|
-
# Copyright (C) 2005
|
5
|
-
# TANAKA Nobuya <tanaka@kuicr.kyoto-u.ac.jp>
|
6
|
-
#
|
4
|
+
# Copyright (C) 2005-2007 TANAKA Nobuya <nobuya.tanaka@gmail.com>
|
7
5
|
#
|
8
6
|
|
9
|
-
require '
|
10
|
-
|
7
|
+
require 'chem'
|
8
|
+
# ftp://ftp.ncbi.nlm.nih.gov/pubchem/specifications/pubchem_fingerprints.txt
|
11
9
|
|
12
10
|
module Chem
|
11
|
+
# Section 1.
|
12
|
+
HierarchicElementCounts = {
|
13
|
+
:H => [
|
14
|
+
[4, 0],
|
15
|
+
[8, 1],
|
16
|
+
[16, 2],
|
17
|
+
[32, 3]],
|
18
|
+
:Li => [
|
19
|
+
[ 1, 4],
|
20
|
+
[ 2, 5]],
|
21
|
+
:B => [
|
22
|
+
[ 1, 6],
|
23
|
+
[ 2, 7],
|
24
|
+
[ 4, 8]],
|
25
|
+
:C => [
|
26
|
+
[ 2, 9],
|
27
|
+
[ 4, 10],
|
28
|
+
[ 8, 11],
|
29
|
+
[ 16, 12],
|
30
|
+
[ 32, 13]],
|
31
|
+
:N => [
|
32
|
+
[ 1, 14],
|
33
|
+
[ 2, 15],
|
34
|
+
[ 4, 16],
|
35
|
+
[ 8, 17]],
|
36
|
+
:O => [
|
37
|
+
[ 1, 18],
|
38
|
+
[ 2, 19],
|
39
|
+
[ 4, 20],
|
40
|
+
[ 8, 21],
|
41
|
+
[ 16, 22]],
|
42
|
+
:F => [
|
43
|
+
[ 1, 23],
|
44
|
+
[ 2, 24],
|
45
|
+
[ 4, 25]],
|
46
|
+
:Na => [
|
47
|
+
[ 1, 26],
|
48
|
+
[ 2, 27]],
|
49
|
+
:Si => [
|
50
|
+
[ 1, 28],
|
51
|
+
[ 2, 29]],
|
52
|
+
:P => [
|
53
|
+
[ 1, 30],
|
54
|
+
[ 2, 31],
|
55
|
+
[ 4, 32]],
|
56
|
+
:S => [
|
57
|
+
[ 1, 33],
|
58
|
+
[ 2, 34],
|
59
|
+
[ 4, 35],
|
60
|
+
[ 8, 36]],
|
61
|
+
:Cl => [
|
62
|
+
[ 1, 37],
|
63
|
+
[ 2, 38],
|
64
|
+
[ 4, 39],
|
65
|
+
[ 8, 40]],
|
66
|
+
:K => [
|
67
|
+
[ 1, 41],
|
68
|
+
[ 2, 42]],
|
69
|
+
:Br => [
|
70
|
+
[ 1, 43],
|
71
|
+
[ 2, 44],
|
72
|
+
[ 4, 45]],
|
73
|
+
:I => [
|
74
|
+
[ 1, 46],
|
75
|
+
[ 2, 47],
|
76
|
+
[ 4, 48]],
|
77
|
+
:Be => [[ 1, 49 ]],
|
78
|
+
:Mg => [[ 1, 50]],
|
79
|
+
:Al => [[ 1, 51]],
|
80
|
+
:Ca => [[ 1, 52]],
|
81
|
+
:Sc => [[ 1, 53]],
|
82
|
+
:Ti => [[ 1, 54]],
|
83
|
+
:V => [[ 1, 55]],
|
84
|
+
:Cr => [[ 1, 56]],
|
85
|
+
:Mn => [[ 1, 57]],
|
86
|
+
:Fe => [[ 1, 58]],
|
87
|
+
:Co => [[ 1, 59]],
|
88
|
+
:Ni => [[ 1, 60]],
|
89
|
+
:Cu => [[ 1, 61]],
|
90
|
+
:Zn => [[ 1, 62]],
|
91
|
+
:Ga => [[ 1, 63]],
|
92
|
+
:Ge => [[ 1, 64]],
|
93
|
+
:As => [[ 1, 65]],
|
94
|
+
:Se => [[ 1, 66]],
|
95
|
+
:Kr => [[ 1, 67]],
|
96
|
+
:Rb => [[ 1, 68]],
|
97
|
+
:Sr => [[ 1, 69]],
|
98
|
+
:Y => [[ 1, 70]],
|
99
|
+
:Zr => [[ 1, 71]],
|
100
|
+
:Nb => [[ 1, 72]],
|
101
|
+
:Mo => [[ 1, 73]],
|
102
|
+
:Ru => [[ 1, 74]],
|
103
|
+
:Rh => [[ 1, 75]],
|
104
|
+
:Pd => [[ 1, 76]],
|
105
|
+
:Ag => [[ 1, 77]],
|
106
|
+
:Cd => [[ 1, 78]],
|
107
|
+
:In => [[ 1, 79]],
|
108
|
+
:Sn => [[ 1, 80]],
|
109
|
+
:Sb => [[ 1, 81]],
|
110
|
+
:Te => [[ 1, 82]],
|
111
|
+
:Xe => [[ 1, 83]],
|
112
|
+
:Cs => [[ 1, 84]],
|
113
|
+
:Ba => [[ 1, 85]],
|
114
|
+
:Lu => [[ 1, 86]],
|
115
|
+
:Hf => [[ 1, 87]],
|
116
|
+
:Ta => [[ 1, 88]],
|
117
|
+
:W => [[ 1, 89]],
|
118
|
+
:Re => [[ 1, 90]],
|
119
|
+
:Os => [[ 1, 91]],
|
120
|
+
:Ir => [[ 1, 92]],
|
121
|
+
:Pt => [[ 1, 93]],
|
122
|
+
:Au => [[ 1, 94]],
|
123
|
+
:Hg => [[ 1, 95]],
|
124
|
+
:Tl => [[ 1, 96]],
|
125
|
+
:Pb => [[ 1, 97]],
|
126
|
+
:Bi => [[ 1, 98]],
|
127
|
+
:La => [[ 1, 99]],
|
128
|
+
:Ce => [[ 1, 100]],
|
129
|
+
:Pr => [[ 1, 101]],
|
130
|
+
:Nd => [[ 1, 102]],
|
131
|
+
:Pm => [[ 1, 103]],
|
132
|
+
:Sm => [[ 1, 104]],
|
133
|
+
:Eu => [[ 1, 105]],
|
134
|
+
:Gd => [[ 1, 106]],
|
135
|
+
:Tb => [[ 1, 107]],
|
136
|
+
:Dy => [[ 1, 108]],
|
137
|
+
:Ho => [[ 1, 109]],
|
138
|
+
:Er => [[ 1, 110]],
|
139
|
+
:Tm => [[ 1, 111]],
|
140
|
+
:Yb => [[ 1, 112]],
|
141
|
+
:Tc => [[ 1, 113]],
|
142
|
+
:U => [[ 1, 114]],
|
143
|
+
}
|
13
144
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
145
|
+
# Section 2
|
146
|
+
|
147
|
+
# Section 3
|
148
|
+
Section3 = {
|
149
|
+
'H-Li' => 263,
|
150
|
+
'Li-Li' => 264,
|
151
|
+
'B-Li' => 265,
|
152
|
+
'C-Li' => 266,
|
153
|
+
'Li-O' => 267,
|
154
|
+
'F-Li' => 268,
|
155
|
+
'Li-P' => 269,
|
156
|
+
'Li-S' => 270,
|
157
|
+
'Cl-Li' => 271,
|
158
|
+
'B-H' => 272,
|
159
|
+
'B-B' => 273,
|
160
|
+
'B-C' => 274,
|
161
|
+
'B-N' => 275,
|
162
|
+
'B-O' => 276,
|
163
|
+
'B-F' => 277,
|
164
|
+
'B-Si' => 278,
|
165
|
+
'B-P' => 279,
|
166
|
+
'B-S' => 280,
|
167
|
+
'B-Cl' => 281,
|
168
|
+
'B-Br' => 282,
|
169
|
+
'C-H' => 283,
|
170
|
+
'C-C' => 284,
|
171
|
+
'C-N' => 285,
|
172
|
+
'C-O' => 286,
|
173
|
+
'C-F' => 287,
|
174
|
+
'C-Na' => 288,
|
175
|
+
'C-Mg' => 289,
|
176
|
+
'Al-C' => 290,
|
177
|
+
'C-Si' => 291,
|
178
|
+
'C-P' => 292,
|
179
|
+
'C-S' => 293,
|
180
|
+
'C-Cl' => 294,
|
181
|
+
'As-C' => 295,
|
182
|
+
'C-Se' => 296,
|
183
|
+
'Br-C' => 297,
|
184
|
+
'C-I' => 298,
|
185
|
+
'H-N' => 299,
|
186
|
+
'N-N' => 300,
|
187
|
+
'N-O' => 301,
|
188
|
+
'F-N' => 302,
|
189
|
+
'N-Si' => 303,
|
190
|
+
'N-P' => 304,
|
191
|
+
'N-S' => 305,
|
192
|
+
'Cl-N' => 306,
|
193
|
+
'Br-N' => 307,
|
194
|
+
'H-O' => 308,
|
195
|
+
'O-O' => 309,
|
196
|
+
'Mg-O' => 310,
|
197
|
+
'Na-O' => 311,
|
198
|
+
'Al-O' => 312,
|
199
|
+
'O-Si' => 313,
|
200
|
+
'O-P' => 314,
|
201
|
+
'K-O' => 315,
|
202
|
+
'F-P' => 316,
|
203
|
+
'F-S' => 317,
|
204
|
+
'Al-H' => 318,
|
205
|
+
'Al-Cl' => 319,
|
206
|
+
'H-Si' => 320,
|
207
|
+
'Si-Si' => 321,
|
208
|
+
'Cl-Si' => 322,
|
209
|
+
'H-P' => 323,
|
210
|
+
'P-P' => 324,
|
211
|
+
'As-H' => 325,
|
212
|
+
'As-As' => 326,
|
213
|
+
}
|
214
|
+
|
215
|
+
# Section 4 Simple atom nearest neighbors
|
216
|
+
|
217
|
+
Section4 = {
|
218
|
+
:C => [
|
219
|
+
["-Br", "-C", 327],
|
220
|
+
["-Br", "-C", "-C", 328],
|
221
|
+
["-Br", "-H", 329],
|
222
|
+
["-Br", "~C", 330],
|
223
|
+
["-Br", "~N", 331],
|
224
|
+
["-C", "-C", 332],
|
225
|
+
["-C", "-C", "-C", 333],
|
226
|
+
["-C", "-C", "-C", "-C", 334],
|
227
|
+
["-C", "-C", "-C", "-H", 335],
|
228
|
+
["-C", "-C", "-C", "-N", 336],
|
229
|
+
["-C", "-C", "-C", "-O", 337],
|
230
|
+
["-C", "-C", "-H", "-N", 338],
|
231
|
+
["-C", "-C", "-H", "-O", 339],
|
232
|
+
["-C", "-C", "-N", 340],
|
233
|
+
["-C", "-C", "-O", 341],
|
234
|
+
["-C", "-Cl", 342],
|
235
|
+
["-C", "-Cl", "-H", 343],
|
236
|
+
["-C", "-H", 344],
|
237
|
+
["-C", "-H", "-N", 345],
|
238
|
+
["-C", "-H", "-O", 346],
|
239
|
+
["-C", "-H", "-O", "-O", 347],
|
240
|
+
["-C", "-H", "-P", 348],
|
241
|
+
["-C", "-H", "-S", 349],
|
242
|
+
["-C", "-I", 350],
|
243
|
+
["-C", "-N", 351],
|
244
|
+
["-C", "-O", 352],
|
245
|
+
["-C", "-S", 353],
|
246
|
+
["-C", "-Si", 354],
|
247
|
+
["-C", "~C", 355],
|
248
|
+
["-C", "~C", "~C", 356],
|
249
|
+
["-C", "~C", "~N", 357],
|
250
|
+
["-C", "~N", 358],
|
251
|
+
["-C", "~N", "~N", 359],
|
252
|
+
["-Cl", "-Cl", 360],
|
253
|
+
["-Cl", "-H", 361],
|
254
|
+
["-Cl", "~C", 362],
|
255
|
+
["-F", "-F", 363],
|
256
|
+
["-F", "~C", 364],
|
257
|
+
["-H", "-N", 365],
|
258
|
+
["-H", "-O", 366],
|
259
|
+
["-H", "-O", "-O", 367],
|
260
|
+
["-H", "-S", 368],
|
261
|
+
["-H", "-Si", 369],
|
262
|
+
["-H", "~C", 370],
|
263
|
+
["-H", "~C", "~C", 371],
|
264
|
+
["-H", "~C", "~N", 372],
|
265
|
+
["-H", "~N", 373],
|
266
|
+
["-H", "-H", "-H", 374],
|
267
|
+
["-N", "-N", 375],
|
268
|
+
["-N", "~C", 376],
|
269
|
+
["-N", "~C", "~C", 377],
|
270
|
+
["-N", "~C", "~N", 378],
|
271
|
+
["-N", "~N", 379],
|
272
|
+
["-O", "-O", 380],
|
273
|
+
["-O", "~C", 381],
|
274
|
+
["-O", "~C", "~C", 382],
|
275
|
+
["-S", "~C", 383],
|
276
|
+
["~C", "~C", 384],
|
277
|
+
["~C", "~C", "~C", 385],
|
278
|
+
["~C", "~C", "~N", 386],
|
279
|
+
["~C", "~N", 387],
|
280
|
+
["~C", "~N", "~N", 388],
|
281
|
+
["~N", "~N", 389]],
|
282
|
+
:N => [
|
283
|
+
["-C", "-C", 390],
|
284
|
+
["-C", "-C", "-C", 391],
|
285
|
+
["-C", "-C", "-H", 392],
|
286
|
+
["-C", "-H", 393],
|
287
|
+
["-C", "-H", "-N", 394],
|
288
|
+
["-C", "-O", 395],
|
289
|
+
["-C", "~C", 396],
|
290
|
+
["-C", "~C", "~C", 397],
|
291
|
+
["-H", "-N", 398],
|
292
|
+
["-H", "~C", 399],
|
293
|
+
["-H", "~C", "~C", 400],
|
294
|
+
["-O", "-O", 401],
|
295
|
+
["-O", "~O", 402],
|
296
|
+
["~C", "~C", 403],
|
297
|
+
["~C", "~C", "~C", 404]],
|
298
|
+
:O => [
|
299
|
+
["-C", "-C", 405],
|
300
|
+
["-C", "-H", 406],
|
301
|
+
["-C", "-P", 407],
|
302
|
+
["-H", "-S", 408],
|
303
|
+
["~C", "~C", 409]],
|
304
|
+
:P => [
|
305
|
+
["-C", "-C", 410],
|
306
|
+
["-O", "-O", 411]],
|
307
|
+
:S => [
|
308
|
+
["-C", "-C", 412],
|
309
|
+
["-C", "-H", 413],
|
310
|
+
["-C", "-O", 414]],
|
311
|
+
:Si => [
|
312
|
+
["-C", "-C", 415]
|
313
|
+
]
|
314
|
+
}
|
315
|
+
|
316
|
+
|
317
|
+
|
318
|
+
# Section 5 Detailed atom neighborhoods
|
319
|
+
|
320
|
+
# Section 6 Simple SMARTS patterns
|
321
|
+
|
322
|
+
Section6 = {
|
323
|
+
"C-C-C#C" => 460,
|
324
|
+
"O-C-C=N" => 461,
|
325
|
+
"O-C-C=O" => 462,
|
326
|
+
"N:C-S-[#1]" => 463,
|
327
|
+
"N-C-C=C" => 464,
|
328
|
+
"O=S-C-C" => 465,
|
329
|
+
"N#C-C=C" => 466,
|
330
|
+
"C=N-N-C" => 467,
|
331
|
+
"O=S-C-N" => 468,
|
332
|
+
"S-S-C:C" => 469,
|
333
|
+
"C:C-C=C" => 470,
|
334
|
+
"S:C:C:C" => 471,
|
335
|
+
"C:N:C-C" => 472,
|
336
|
+
"S-C:N:C" => 473,
|
337
|
+
"S:C:C:N" => 474,
|
338
|
+
"S-C=N-C" => 475,
|
339
|
+
"C-O-C=C" => 476,
|
340
|
+
"N-N-C:C" => 477,
|
341
|
+
"S-C=N-[#1]" => 478,
|
342
|
+
"S-C-S-C" => 479,
|
343
|
+
"C:S:C-C" => 480,
|
344
|
+
"O-S-C:C" => 481,
|
345
|
+
"C:N-C:C" => 482,
|
346
|
+
"N-S-C:C" => 483,
|
347
|
+
"N-C:N:C" => 484,
|
348
|
+
"N:C:C:N" => 485,
|
349
|
+
"N-C:N:N" => 486,
|
350
|
+
"N-C=N-C" => 487,
|
351
|
+
"N-C=N-[#1]" => 488,
|
352
|
+
"N-C-S-C" => 489,
|
353
|
+
"C-C-C=C" => 490,
|
354
|
+
"C-N:C-[#1]" => 491,
|
355
|
+
"N-C:O:C" => 492,
|
356
|
+
"O=C-C:C" => 493,
|
357
|
+
"O=C-C:N" => 494,
|
358
|
+
"C-N-C:C" => 495,
|
359
|
+
"N:N-C-[#1]" => 496,
|
360
|
+
"O-C:C:N" => 497,
|
361
|
+
"O-C=C-C" => 498,
|
362
|
+
"N-C:C:N" => 499,
|
363
|
+
"C-S-C:C" => 500,
|
364
|
+
"Cl-C:C-C" => 501,
|
365
|
+
"N-C=C-[#1]" => 502,
|
366
|
+
"Cl-C:C-[#1]" => 503,
|
367
|
+
"N:C:N-C" => 504,
|
368
|
+
"Cl-C:C-O" => 505,
|
369
|
+
"C-C:N:C" => 506,
|
370
|
+
"C-C-S-C" => 507,
|
371
|
+
"S=C-N-C" => 508,
|
372
|
+
"Br-C:C-C" => 509,
|
373
|
+
"[#1]-N-N-[#1]" => 510,
|
374
|
+
"S=C-N-[#1]" => 511,
|
375
|
+
"C-[As]-O-[#1]" => 512,
|
376
|
+
"S:C:C-[#1]" => 513,
|
377
|
+
"O-N-C-C" => 514,
|
378
|
+
"N-N-C-C" => 515,
|
379
|
+
"[#1]-C=C-[#1]" => 516,
|
380
|
+
"N-N-C-N" => 517,
|
381
|
+
"O=C-N-N" => 518,
|
382
|
+
"N=C-N-C" => 519,
|
383
|
+
"C=C-C:C" => 520,
|
384
|
+
"C:N-C-[#1]" => 521,
|
385
|
+
"C-N-N-[#1]" => 522,
|
386
|
+
"N:C:C-C" => 523,
|
387
|
+
"C-C=C-C" => 524,
|
388
|
+
"[As]-C:C-[#1]" => 525,
|
389
|
+
"Cl-C:C-Cl" => 526,
|
390
|
+
"C:C:N-[#1]" => 527,
|
391
|
+
"[#1]-N-C-[#1]" => 528,
|
392
|
+
"Cl-C-C-Cl" => 529,
|
393
|
+
"N:C-C:C" => 530,
|
394
|
+
"S-C:C-C" => 531,
|
395
|
+
"S-C:C-[#1]" => 532,
|
396
|
+
"S-C:C-N" => 533,
|
397
|
+
"S-C:C-O" => 534,
|
398
|
+
"O=C-C-C" => 535,
|
399
|
+
"O=C-C-N" => 536,
|
400
|
+
"O=C-C-O" => 537,
|
401
|
+
"N=C-C-C" => 538,
|
402
|
+
"N=C-C-[#1]" => 539,
|
403
|
+
"C-N-C-[#1]" => 540,
|
404
|
+
"O-C:C-C" => 541,
|
405
|
+
"O-C:C-[#1]" => 542,
|
406
|
+
"O-C:C-N" => 543,
|
407
|
+
"O-C:C-O" => 544,
|
408
|
+
"N-C:C-C" => 545,
|
409
|
+
"N-C:C-[#1]" => 546,
|
410
|
+
"N-C:C-N" => 547,
|
411
|
+
"O-C-C:C" => 548,
|
412
|
+
"N-C-C:C" => 549,
|
413
|
+
"Cl-C-C-C" => 550,
|
414
|
+
"Cl-C-C-O" => 551,
|
415
|
+
"C:C-C:C" => 552,
|
416
|
+
"O=C-C=C" => 553,
|
417
|
+
"Br-C-C-C" => 554,
|
418
|
+
"N=C-C=C" => 555,
|
419
|
+
"C=C-C-C" => 556,
|
420
|
+
"N:C-O-[#1]" => 557,
|
421
|
+
"O=N-C:C" => 558,
|
422
|
+
"O-C-N-[#1]" => 559,
|
423
|
+
"N-C-N-C" => 560,
|
424
|
+
"Cl-C-C=O" => 561,
|
425
|
+
"Br-C-C=O" => 562,
|
426
|
+
"O-C-O-C" => 563,
|
427
|
+
"C=C-C=C" => 564,
|
428
|
+
"C:C-O-C" => 565,
|
429
|
+
"O-C-C-N" => 566,
|
430
|
+
"O-C-C-O" => 567,
|
431
|
+
"N#C-C-C" => 568,
|
432
|
+
"N-C-C-N" => 569,
|
433
|
+
"C:C-C-C" => 570,
|
434
|
+
"[#1]-C-O-[#1]" => 571,
|
435
|
+
"N:C:N:C" => 572,
|
436
|
+
"O-C-C=C" => 573,
|
437
|
+
"O-C-C:C-C" => 574,
|
438
|
+
"O-C-C:C-O" => 575,
|
439
|
+
"N=C-C:C-[#1]" => 576,
|
440
|
+
"C:C-N-C:C" => 577,
|
441
|
+
"C-C:C-C:C" => 578,
|
442
|
+
"O=C-C-C-C" => 579,
|
443
|
+
"O=C-C-C-N" => 580,
|
444
|
+
"O=C-C-C-O" => 581,
|
445
|
+
"C-C-C-C-C" => 582,
|
446
|
+
"Cl-C:C-O-C" => 583,
|
447
|
+
"C:C-C=C-C" => 584,
|
448
|
+
"C-C:C-N-C" => 585,
|
449
|
+
"C-S-C-C-C" => 586,
|
450
|
+
"N-C:C-O-[#1]" => 587,
|
451
|
+
"O=C-C-C=O" => 588,
|
452
|
+
"C-C:C-O-C" => 589,
|
453
|
+
"C-C:C-O-[#1]" => 590,
|
454
|
+
"Cl-C-C-C-C" => 591,
|
455
|
+
"N-C-C-C-C" => 592,
|
456
|
+
"N-C-C-C-N" => 593,
|
457
|
+
"C-O-C-C=C" => 594,
|
458
|
+
"C:C-C-C-C" => 595,
|
459
|
+
"N=C-N-C-C" => 596,
|
460
|
+
"O=C-C-C:C" => 597,
|
461
|
+
"Cl-C:C:C-C" => 598,
|
462
|
+
"[#1]-C-C=C-[#1]" => 599,
|
463
|
+
"N-C:C:C-C" => 600,
|
464
|
+
"N-C:C:C-N" => 601,
|
465
|
+
"O=C-C-N-C" => 602,
|
466
|
+
"C-C:C:C-C" => 603,
|
467
|
+
"C-O-C-C:C" => 604,
|
468
|
+
"O=C-C-O-C" => 605,
|
469
|
+
"O-C:C-C-C" => 606,
|
470
|
+
"N-C-C-C:C" => 607,
|
471
|
+
"C-C-C-C:C" => 608,
|
472
|
+
"Cl-C-C-N-C" => 609,
|
473
|
+
"C-O-C-O-C" => 610,
|
474
|
+
"N-C-C-N-C" => 611,
|
475
|
+
"N-C-O-C-C" => 612,
|
476
|
+
"C-N-C-C-C" => 613,
|
477
|
+
"C-C-O-C-C" => 614,
|
478
|
+
"N-C-C-O-C" => 615,
|
479
|
+
"C:C:N:N:C" => 616,
|
480
|
+
"C-C-C-O-[#1]" => 617,
|
481
|
+
"C:C-C-C:C" => 618,
|
482
|
+
"O-C-C=C-C" => 619,
|
483
|
+
"C:C-O-C-C" => 620,
|
484
|
+
"N-C:C:C:N" => 621,
|
485
|
+
"O=C-O-C:C" => 622,
|
486
|
+
"O=C-C:C-C" => 623,
|
487
|
+
"O=C-C:C-N" => 624,
|
488
|
+
"O=C-C:C-O" => 625,
|
489
|
+
"C-O-C:C-C" => 626,
|
490
|
+
"O=[As]-C:C:C" => 627,
|
491
|
+
"C-N-C-C:C" => 628,
|
492
|
+
"S-C:C:C-N" => 629,
|
493
|
+
"O-C:C-O-C" => 630,
|
494
|
+
"O-C:C-O-[#1]" => 631,
|
495
|
+
"C-C-O-C:C" => 632,
|
496
|
+
"N-C-C:C-C" => 633,
|
497
|
+
"C-C-C:C-C" => 634,
|
498
|
+
"N-N-C-N-[#1]" => 635,
|
499
|
+
"C-N-C-N-C" => 636,
|
500
|
+
"O-C-C-C-C" => 637,
|
501
|
+
"O-C-C-C-N" => 638,
|
502
|
+
"O-C-C-C-O" => 639,
|
503
|
+
"C=C-C-C-C" => 640,
|
504
|
+
"O-C-C-C=C" => 641,
|
505
|
+
"O-C-C-C=O" => 642,
|
506
|
+
"[#1]-C-C-N-[#1]" => 643,
|
507
|
+
"C-C=N-N-C" => 644,
|
508
|
+
"O=C-N-C-C" => 645,
|
509
|
+
"O=C-N-C-[#1]" => 646,
|
510
|
+
"O=C-N-C-N" => 647,
|
511
|
+
"O=N-C:C-N" => 648,
|
512
|
+
"O=N-C:C-O" => 649,
|
513
|
+
"O=C-N-C=O" => 650,
|
514
|
+
"O-C:C:C-C" => 651,
|
515
|
+
"O-C:C:C-N" => 652,
|
516
|
+
"O-C:C:C-O" => 653,
|
517
|
+
"N-C-N-C-C" => 654,
|
518
|
+
"O-C-C-C:C" => 655,
|
519
|
+
"C-C-N-C-C" => 656,
|
520
|
+
"C-N-C:C-C" => 657,
|
521
|
+
"C-C-S-C-C" => 658,
|
522
|
+
"O-C-C-N-C" => 659,
|
523
|
+
"C-C=C-C-C" => 660,
|
524
|
+
"O-C-O-C-C" => 661,
|
525
|
+
"O-C-C-O-C" => 662,
|
526
|
+
"O-C-C-O-[#1]" => 663,
|
527
|
+
"C-C=C-C=C" => 664,
|
528
|
+
"N-C:C-C-C" => 665,
|
529
|
+
"C=C-C-O-C" => 666,
|
530
|
+
"C=C-C-O-[#1]" => 667,
|
531
|
+
"C-C:C-C-C" => 668,
|
532
|
+
"Cl-C:C-C=O" => 669,
|
533
|
+
"Br-C:C:C-C" => 670,
|
534
|
+
"O=C-C=C-C" => 671,
|
535
|
+
"O=C-C=C-[#1]" => 672,
|
536
|
+
"O=C-C=C-N" => 673,
|
537
|
+
"N-C-N-C:C" => 674,
|
538
|
+
"Br-C-C-C:C" => 675,
|
539
|
+
"N#C-C-C-C" => 676,
|
540
|
+
"C-C=C-C:C" => 677,
|
541
|
+
"C-C-C=C-C" => 678,
|
542
|
+
"C-C-C-C-C-C" => 679,
|
543
|
+
"O-C-C-C-C-C" => 680,
|
544
|
+
"O-C-C-C-C-O" => 681,
|
545
|
+
"O-C-C-C-C-N" => 682,
|
546
|
+
"N-C-C-C-C-C" => 683,
|
547
|
+
"O=C-C-C-C-C" => 684,
|
548
|
+
"O=C-C-C-C-N" => 685,
|
549
|
+
"O=C-C-C-C-O" => 686,
|
550
|
+
"O=C-C-C-C=O" => 687,
|
551
|
+
"C-C-C-C-C-C-C" => 688,
|
552
|
+
"O-C-C-C-C-C-C" => 689,
|
553
|
+
"O-C-C-C-C-C-O" => 690,
|
554
|
+
"O-C-C-C-C-C-N" => 691,
|
555
|
+
"O=C-C-C-C-C-C" => 692,
|
556
|
+
"O=C-C-C-C-C-O" => 693,
|
557
|
+
"O=C-C-C-C-C=O" => 694,
|
558
|
+
"O=C-C-C-C-C-N" => 695,
|
559
|
+
"C-C-C-C-C-C-C-C" => 696,
|
560
|
+
"C-C-C-C-C-C(C)-C" => 697,
|
561
|
+
"O-C-C-C-C-C-C-C" => 698,
|
562
|
+
"O-C-C-C-C-C(C)-C" => 699,
|
563
|
+
"O-C-C-C-C-C-O-C" => 700,
|
564
|
+
"O-C-C-C-C-C(O)-C" => 701,
|
565
|
+
"O-C-C-C-C-C-N-C" => 702,
|
566
|
+
"O-C-C-C-C-C(N)-C" => 703,
|
567
|
+
"O=C-C-C-C-C-C-C" => 704,
|
568
|
+
"O=C-C-C-C-C(O)-C" => 705,
|
569
|
+
"O=C-C-C-C-C(=O)-C" => 706,
|
570
|
+
"O=C-C-C-C-C(N)-C" => 707,
|
571
|
+
"C-C(C)-C-C" => 708,
|
572
|
+
"C-C(C)-C-C-C" => 709,
|
573
|
+
"C-C-C(C)-C-C" => 710,
|
574
|
+
"C-C(C)(C)-C-C" => 711,
|
575
|
+
"C-C(C)-C(C)-C" => 712,
|
576
|
+
}
|
577
|
+
|
578
|
+
# Section 7: Complex SMARTS patterns
|
18
579
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
580
|
+
Section7 = {
|
581
|
+
"Cc1ccc(C)cc1" => 713,
|
582
|
+
"Cc1ccc(O)cc1" => 714,
|
583
|
+
"Cc1ccc(S)cc1" => 715,
|
584
|
+
"Cc1ccc(N)cc1" => 716,
|
585
|
+
"Cc1ccc(Cl)cc1" => 717,
|
586
|
+
"Cc1ccc(Br)cc1" => 718,
|
587
|
+
"Oc1ccc(O)cc1" => 719,
|
588
|
+
"Oc1ccc(S)cc1" => 720,
|
589
|
+
"Oc1ccc(N)cc1" => 721,
|
590
|
+
"Oc1ccc(Cl)cc1" => 722,
|
591
|
+
"Oc1ccc(Br)cc1" => 723,
|
592
|
+
"Sc1ccc(S)cc1" => 724,
|
593
|
+
"Sc1ccc(N)cc1" => 725,
|
594
|
+
"Sc1ccc(Cl)cc1" => 726,
|
595
|
+
"Sc1ccc(Br)cc1" => 727,
|
596
|
+
"Nc1ccc(N)cc1" => 728,
|
597
|
+
"Nc1ccc(Cl)cc1" => 729,
|
598
|
+
"Nc1ccc(Br)cc1" => 730,
|
599
|
+
"Clc1ccc(Cl)cc1" => 731,
|
600
|
+
"Clc1ccc(Br)cc1" => 732,
|
601
|
+
"Brc1ccc(Br)cc1" => 733,
|
602
|
+
"Cc1cc(C)ccc1" => 734,
|
603
|
+
"Cc1cc(O)ccc1" => 735,
|
604
|
+
"Cc1cc(S)ccc1" => 736,
|
605
|
+
"Cc1cc(N)ccc1" => 737,
|
606
|
+
"Cc1cc(Cl)ccc1" => 738,
|
607
|
+
"Cc1cc(Br)ccc1" => 739,
|
608
|
+
"Oc1cc(O)ccc1" => 740,
|
609
|
+
"Oc1cc(S)ccc1" => 741,
|
610
|
+
"Oc1cc(N)ccc1" => 742,
|
611
|
+
"Oc1cc(Cl)ccc1" => 743,
|
612
|
+
"Oc1cc(Br)ccc1" => 744,
|
613
|
+
"Sc1cc(S)ccc1" => 745,
|
614
|
+
"Sc1cc(N)ccc1" => 746,
|
615
|
+
"Sc1cc(Cl)ccc1" => 747,
|
616
|
+
"Sc1cc(Br)ccc1" => 748,
|
617
|
+
"Nc1cc(N)ccc1" => 749,
|
618
|
+
"Nc1cc(Cl)ccc1" => 750,
|
619
|
+
"Nc1cc(Br)ccc1" => 751,
|
620
|
+
"Clc1cc(Cl)ccc1" => 752,
|
621
|
+
"Clc1cc(Br)ccc1" => 753,
|
622
|
+
"Brc1cc(Br)ccc1" => 754,
|
623
|
+
"Cc1c(C)cccc1" => 755,
|
624
|
+
"Cc1c(O)cccc1" => 756,
|
625
|
+
"Cc1c(S)cccc1" => 757,
|
626
|
+
"Cc1c(N)cccc1" => 758,
|
627
|
+
"Cc1c(Cl)cccc1" => 759,
|
628
|
+
"Cc1c(Br)cccc1" => 760,
|
629
|
+
"Oc1c(O)cccc1" => 761,
|
630
|
+
"Oc1c(S)cccc1" => 762,
|
631
|
+
"Oc1c(N)cccc1" => 763,
|
632
|
+
"Oc1c(Cl)cccc1" => 764,
|
633
|
+
"Oc1c(Br)cccc1" => 765,
|
634
|
+
"Sc1c(S)cccc1" => 766,
|
635
|
+
"Sc1c(N)cccc1" => 767,
|
636
|
+
"Sc1c(Cl)cccc1" => 768,
|
637
|
+
"Sc1c(Br)cccc1" => 769,
|
638
|
+
"Nc1c(N)cccc1" => 770,
|
639
|
+
"Nc1c(Cl)cccc1" => 771,
|
640
|
+
"Nc1c(Br)cccc1" => 772,
|
641
|
+
"Clc1c(Cl)cccc1" => 773,
|
642
|
+
"Clc1c(Br)cccc1" => 774,
|
643
|
+
"Brc1c(Br)cccc1" => 775,
|
644
|
+
"CC1CCC(C)CC1" => 776,
|
645
|
+
"CC1CCC(O)CC1" => 777,
|
646
|
+
"CC1CCC(S)CC1" => 778,
|
647
|
+
"CC1CCC(N)CC1" => 779,
|
648
|
+
"CC1CCC(Cl)CC1" => 780,
|
649
|
+
"CC1CCC(Br)CC1" => 781,
|
650
|
+
"OC1CCC(O)CC1" => 782,
|
651
|
+
"OC1CCC(S)CC1" => 783,
|
652
|
+
"OC1CCC(N)CC1" => 784,
|
653
|
+
"OC1CCC(Cl)CC1" => 785,
|
654
|
+
"OC1CCC(Br)CC1" => 786,
|
655
|
+
"SC1CCC(S)CC1" => 787,
|
656
|
+
"SC1CCC(N)CC1" => 788,
|
657
|
+
"SC1CCC(Cl)CC1" => 789,
|
658
|
+
"SC1CCC(Br)CC1" => 790,
|
659
|
+
"NC1CCC(N)CC1" => 791,
|
660
|
+
"NC1CCC(Cl)CC1" => 792,
|
661
|
+
"NC1CCC(Br)CC1" => 793,
|
662
|
+
"ClC1CCC(Cl)CC1" => 794,
|
663
|
+
"ClC1CCC(Br)CC1" => 795,
|
664
|
+
"BrC1CCC(Br)CC1" => 796,
|
665
|
+
"CC1CC(C)CCC1" => 797,
|
666
|
+
"CC1CC(O)CCC1" => 798,
|
667
|
+
"CC1CC(S)CCC1" => 799,
|
668
|
+
"CC1CC(N)CCC1" => 800,
|
669
|
+
"CC1CC(Cl)CCC1" => 801,
|
670
|
+
"CC1CC(Br)CCC1" => 802,
|
671
|
+
"OC1CC(O)CCC1" => 803,
|
672
|
+
"OC1CC(S)CCC1" => 804,
|
673
|
+
"OC1CC(N)CCC1" => 805,
|
674
|
+
"OC1CC(Cl)CCC1" => 806,
|
675
|
+
"OC1CC(Br)CCC1" => 807,
|
676
|
+
"SC1CC(S)CCC1" => 808,
|
677
|
+
"SC1CC(N)CCC1" => 809,
|
678
|
+
"SC1CC(Cl)CCC1" => 810,
|
679
|
+
"SC1CC(Br)CCC1" => 811,
|
680
|
+
"NC1CC(N)CCC1" => 812,
|
681
|
+
"NC1CC(Cl)CCC1" => 813,
|
682
|
+
"NC1CC(Br)CCC1" => 814,
|
683
|
+
"ClC1CC(Cl)CCC1" => 815,
|
684
|
+
"ClC1CC(Br)CCC1" => 816,
|
685
|
+
"BrC1CC(Br)CCC1" => 817,
|
686
|
+
"CC1C(C)CCCC1" => 818,
|
687
|
+
"CC1C(O)CCCC1" => 819,
|
688
|
+
"CC1C(S)CCCC1" => 820,
|
689
|
+
"CC1C(N)CCCC1" => 821,
|
690
|
+
"CC1C(Cl)CCCC1" => 822,
|
691
|
+
"CC1C(Br)CCCC1" => 823,
|
692
|
+
"OC1C(O)CCCC1" => 824,
|
693
|
+
"OC1C(S)CCCC1" => 825,
|
694
|
+
"OC1C(N)CCCC1" => 826,
|
695
|
+
"OC1C(Cl)CCCC1" => 827,
|
696
|
+
"OC1C(Br)CCCC1" => 828,
|
697
|
+
"SC1C(S)CCCC1" => 829,
|
698
|
+
"SC1C(N)CCCC1" => 830,
|
699
|
+
"SC1C(Cl)CCCC1" => 831,
|
700
|
+
"SC1C(Br)CCCC1" => 832,
|
701
|
+
"NC1C(N)CCCC1" => 833,
|
702
|
+
"NC1C(Cl)CCCC1" => 834,
|
703
|
+
"NC1C(Br)CCCC1" => 835,
|
704
|
+
"ClC1C(Cl)CCCC1" => 836,
|
705
|
+
"ClC1C(Br)CCCC1" => 837,
|
706
|
+
"BrC1C(Br)CCCC1" => 838,
|
707
|
+
"CC1CC(C)CC1" => 839,
|
708
|
+
"CC1CC(O)CC1" => 840,
|
709
|
+
"CC1CC(S)CC1" => 841,
|
710
|
+
"CC1CC(N)CC1" => 842,
|
711
|
+
"CC1CC(Cl)CC1" => 843,
|
712
|
+
"CC1CC(Br)CC1" => 844,
|
713
|
+
"OC1CC(O)CC1" => 845,
|
714
|
+
"OC1CC(S)CC1" => 846,
|
715
|
+
"OC1CC(N)CC1" => 847,
|
716
|
+
"OC1CC(Cl)CC1" => 848,
|
717
|
+
"OC1CC(Br)CC1" => 849,
|
718
|
+
"SC1CC(S)CC1" => 850,
|
719
|
+
"SC1CC(N)CC1" => 851,
|
720
|
+
"SC1CC(Cl)CC1" => 852,
|
721
|
+
"SC1CC(Br)CC1" => 853,
|
722
|
+
"NC1CC(N)CC1" => 854,
|
723
|
+
"NC1CC(Cl)CC1" => 855,
|
724
|
+
"NC1CC(Br)CC1" => 856,
|
725
|
+
"ClC1CC(Cl)CC1" => 857,
|
726
|
+
"ClC1CC(Br)CC1" => 858,
|
727
|
+
"BrC1CC(Br)CC1" => 859,
|
728
|
+
"CC1C(C)CCC1" => 860,
|
729
|
+
"CC1C(O)CCC1" => 861,
|
730
|
+
"CC1C(S)CCC1" => 862,
|
731
|
+
"CC1C(N)CCC1" => 863,
|
732
|
+
"CC1C(Cl)CCC1" => 864,
|
733
|
+
"CC1C(Br)CCC1" => 865,
|
734
|
+
"OC1C(O)CCC1" => 866,
|
735
|
+
"OC1C(S)CCC1" => 867,
|
736
|
+
"OC1C(N)CCC1" => 868,
|
737
|
+
"OC1C(Cl)CCC1" => 869,
|
738
|
+
"OC1C(Br)CCC1" => 870,
|
739
|
+
"SC1C(S)CCC1" => 871,
|
740
|
+
"SC1C(N)CCC1" => 872,
|
741
|
+
"SC1C(Cl)CCC1" => 873,
|
742
|
+
"SC1C(Br)CCC1" => 874,
|
743
|
+
"NC1C(N)CCC1" => 875,
|
744
|
+
"NC1C(Cl)CC1" => 876,
|
745
|
+
"NC1C(Br)CCC1" => 877,
|
746
|
+
"ClC1C(Cl)CCC1" => 878,
|
747
|
+
"ClC1C(Br)CCC1" => 879,
|
748
|
+
"BrC1C(Br)CCC1" => 880,
|
749
|
+
}
|
750
|
+
|
751
|
+
PubChemSubsKey = [
|
752
|
+
">= 4 H",
|
753
|
+
">= 8 H",
|
754
|
+
">= 16 H",
|
755
|
+
">= 32 H",
|
756
|
+
">= 1 Li",
|
757
|
+
">= 2 Li",
|
758
|
+
">= 1 B",
|
759
|
+
">= 2 B",
|
760
|
+
">= 4 B",
|
761
|
+
">= 2 C",
|
762
|
+
">= 4 C",
|
763
|
+
">= 8 C",
|
764
|
+
">= 16 C",
|
765
|
+
">= 32 C",
|
766
|
+
">= 1 N",
|
767
|
+
">= 2 N",
|
768
|
+
">= 4 N",
|
769
|
+
">= 8 N",
|
770
|
+
">= 1 O",
|
771
|
+
">= 2 O",
|
772
|
+
">= 4 O",
|
773
|
+
">= 8 O",
|
774
|
+
">= 16 O",
|
775
|
+
">= 1 F",
|
776
|
+
">= 2 F",
|
777
|
+
">= 4 F",
|
778
|
+
">= 1 Na",
|
779
|
+
">= 2 Na",
|
780
|
+
">= 1 Si",
|
781
|
+
">= 2 Si",
|
782
|
+
">= 1 P",
|
783
|
+
">= 2 P",
|
784
|
+
">= 4 P",
|
785
|
+
">= 1 S",
|
786
|
+
">= 2 S",
|
787
|
+
">= 4 S",
|
788
|
+
">= 8 S",
|
789
|
+
">= 1 Cl",
|
790
|
+
">= 2 Cl",
|
791
|
+
">= 4 Cl",
|
792
|
+
">= 8 Cl",
|
793
|
+
">= 1 K",
|
794
|
+
">= 2 K",
|
795
|
+
">= 1 Br",
|
796
|
+
">= 2 Br",
|
797
|
+
">= 4 Br",
|
798
|
+
">= 1 I",
|
799
|
+
">= 2 I",
|
800
|
+
">= 4 I",
|
801
|
+
">= 1 Be",
|
802
|
+
">= 1 Mg",
|
803
|
+
">= 1 Al",
|
804
|
+
">= 1 Ca",
|
805
|
+
">= 1 Sc",
|
806
|
+
">= 1 Ti",
|
807
|
+
">= 1 V",
|
808
|
+
">= 1 Cr",
|
809
|
+
">= 1 Mn",
|
810
|
+
">= 1 Fe",
|
811
|
+
">= 1 Co",
|
812
|
+
">= 1 Ni",
|
813
|
+
">= 1 Cu",
|
814
|
+
">= 1 Zn",
|
815
|
+
">= 1 Ga",
|
816
|
+
">= 1 Ge",
|
817
|
+
">= 1 As",
|
818
|
+
">= 1 Se",
|
819
|
+
">= 1 Kr",
|
820
|
+
">= 1 Rb",
|
821
|
+
">= 1 Sr",
|
822
|
+
">= 1 Y",
|
823
|
+
">= 1 Zr",
|
824
|
+
">= 1 Nb",
|
825
|
+
">= 1 Mo",
|
826
|
+
">= 1 Ru",
|
827
|
+
">= 1 Rh",
|
828
|
+
">= 1 Pd",
|
829
|
+
">= 1 Ag",
|
830
|
+
">= 1 Cd",
|
831
|
+
">= 1 In",
|
832
|
+
">= 1 Sn",
|
833
|
+
">= 1 Sb",
|
834
|
+
">= 1 Te",
|
835
|
+
">= 1 Xe",
|
836
|
+
">= 1 Cs",
|
837
|
+
">= 1 Ba",
|
838
|
+
">= 1 Lu",
|
839
|
+
">= 1 Hf",
|
840
|
+
">= 1 Ta",
|
841
|
+
">= 1 W",
|
842
|
+
">= 1 Re",
|
843
|
+
">= 1 Os",
|
844
|
+
">= 1 Ir",
|
845
|
+
">= 1 Pt",
|
846
|
+
">= 1 Au",
|
847
|
+
">= 1 Hg",
|
848
|
+
">= 1 Tl",
|
849
|
+
">= 1 Pb",
|
850
|
+
">= 1 Bi",
|
851
|
+
">= 1 La",
|
852
|
+
">= 1 Ce",
|
853
|
+
">= 1 Pr",
|
854
|
+
">= 1 Nd",
|
855
|
+
">= 1 Pm",
|
856
|
+
">= 1 Sm",
|
857
|
+
">= 1 Eu",
|
858
|
+
">= 1 Gd",
|
859
|
+
">= 1 Tb",
|
860
|
+
">= 1 Dy",
|
861
|
+
">= 1 Ho",
|
862
|
+
">= 1 Er",
|
863
|
+
">= 1 Tm",
|
864
|
+
">= 1 Yb",
|
865
|
+
">= 1 Tc",
|
866
|
+
">= 1 U",
|
867
|
+
">= 1 any ring size 3",
|
868
|
+
">= 1 saturated carbon-only ring size 3",
|
869
|
+
">= 1 saturated nitrogen-containing ring size 3",
|
870
|
+
">= 1 saturated heteroatom-containing ring size 3",
|
871
|
+
">= 1 unsaturated or aromatic carbon-only ring size 3",
|
872
|
+
">= 1 unsaturated or aromatic nitrogen-containing ring size 3",
|
873
|
+
">= 1 unsaturated or aromatic heteroatom-containing ring size 3",
|
874
|
+
">= 2 any ring size 3",
|
875
|
+
">= 2 saturated carbon-only ring size 3",
|
876
|
+
">= 2 saturated nitrogen-containing ring size 3",
|
877
|
+
">= 2 saturated heteroatom-containing ring size 3",
|
878
|
+
">= 2 unsaturated or aromatic carbon-only ring size 3",
|
879
|
+
">= 2 unsaturated or aromatic nitrogen-containing ring size 3",
|
880
|
+
">= 2 unsaturated or aromatic heteroatom-containing ring size 3",
|
881
|
+
">= 1 any ring size 4",
|
882
|
+
">= 1 saturated carbon-only ring size 4",
|
883
|
+
">= 1 saturated nitrogen-containing ring size 4",
|
884
|
+
">= 1 saturated heteroatom-containing ring size 4",
|
885
|
+
">= 1 unsaturated or aromatic carbon-only ring size 4",
|
886
|
+
">= 1 unsaturated or aromatic nitrogen-containing ring size 4",
|
887
|
+
">= 1 unsaturated or aromatic heteroatom-containing ring size 4",
|
888
|
+
">= 2 any ring size 4",
|
889
|
+
">= 2 saturated carbon-only ring size 4",
|
890
|
+
">= 2 saturated nitrogen-containing ring size 4",
|
891
|
+
">= 2 saturated heteroatom-containing ring size 4",
|
892
|
+
">= 2 unsaturated or aromatic carbon-only ring size 4",
|
893
|
+
">= 2 unsaturated or aromatic nitrogen-containing ring size 4",
|
894
|
+
">= 2 unsaturated or aromatic heteroatom-containing ring size 4",
|
895
|
+
">= 1 any ring size 5",
|
896
|
+
">= 1 saturated carbon-only ring size 5",
|
897
|
+
">= 1 saturated nitrogen-containing ring size 5",
|
898
|
+
">= 1 saturated heteroatom-containing ring size 5",
|
899
|
+
">= 1 unsaturated or aromatic carbon-only ring size 5",
|
900
|
+
">= 1 unsaturated or aromatic nitrogen-containing ring size 5",
|
901
|
+
">= 1 unsaturated or aromatic heteroatom-containing ring size 5",
|
902
|
+
">= 2 any ring size 5",
|
903
|
+
">= 2 saturated carbon-only ring size 5",
|
904
|
+
">= 2 saturated nitrogen-containing ring size 5",
|
905
|
+
">= 2 saturated heteroatom-containing ring size 5",
|
906
|
+
">= 2 unsaturated or aromatic carbon-only ring size 5",
|
907
|
+
">= 2 unsaturated or aromatic nitrogen-containing ring size 5",
|
908
|
+
">= 2 unsaturated or aromatic heteroatom-containing ring size 5",
|
909
|
+
">= 3 any ring size 5",
|
910
|
+
">= 3 saturated carbon-only ring size 5",
|
911
|
+
">= 3 saturated nitrogen-containing ring size 5",
|
912
|
+
">= 3 saturated heteroatom-containing ring size 5",
|
913
|
+
">= 3 unsaturated or aromatic carbon-only ring size 5",
|
914
|
+
">= 3 unsaturated or aromatic nitrogen-containing ring size 5",
|
915
|
+
">= 3 unsaturated or aromatic heteroatom-containing ring size 5",
|
916
|
+
">= 4 any ring size 5",
|
917
|
+
">= 4 saturated carbon-only ring size 5",
|
918
|
+
">= 4 saturated nitrogen-containing ring size 5",
|
919
|
+
">= 4 saturated heteroatom-containing ring size 5",
|
920
|
+
">= 4 unsaturated or aromatic carbon-only ring size 5",
|
921
|
+
">= 4 unsaturated or aromatic nitrogen-containing ring size 5",
|
922
|
+
">= 4 unsaturated or aromatic heteroatom-containing ring size 5",
|
923
|
+
">= 5 any ring size 5",
|
924
|
+
">= 5 saturated carbon-only ring size 5",
|
925
|
+
">= 5 saturated nitrogen-containing ring size 5",
|
926
|
+
">= 5 saturated heteroatom-containing ring size 5",
|
927
|
+
">= 5 unsaturated or aromatic carbon-only ring size 5",
|
928
|
+
">= 5 unsaturated or aromatic nitrogen-containing ring size 5",
|
929
|
+
">= 5 unsaturated or aromatic heteroatom-containing ring size 5",
|
930
|
+
">= 1 any ring size 6",
|
931
|
+
">= 1 saturated carbon-only ring size 6",
|
932
|
+
">= 1 saturated nitrogen-containing ring size 6",
|
933
|
+
">= 1 saturated heteroatom-containing ring size 6",
|
934
|
+
">= 1 unsaturated or aromatic carbon-only ring size 6",
|
935
|
+
">= 1 unsaturated or aromatic nitrogen-containing ring size 6",
|
936
|
+
">= 1 unsaturated or aromatic heteroatom-containing ring size 6",
|
937
|
+
">= 2 any ring size 6",
|
938
|
+
">= 2 saturated carbon-only ring size 6",
|
939
|
+
">= 2 saturated nitrogen-containing ring size 6",
|
940
|
+
">= 2 saturated heteroatom-containing ring size 6",
|
941
|
+
">= 2 unsaturated or aromatic carbon-only ring size 6",
|
942
|
+
">= 2 unsaturated or aromatic nitrogen-containing ring size 6",
|
943
|
+
">= 2 unsaturated or aromatic heteroatom-containing ring size 6",
|
944
|
+
">= 3 any ring size 6",
|
945
|
+
">= 3 saturated carbon-only ring size 6",
|
946
|
+
">= 3 saturated nitrogen-containing ring size 6",
|
947
|
+
">= 3 saturated heteroatom-containing ring size 6",
|
948
|
+
">= 3 unsaturated or aromatic carbon-only ring size 6",
|
949
|
+
">= 3 unsaturated or aromatic nitrogen-containing ring size 6",
|
950
|
+
">= 3 unsaturated or aromatic heteroatom-containing ring size 6",
|
951
|
+
">= 4 any ring size 6",
|
952
|
+
">= 4 saturated carbon-only ring size 6",
|
953
|
+
">= 4 saturated nitrogen-containing ring size 6",
|
954
|
+
">= 4 saturated heteroatom-containing ring size 6",
|
955
|
+
">= 4 unsaturated or aromatic carbon-only ring size 6",
|
956
|
+
">= 4 unsaturated or aromatic nitrogen-containing ring size 6",
|
957
|
+
">= 4 unsaturated or aromatic heteroatom-containing ring size 6",
|
958
|
+
">= 5 any ring size 6",
|
959
|
+
">= 5 saturated carbon-only ring size 6",
|
960
|
+
">= 5 saturated nitrogen-containing ring size 6",
|
961
|
+
">= 5 saturated heteroatom-containing ring size 6",
|
962
|
+
">= 5 unsaturated or aromatic carbon-only ring size 6",
|
963
|
+
">= 5 unsaturated or aromatic nitrogen-containing ring size 6",
|
964
|
+
">= 5 unsaturated or aromatic heteroatom-containing ring size 6",
|
965
|
+
">= 1 any ring size 7",
|
966
|
+
">= 1 saturated carbon-only ring size 7",
|
967
|
+
">= 1 saturated nitrogen-containing ring size 7",
|
968
|
+
">= 1 saturated heteroatom-containing ring size 7",
|
969
|
+
">= 1 unsaturated or aromatic carbon-only ring size 7",
|
970
|
+
">= 1 unsaturated or aromatic nitrogen-containing ring size 7",
|
971
|
+
">= 1 unsaturated or aromatic heteroatom-containing ring size 7",
|
972
|
+
">= 2 any ring size 7",
|
973
|
+
">= 2 saturated carbon-only ring size 7",
|
974
|
+
">= 2 saturated nitrogen-containing ring size 7",
|
975
|
+
">= 2 saturated heteroatom-containing ring size 7",
|
976
|
+
">= 2 unsaturated or aromatic carbon-only ring size 7",
|
977
|
+
">= 2 unsaturated or aromatic nitrogen-containing ring size 7",
|
978
|
+
">= 2 unsaturated or aromatic heteroatom-containing ring size 7",
|
979
|
+
">= 1 any ring size 8",
|
980
|
+
">= 1 saturated carbon-only ring size 8",
|
981
|
+
">= 1 saturated nitrogen-containing ring size 8",
|
982
|
+
">= 1 saturated heteroatom-containing ring size 8",
|
983
|
+
">= 1 unsaturated or aromatic carbon-only ring size 8",
|
984
|
+
">= 1 unsaturated or aromatic nitrogen-containing ring size 8",
|
985
|
+
">= 1 unsaturated or aromatic heteroatom-containing ring size 8",
|
986
|
+
">= 2 any ring size 8",
|
987
|
+
">= 2 saturated carbon-only ring size 8",
|
988
|
+
">= 2 saturated nitrogen-containing ring size 8",
|
989
|
+
">= 2 saturated heteroatom-containing ring size 8",
|
990
|
+
">= 2 unsaturated or aromatic carbon-only ring size 8",
|
991
|
+
">= 2 unsaturated or aromatic nitrogen-containing ring size 8",
|
992
|
+
">= 2 unsaturated or aromatic heteroatom-containing ring size 8",
|
993
|
+
">= 1 any ring size 9",
|
994
|
+
">= 1 saturated carbon-only ring size 9",
|
995
|
+
">= 1 saturated nitrogen-containing ring size 9",
|
996
|
+
">= 1 saturated heteroatom-containing ring size 9",
|
997
|
+
">= 1 unsaturated or aromatic carbon-only ring size 9",
|
998
|
+
">= 1 unsaturated or aromatic nitrogen-containing ring size 9",
|
999
|
+
">= 1 unsaturated or aromatic heteroatom-containing ring size 9",
|
1000
|
+
">= 1 any ring size 10",
|
1001
|
+
">= 1 saturated carbon-only ring size 10",
|
1002
|
+
">= 1 saturated nitrogen-containing ring size 10",
|
1003
|
+
">= 1 saturated heteroatom-containing ring size 10",
|
1004
|
+
">= 1 unsaturated or aromatic carbon-only ring size 10",
|
1005
|
+
">= 1 unsaturated or aromatic nitrogen-containing ring size 10",
|
1006
|
+
">= 1 unsaturated or aromatic heteroatom-containing ring size 10",
|
1007
|
+
">= 1 aromatic ring",
|
1008
|
+
">= 1 hetero-aromatic ring",
|
1009
|
+
">= 2 aromatic rings",
|
1010
|
+
">= 2 hetero-aromatic rings",
|
1011
|
+
">= 3 aromatic rings",
|
1012
|
+
">= 3 hetero-aromatic rings",
|
1013
|
+
">= 4 aromatic rings",
|
1014
|
+
">= 4 hetero-aromatic rings",
|
1015
|
+
]
|
1016
|
+
|
1017
|
+
RingSizeBaseNum = {
|
1018
|
+
3 => 115,
|
1019
|
+
4 => 129,
|
1020
|
+
5 => 143,
|
1021
|
+
6 => 178,
|
1022
|
+
7 => 213,
|
1023
|
+
8 => 227,
|
1024
|
+
9 => 241,
|
1025
|
+
10 => 248,
|
1026
|
+
}
|
1027
|
+
module Molecule
|
1028
|
+
def generate_pubchem_subskey
|
1029
|
+
fp = 0
|
1030
|
+
# Section 1
|
1031
|
+
self.composition.each do |elem, num|
|
1032
|
+
HierarchicElementCounts[elem].each do |n_atoms, bit|
|
1033
|
+
fp |= (1 << bit) if num >= n_atoms
|
75
1034
|
end
|
76
|
-
cid
|
77
1035
|
end
|
78
1036
|
|
79
|
-
|
1037
|
+
# Section 2
|
1038
|
+
[143, 150, 157, 164, 171]
|
1039
|
+
s2bit = []
|
1040
|
+
sssrs = self.find_sssr.inject({}){|hash, ring| (hash[ring.size] ||= []) << ring ; hash}
|
1041
|
+
sssrs.each do |ring_size, rings|
|
1042
|
+
# base_num = case ring_size
|
1043
|
+
|
1044
|
+
# p [ring_size, rings.size]
|
1045
|
+
# p rings.any?{|ring| ring.any?{|atom| atom.element == :N}}
|
1046
|
+
# p rings.any?{|ring| ring.any?{|atom| atom.element != :C}}
|
1047
|
+
end
|
80
1048
|
|
81
|
-
|
1049
|
+
# Section 3
|
1050
|
+
self.edges.collect{ |bond, atom1, atom2|
|
1051
|
+
[atom1.element.to_s, atom2.element.to_s].sort.join("-")
|
1052
|
+
}.uniq.each do |pair|
|
1053
|
+
fp |= (1 << Section3[pair]) if Section3[pair]
|
1054
|
+
end
|
82
1055
|
|
83
|
-
|
84
|
-
|
1056
|
+
# Section 4
|
1057
|
+
self.nodes.each do |node|
|
1058
|
+
adj = self.adjacent_to(node).collect{|bond, atom| }
|
1059
|
+
p Section4[node.element]
|
85
1060
|
end
|
1061
|
+
# exit
|
1062
|
+
|
86
1063
|
|
87
|
-
|
88
|
-
|
89
|
-
|
1064
|
+
Section6.each do |smarts, bit|
|
1065
|
+
pat = Chem::OpenBabel::parse_smarts(smarts)
|
1066
|
+
fp |= (1 << bit ) if pat.match(self)
|
90
1067
|
end
|
91
1068
|
|
92
|
-
|
93
|
-
|
94
|
-
|
1069
|
+
Section7.each do |smarts, bit|
|
1070
|
+
pat = Chem::OpenBabel::parse_smarts(smarts)
|
1071
|
+
fp |= (1 << bit ) if pat.match(self)
|
95
1072
|
end
|
1073
|
+
fp
|
1074
|
+
end
|
96
1075
|
|
1076
|
+
# Extract PubChem substructural keys
|
1077
|
+
# see ftp://ftp.ncbi.nlm.nih.gov/pubchem/specifications/pubchem_fingerprints.txt
|
1078
|
+
def pubchem_subskeys
|
1079
|
+
require 'base64'
|
1080
|
+
fp = 0
|
1081
|
+
b64 = self.sdf_data["PUBCHEM_CACTVS_SUBSKEYS"]
|
1082
|
+
Base64.decode64(b64).unpack("N*")[1..-1].each_with_index do |bit, idx|
|
1083
|
+
# fp += (bit << (881 - (idx + 1) * 32))
|
1084
|
+
bb = 0
|
1085
|
+
0.upto(31) do |n|
|
1086
|
+
bb += (1 << n) if (bit & (1 << (31 - n)) != 0)
|
1087
|
+
end
|
1088
|
+
fp += (bb << (idx * 32))
|
1089
|
+
end
|
1090
|
+
fp
|
97
1091
|
end
|
1092
|
+
|
98
1093
|
end
|
99
1094
|
|
100
1095
|
end
|
101
1096
|
|
102
|
-
if $0 == __FILE__
|
103
|
-
smiles="CC23(CCC1c4ccc(O)cc4(CCC1C3(CC(O)C2(O))))"
|
104
|
-
puts "===== CID(s) for SMILES, #{smiles} ====="
|
105
|
-
cid = Chem::PubChem.smiles_search(smiles)
|
106
|
-
p cid
|
107
|
-
puts "===== MOL format data ===="
|
108
|
-
cid.each do |c|
|
109
|
-
puts c.get_sdf
|
110
|
-
end
|
111
|
-
# p Chem::PubChem.get_xml(cid[0])
|
112
|
-
# puts Chem::PubChem.get_xml(cid[0]).sdf2mol.data
|
113
|
-
end
|