rdworks 0.25.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdworks/__init__.py +35 -0
- rdworks/autograph/__init__.py +4 -0
- rdworks/autograph/autograph.py +184 -0
- rdworks/autograph/centroid.py +90 -0
- rdworks/autograph/dynamictreecut.py +135 -0
- rdworks/autograph/nmrclust.py +123 -0
- rdworks/autograph/rckmeans.py +74 -0
- rdworks/bitqt/__init__.py +1 -0
- rdworks/bitqt/bitqt.py +355 -0
- rdworks/conf.py +374 -0
- rdworks/descriptor.py +36 -0
- rdworks/display.py +206 -0
- rdworks/ionized.py +170 -0
- rdworks/matchedseries.py +260 -0
- rdworks/mol.py +1522 -0
- rdworks/mollibr.py +887 -0
- rdworks/pka.py +38 -0
- rdworks/predefined/Asinex_fragment.xml +20 -0
- rdworks/predefined/Astex_RO3.xml +16 -0
- rdworks/predefined/Baell2010_PAINS/Baell2010A.xml +52 -0
- rdworks/predefined/Baell2010_PAINS/Baell2010B.xml +169 -0
- rdworks/predefined/Baell2010_PAINS/Baell2010C.xml +1231 -0
- rdworks/predefined/Baell2010_PAINS/PAINS-less-than-015-hits.xml +2048 -0
- rdworks/predefined/Baell2010_PAINS/PAINS-less-than-150-hits.xml +278 -0
- rdworks/predefined/Baell2010_PAINS/PAINS-more-than-150-hits.xml +83 -0
- rdworks/predefined/Baell2010_PAINS/makexml.py +70 -0
- rdworks/predefined/Brenk2008_Dundee/makexml.py +21 -0
- rdworks/predefined/CNS.xml +18 -0
- rdworks/predefined/ChEMBL_Walters/BMS.xml +543 -0
- rdworks/predefined/ChEMBL_Walters/Dundee.xml +318 -0
- rdworks/predefined/ChEMBL_Walters/Glaxo.xml +168 -0
- rdworks/predefined/ChEMBL_Walters/Inpharmatica.xml +276 -0
- rdworks/predefined/ChEMBL_Walters/LINT.xml +174 -0
- rdworks/predefined/ChEMBL_Walters/MLSMR.xml +351 -0
- rdworks/predefined/ChEMBL_Walters/PAINS.xml +1446 -0
- rdworks/predefined/ChEMBL_Walters/SureChEMBL.xml +501 -0
- rdworks/predefined/ChEMBL_Walters/makexml.py +40 -0
- rdworks/predefined/Hann1999_Glaxo/Hann1999.xml +168 -0
- rdworks/predefined/Hann1999_Glaxo/Hann1999Acid.xml +102 -0
- rdworks/predefined/Hann1999_Glaxo/Hann1999Base.xml +6 -0
- rdworks/predefined/Hann1999_Glaxo/Hann1999ElPh.xml +6 -0
- rdworks/predefined/Hann1999_Glaxo/Hann1999NuPh.xml +6 -0
- rdworks/predefined/Hann1999_Glaxo/makexml.py +83 -0
- rdworks/predefined/Kazius2005/Kazius2005.xml +114 -0
- rdworks/predefined/Kazius2005/makexml.py +66 -0
- rdworks/predefined/ZINC_druglike.xml +24 -0
- rdworks/predefined/ZINC_fragment.xml +14 -0
- rdworks/predefined/ZINC_leadlike.xml +15 -0
- rdworks/predefined/fragment.xml +7 -0
- rdworks/predefined/ionized/simple_smarts_pattern.csv +57 -0
- rdworks/predefined/ionized/smarts_pattern.csv +107 -0
- rdworks/predefined/misc/makexml.py +119 -0
- rdworks/predefined/misc/reactive-part-2.xml +104 -0
- rdworks/predefined/misc/reactive-part-3.xml +74 -0
- rdworks/predefined/misc/reactive.xml +321 -0
- rdworks/readin.py +312 -0
- rdworks/rgroup.py +2173 -0
- rdworks/scaffold.py +520 -0
- rdworks/std.py +143 -0
- rdworks/stereoisomers.py +127 -0
- rdworks/tautomers.py +20 -0
- rdworks/units.py +63 -0
- rdworks/utils.py +495 -0
- rdworks/xml.py +260 -0
- rdworks-0.25.7.dist-info/METADATA +37 -0
- rdworks-0.25.7.dist-info/RECORD +69 -0
- rdworks-0.25.7.dist-info/WHEEL +5 -0
- rdworks-0.25.7.dist-info/licenses/LICENSE +21 -0
- rdworks-0.25.7.dist-info/top_level.txt +1 -0
@@ -0,0 +1,168 @@
|
|
1
|
+
<?xml version="1.0" ?>
|
2
|
+
<Hann1999>
|
3
|
+
<substructure name="R1 reactive alkyl halides">
|
4
|
+
<SMARTS>[Br,Cl,I][CX4;CH,CH2]</SMARTS>
|
5
|
+
</substructure>
|
6
|
+
<substructure name="R2 acid halides">
|
7
|
+
<SMARTS>[S,C](=[O,S])[F,Br,Cl,I]</SMARTS>
|
8
|
+
</substructure>
|
9
|
+
<substructure name="R3 carbazides">
|
10
|
+
<SMARTS>O=CN=[N+]=[N-]</SMARTS>
|
11
|
+
</substructure>
|
12
|
+
<substructure name="R4 sulphate esters">
|
13
|
+
<SMARTS>COS(=O)O[C,c]</SMARTS>
|
14
|
+
</substructure>
|
15
|
+
<substructure name="R5 sulphonates">
|
16
|
+
<SMARTS>COS(=O)(=O)[C,c]</SMARTS>
|
17
|
+
</substructure>
|
18
|
+
<substructure name="R6 acid anhydrides">
|
19
|
+
<SMARTS>C(=O)OC(=O)</SMARTS>
|
20
|
+
</substructure>
|
21
|
+
<substructure name="R7 peroxides">
|
22
|
+
<SMARTS>OO</SMARTS>
|
23
|
+
</substructure>
|
24
|
+
<substructure name="R8 pentafluophenyl esters">
|
25
|
+
<SMARTS>C(=O)Oc1c(F)c(F)c(F)c(F)c1(F)</SMARTS>
|
26
|
+
</substructure>
|
27
|
+
<substructure name="R9 paranitrophenyl esters">
|
28
|
+
<SMARTS>C(=O)Oc1ccc(N(N=O)=O)cc1</SMARTS>
|
29
|
+
</substructure>
|
30
|
+
<substructure name="R10 esters of HOBT">
|
31
|
+
<SMARTS>C(=O)Onnn</SMARTS>
|
32
|
+
</substructure>
|
33
|
+
<substructure name="R11 isocynates and isothiocyanates">
|
34
|
+
<SMARTS>N=C=[S,O]</SMARTS>
|
35
|
+
</substructure>
|
36
|
+
<substructure name="R12 triflates">
|
37
|
+
<SMARTS>OS(=O)(=O)C(F)(F)F</SMARTS>
|
38
|
+
</substructure>
|
39
|
+
<substructure name="R13 lawesson's reagent and derivates">
|
40
|
+
<SMARTS>P(=S)(S)S</SMARTS>
|
41
|
+
</substructure>
|
42
|
+
<substructure name="R14 phosphoramides">
|
43
|
+
<SMARTS>NP(=O)(N)N</SMARTS>
|
44
|
+
</substructure>
|
45
|
+
<substructure name="R15 aromatic azides">
|
46
|
+
<SMARTS>cN=[N+]=[N-]</SMARTS>
|
47
|
+
</substructure>
|
48
|
+
<substructure name="R16 beta carbonyl quaternary nitrogen">
|
49
|
+
<SMARTS>C(=O)C[N+,n+]</SMARTS>
|
50
|
+
</substructure>
|
51
|
+
<substructure name="R17 acylhydrazide">
|
52
|
+
<SMARTS>[N;R0][N;R0]C(=O)</SMARTS>
|
53
|
+
</substructure>
|
54
|
+
<substructure name="R18 quaternary C,Cl,I,P or S">
|
55
|
+
<SMARTS>[C+,Cl+,I+,P+,S+]</SMARTS>
|
56
|
+
</substructure>
|
57
|
+
<substructure name="R19 phosphoranes">
|
58
|
+
<SMARTS>C=P</SMARTS>
|
59
|
+
</substructure>
|
60
|
+
<substructure name="R20 chloramidines">
|
61
|
+
<SMARTS>[Cl]C([C&R0])=N</SMARTS>
|
62
|
+
</substructure>
|
63
|
+
<substructure name="R21 nitroso">
|
64
|
+
<SMARTS>[N&D2](=O)</SMARTS>
|
65
|
+
</substructure>
|
66
|
+
<substructure name="R22 P/S halides">
|
67
|
+
<SMARTS>[P,S][Cl,Br,F,I]</SMARTS>
|
68
|
+
</substructure>
|
69
|
+
<substructure name="R23 carbodiimide">
|
70
|
+
<SMARTS>N=C=N</SMARTS>
|
71
|
+
</substructure>
|
72
|
+
<substructure name="R24 isonitrile">
|
73
|
+
<SMARTS>[N+]#[C-]</SMARTS>
|
74
|
+
</substructure>
|
75
|
+
<substructure name="R25 triacyloximes">
|
76
|
+
<SMARTS>C(=O)N(C(C=O))OC(=O)</SMARTS>
|
77
|
+
</substructure>
|
78
|
+
<substructure name="R26 cyanohydrins">
|
79
|
+
<SMARTS>N#CC[OH]</SMARTS>
|
80
|
+
</substructure>
|
81
|
+
<substructure name="R27 acyl cyanides">
|
82
|
+
<SMARTS>N#CC(=O)</SMARTS>
|
83
|
+
</substructure>
|
84
|
+
<substructure name="R28 sulfonyl cyanides">
|
85
|
+
<SMARTS>S(=O)(=O)C#N</SMARTS>
|
86
|
+
</substructure>
|
87
|
+
<substructure name="R29 cyanophosphonates">
|
88
|
+
<SMARTS>P(OCC)(OCC)(=O)C#N</SMARTS>
|
89
|
+
</substructure>
|
90
|
+
<substructure name="R30 azocyanamides">
|
91
|
+
<SMARTS>[N;R0]=[N;R0]C#N</SMARTS>
|
92
|
+
</substructure>
|
93
|
+
<substructure name="R31 azoalkanals">
|
94
|
+
<SMARTS>[N;R0]=[N;R0]CC=O</SMARTS>
|
95
|
+
</substructure>
|
96
|
+
<substructure name="I1 aliphatic methylene chains 7 or more long">
|
97
|
+
<SMARTS>[CD2;R0][CD2;R0][CD2;R0][CD2;R0][CD2;R0][CD2;R0][CD2;R0]</SMARTS>
|
98
|
+
</substructure>
|
99
|
+
<substructure name="I2 compounds with 4 or more acidic substructures">
|
100
|
+
<SMARTS>[C,S,P](=O)[OH].[C,S,P](=O)[OH].[C,S,P](=O)[OH].[C,S,P](=O)[OH]</SMARTS>
|
101
|
+
</substructure>
|
102
|
+
<substructure name="I3 crown ethers">
|
103
|
+
<SMARTS>[O;R1][C;R1][C;R1][O;R1][C;R1][C;R1][O;R1]</SMARTS>
|
104
|
+
</substructure>
|
105
|
+
<substructure name="I4 disulphides">
|
106
|
+
<SMARTS>SS</SMARTS>
|
107
|
+
</substructure>
|
108
|
+
<substructure name="I5 thiols">
|
109
|
+
<SMARTS>[SH]</SMARTS>
|
110
|
+
</substructure>
|
111
|
+
<substructure name="I6 epoxides, thioepoxides, aziridines">
|
112
|
+
<SMARTS>C1[O,S,N]C1</SMARTS>
|
113
|
+
</substructure>
|
114
|
+
<substructure name="I7 2,4,5 trihydroxyphenyl">
|
115
|
+
<SMARTS>c([OH])c([OH])c([OH])</SMARTS>
|
116
|
+
</substructure>
|
117
|
+
<substructure name="I8 2,3,4 trihydroxyphenyl">
|
118
|
+
<SMARTS>c([OH])c([OH])cc([OH])</SMARTS>
|
119
|
+
</substructure>
|
120
|
+
<substructure name="I9 hydrazothiourea">
|
121
|
+
<SMARTS>N=NC(=S)N</SMARTS>
|
122
|
+
</substructure>
|
123
|
+
<substructure name="I10 thiocyanate">
|
124
|
+
<SMARTS>SC#N</SMARTS>
|
125
|
+
</substructure>
|
126
|
+
<substructure name="I11 benzylic quaternary nitrogen">
|
127
|
+
<SMARTS>cC[N+]</SMARTS>
|
128
|
+
</substructure>
|
129
|
+
<substructure name="I12 thioesters">
|
130
|
+
<SMARTS>C[O,S;R0][C;R0](=S)</SMARTS>
|
131
|
+
</substructure>
|
132
|
+
<substructure name="I13 cyanamides">
|
133
|
+
<SMARTS>N[CH2]C#N</SMARTS>
|
134
|
+
</substructure>
|
135
|
+
<substructure name="I14 four membered lactones">
|
136
|
+
<SMARTS>C1(=O)OCC1</SMARTS>
|
137
|
+
</substructure>
|
138
|
+
<substructure name="I15 di and triphosphates">
|
139
|
+
<SMARTS>P(=O)([OH])OP(=O)[OH]</SMARTS>
|
140
|
+
</substructure>
|
141
|
+
<substructure name="I16 betalactams">
|
142
|
+
<SMARTS>N1CCC1=O</SMARTS>
|
143
|
+
</substructure>
|
144
|
+
<substructure name="NP1 quinones">
|
145
|
+
<SMARTS>O=C1[#6]~[#6]C(=O)[#6]~[#6]1</SMARTS>
|
146
|
+
</substructure>
|
147
|
+
<substructure name="NP2 polyenes">
|
148
|
+
<SMARTS>C=CC=CC=CC=C</SMARTS>
|
149
|
+
</substructure>
|
150
|
+
<substructure name="NP3 saponin derivates">
|
151
|
+
<SMARTS>O1CCCCC1OC2CCC3CCCCC3C2</SMARTS>
|
152
|
+
</substructure>
|
153
|
+
<substructure name="NP4 cytochalasin derivates">
|
154
|
+
<SMARTS>O=C1NCC2CCCCC21</SMARTS>
|
155
|
+
</substructure>
|
156
|
+
<substructure name="NP5 cycloheximide derivates">
|
157
|
+
<SMARTS>O=C1CCCC(N1)=O</SMARTS>
|
158
|
+
</substructure>
|
159
|
+
<substructure name="NP6 monensin derivates">
|
160
|
+
<SMARTS>O1CCCCC1C2CCCO2</SMARTS>
|
161
|
+
</substructure>
|
162
|
+
<substructure name="NP7 cyanidin derivates">
|
163
|
+
<SMARTS>[OH]c1cc([OH])cc2=[O+]C(=C([OH])Cc21)c3cc([OH])c([OH])cc3</SMARTS>
|
164
|
+
</substructure>
|
165
|
+
<substructure name="NP8 squalestatin derivates">
|
166
|
+
<SMARTS>C12OCCC(O1)CC2</SMARTS>
|
167
|
+
</substructure>
|
168
|
+
</Hann1999>
|
@@ -0,0 +1,102 @@
|
|
1
|
+
<?xml version="1.0" ?>
|
2
|
+
<Hann1999>
|
3
|
+
<substructure name="A1">
|
4
|
+
<SMARTS>[OH1][P,C,S](=O)</SMARTS>
|
5
|
+
</substructure>
|
6
|
+
<substructure name="A2">
|
7
|
+
<SMARTS>[NH1]([P,S]=O)([P,S]=O)</SMARTS>
|
8
|
+
</substructure>
|
9
|
+
<substructure name="A3">
|
10
|
+
<SMARTS>[nH]1cnoc1=O</SMARTS>
|
11
|
+
</substructure>
|
12
|
+
<substructure name="A4">
|
13
|
+
<SMARTS>[OH1]C1=NC=NO1</SMARTS>
|
14
|
+
</substructure>
|
15
|
+
<substructure name="A5">
|
16
|
+
<SMARTS>[NH1]1C=NOS1=O</SMARTS>
|
17
|
+
</substructure>
|
18
|
+
<substructure name="A6">
|
19
|
+
<SMARTS>[OH1]C1=NC(=O)CC1=O</SMARTS>
|
20
|
+
</substructure>
|
21
|
+
<substructure name="A7">
|
22
|
+
<SMARTS>[OH1]C1NC(=O)C(=O)C1</SMARTS>
|
23
|
+
</substructure>
|
24
|
+
<substructure name="A8">
|
25
|
+
<SMARTS>[nH1]1ncoc1=O</SMARTS>
|
26
|
+
</substructure>
|
27
|
+
<substructure name="A9">
|
28
|
+
<SMARTS>[OH1]C1=NN=CO1</SMARTS>
|
29
|
+
</substructure>
|
30
|
+
<substructure name="A10">
|
31
|
+
<SMARTS>[nH1]1[nH]cnc1=O</SMARTS>
|
32
|
+
</substructure>
|
33
|
+
<substructure name="A11">
|
34
|
+
<SMARTS>[OH1]C1=N[NH1]C=N1</SMARTS>
|
35
|
+
</substructure>
|
36
|
+
<substructure name="A12">
|
37
|
+
<SMARTS>[OH1]C1=NOC=C1</SMARTS>
|
38
|
+
</substructure>
|
39
|
+
<substructure name="A13">
|
40
|
+
<SMARTS>[nH1]1occc1=O</SMARTS>
|
41
|
+
</substructure>
|
42
|
+
<substructure name="A14">
|
43
|
+
<SMARTS>[OH1]c1oncc1</SMARTS>
|
44
|
+
</substructure>
|
45
|
+
<substructure name="A15">
|
46
|
+
<SMARTS>[nH1]1ccc(=O)o1</SMARTS>
|
47
|
+
</substructure>
|
48
|
+
<substructure name="A16 tetrazole">
|
49
|
+
<SMARTS>[nH1]nnn</SMARTS>
|
50
|
+
</substructure>
|
51
|
+
<substructure name="A17">
|
52
|
+
<SMARTS>[nH1](n)nn</SMARTS>
|
53
|
+
</substructure>
|
54
|
+
<substructure name="A18">
|
55
|
+
<SMARTS>[OH1]C1=NC(=O)NO1</SMARTS>
|
56
|
+
</substructure>
|
57
|
+
<substructure name="A19">
|
58
|
+
<SMARTS>[OH1]C1=NC(=O)ON1</SMARTS>
|
59
|
+
</substructure>
|
60
|
+
<substructure name="A20">
|
61
|
+
<SMARTS>[nH1]1cnnc1C(F)(F)F</SMARTS>
|
62
|
+
</substructure>
|
63
|
+
<substructure name="A21">
|
64
|
+
<SMARTS>[nH1]1cnc(n1)C(F)(F)F</SMARTS>
|
65
|
+
</substructure>
|
66
|
+
<substructure name="A22">
|
67
|
+
<SMARTS>[nH1]1C(=O)CC(=O)O1</SMARTS>
|
68
|
+
</substructure>
|
69
|
+
<substructure name="A23">
|
70
|
+
<SMARTS>[OH1]C1=CC(=O)NO1</SMARTS>
|
71
|
+
</substructure>
|
72
|
+
<substructure name="A24">
|
73
|
+
<SMARTS>[OH1]C1=CC(=O)ON1</SMARTS>
|
74
|
+
</substructure>
|
75
|
+
<substructure name="A25 benzosulphimide">
|
76
|
+
<SMARTS>[NH1]1C(=O)c2ccccc2S1(=O)=O</SMARTS>
|
77
|
+
</substructure>
|
78
|
+
<substructure name="A26">
|
79
|
+
<SMARTS>[OH1]C1=NS(=O)(=O)c2ccccc21</SMARTS>
|
80
|
+
</substructure>
|
81
|
+
<substructure name="A27">
|
82
|
+
<SMARTS>[OH1]C1=NC(=O)c2cccc21</SMARTS>
|
83
|
+
</substructure>
|
84
|
+
<substructure name="A28">
|
85
|
+
<SMARTS>[OH1]C1=COC=CC1=O</SMARTS>
|
86
|
+
</substructure>
|
87
|
+
<substructure name="A29">
|
88
|
+
<SMARTS>[OH1]C1=NSN=C1</SMARTS>
|
89
|
+
</substructure>
|
90
|
+
<substructure name="A30 hydroxamic acid">
|
91
|
+
<SMARTS>[OH1]NC(=O)</SMARTS>
|
92
|
+
</substructure>
|
93
|
+
<substructure name="A31 trifluoromethyl sulphonamide">
|
94
|
+
<SMARTS>[NH]S(=O)(=O)C(F)(F)F</SMARTS>
|
95
|
+
</substructure>
|
96
|
+
<substructure name="A32 aryl sulphonamide">
|
97
|
+
<SMARTS>[NH](c)S(=O)=O</SMARTS>
|
98
|
+
</substructure>
|
99
|
+
<substructure name="A33 phenol">
|
100
|
+
<SMARTS>[OH1]c1c[c,n]ccc1</SMARTS>
|
101
|
+
</substructure>
|
102
|
+
</Hann1999>
|
@@ -0,0 +1,6 @@
|
|
1
|
+
<?xml version="1.0" ?>
|
2
|
+
<Hann1999>
|
3
|
+
<substructure name="Base">
|
4
|
+
<SMARTS>[$([NH2][CX4]),$([NH]([CX4])[CX4]),$([NX3]([CX4])([CX4])[CX4]),$([$([C,c](=N)N);!$([C,c](=N)N[C,S](=O))]),$([nH0;!$(n-C);!$(n(:c)(:c):a)]1ccccc1),$([$([NH2]!:c),$([NH1]([CX4])!:c),$([NH0]([CX4])([CX4])!:c)]);!$([N,n;+1])]</SMARTS>
|
5
|
+
</substructure>
|
6
|
+
</Hann1999>
|
@@ -0,0 +1,6 @@
|
|
1
|
+
<?xml version="1.0" ?>
|
2
|
+
<Hann1999>
|
3
|
+
<substructure name="Electrophile">
|
4
|
+
<SMARTS>[$([C;H1](=[O,S])[C,c]),$([C,P;H1](=[O,S])[O,S]),$([C](=O)([C,c,O,S])[C,c,O,S]),$(C(=[O,S])(N)Oc),$(C1(=O)NS(=O)(=O)[C,c]=,:[C,c]1),$([$(P(=O)[O,S]);!$(P[OH1])]),$([$(c(=O)(~c)~c);!$([$(c1(=O)ccn([C,c])cc1),$(c1(=O)n([C,c])cccc1)])]),$(C(=O)-N-C=O);!$(C(=O)[OH1]);!$(C(=O)[SH1])]</SMARTS>
|
5
|
+
</substructure>
|
6
|
+
</Hann1999>
|
@@ -0,0 +1,6 @@
|
|
1
|
+
<?xml version="1.0" ?>
|
2
|
+
<Hann1999>
|
3
|
+
<substructure name="Nucleophile">
|
4
|
+
<SMARTS>[$([NH2][CX4]),$([NH]([CX4])[CX4]),$([OH1][C,c,N;!$(C=O)]),$([$([NH2]!:c),$([NH1]([CX4])!:c),$([NH0]([CX4])([CX4])!:c)]);!$(N=[O,C,N,S]);!$(N-[C,c,N]=[C,c,N,n,O,S]);!$([OH1]C=C);!$([OH1]NC=[O,S])]</SMARTS>
|
5
|
+
</substructure>
|
6
|
+
</Hann1999>
|
@@ -0,0 +1,83 @@
|
|
1
|
+
from __future__ import print_function
|
2
|
+
from xml.etree.ElementTree import Element, SubElement, tostring
|
3
|
+
from xml.dom import minidom
|
4
|
+
from rdkit import Chem
|
5
|
+
import sys
|
6
|
+
|
7
|
+
with open("hann1999filter.txt","r") as f, \
|
8
|
+
open("Hann1999.xml","w") as g:
|
9
|
+
root= Element('Hann1999')
|
10
|
+
for line in f :
|
11
|
+
if line.startswith("#") : continue
|
12
|
+
c= line.strip().split()
|
13
|
+
if len(c) < 2 : continue
|
14
|
+
try:
|
15
|
+
smartstxt= c[-1]
|
16
|
+
name= " ".join(c[:-1])
|
17
|
+
except:
|
18
|
+
print(line.rstrip())
|
19
|
+
sys.exit(0)
|
20
|
+
try:
|
21
|
+
rdkq= Chem.MolFromSmarts(smartstxt)
|
22
|
+
except:
|
23
|
+
print("NOT VALID:", line.rstrip())
|
24
|
+
continue
|
25
|
+
entry= SubElement (root, 'group')
|
26
|
+
entry.set('name', name)
|
27
|
+
node= SubElement (entry, 'SMARTS')
|
28
|
+
node.text= smartstxt
|
29
|
+
# write to XML
|
30
|
+
coarse= tostring(root,'utf-8')
|
31
|
+
g.write( minidom.parseString( coarse ).toprettyxml(indent=" ") )
|
32
|
+
|
33
|
+
with open("hann1999acid.txt","r") as f, \
|
34
|
+
open("Hann1999Acid.xml","w") as g:
|
35
|
+
root= Element('Hann1999')
|
36
|
+
for line in f :
|
37
|
+
if line.startswith("#") : continue
|
38
|
+
c= line.strip().split()
|
39
|
+
if len(c) < 2 : continue
|
40
|
+
try:
|
41
|
+
smartstxt= c[-1]
|
42
|
+
name= " ".join(c[:-1])
|
43
|
+
except:
|
44
|
+
print(line.rstrip())
|
45
|
+
sys.exit(0)
|
46
|
+
try:
|
47
|
+
rdkq= Chem.MolFromSmarts(smartstxt)
|
48
|
+
except:
|
49
|
+
print("NOT VALID:", line.rstrip())
|
50
|
+
continue
|
51
|
+
entry= SubElement (root, 'group')
|
52
|
+
entry.set('name', name)
|
53
|
+
node= SubElement (entry, 'SMARTS')
|
54
|
+
node.text= smartstxt
|
55
|
+
# write to XML
|
56
|
+
coarse= tostring(root,'utf-8')
|
57
|
+
g.write( minidom.parseString( coarse ).toprettyxml(indent=" ") )
|
58
|
+
|
59
|
+
with open("hann1999acid.txt","r") as f, \
|
60
|
+
open("Hann1999Acid.xml","w") as g:
|
61
|
+
root= Element('Hann1999')
|
62
|
+
for line in f :
|
63
|
+
if line.startswith("#") : continue
|
64
|
+
c= line.strip().split()
|
65
|
+
if len(c) < 2 : continue
|
66
|
+
try:
|
67
|
+
smartstxt= c[-1]
|
68
|
+
name= " ".join(c[:-1])
|
69
|
+
except:
|
70
|
+
print(line.rstrip())
|
71
|
+
sys.exit(0)
|
72
|
+
try:
|
73
|
+
rdkq= Chem.MolFromSmarts(smartstxt)
|
74
|
+
except:
|
75
|
+
print("NOT VALID:", line.rstrip())
|
76
|
+
continue
|
77
|
+
entry= SubElement (root, 'group')
|
78
|
+
entry.set('name', name)
|
79
|
+
node= SubElement (entry, 'SMARTS')
|
80
|
+
node.text= smartstxt
|
81
|
+
# write to XML
|
82
|
+
coarse= tostring(root,'utf-8')
|
83
|
+
g.write( minidom.parseString( coarse ).toprettyxml(indent=" ") )
|
@@ -0,0 +1,114 @@
|
|
1
|
+
<?xml version="1.0" ?>
|
2
|
+
<Kazius2005>
|
3
|
+
<substructure name="(1) Aromatic nitro">
|
4
|
+
<SMARTS>a[$([NX3](=O)=O),$([NX3+](=O)[O-])][!#8]</SMARTS>
|
5
|
+
</substructure>
|
6
|
+
<substructure name="(2) Aromatic amine">
|
7
|
+
<SMARTS>a[NX3;H2]</SMARTS>
|
8
|
+
</substructure>
|
9
|
+
<substructure name="(3) 3-membered heterocycle">
|
10
|
+
<SMARTS>[N,O,S]1[C][C]1</SMARTS>
|
11
|
+
</substructure>
|
12
|
+
<substructure name="(4) unsubstituted heteroatom bonded heteroatom">
|
13
|
+
<SMARTS>[NX2;H2,OX1;H1][N,O]</SMARTS>
|
14
|
+
</substructure>
|
15
|
+
<substructure name="(5) Specific aromatic amine">
|
16
|
+
<SMARTS>a[NH2]</SMARTS>
|
17
|
+
</substructure>
|
18
|
+
<substructure name="(6) aromatic nitroso">
|
19
|
+
<SMARTS>a[N;X2]=O</SMARTS>
|
20
|
+
</substructure>
|
21
|
+
<substructure name="(7) alkyl nitrite">
|
22
|
+
<SMARTS>CO[N;X2]=O</SMARTS>
|
23
|
+
</substructure>
|
24
|
+
<substructure name="(8) nitrosamine">
|
25
|
+
<SMARTS>N[N;X2]=O</SMARTS>
|
26
|
+
</substructure>
|
27
|
+
<substructure name="(9) epoxide">
|
28
|
+
<SMARTS>O1[c,C]-[c,C]1</SMARTS>
|
29
|
+
</substructure>
|
30
|
+
<substructure name="(10) aziridine">
|
31
|
+
<SMARTS>C1NC1</SMARTS>
|
32
|
+
</substructure>
|
33
|
+
<substructure name="(11) azide">
|
34
|
+
<SMARTS>N=[N+]=[N-]</SMARTS>
|
35
|
+
</substructure>
|
36
|
+
<substructure name="(12) diazo">
|
37
|
+
<SMARTS>C=[N+]=[N-]</SMARTS>
|
38
|
+
</substructure>
|
39
|
+
<substructure name="(13) triazene">
|
40
|
+
<SMARTS>N=N-N</SMARTS>
|
41
|
+
</substructure>
|
42
|
+
<substructure name="(14) unsubstituted heteroatom-bonded heteroatom">
|
43
|
+
<SMARTS>[OH,NH2][N,O]</SMARTS>
|
44
|
+
</substructure>
|
45
|
+
<substructure name="(15) aromatic hydroxylamine">
|
46
|
+
<SMARTS>[OH]Na</SMARTS>
|
47
|
+
</substructure>
|
48
|
+
<substructure name="(16) aliphatic halide">
|
49
|
+
<SMARTS>[Cl,Br,I]C</SMARTS>
|
50
|
+
</substructure>
|
51
|
+
<substructure name="(17) carboxylic acid halide">
|
52
|
+
<SMARTS>[Cl,Br,I]C=O</SMARTS>
|
53
|
+
</substructure>
|
54
|
+
<substructure name="(18) nitrogen or sulphur mustard">
|
55
|
+
<SMARTS>[N,S]!@[C;X4]!@[CH2][Cl,Br,I]</SMARTS>
|
56
|
+
</substructure>
|
57
|
+
<substructure name="(19) bay-region in Polycyclic Aromatic Hydrocarbons">
|
58
|
+
<SMARTS>[cH]1[cH]ccc2c1c3c(cc2)cc[cH][cH]3</SMARTS>
|
59
|
+
</substructure>
|
60
|
+
<substructure name="(20) K-region in Polycyclic Aromatic Hydrocarbons">
|
61
|
+
<SMARTS>[cH]1cccc2c1[cH][cH]c3c2ccc[cH]3</SMARTS>
|
62
|
+
</substructure>
|
63
|
+
<substructure name="(21) sulphonate-bonded carbon (alkyl alkane sulphonate or dialkyl sulphate)">
|
64
|
+
<SMARTS>[$([C,c]OS(=O)(=O)O!@[c,C]),$([c,C]S(=O)(=O)O!@[c,C])]</SMARTS>
|
65
|
+
</substructure>
|
66
|
+
<substructure name="(22) aliphatic N-nitro">
|
67
|
+
<SMARTS>O=N(~O)N</SMARTS>
|
68
|
+
</substructure>
|
69
|
+
<substructure name="(23) aB unsaturated aldehyde (including a-carbonyl aldehyde)">
|
70
|
+
<SMARTS>[$(O=[CH]C=C),$(O=[CH]C=O)]</SMARTS>
|
71
|
+
</substructure>
|
72
|
+
<substructure name="(24) diazonium">
|
73
|
+
<SMARTS>[N;v4]#N</SMARTS>
|
74
|
+
</substructure>
|
75
|
+
<substructure name="(25) beta-propiolactone">
|
76
|
+
<SMARTS>O=C1CCO1</SMARTS>
|
77
|
+
</substructure>
|
78
|
+
<substructure name="(26) ab unsaturated alkoxy substructure">
|
79
|
+
<SMARTS>[CH]=[CH]O</SMARTS>
|
80
|
+
</substructure>
|
81
|
+
<substructure name="(27) 1-aryl-2-monoalkyl hydrazine">
|
82
|
+
<SMARTS>[NH;!R][NH;R]a</SMARTS>
|
83
|
+
</substructure>
|
84
|
+
<substructure name="(28) ester derivative of aromatic hydroxylamine (including original specific toxicophore)">
|
85
|
+
<SMARTS>aN([$([OH]),$(O*=O)])[$([#1]),$(C(=O)[CH3]),$([CH3]),$([OH]),$(O*=O)]</SMARTS>
|
86
|
+
</substructure>
|
87
|
+
<substructure name="(29) polycyclic planar system 1">
|
88
|
+
<SMARTS>a13~a~a~a~a2~a1~a(~a~a~a~3)~a~a~a~2</SMARTS>
|
89
|
+
</substructure>
|
90
|
+
<substructure name="(30) polycyclic planar system 2">
|
91
|
+
<SMARTS>a1~a~a~a2~a~1~a~a3~a(~a~2)~a~a~a~3</SMARTS>
|
92
|
+
</substructure>
|
93
|
+
<substructure name="(31) polycyclic planar system 3">
|
94
|
+
<SMARTS>a1~a~a~a2~a~1~a~a~a3~a~2~a~a~a~3</SMARTS>
|
95
|
+
</substructure>
|
96
|
+
<substructure name="(32) polycyclic planar system 4">
|
97
|
+
<SMARTS>a1~a~a~a~a2~a~1~a3~a(~a~2)~a~a~a~a~3</SMARTS>
|
98
|
+
</substructure>
|
99
|
+
<substructure name="(33) polycyclic planar system 5">
|
100
|
+
<SMARTS>a1~a~a~a~a2~a~1~a~a3~a(~a~2)~a~a~a~3</SMARTS>
|
101
|
+
</substructure>
|
102
|
+
<substructure name="(34) polycyclic planar system 6">
|
103
|
+
<SMARTS>a1~a~a~a~a2~a~1~a~a3~a(~a~2)~a~a~a~a~3</SMARTS>
|
104
|
+
</substructure>
|
105
|
+
<substructure name="(35) polycyclic planar system 7">
|
106
|
+
<SMARTS>a1~a~a~a~a2~a~1~a~a~a3~a~2~a~a~a~3</SMARTS>
|
107
|
+
</substructure>
|
108
|
+
<substructure name="(36) polycyclic planar system 8">
|
109
|
+
<SMARTS>a1~a~a~a~a2~a~1~a~a~a3~a~2~a~a~a~a~3</SMARTS>
|
110
|
+
</substructure>
|
111
|
+
<substructure name="(37) polycyclic planar system 9">
|
112
|
+
<SMARTS>a13~a~a~a~a2~a1~a(~a~a~a~3)~a~a~2</SMARTS>
|
113
|
+
</substructure>
|
114
|
+
</Kazius2005>
|
@@ -0,0 +1,66 @@
|
|
1
|
+
from xml.etree.ElementTree import Element,SubElement,Comment,tostring,parse
|
2
|
+
from xml.dom import minidom
|
3
|
+
from rdkit import Chem
|
4
|
+
|
5
|
+
"""
|
6
|
+
Jeroen Kazius, Ross McGuire, and Roberta Bursi, 2004.
|
7
|
+
Derivation and Validation of Toxicophores for Mutagenicity Prediction.
|
8
|
+
J. Med. Chem., 2005, 48 (1), pp 312–320 DOI: 10.1021/jm040835a
|
9
|
+
"""
|
10
|
+
|
11
|
+
count= 0
|
12
|
+
A= Element('Kazius2005')
|
13
|
+
|
14
|
+
for (name, smarts) in [
|
15
|
+
["Aromatic nitro","a[$([NX3](=O)=O),$([NX3+](=O)[O-])][!#8]"],
|
16
|
+
["Aromatic amine","a[NX3;H2]"],
|
17
|
+
["3-membered heterocycle","[N,O,S]1[C][C]1"],
|
18
|
+
["unsubstituted heteroatom bonded heteroatom","[NX2;H2,OX1;H1][N,O]"],
|
19
|
+
["Specific aromatic nitro","$(O=N(~O)a);!$(O=N(O)c[$(aS(=O)=O),$(aaS(=O)=O),$(aaaS(=O)=O),$(aC((F)F)F),$(aaC((F)F)F),$(aaaC((F)F)F)])"],
|
20
|
+
["Specific aromatic amine","a[NH2]"],
|
21
|
+
["aromatic nitroso","a[N;X2]=O"],
|
22
|
+
["alkyl nitrite","CO[N;X2]=O"],
|
23
|
+
["nitrosamine","N[N;X2]=O"],
|
24
|
+
["epoxide","O1[c,C]-[c,C]1"],
|
25
|
+
["aziridine","C1NC1"],
|
26
|
+
["azide","N=[N+]=[N-]"],
|
27
|
+
["diazo","C=[N+]=[N-]"],
|
28
|
+
["triazene","N=N-N"],
|
29
|
+
["unsubstituted heteroatom-bonded heteroatom","[OH,NH2][N,O]"],
|
30
|
+
["aromatic hydroxylamine","[OH]Na"],
|
31
|
+
["aliphatic halide","[Cl,Br,I]C"],
|
32
|
+
["carboxylic acid halide","[Cl,Br,I]C=O"],
|
33
|
+
["nitrogen or sulphur mustard","[N,S]!@[C;X4]!@[CH2][Cl,Br,I]"],
|
34
|
+
["bay-region in Polycyclic Aromatic Hydrocarbons","[cH]1[cH]ccc2c1c3c(cc2)cc[cH][cH]3"],
|
35
|
+
["K-region in Polycyclic Aromatic Hydrocarbons","[cH]1cccc2c1[cH][cH]c3c2ccc[cH]3"],
|
36
|
+
["sulphonate-bonded carbon (alkyl alkane sulphonate or dialkyl sulphate)","[$([C,c]OS(=O)(=O)O!@[c,C]),$([c,C]S(=O)(=O)O!@[c,C])]"],
|
37
|
+
["aliphatic N-nitro","O=N(~O)N"],
|
38
|
+
["aB unsaturated aldehyde (including a-carbonyl aldehyde)","[$(O=[CH]C=C),$(O=[CH]C=O)]"],
|
39
|
+
["diazonium","[N;v4]#N"],
|
40
|
+
["beta-propiolactone","O=C1CCO1"],
|
41
|
+
["ab unsaturated alkoxy group","[CH]=[CH]O"],
|
42
|
+
["1-aryl-2-monoalkyl hydrazine","[NH;!R][NH;R]a"],
|
43
|
+
["aromatic methylamine","[CH3][NH]a;![CH3][NH]a[$(a[$(C((F)F)F),$(S=O),$(C(=O)O)]),$(aa[$(C((F)F)F),$(S=O),$(C(=O)O)]),$(aaa[$(C((F)F)F),$(S=O),$(C(=O)O)])]"],
|
44
|
+
["ester derivative of aromatic hydroxylamine (including original specific toxicophore)","aN([$([OH]),$(O*=O)])[$([#1]),$(C(=O)[CH3]),$([CH3]),$([OH]),$(O*=O)]"],
|
45
|
+
["polycyclic planar system 1","a13~a~a~a~a2~a1~a(~a~a~a~3)~a~a~a~2"],
|
46
|
+
["polycyclic planar system 2","a1~a~a~a2~a~1~a~a3~a(~a~2)~a~a~a~3"],
|
47
|
+
["polycyclic planar system 3","a1~a~a~a2~a~1~a~a~a3~a~2~a~a~a~3"],
|
48
|
+
["polycyclic planar system 4","a1~a~a~a~a2~a~1~a3~a(~a~2)~a~a~a~a~3"],
|
49
|
+
["polycyclic planar system 5","a1~a~a~a~a2~a~1~a~a3~a(~a~2)~a~a~a~3"],
|
50
|
+
["polycyclic planar system 6","a1~a~a~a~a2~a~1~a~a3~a(~a~2)~a~a~a~a~3"],
|
51
|
+
["polycyclic planar system 7","a1~a~a~a~a2~a~1~a~a~a3~a~2~a~a~a~3"],
|
52
|
+
["polycyclic planar system 8","a1~a~a~a~a2~a~1~a~a~a3~a~2~a~a~a~a~3"],
|
53
|
+
["polycyclic planar system 9","a13~a~a~a~a2~a1~a(~a~a~a~3)~a~a~2"],
|
54
|
+
]:
|
55
|
+
m = Chem.MolFromSmarts(smarts)
|
56
|
+
if m is None: continue
|
57
|
+
count += 1
|
58
|
+
entry= SubElement (A, 'group')
|
59
|
+
entry.set('name', '('+str(count)+') '+name)
|
60
|
+
node= SubElement(entry, "SMARTS")
|
61
|
+
node.text= smarts
|
62
|
+
|
63
|
+
g=open('Kazius2005.xml','wt')
|
64
|
+
coarse= tostring(A,'utf-8')
|
65
|
+
g.write( minidom.parseString( coarse ).toprettyxml(indent=" ") )
|
66
|
+
g.close()
|
@@ -0,0 +1,24 @@
|
|
1
|
+
<?xml version="1.0" ?>
|
2
|
+
<ZINC_DrugLike combine='AND'>
|
3
|
+
<!--definition from zinc.docking.org -->
|
4
|
+
<!--Lipinski, J Pharmacol Toxicol Methods. 2000 Jul-Aug;44(1):235-49.-->
|
5
|
+
<descriptor name="MolWt">
|
6
|
+
<min>150</min>
|
7
|
+
<max>500</max>
|
8
|
+
</descriptor>
|
9
|
+
<descriptor name="LogP">
|
10
|
+
<max>5.0</max>
|
11
|
+
</descriptor>
|
12
|
+
<descriptor name="RotBonds">
|
13
|
+
<max>7</max>
|
14
|
+
</descriptor>
|
15
|
+
<descriptor name="TPSA">
|
16
|
+
<max>150</max>
|
17
|
+
</descriptor>
|
18
|
+
<descriptor name="LipinskiHBD">
|
19
|
+
<max>5</max>
|
20
|
+
</descriptor>
|
21
|
+
<descriptor name="LipinskiHBA">
|
22
|
+
<max>10</max>
|
23
|
+
</descriptor>
|
24
|
+
</ZINC_DrugLike>
|
@@ -0,0 +1,14 @@
|
|
1
|
+
<?xml version="1.0" ?>
|
2
|
+
<ZINC_Fragment combine='AND'>
|
3
|
+
<!--definition from zinc.docking.org -->
|
4
|
+
<!--Carr et al. (2005) Drug Discov Today. 10(14):987-->
|
5
|
+
<descriptor name="MolWt">
|
6
|
+
<max>250</max>
|
7
|
+
</descriptor>
|
8
|
+
<descriptor name="LogP">
|
9
|
+
<max>3.5</max>
|
10
|
+
</descriptor>
|
11
|
+
<descriptor name="RotBonds">
|
12
|
+
<max>5</max>
|
13
|
+
</descriptor>
|
14
|
+
</ZINC_Fragment>
|
@@ -0,0 +1,15 @@
|
|
1
|
+
<?xml version="1.0" ?>
|
2
|
+
<ZINC_LeadLike combine='AND'>
|
3
|
+
<!--definition from zinc.docking.org -->
|
4
|
+
<!--Teague et al. (1999) Angew Chem Int Ed Engl. 38(24):3743-3748.-->
|
5
|
+
<descriptor name="MolWt">
|
6
|
+
<min>250</min>
|
7
|
+
<max>350</max>
|
8
|
+
</descriptor>
|
9
|
+
<descriptor name="LogP">
|
10
|
+
<max>3.5</max>
|
11
|
+
</descriptor>
|
12
|
+
<descriptor name="RotBonds">
|
13
|
+
<max>7</max>
|
14
|
+
</descriptor>
|
15
|
+
</ZINC_LeadLike>
|