rdworks 0.25.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdworks/__init__.py +35 -0
- rdworks/autograph/__init__.py +4 -0
- rdworks/autograph/autograph.py +184 -0
- rdworks/autograph/centroid.py +90 -0
- rdworks/autograph/dynamictreecut.py +135 -0
- rdworks/autograph/nmrclust.py +123 -0
- rdworks/autograph/rckmeans.py +74 -0
- rdworks/bitqt/__init__.py +1 -0
- rdworks/bitqt/bitqt.py +355 -0
- rdworks/conf.py +374 -0
- rdworks/descriptor.py +36 -0
- rdworks/display.py +206 -0
- rdworks/ionized.py +170 -0
- rdworks/matchedseries.py +260 -0
- rdworks/mol.py +1522 -0
- rdworks/mollibr.py +887 -0
- rdworks/pka.py +38 -0
- rdworks/predefined/Asinex_fragment.xml +20 -0
- rdworks/predefined/Astex_RO3.xml +16 -0
- rdworks/predefined/Baell2010_PAINS/Baell2010A.xml +52 -0
- rdworks/predefined/Baell2010_PAINS/Baell2010B.xml +169 -0
- rdworks/predefined/Baell2010_PAINS/Baell2010C.xml +1231 -0
- rdworks/predefined/Baell2010_PAINS/PAINS-less-than-015-hits.xml +2048 -0
- rdworks/predefined/Baell2010_PAINS/PAINS-less-than-150-hits.xml +278 -0
- rdworks/predefined/Baell2010_PAINS/PAINS-more-than-150-hits.xml +83 -0
- rdworks/predefined/Baell2010_PAINS/makexml.py +70 -0
- rdworks/predefined/Brenk2008_Dundee/makexml.py +21 -0
- rdworks/predefined/CNS.xml +18 -0
- rdworks/predefined/ChEMBL_Walters/BMS.xml +543 -0
- rdworks/predefined/ChEMBL_Walters/Dundee.xml +318 -0
- rdworks/predefined/ChEMBL_Walters/Glaxo.xml +168 -0
- rdworks/predefined/ChEMBL_Walters/Inpharmatica.xml +276 -0
- rdworks/predefined/ChEMBL_Walters/LINT.xml +174 -0
- rdworks/predefined/ChEMBL_Walters/MLSMR.xml +351 -0
- rdworks/predefined/ChEMBL_Walters/PAINS.xml +1446 -0
- rdworks/predefined/ChEMBL_Walters/SureChEMBL.xml +501 -0
- rdworks/predefined/ChEMBL_Walters/makexml.py +40 -0
- rdworks/predefined/Hann1999_Glaxo/Hann1999.xml +168 -0
- rdworks/predefined/Hann1999_Glaxo/Hann1999Acid.xml +102 -0
- rdworks/predefined/Hann1999_Glaxo/Hann1999Base.xml +6 -0
- rdworks/predefined/Hann1999_Glaxo/Hann1999ElPh.xml +6 -0
- rdworks/predefined/Hann1999_Glaxo/Hann1999NuPh.xml +6 -0
- rdworks/predefined/Hann1999_Glaxo/makexml.py +83 -0
- rdworks/predefined/Kazius2005/Kazius2005.xml +114 -0
- rdworks/predefined/Kazius2005/makexml.py +66 -0
- rdworks/predefined/ZINC_druglike.xml +24 -0
- rdworks/predefined/ZINC_fragment.xml +14 -0
- rdworks/predefined/ZINC_leadlike.xml +15 -0
- rdworks/predefined/fragment.xml +7 -0
- rdworks/predefined/ionized/simple_smarts_pattern.csv +57 -0
- rdworks/predefined/ionized/smarts_pattern.csv +107 -0
- rdworks/predefined/misc/makexml.py +119 -0
- rdworks/predefined/misc/reactive-part-2.xml +104 -0
- rdworks/predefined/misc/reactive-part-3.xml +74 -0
- rdworks/predefined/misc/reactive.xml +321 -0
- rdworks/readin.py +312 -0
- rdworks/rgroup.py +2173 -0
- rdworks/scaffold.py +520 -0
- rdworks/std.py +143 -0
- rdworks/stereoisomers.py +127 -0
- rdworks/tautomers.py +20 -0
- rdworks/units.py +63 -0
- rdworks/utils.py +495 -0
- rdworks/xml.py +260 -0
- rdworks-0.25.7.dist-info/METADATA +37 -0
- rdworks-0.25.7.dist-info/RECORD +69 -0
- rdworks-0.25.7.dist-info/WHEEL +5 -0
- rdworks-0.25.7.dist-info/licenses/LICENSE +21 -0
- rdworks-0.25.7.dist-info/top_level.txt +1 -0
@@ -0,0 +1,321 @@
|
|
1
|
+
<?xml version="1.0" ?>
|
2
|
+
<reactive>
|
3
|
+
<substructure name="(1) 4-Nitrophenyl Ester">
|
4
|
+
<SMARTS>[O-][N+](=O)c1ccc(OC=O)cc1</SMARTS>
|
5
|
+
</substructure>
|
6
|
+
<substructure name="(2) Acid Chloride">
|
7
|
+
<SMARTS>C(=O)Cl</SMARTS>
|
8
|
+
</substructure>
|
9
|
+
<substructure name="(3) Acid Bromide">
|
10
|
+
<SMARTS>C(=O)Br</SMARTS>
|
11
|
+
</substructure>
|
12
|
+
<substructure name="(4) Acid Iodide">
|
13
|
+
<SMARTS>C(=O)I</SMARTS>
|
14
|
+
</substructure>
|
15
|
+
<substructure name="(5) Acid Fluoride">
|
16
|
+
<SMARTS>C(=O)F</SMARTS>
|
17
|
+
</substructure>
|
18
|
+
<substructure name="(6) Acyl Cyanide">
|
19
|
+
<SMARTS>N#CC(=O)</SMARTS>
|
20
|
+
</substructure>
|
21
|
+
<substructure name="(7) Acyl hydrazine">
|
22
|
+
<SMARTS>NNC=O</SMARTS>
|
23
|
+
</substructure>
|
24
|
+
<substructure name="(8) Anhydride">
|
25
|
+
<SMARTS>C(=O)OC(=O)</SMARTS>
|
26
|
+
</substructure>
|
27
|
+
<substructure name="(9) Allyl Bromide">
|
28
|
+
<SMARTS>BrCC=C</SMARTS>
|
29
|
+
</substructure>
|
30
|
+
<substructure name="(10) Allyl Chloride">
|
31
|
+
<SMARTS>ClCC=C</SMARTS>
|
32
|
+
</substructure>
|
33
|
+
<substructure name="(11) Allyl Fluoride">
|
34
|
+
<SMARTS>FCC=C</SMARTS>
|
35
|
+
</substructure>
|
36
|
+
<substructure name="(12) Allyl iodide">
|
37
|
+
<SMARTS>ICC=C</SMARTS>
|
38
|
+
</substructure>
|
39
|
+
<substructure name="(13) Alpha_HaloCarbonyl">
|
40
|
+
<SMARTS>[F,Cl,Br,I]CC=O</SMARTS>
|
41
|
+
</substructure>
|
42
|
+
<substructure name="(14) Beta_HaloCarbonyl">
|
43
|
+
<SMARTS>[F,Cl,Br,I]CCC=O</SMARTS>
|
44
|
+
</substructure>
|
45
|
+
<substructure name="(15) Azide">
|
46
|
+
<SMARTS>N=[N+]=[N-]</SMARTS>
|
47
|
+
</substructure>
|
48
|
+
<substructure name="(16) Aziridine">
|
49
|
+
<SMARTS>C1CN1</SMARTS>
|
50
|
+
</substructure>
|
51
|
+
<substructure name="(17) Azo">
|
52
|
+
<SMARTS>[N;X2]=[N;X2]</SMARTS>
|
53
|
+
</substructure>
|
54
|
+
<substructure name="(18) Benzyl Bromide">
|
55
|
+
<SMARTS>[H]C([H])(Br)c</SMARTS>
|
56
|
+
</substructure>
|
57
|
+
<substructure name="(19) Benzyl Chloride">
|
58
|
+
<SMARTS>[H]C([H])(Cl)c</SMARTS>
|
59
|
+
</substructure>
|
60
|
+
<substructure name="(20) Benzyl Iodide">
|
61
|
+
<SMARTS>[H]C([H])(I)c</SMARTS>
|
62
|
+
</substructure>
|
63
|
+
<substructure name="(21) Beta ammonium carbonyl">
|
64
|
+
<SMARTS>C[N+](C)(C)CCC=O</SMARTS>
|
65
|
+
</substructure>
|
66
|
+
<substructure name="(22) Carbazide">
|
67
|
+
<SMARTS>O=*N=[N+]=[N-]</SMARTS>
|
68
|
+
</substructure>
|
69
|
+
<substructure name="(23) Carbodimide">
|
70
|
+
<SMARTS>N=C=N</SMARTS>
|
71
|
+
</substructure>
|
72
|
+
<substructure name="(24) Chloramine">
|
73
|
+
<SMARTS>[N;X3](Cl)</SMARTS>
|
74
|
+
</substructure>
|
75
|
+
<substructure name="(25) Chloro Silane">
|
76
|
+
<SMARTS>Cl[Si]</SMARTS>
|
77
|
+
</substructure>
|
78
|
+
<substructure name="(26) Cyanohydrin">
|
79
|
+
<SMARTS>N#CC[OH]</SMARTS>
|
80
|
+
</substructure>
|
81
|
+
<substructure name="(27) cyanamides">
|
82
|
+
<SMARTS>N[CH2]C#N</SMARTS>
|
83
|
+
</substructure>
|
84
|
+
<substructure name="(28) Cyanate">
|
85
|
+
<SMARTS>O=C=N</SMARTS>
|
86
|
+
</substructure>
|
87
|
+
<substructure name="(29) diazo">
|
88
|
+
<SMARTS>cN=Nc</SMARTS>
|
89
|
+
</substructure>
|
90
|
+
<substructure name="(30) Diazonium">
|
91
|
+
<SMARTS>[N+]#N</SMARTS>
|
92
|
+
</substructure>
|
93
|
+
<substructure name="(31) Dichloramine">
|
94
|
+
<SMARTS>[N;X3](Cl)Cl</SMARTS>
|
95
|
+
</substructure>
|
96
|
+
<substructure name="(32) Disulphide">
|
97
|
+
<SMARTS>SS</SMARTS>
|
98
|
+
</substructure>
|
99
|
+
<substructure name="(33) Epoxide">
|
100
|
+
<SMARTS>C1CO1</SMARTS>
|
101
|
+
</substructure>
|
102
|
+
<substructure name="(34) HaloAmine">
|
103
|
+
<SMARTS>[F,Cl,Br,I]N</SMARTS>
|
104
|
+
</substructure>
|
105
|
+
<substructure name="(35) Beta_HaloAmine">
|
106
|
+
<SMARTS>[F,Cl,Br,I]CCN</SMARTS>
|
107
|
+
</substructure>
|
108
|
+
<substructure name="(36) HaloMethylEther">
|
109
|
+
<SMARTS>[F,Cl,Br,I]C[OH0;X2]</SMARTS>
|
110
|
+
</substructure>
|
111
|
+
<substructure name="(37) HaloMethylThioEther">
|
112
|
+
<SMARTS>[F,Cl,Br,I]C[SH0;X2]</SMARTS>
|
113
|
+
</substructure>
|
114
|
+
<substructure name="(38) HydroxyBenzoylTriazole">
|
115
|
+
<SMARTS>C(=O)Onnn</SMARTS>
|
116
|
+
</substructure>
|
117
|
+
<substructure name="(39) Imidoyl Chloride">
|
118
|
+
<SMARTS>ClC=N</SMARTS>
|
119
|
+
</substructure>
|
120
|
+
<substructure name="(40) Imidoyl Bromide">
|
121
|
+
<SMARTS>BrC=N</SMARTS>
|
122
|
+
</substructure>
|
123
|
+
<substructure name="(41) Iodoso">
|
124
|
+
<SMARTS>I(=O)</SMARTS>
|
125
|
+
</substructure>
|
126
|
+
<substructure name="(42) Iodoxy">
|
127
|
+
<SMARTS>O=I=O</SMARTS>
|
128
|
+
</substructure>
|
129
|
+
<substructure name="(43) Isocyanate">
|
130
|
+
<SMARTS>N=C=O</SMARTS>
|
131
|
+
</substructure>
|
132
|
+
<substructure name="(44) Isothiocyanate">
|
133
|
+
<SMARTS>N=C=S</SMARTS>
|
134
|
+
</substructure>
|
135
|
+
<substructure name="(45) isonitriles">
|
136
|
+
<SMARTS>[N+]#[C-]</SMARTS>
|
137
|
+
</substructure>
|
138
|
+
<substructure name="(46) Ketene">
|
139
|
+
<SMARTS>C=C=O</SMARTS>
|
140
|
+
</substructure>
|
141
|
+
<substructure name="(47) Lawesson's_reagents">
|
142
|
+
<SMARTS>P(=S)(S)S</SMARTS>
|
143
|
+
</substructure>
|
144
|
+
<substructure name="(48) Nitroso">
|
145
|
+
<SMARTS>[N;X2]=O</SMARTS>
|
146
|
+
</substructure>
|
147
|
+
<substructure name="(49) Oxaziridine">
|
148
|
+
<SMARTS>C1NO1</SMARTS>
|
149
|
+
</substructure>
|
150
|
+
<substructure name="(50) Pentafluorophenyl Ester">
|
151
|
+
<SMARTS>Fc1c(F)c(F)c(OC=O)c(F)c1F</SMARTS>
|
152
|
+
</substructure>
|
153
|
+
<substructure name="(51) Peroxide">
|
154
|
+
<SMARTS>OO</SMARTS>
|
155
|
+
</substructure>
|
156
|
+
<substructure name="(52) Phosphine Chloride">
|
157
|
+
<SMARTS>PCl</SMARTS>
|
158
|
+
</substructure>
|
159
|
+
<substructure name="(53) Phosphine Bromide">
|
160
|
+
<SMARTS>PBr</SMARTS>
|
161
|
+
</substructure>
|
162
|
+
<substructure name="(54) Phosphine Fluoride">
|
163
|
+
<SMARTS>PF</SMARTS>
|
164
|
+
</substructure>
|
165
|
+
<substructure name="(55) Phosphine Iodide">
|
166
|
+
<SMARTS>PI</SMARTS>
|
167
|
+
</substructure>
|
168
|
+
<substructure name="(56) Cationic Br">
|
169
|
+
<SMARTS>[Br+]</SMARTS>
|
170
|
+
</substructure>
|
171
|
+
<substructure name="(57) Cationic Cl">
|
172
|
+
<SMARTS>[Cl+]</SMARTS>
|
173
|
+
</substructure>
|
174
|
+
<substructure name="(58) Cationic I">
|
175
|
+
<SMARTS>[I+]</SMARTS>
|
176
|
+
</substructure>
|
177
|
+
<substructure name="(59) Cationic O">
|
178
|
+
<SMARTS>[O+,o+]</SMARTS>
|
179
|
+
</substructure>
|
180
|
+
<substructure name="(60) Cationic P">
|
181
|
+
<SMARTS>[P+]</SMARTS>
|
182
|
+
</substructure>
|
183
|
+
<substructure name="(61) Cationic S">
|
184
|
+
<SMARTS>[S+]</SMARTS>
|
185
|
+
</substructure>
|
186
|
+
<substructure name="(62) Sulphonyl Chloride">
|
187
|
+
<SMARTS>S(=O)(=O)[Cl]</SMARTS>
|
188
|
+
</substructure>
|
189
|
+
<substructure name="(63) Sulphonyl Bromide">
|
190
|
+
<SMARTS>S(=O)(=O)[Br]</SMARTS>
|
191
|
+
</substructure>
|
192
|
+
<substructure name="(64) Sulphonyl Fluoride">
|
193
|
+
<SMARTS>S(=O)(=O)[F]</SMARTS>
|
194
|
+
</substructure>
|
195
|
+
<substructure name="(65) Sulphonate Ester">
|
196
|
+
<SMARTS>COS(c)(=O)=O</SMARTS>
|
197
|
+
</substructure>
|
198
|
+
<substructure name="(66) Sulphonyl Cyanide">
|
199
|
+
<SMARTS>S(=O)(=O)C#N</SMARTS>
|
200
|
+
</substructure>
|
201
|
+
<substructure name="(67) Thioacyl Chloride">
|
202
|
+
<SMARTS>C(=S)Cl</SMARTS>
|
203
|
+
</substructure>
|
204
|
+
<substructure name="(68) Thioacyl Bromide">
|
205
|
+
<SMARTS>C(=S)Br</SMARTS>
|
206
|
+
</substructure>
|
207
|
+
<substructure name="(69) Thio Halides">
|
208
|
+
<SMARTS>[S][Cl,Br,F,I]</SMARTS>
|
209
|
+
</substructure>
|
210
|
+
<substructure name="(70) Thiocyanate">
|
211
|
+
<SMARTS>SC#N</SMARTS>
|
212
|
+
</substructure>
|
213
|
+
<substructure name="(71) Triflate">
|
214
|
+
<SMARTS>OS(=O)(=O)C(F)(F)F</SMARTS>
|
215
|
+
</substructure>
|
216
|
+
<substructure name="(72) Vinylous Acid Chloride">
|
217
|
+
<SMARTS>ClC=CC=O</SMARTS>
|
218
|
+
</substructure>
|
219
|
+
<substructure name="(73) sulfonyl-alide">
|
220
|
+
<SMARTS>[F,Cl,Br,I]-[SX4](=[OX1])(=[OX1])-[#6]</SMARTS>
|
221
|
+
</substructure>
|
222
|
+
<substructure name="(74) acyl-halide">
|
223
|
+
<SMARTS>[F,Cl,Br,I]-[CX3](=[OX1])-[#6]</SMARTS>
|
224
|
+
</substructure>
|
225
|
+
<substructure name="(75) alkyl-halide">
|
226
|
+
<SMARTS>[F,Cl,Br,I]-[CX4H2]-[#6]</SMARTS>
|
227
|
+
</substructure>
|
228
|
+
<substructure name="(76) halopyrimidine">
|
229
|
+
<SMARTS>[F,Cl,Br,I]-c1ncccn1</SMARTS>
|
230
|
+
</substructure>
|
231
|
+
<substructure name="(77) anhydride">
|
232
|
+
<SMARTS>[#6]-[CX3](=[OX1])-[OX2]-[CX3](=O)-[#6]</SMARTS>
|
233
|
+
</substructure>
|
234
|
+
<substructure name="(78) 1,2-dicarbonyl">
|
235
|
+
<SMARTS>[#6]-[CX3](=[OX1])-[CX3](=[OX1])-[#6]</SMARTS>
|
236
|
+
</substructure>
|
237
|
+
<substructure name="(79) perhalo-ketone">
|
238
|
+
<SMARTS>[F,Cl,Br,I]-[CX4]([F,Cl,Br,I])([F,Cl,Br,I])-[CX3](=[OX1])-[CX4H2]-[#6]</SMARTS>
|
239
|
+
</substructure>
|
240
|
+
<substructure name="(80) aliphatic ketone">
|
241
|
+
<SMARTS>[CH3]-[CX3](=[OX1])([CH2])-[#6]</SMARTS>
|
242
|
+
</substructure>
|
243
|
+
<substructure name="(81) epoxide">
|
244
|
+
<SMARTS>[#6]-[CX4]1-[OX2]-[CX4]1</SMARTS>
|
245
|
+
</substructure>
|
246
|
+
<substructure name="(82) aziridine">
|
247
|
+
<SMARTS>[#6]-[CX4]1-[NX3](-[#6])-[CX4]1-[#6]</SMARTS>
|
248
|
+
</substructure>
|
249
|
+
<substructure name="(83) aliphatic ester">
|
250
|
+
<SMARTS>[#6]-[CX4H2]-[CX3](=[OX1])-[OX2]-[#6]</SMARTS>
|
251
|
+
</substructure>
|
252
|
+
<substructure name="(84) aliphatic thioester">
|
253
|
+
<SMARTS>[#6]-[CX4H2]-[CX3](=[OX1])-[SX2]-[#6]</SMARTS>
|
254
|
+
</substructure>
|
255
|
+
<substructure name="(85) sulphonate ester">
|
256
|
+
<SMARTS>[#6]-[CX4H2]-[SX4](=[OX1])(=[OX1])-[OX2]-[#6]</SMARTS>
|
257
|
+
</substructure>
|
258
|
+
<substructure name="(86) phosphonate ester">
|
259
|
+
<SMARTS>[#6]-[CX4H2]-[PX4](=[OX1])(-O)-[OX2]-[#6]</SMARTS>
|
260
|
+
</substructure>
|
261
|
+
<substructure name="(87) imine">
|
262
|
+
<SMARTS>[#6]-[CX4H2]-[CX3](=[NX2]-[#6])-[#6]</SMARTS>
|
263
|
+
</substructure>
|
264
|
+
<substructure name="(88) aldehyde">
|
265
|
+
<SMARTS>[CX3H1](=[OX1])-[#6]</SMARTS>
|
266
|
+
</substructure>
|
267
|
+
<substructure name="(89) Michael acceptor">
|
268
|
+
<SMARTS>[*]-[C](-[#1])=[C](-[#1,C])-[CX3](=O)-[#6]</SMARTS>
|
269
|
+
</substructure>
|
270
|
+
<substructure name="(90) b-heterosubstitued carbonyl">
|
271
|
+
<SMARTS>[*]-[CX4H1](-[F,Cl,Br,I])-[CX4H2]-[CX3](=O)-[#6]</SMARTS>
|
272
|
+
</substructure>
|
273
|
+
<substructure name="(91) heteroatom-heteroatom single bonds">
|
274
|
+
<SMARTS>[#6]-[#7,#8,#16;X2]-[#7,#8,#16;X2]-[#6]</SMARTS>
|
275
|
+
</substructure>
|
276
|
+
<substructure name="(92) vinyl sulfone">
|
277
|
+
<SMARTS>[$([#16X4](=[OX1])(=[OX1])-[C](-[#1,C])=[C](-[#1,C])),$([#16X4+2]([OX1-])([OX1-])-[C](-[#1,C])=[C](-[#1,C]))]</SMARTS>
|
278
|
+
</substructure>
|
279
|
+
<substructure name="(93) alpha-Haloketones">
|
280
|
+
<SMARTS>[*]-C(=[OX1])-[NH]-C(=[CH1]-*)-C(=[OX1])-[CH2]-[F,Cl,Br,I]</SMARTS>
|
281
|
+
</substructure>
|
282
|
+
<substructure name="(94) boronic acids boronic esters">
|
283
|
+
<SMARTS>[*]-C(=[OX1])-[NH]-C(=[CH1]-*)-[BX3](-[OX2]-[*,#1])-[OX2]-[*,#1]</SMARTS>
|
284
|
+
</substructure>
|
285
|
+
<substructure name="(95) aldehydes">
|
286
|
+
<SMARTS>[*]-C(=[OX1])-[NH]-C(=[CH1]-*)-[CH1]=[OX1]</SMARTS>
|
287
|
+
</substructure>
|
288
|
+
<substructure name="(96) TFMKs">
|
289
|
+
<SMARTS>[*]-C(=[OX1])-[NH]-C(=[CH1]-*)-C(=[OX1])-C(-F)(-F)F</SMARTS>
|
290
|
+
</substructure>
|
291
|
+
<substructure name="(97) 1,2-dicarbonyls">
|
292
|
+
<SMARTS>[*]-C(=[OX1])-[NH]-C(=[CH1]-*)-C(=[OX1])-C(=[OX1])-*</SMARTS>
|
293
|
+
</substructure>
|
294
|
+
<substructure name="(98) saccharinoids">
|
295
|
+
<SMARTS>[*]-C(=O)N-C(=C-*)c1cc2c(cc1)S(N(C2=O)*)(=O)=O</SMARTS>
|
296
|
+
</substructure>
|
297
|
+
<substructure name="(99) phosphoryl-serine phosphoryl-threonine">
|
298
|
+
<SMARTS>[*]-C(=[OX1])-[NH]-C(=C-O-[P](=[OX1])(-[O-])-[O-])-C(=[OX1])-[NH]-*</SMARTS>
|
299
|
+
</substructure>
|
300
|
+
<substructure name="(100) phosphoryl-tyrosine">
|
301
|
+
<SMARTS>[*]-C(=[OX1])-[NH]-C(=C-c1ccc(-O-[PX4](=[OX1])(-[O-])-[O-])cc1)-C(=[OX1])-[NH]-*</SMARTS>
|
302
|
+
</substructure>
|
303
|
+
<substructure name="(101) phosphate esters as "in vitro prodrugs"">
|
304
|
+
<SMARTS>[*]-C(=[OX1])-[NH]-C(=C-c1ccc(-O-[PX4](=[OX1])(-[O-])-[OX2]-*)cc1)-C(=[OX1])-[NH]-*</SMARTS>
|
305
|
+
</substructure>
|
306
|
+
<substructure name="(102) various metal substitutes for phosphorous">
|
307
|
+
<SMARTS>[*]-C(=[OX1])-[NH]-C(=C-c1ccc(-O-[*X4-3;!#6;!#7;!#8;!#9;!#15;!#16;!#17;!#34;!#35;!#53](=[OX1])(-[O-])-[O])cc1)-C(=[OX1])-[NH]-*</SMARTS>
|
308
|
+
</substructure>
|
309
|
+
<substructure name="(103) Hydroxamates">
|
310
|
+
<SMARTS>[*]-C(=[OX1])-[NH]-C(=[CH1]-*)-C(=[OX1])-[NH]-[O-,OH]</SMARTS>
|
311
|
+
</substructure>
|
312
|
+
<substructure name="(104) thiols">
|
313
|
+
<SMARTS>[*]-C(=[OX1])-[NH]-C(=[CH1]-[SH])-C(=[OX1])-[NH]-*</SMARTS>
|
314
|
+
</substructure>
|
315
|
+
<substructure name="(105) double trouble">
|
316
|
+
<SMARTS>[*]-C(=[OX1])-[NH]-C(=[CH1]-[SH])-C(=[OX1])-[NH]-[OH]</SMARTS>
|
317
|
+
</substructure>
|
318
|
+
<substructure name="(106) oximes">
|
319
|
+
<SMARTS>[*]-C(=[OX1])-[NH]-C(=[CH1]-*)-C(=N-[OH])-*</SMARTS>
|
320
|
+
</substructure>
|
321
|
+
</reactive>
|
rdworks/readin.py
ADDED
@@ -0,0 +1,312 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
|
3
|
+
from rdkit import Chem
|
4
|
+
from rdkit.Chem import AllChem, rdmolfiles, Draw
|
5
|
+
|
6
|
+
from rdworks.mol import Mol
|
7
|
+
from rdworks.mollibr import MolLibr
|
8
|
+
from rdworks.conf import Conf
|
9
|
+
from rdworks.utils import compute, precheck_path, guess_mol_id
|
10
|
+
|
11
|
+
import pandas as pd
|
12
|
+
import re
|
13
|
+
import gzip
|
14
|
+
|
15
|
+
|
16
|
+
conf_name_convention = re.compile(r'[a-zA-Z0-9-_.!@#$%^&*()+=]+.[0-9]+/[0-9]+')
|
17
|
+
|
18
|
+
|
19
|
+
def read_csv(path:str | Path, smiles:str, name:str, std:bool=False, **kwargs) -> MolLibr:
|
20
|
+
"""Returns a library of molecules reading from a .csv file.
|
21
|
+
|
22
|
+
Other columns will be read as properties.
|
23
|
+
|
24
|
+
Args:
|
25
|
+
path (Union[str, Path]): filename or path to a .csv file.
|
26
|
+
smiles (str): column for SMILES.
|
27
|
+
name (str): column for name.
|
28
|
+
std (bool, optional): whether to standardize the input. Defaults to False.
|
29
|
+
|
30
|
+
Raises:
|
31
|
+
ValueError: if `smiles` or `name` column is not found in the csv file.
|
32
|
+
|
33
|
+
Returns:
|
34
|
+
MolLibr: a library of molecules.
|
35
|
+
"""
|
36
|
+
path = precheck_path(path)
|
37
|
+
df = pd.read_csv(path)
|
38
|
+
try:
|
39
|
+
assert smiles in list(df.columns)
|
40
|
+
except:
|
41
|
+
raise ValueError(f"Cannot find SMILES column (`smiles=`) {smiles}")
|
42
|
+
try:
|
43
|
+
assert name in list(df.columns)
|
44
|
+
except:
|
45
|
+
raise ValueError(f"Cannot find NAME column (`name=`) {name}")
|
46
|
+
|
47
|
+
largs = [ (smiles, name, std) for smiles, name in zip(list(df[smiles]), list(df[name])) ]
|
48
|
+
libr = MolLibr(compute(Mol, largs, desc='Reading CSV', **kwargs))
|
49
|
+
|
50
|
+
# read other columns as properties
|
51
|
+
# A list of dictionaries, where each dictionary represents a row,
|
52
|
+
# with column names as keys and cell values as values:
|
53
|
+
# [{column -> value}, ..., {column -> value}].
|
54
|
+
csv_records = df.to_dict('records')
|
55
|
+
for mol, row_dict in zip(libr, csv_records):
|
56
|
+
mol.props.update({ k:v for (k,v) in row_dict.items() if k not in [smiles, name]})
|
57
|
+
|
58
|
+
return libr
|
59
|
+
|
60
|
+
|
61
|
+
def merge_csv(libr: MolLibr, path:str | Path, on:str='name') -> MolLibr:
|
62
|
+
"""Returns a copy of MolLibr merged with properties from `on` column of a .csv file.
|
63
|
+
|
64
|
+
Args:
|
65
|
+
libr (MolLibr): library to be merged.
|
66
|
+
path (Union[str, Path]): filename or path to a .csv file.
|
67
|
+
on (str, optional): column for name. Defaults to 'name'.
|
68
|
+
|
69
|
+
Raises:
|
70
|
+
ValueError: if `on` column is not found in the csv file.
|
71
|
+
|
72
|
+
Returns:
|
73
|
+
MolLibr: a copy of library of molecules.
|
74
|
+
"""
|
75
|
+
path = precheck_path(path)
|
76
|
+
df = pd.read_csv(path)
|
77
|
+
try:
|
78
|
+
assert on in list(df.columns)
|
79
|
+
except:
|
80
|
+
raise ValueError(f"Cannot find ON column (`on=`) {on}")
|
81
|
+
# A list of dictionaries, where each dictionary represents a row,
|
82
|
+
# with column names as keys and cell values as values:
|
83
|
+
# [{column -> value}, ..., {column -> value}].
|
84
|
+
csv_records = df.to_dict('records')
|
85
|
+
data = {}
|
86
|
+
for row_dict in csv_records:
|
87
|
+
data[row_dict[on]] = { k:v for (k,v) in row_dict.items() if k != on }
|
88
|
+
|
89
|
+
merged_libr = libr.copy()
|
90
|
+
|
91
|
+
for mol in merged_libr:
|
92
|
+
if mol.name in data: # mol.props can be partly updated from csv
|
93
|
+
mol.props.update(data[mol.name])
|
94
|
+
|
95
|
+
return merged_libr
|
96
|
+
|
97
|
+
|
98
|
+
def read_dataframe(df:pd.DataFrame, smiles:str, name:str, std:bool=False) -> MolLibr:
|
99
|
+
"""Returns rdworks.MolLibr object from a pandas DataFrame.
|
100
|
+
|
101
|
+
Args:
|
102
|
+
df (pd.DataFrame): pandas.DataFrame.
|
103
|
+
smiles (str): column for SMILES.
|
104
|
+
name (str): column for name.
|
105
|
+
std (bool, optional): whether to standardize the input. Defaults to False.
|
106
|
+
|
107
|
+
Raises:
|
108
|
+
TypeError: if `df` is not pandas DataFrame.
|
109
|
+
ValueError: if `smiles` or `name` column is not found.
|
110
|
+
|
111
|
+
Returns:
|
112
|
+
MolLibr: a library of molecules.
|
113
|
+
"""
|
114
|
+
if not isinstance(df, pd.DataFrame):
|
115
|
+
raise TypeError(f"Expects a pandas.DataFrame object")
|
116
|
+
try:
|
117
|
+
assert smiles in list(df.columns)
|
118
|
+
except:
|
119
|
+
raise ValueError(f"Cannot find SMILES column (`smiles=`) {smiles}")
|
120
|
+
try:
|
121
|
+
assert name in list(df.columns)
|
122
|
+
except:
|
123
|
+
raise ValueError(f"Cannot find NAME column (`name=`) {name}")
|
124
|
+
|
125
|
+
return MolLibr(list(df[smiles]), list(df[name]), std=std)
|
126
|
+
|
127
|
+
|
128
|
+
|
129
|
+
def read_smi(path:str | Path, std:bool = False, **kwargs) -> MolLibr:
|
130
|
+
"""Read a SMILES file and create a molecular library.
|
131
|
+
|
132
|
+
Args:
|
133
|
+
path (str | Path): path to the SMILES file.
|
134
|
+
std (bool, optional): whether to standardize. Defaults to False.
|
135
|
+
|
136
|
+
Raises:
|
137
|
+
FileNotFoundError: when path does not exist.
|
138
|
+
|
139
|
+
Returns:
|
140
|
+
MolLibr: a library of molecules.
|
141
|
+
"""
|
142
|
+
if not Path(path).exists():
|
143
|
+
raise FileNotFoundError(f"Path {path} does not exist.")
|
144
|
+
if path.suffix == '.gz':
|
145
|
+
with gzip.open(path, "rb") as gz:
|
146
|
+
largs = [ tuple(line.decode('utf-8').strip().split()[:2] + [std]) for line in gz ]
|
147
|
+
else:
|
148
|
+
with open(path, "r") as smi:
|
149
|
+
largs = [ tuple(line.strip().split()[:2] +[std]) for line in smi ]
|
150
|
+
return MolLibr(compute(Mol, largs, desc='Reading SMILES', **kwargs))
|
151
|
+
|
152
|
+
|
153
|
+
|
154
|
+
def _map_sdf(rdmol:Chem.Mol, name:str, std:bool, props:dict) -> Mol:
|
155
|
+
"""A map function for `read_sdf()` to return a rdworks.Mol object reading from a SDF entry.
|
156
|
+
|
157
|
+
Args:
|
158
|
+
rdmol (Chem.Mol): input molecule.
|
159
|
+
name (str): name of the molecule.
|
160
|
+
std (bool): whether to standardize the input SMILES.
|
161
|
+
props (dict): dictionary of molecule properties.
|
162
|
+
|
163
|
+
Returns:
|
164
|
+
Mol: rdworks.Mol object.
|
165
|
+
"""
|
166
|
+
obj = Mol(rdmol, name, std)
|
167
|
+
obj.props = props
|
168
|
+
return obj
|
169
|
+
|
170
|
+
|
171
|
+
def read_sdf(path:str | Path, std:bool=False, confs:bool=False, props:bool=True, **kwargs) -> MolLibr:
|
172
|
+
"""Returns a library of molecules reading from a SDF file.
|
173
|
+
|
174
|
+
Args:
|
175
|
+
path (Union[str, PosixPath]): filename or path to the .sdf file.
|
176
|
+
std (bool, optional): whether to standardize the input. Defaults to False.
|
177
|
+
confs (bool, optional): whether to read 3D conformers and keep hydrogens. Defaults to False.
|
178
|
+
props (bool, optional): whether to read SDF properties. Defaults to True.
|
179
|
+
|
180
|
+
Returns:
|
181
|
+
MolLibr: a library of molecules.
|
182
|
+
"""
|
183
|
+
path = precheck_path(path)
|
184
|
+
if path.suffix == '.gz':
|
185
|
+
with gzip.open(path, 'rb') as gz:
|
186
|
+
# switch ^ True, XOR(^) inverts only if switch is True
|
187
|
+
with Chem.ForwardSDMolSupplier(gz, sanitize=True, removeHs=(confs ^ True)) as gzsdf:
|
188
|
+
lrdmols = [ m for m in gzsdf if m is not None ]
|
189
|
+
else:
|
190
|
+
# switch ^ True, XOR(^) inverts only if switch is True
|
191
|
+
with Chem.SDMolSupplier(path, sanitize=True, removeHs=(confs ^ True)) as sdf:
|
192
|
+
lrdmols = [ m for m in sdf if m is not None ]
|
193
|
+
|
194
|
+
if props:
|
195
|
+
lprops = [ m.GetPropsAsDict() for m in lrdmols ]
|
196
|
+
try:
|
197
|
+
lnames = [ m.GetProp('_Name') for m in lrdmols ]
|
198
|
+
assert len(set(lnames)) == len(lrdmols)
|
199
|
+
except:
|
200
|
+
(k, c, t) = guess_mol_id(lprops)
|
201
|
+
if k is None:
|
202
|
+
for i, m in enumerate(lrdmols, start=1):
|
203
|
+
name = f'_{i}_'
|
204
|
+
lnames.append(name)
|
205
|
+
else:
|
206
|
+
lnames = []
|
207
|
+
for i, m in enumerate(lrdmols, start=1):
|
208
|
+
try:
|
209
|
+
name = m.GetProp(k)
|
210
|
+
except:
|
211
|
+
name = f'_{i}_'
|
212
|
+
lnames.append(name)
|
213
|
+
else:
|
214
|
+
lprops = [ None ] * len(lrdmols)
|
215
|
+
lnames = [ None ] * len(lrdmols)
|
216
|
+
|
217
|
+
largs = [ (rdmol, name, std, props) for rdmol, name, props in zip(lrdmols, lnames, lprops) ]
|
218
|
+
|
219
|
+
obj = MolLibr()
|
220
|
+
if confs:
|
221
|
+
# reading 3D SDF (conformers)
|
222
|
+
last_smiles = None
|
223
|
+
new_mol = None
|
224
|
+
for rdmol, name, props in zip(lrdmols, lnames, lprops):
|
225
|
+
# rdworks name convention (e.g. xxxx.yy/zzz)
|
226
|
+
if conf_name_convention.match(name):
|
227
|
+
(isomer_name, _) = name.split('/')
|
228
|
+
else:
|
229
|
+
isomer_name = name
|
230
|
+
smiles = Chem.MolToSmiles(rdmol) # canonicalized SMILES
|
231
|
+
if last_smiles is None or last_smiles != smiles:
|
232
|
+
if new_mol:
|
233
|
+
obj.libr.append(new_mol.rename())
|
234
|
+
# start a new molecule
|
235
|
+
rdmol_2d = Chem.RemoveHs(rdmol)
|
236
|
+
AllChem.Compute2DCoords(rdmol_2d)
|
237
|
+
new_mol = Mol(rdmol_2d, isomer_name, std=False) # atom indices remain unchanged.
|
238
|
+
new_conf = Conf(rdmol)
|
239
|
+
new_conf.props.update(props)
|
240
|
+
new_mol.confs.append(new_conf)
|
241
|
+
last_smiles = smiles
|
242
|
+
if new_mol: # handle the last molecule
|
243
|
+
obj.libr.append(new_mol.rename())
|
244
|
+
else:
|
245
|
+
# reading 2D SDF
|
246
|
+
obj = MolLibr(compute(_map_sdf, largs, desc='Reading SDF', **kwargs))
|
247
|
+
|
248
|
+
return obj
|
249
|
+
|
250
|
+
|
251
|
+
|
252
|
+
def read_mae(path:str | Path, std:bool=False, confs:bool=True, **kwargs) -> MolLibr:
|
253
|
+
"""Returns a library of molecules reading from a Schrodinger Maestro file.
|
254
|
+
|
255
|
+
Args:
|
256
|
+
path (Union[str, Path]): filename or path to the .mae or .maegz file.
|
257
|
+
std (bool, optional): whether to standardize the input. Defaults to False.
|
258
|
+
confs (bool, optional): whether to read 3D conformers. Defaults to True.
|
259
|
+
|
260
|
+
Returns:
|
261
|
+
MolLibr: a library of molecules.
|
262
|
+
"""
|
263
|
+
path = precheck_path(path)
|
264
|
+
|
265
|
+
if path.suffix == '.maegz':
|
266
|
+
with gzip.open(path, 'rb') as gz:
|
267
|
+
# switch ^ True, XOR(^) inverts only if switch is True
|
268
|
+
with rdmolfiles.MaeMolSupplier(gz, sanitize=True, removeHs=(confs ^ True)) as maegz:
|
269
|
+
lrdmols = [ m for m in maegz if m is not None ]
|
270
|
+
else:
|
271
|
+
# switch ^ True, XOR(^) inverts only if switch is True
|
272
|
+
with rdmolfiles.MaeMolSupplier(path, sanitize=True, removeHs=(confs ^ True)) as mae:
|
273
|
+
lrdmols = [ m for m in mae if m is not None ]
|
274
|
+
|
275
|
+
lnames = [m.GetProp('_Name') for m in lrdmols]
|
276
|
+
largs = [(rdmol, name, std) for rdmol, name in zip(lrdmols, lnames)]
|
277
|
+
|
278
|
+
obj = MolLibr()
|
279
|
+
|
280
|
+
if confs: # reading 3D SDF (conformers)
|
281
|
+
last_smiles = None
|
282
|
+
new_mol = None
|
283
|
+
for rdmol, name in zip(lrdmols, lnames):
|
284
|
+
# rdworks name convention (e.g. xxxx.yy/zzz)
|
285
|
+
if conf_name_convention.match(name):
|
286
|
+
(isomer_name, _) = name.split('/')
|
287
|
+
else:
|
288
|
+
isomer_name = name
|
289
|
+
smiles = Chem.MolToSmiles(rdmol) # canonicalized SMILES
|
290
|
+
if last_smiles is None or last_smiles != smiles:
|
291
|
+
if new_mol:
|
292
|
+
obj.libr.append(new_mol.rename())
|
293
|
+
# start a new molecule
|
294
|
+
# !!!! rdmol and new_mol do not have consistent atom indices !!!
|
295
|
+
# idxmap: original atom index -> canonicalized rdmol atom index
|
296
|
+
# smiles = Chem.MolToSmiles(rdmol) # canonicalization creates `_smilesAtomOutputOrder` property
|
297
|
+
# idxord_o = ast.literal_eval(rdmol.GetProp("_smilesAtomOutputOrder"))
|
298
|
+
# idxmap_o = {o.GetIdx():idxord_o.index(o.GetIdx()) for o in rdmol.GetAtoms()}
|
299
|
+
rdmol_2d = Chem.RemoveHs(rdmol)
|
300
|
+
AllChem.Compute2DCoords(rdmol_2d)
|
301
|
+
new_mol = Mol(rdmol_2d, isomer_name, std=False) # atom indices remain unchanged.
|
302
|
+
|
303
|
+
new_mol.confs.append(Conf(rdmol))
|
304
|
+
|
305
|
+
last_smiles = smiles
|
306
|
+
if new_mol: # handle the last molecule
|
307
|
+
obj.libr.append(new_mol.rename())
|
308
|
+
|
309
|
+
else: # reading 2D SDF
|
310
|
+
obj = MolLibr(compute(Mol, largs, desc='Reading Mae', **kwargs))
|
311
|
+
|
312
|
+
return obj
|