rdworks 0.25.8__py3-none-any.whl → 0.35.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdworks/__init__.py +19 -20
- rdworks/conf.py +308 -117
- rdworks/display.py +244 -83
- rdworks/mol.py +620 -489
- rdworks/mollibr.py +336 -180
- rdworks/readin.py +2 -4
- rdworks/scaffold.py +1 -1
- rdworks/std.py +64 -24
- rdworks/torsion.py +477 -0
- rdworks/units.py +7 -58
- rdworks/utils.py +141 -258
- rdworks/xtb/__init__.py +0 -0
- rdworks/xtb/wrapper.py +304 -0
- {rdworks-0.25.8.dist-info → rdworks-0.35.1.dist-info}/METADATA +6 -9
- {rdworks-0.25.8.dist-info → rdworks-0.35.1.dist-info}/RECORD +18 -15
- {rdworks-0.25.8.dist-info → rdworks-0.35.1.dist-info}/WHEEL +1 -1
- {rdworks-0.25.8.dist-info → rdworks-0.35.1.dist-info}/licenses/LICENSE +0 -0
- {rdworks-0.25.8.dist-info → rdworks-0.35.1.dist-info}/top_level.txt +0 -0
rdworks/mollibr.py
CHANGED
@@ -4,36 +4,37 @@ import pandas as pd
|
|
4
4
|
import gzip
|
5
5
|
|
6
6
|
from pathlib import Path
|
7
|
-
from
|
7
|
+
from collections.abc import Iterable
|
8
8
|
from collections import defaultdict
|
9
|
+
from typing import Self, Iterator
|
9
10
|
from concurrent.futures import ProcessPoolExecutor
|
10
11
|
from tqdm import tqdm
|
11
12
|
|
12
|
-
from rdkit import Chem, DataStructs
|
13
|
-
from rdkit.Chem import Draw
|
13
|
+
from rdkit import Chem, DataStructs, Geometry
|
14
|
+
from rdkit.Chem import Draw, AllChem, rdFMCS, rdDepictor
|
14
15
|
from rdkit.ML.Cluster import Butina
|
15
16
|
from rdkit.SimDivFilters.rdSimDivPickers import MaxMinPicker
|
17
|
+
from PIL import Image
|
16
18
|
|
17
|
-
from rdworks
|
18
|
-
from rdworks.
|
19
|
+
from rdworks import Conf, Mol
|
20
|
+
from rdworks.display import render_matrix_grid
|
19
21
|
from rdworks.xml import list_predefined_xml
|
20
22
|
|
21
23
|
|
22
24
|
class MolLibr:
|
23
25
|
def __init__(self,
|
24
|
-
molecules:
|
25
|
-
names:
|
26
|
+
molecules: Iterable | None = None,
|
27
|
+
names: Iterable | None = None,
|
26
28
|
std:bool=False,
|
27
29
|
max_workers:int=4,
|
28
|
-
chunksize:int=
|
30
|
+
chunksize:int=10,
|
29
31
|
progress:bool=False) -> None:
|
30
32
|
"""Create a rdworks.MolLibr object.
|
31
33
|
|
32
34
|
Args:
|
33
|
-
molecules (
|
35
|
+
molecules (Iterable | None, optional): a list/tuple/set of molecules
|
34
36
|
(rdworks.Mol | SMILES | rdkit.Chem.Mol). Defaults to None.
|
35
|
-
names (
|
36
|
-
Defaults to None.
|
37
|
+
names (Iterable | None, optional): a list/tuple/set of names. Defaults to None.
|
37
38
|
std (bool, optional): whether to standardize molecules. Defaults to False.
|
38
39
|
max_workers (int, optional): max workers for parallel calculation. Defaults to 4.
|
39
40
|
chunksize (int, optional): chunksize for parallel calculation. Defaults to 100.
|
@@ -51,29 +52,34 @@ class MolLibr:
|
|
51
52
|
self.threshold = None
|
52
53
|
self.clusters = None
|
53
54
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
if isinstance(
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
55
|
+
assert isinstance(molecules, Iterable) or molecules is None, "molecules must be iterable or None"
|
56
|
+
assert isinstance(names, Iterable) or names is None, "names must be iterable or None"
|
57
|
+
|
58
|
+
if isinstance(molecules, Iterable):
|
59
|
+
if isinstance(names, Iterable):
|
60
|
+
assert len(molecules) == len(names), "molecules and names must be the same counts"
|
61
|
+
|
62
|
+
if names is None:
|
63
|
+
names = [''] * len(molecules)
|
64
|
+
|
65
|
+
for molecular_input, name in zip(molecules, names):
|
66
|
+
if isinstance(molecular_input, Mol):
|
67
|
+
_mol = molecular_input
|
68
|
+
|
69
|
+
elif isinstance(molecular_input, Chem.Mol) or isinstance(molecular_input, str):
|
70
|
+
_mol = Mol(molecular_input, name=name, std=std)
|
71
|
+
|
72
|
+
elif isinstance(molecular_input, Conf):
|
73
|
+
_mol = Mol(molecular_input.rdmol,
|
74
|
+
name=molecular_input.name,
|
75
|
+
std=std).props.update(molecular_input.props)
|
76
|
+
|
77
|
+
self.libr.append(_mol)
|
78
|
+
|
79
|
+
if not any(names):
|
80
|
+
self.rename(prefix='entry')
|
76
81
|
|
82
|
+
|
77
83
|
def copy(self) -> Self:
|
78
84
|
"""Returns a copy of self.
|
79
85
|
|
@@ -106,178 +112,192 @@ class MolLibr:
|
|
106
112
|
"""Next molecule.
|
107
113
|
|
108
114
|
Returns:
|
109
|
-
Mol: next molecule
|
115
|
+
Mol: next molecule.
|
110
116
|
"""
|
111
117
|
return next(self.libr)
|
112
118
|
|
113
119
|
|
114
|
-
def __eq__(self, other:Self) -> bool:
|
120
|
+
def __eq__(self, other: Self) -> bool:
|
115
121
|
"""Operator `==`.
|
116
122
|
|
117
123
|
Args:
|
118
124
|
other (rdworks.MolLibr): other rdworks.MolLibr object.
|
119
125
|
|
120
126
|
Returns:
|
121
|
-
|
127
|
+
Bool: True if other MolLibr object is identical with self.
|
122
128
|
"""
|
123
129
|
if isinstance(other, MolLibr):
|
124
130
|
return len(frozenset(self.libr) - frozenset(other.libr)) == 0
|
125
|
-
|
126
|
-
|
131
|
+
|
132
|
+
return False
|
127
133
|
|
128
134
|
|
129
|
-
def __getitem__(self, index: int | slice) -> Mol:
|
135
|
+
def __getitem__(self, index: int | slice) -> Mol | Self:
|
130
136
|
"""Operator `[]`.
|
131
137
|
|
132
138
|
Args:
|
133
139
|
index (Union[int, slice]): index or slice of indexes.
|
134
140
|
|
135
|
-
Raises:
|
136
|
-
ValueError: if library is empty or index is out of range.
|
137
|
-
|
138
141
|
Returns:
|
139
|
-
Mol
|
142
|
+
Mol or MolLibr specified by single index or slice.
|
140
143
|
"""
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
+
assert self.count() != 0, "library is empty"
|
145
|
+
if isinstance(index, slice):
|
146
|
+
return MolLibr(self.libr[index])
|
147
|
+
else:
|
144
148
|
return self.libr[index]
|
145
|
-
except:
|
146
|
-
raise ValueError(f"index should be 0..{self.count()-1}")
|
147
149
|
|
148
150
|
|
149
|
-
def
|
150
|
-
"""
|
151
|
+
def __setitem__(self, index: int, molecule: Mol) -> Self:
|
152
|
+
"""Set item.
|
151
153
|
|
152
154
|
Args:
|
153
|
-
|
155
|
+
index (int): index
|
156
|
+
molecule (Mol): molecule to replace
|
157
|
+
|
158
|
+
Returns:
|
159
|
+
Modified self.
|
160
|
+
"""
|
161
|
+
self.libr[index] = molecule
|
162
|
+
|
163
|
+
return self
|
154
164
|
|
155
|
-
|
156
|
-
|
165
|
+
|
166
|
+
def __add__(self, other: Mol | Self) -> Self:
|
167
|
+
"""Operator `+`.
|
168
|
+
|
169
|
+
Returns a new object, leaving the original objects unchanged (conventional behavior).
|
170
|
+
|
171
|
+
Args:
|
172
|
+
other (object): other Mol or MolLibr object.
|
157
173
|
|
158
174
|
Returns:
|
159
|
-
|
175
|
+
A new MolLibr object.
|
160
176
|
"""
|
177
|
+
assert isinstance(other, Mol | MolLibr), "'+' operator expects Mol or MolLibr object"
|
178
|
+
|
179
|
+
new_object = self.copy()
|
180
|
+
|
161
181
|
if isinstance(other, Mol):
|
162
|
-
|
163
|
-
|
164
|
-
return obj
|
182
|
+
new_object.libr.append(other)
|
183
|
+
|
165
184
|
elif isinstance(other, MolLibr):
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
else:
|
170
|
-
raise TypeError("'+' operator expects rdworks.Mol or rdworks.MolLibr object")
|
185
|
+
new_object.libr.extend(other.libr)
|
186
|
+
|
187
|
+
return new_object
|
171
188
|
|
172
189
|
|
173
190
|
def __iadd__(self, other: Mol | Self) -> Self:
|
174
|
-
"""Operator `+=`.
|
191
|
+
"""Operator `+=`.
|
175
192
|
|
176
193
|
Args:
|
177
|
-
other (object): other
|
178
|
-
|
179
|
-
Raises:
|
180
|
-
TypeError: if `other` is not rdworks.Mol or rdworks.MolLibr.
|
194
|
+
other (object): other Mol or MolLibr object.
|
181
195
|
|
182
196
|
Returns:
|
183
|
-
|
197
|
+
modified self.
|
184
198
|
"""
|
199
|
+
assert isinstance(other, Mol | MolLibr), "'+=' operator expects Mol or MolLibr object"
|
200
|
+
|
185
201
|
if isinstance(other, Mol):
|
186
202
|
self.libr.append(other)
|
203
|
+
|
187
204
|
elif isinstance(other, MolLibr):
|
188
205
|
self.libr.extend(other.libr)
|
189
|
-
|
190
|
-
raise TypeError("'+=' operator expects Mol or MolLibr object")
|
206
|
+
|
191
207
|
return self
|
192
208
|
|
193
209
|
|
194
210
|
def __sub__(self, other: Mol | Self) -> Self:
|
195
|
-
"""Operator `-`.
|
211
|
+
"""Operator `-`.
|
196
212
|
|
197
|
-
|
198
|
-
other (Union[Mol,Self]): other rdworks.Mol or rdworks.MolLibr object.
|
213
|
+
Returns a new object, leaving the original objects unchanged (conventional behavior).
|
199
214
|
|
200
|
-
|
201
|
-
|
215
|
+
Args:
|
216
|
+
other (Mol | MolLibr): other rdworks.Mol or rdworks.MolLibr object.
|
202
217
|
|
203
218
|
Returns:
|
204
|
-
|
219
|
+
A new MolLibr object.
|
205
220
|
"""
|
221
|
+
assert isinstance(other, Mol | MolLibr), "'-' operator expects Mol or MolLibr object"
|
222
|
+
|
206
223
|
if isinstance(other, Mol):
|
207
224
|
difference = frozenset(self.libr) - frozenset([other])
|
225
|
+
|
208
226
|
elif isinstance(other, MolLibr):
|
209
227
|
difference = frozenset(self.libr) - frozenset(other.libr)
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
return
|
228
|
+
|
229
|
+
new_object = self.copy()
|
230
|
+
new_object.libr = list(difference)
|
231
|
+
|
232
|
+
return new_object
|
215
233
|
|
216
234
|
|
217
235
|
def __isub__(self, other: Mol | Self) -> Self:
|
218
|
-
"""Operator `-=`.
|
236
|
+
"""Operator `-=`.
|
219
237
|
|
220
238
|
Args:
|
221
|
-
other (
|
222
|
-
|
223
|
-
Raises:
|
224
|
-
TypeError: if `other` is not rdworks.Mol or rdworks.MolLibr.
|
239
|
+
other (Mol | MolLibr): other molecule or library.
|
225
240
|
|
226
241
|
Returns:
|
227
|
-
|
242
|
+
Modified self.
|
228
243
|
"""
|
244
|
+
assert isinstance(other, Mol | MolLibr), "'-=' operator expects Mol or MolLibr object"
|
245
|
+
|
229
246
|
if isinstance(other, Mol):
|
230
247
|
difference = frozenset(self.libr) - frozenset([other])
|
248
|
+
|
231
249
|
elif isinstance(other, MolLibr):
|
232
250
|
difference = frozenset(self.libr) - frozenset(other.libr)
|
233
|
-
|
234
|
-
raise TypeError("'-=' operator expects rdworks.Mol or rdworks.MolLibr object")
|
251
|
+
|
235
252
|
self.libr = list(difference)
|
253
|
+
|
236
254
|
return self
|
237
255
|
|
238
256
|
|
239
257
|
def __and__(self, other: Mol | Self) -> Self:
|
240
|
-
"""Operator `&`.
|
258
|
+
"""Operator `&`.
|
241
259
|
|
242
|
-
|
243
|
-
other (Union[Mol,Self]): other molecule or library.
|
260
|
+
Returns a new object, leaving the original objects unchanged (conventional behavior).
|
244
261
|
|
245
|
-
|
246
|
-
|
262
|
+
Args:
|
263
|
+
other (Mol | MolLibr): other molecule or library.
|
247
264
|
|
248
265
|
Returns:
|
249
|
-
|
266
|
+
A new MolLibr object.
|
250
267
|
"""
|
268
|
+
assert isinstance(other, Mol | MolLibr), "'&' operator expects Mol or MolLibr object"
|
269
|
+
|
251
270
|
if isinstance(other, Mol):
|
252
271
|
intersection = frozenset(self.libr) & frozenset([other])
|
272
|
+
|
253
273
|
elif isinstance(other, MolLibr):
|
254
274
|
intersection = frozenset(self.libr) & frozenset(other.libr)
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
return
|
275
|
+
|
276
|
+
new_object = self.copy()
|
277
|
+
new_object.libr = list(intersection)
|
278
|
+
|
279
|
+
return new_object
|
260
280
|
|
261
281
|
|
262
282
|
def __iand__(self, other: Mol | Self) -> Self:
|
263
|
-
"""Operator `&=`.
|
283
|
+
"""Operator `&=`.
|
264
284
|
|
265
285
|
Args:
|
266
|
-
other (
|
267
|
-
|
268
|
-
Raises:
|
269
|
-
TypeError: if `other` is not rdworks.Mol or rdworks.MolLibr.
|
286
|
+
other (Mol | Self): other molecule or library.
|
270
287
|
|
271
288
|
Returns:
|
272
|
-
|
289
|
+
Modified self.
|
273
290
|
"""
|
291
|
+
assert isinstance(other, Mol | MolLibr), "'&=' operator expects Mol or MolLibr object"
|
292
|
+
|
274
293
|
if isinstance(other, Mol):
|
275
294
|
intersection = frozenset(self.libr) & frozenset([other])
|
295
|
+
|
276
296
|
elif isinstance(other, MolLibr):
|
277
297
|
intersection = frozenset(self.libr) & frozenset(other.libr)
|
278
|
-
|
279
|
-
raise TypeError("'&=' operator expects rdworks.Mol or rdworks.MolLibr object")
|
298
|
+
|
280
299
|
self.libr = list(intersection)
|
300
|
+
|
281
301
|
return self
|
282
302
|
|
283
303
|
|
@@ -332,9 +352,9 @@ class MolLibr:
|
|
332
352
|
"""Change settings for parallel computing.
|
333
353
|
|
334
354
|
Args:
|
335
|
-
max_workers (
|
336
|
-
chunksize (
|
337
|
-
progress (
|
355
|
+
max_workers (int, optional): max number of workers. Defaults to 4.
|
356
|
+
chunksize (int, optional): chunksize of splitted workload. Defaults to 10.
|
357
|
+
progress (bool, optional): whether to show progress bar. Defaults to False.
|
338
358
|
|
339
359
|
Returns:
|
340
360
|
Self: rdworks.MolLibr object.
|
@@ -342,10 +362,11 @@ class MolLibr:
|
|
342
362
|
self.max_workers = kwargs.get('max_workers', self.max_workers)
|
343
363
|
self.chunksize = kwargs.get('chunksize', self.chunksize)
|
344
364
|
self.progress = kwargs.get('progress', self.progress)
|
365
|
+
|
345
366
|
return self
|
346
367
|
|
347
368
|
|
348
|
-
def rename(self, prefix:
|
369
|
+
def rename(self, prefix: str | None = None, sep: str='.', start: int=1) -> Self:
|
349
370
|
"""Rename molecules with serial numbers in-place and their conformers.
|
350
371
|
|
351
372
|
Molecules will be named by a format, `{prefix}{sep}{serial_number}` and
|
@@ -391,10 +412,11 @@ class MolLibr:
|
|
391
412
|
# rename conformers
|
392
413
|
for mol in self.libr:
|
393
414
|
mol.rename()
|
415
|
+
|
394
416
|
return self
|
395
417
|
|
396
418
|
|
397
|
-
def overlap(self, other:Self) -> Self:
|
419
|
+
def overlap(self, other: Self) -> Self:
|
398
420
|
"""Returns a common subset with `other` library.
|
399
421
|
|
400
422
|
Args:
|
@@ -406,7 +428,7 @@ class MolLibr:
|
|
406
428
|
return self.__and__(other)
|
407
429
|
|
408
430
|
|
409
|
-
def similar(self, query:Mol, threshold:float=0.2, **kwargs) -> Self:
|
431
|
+
def similar(self, query: Mol, threshold: float = 0.2, **kwargs) -> Self:
|
410
432
|
"""Returns a copy of subset that are similar to `query`.
|
411
433
|
|
412
434
|
Args:
|
@@ -419,7 +441,8 @@ class MolLibr:
|
|
419
441
|
Returns:
|
420
442
|
Self: a copy of self.
|
421
443
|
"""
|
422
|
-
obj = copy
|
444
|
+
obj = self.copy().compute(**kwargs)
|
445
|
+
|
423
446
|
if isinstance(query, Mol):
|
424
447
|
largs = [(query, threshold),] * obj.count()
|
425
448
|
else:
|
@@ -432,6 +455,7 @@ class MolLibr:
|
|
432
455
|
else:
|
433
456
|
mask = list(executor.map(MolLibr._mask_similar, obj.libr, largs, chunksize=obj.chunksize))
|
434
457
|
obj.libr = list(itertools.compress(obj.libr, mask))
|
458
|
+
|
435
459
|
return obj
|
436
460
|
|
437
461
|
|
@@ -445,7 +469,8 @@ class MolLibr:
|
|
445
469
|
Returns:
|
446
470
|
Self: a copy of self.
|
447
471
|
"""
|
448
|
-
obj = copy
|
472
|
+
obj = self.copy()
|
473
|
+
|
449
474
|
U = {} # unique SMILES
|
450
475
|
mask = []
|
451
476
|
for mol in obj.libr:
|
@@ -464,6 +489,7 @@ class MolLibr:
|
|
464
489
|
if len(mol.props['aka']) > 0:
|
465
490
|
print(f" {mol.name}({len(mol.props['aka'])}) - {','.join(mol.props['aka'])}")
|
466
491
|
print(f"de-duplicated to {obj.count()} molecules")
|
492
|
+
|
467
493
|
return obj
|
468
494
|
|
469
495
|
|
@@ -489,10 +515,11 @@ class MolLibr:
|
|
489
515
|
self.libr = list(
|
490
516
|
executor.map(MolLibr._map_qed, self.libr, lprops, chunksize=self.chunksize)
|
491
517
|
)
|
518
|
+
|
492
519
|
return self
|
493
520
|
|
494
521
|
|
495
|
-
def drop(self, terms:str | Path | None = None, invert:bool=False, **kwargs) -> Self:
|
522
|
+
def drop(self, terms: str | Path | None = None, invert: bool = False, **kwargs) -> Self:
|
496
523
|
"""Drops matched molecules and returns a copy of library with remaining molecules.
|
497
524
|
|
498
525
|
Args:
|
@@ -505,7 +532,9 @@ class MolLibr:
|
|
505
532
|
if not terms:
|
506
533
|
print(list_predefined_xml())
|
507
534
|
return self
|
508
|
-
|
535
|
+
|
536
|
+
obj = self.copy().compute(**kwargs)
|
537
|
+
|
509
538
|
lterms = [ terms ] * obj.count()
|
510
539
|
with ProcessPoolExecutor(max_workers=obj.max_workers) as executor:
|
511
540
|
if obj.progress:
|
@@ -519,10 +548,101 @@ class MolLibr:
|
|
519
548
|
if invert:
|
520
549
|
mask = [not b for b in mask]
|
521
550
|
obj.libr = list(itertools.compress(obj.libr, mask))
|
551
|
+
|
522
552
|
return obj
|
523
553
|
|
524
554
|
|
525
|
-
|
555
|
+
@staticmethod
|
556
|
+
def _mcs_coord_map(subject:Mol, r:Chem.Mol) -> dict:
|
557
|
+
s = subject.rdmol
|
558
|
+
lcs = rdFMCS.FindMCS([r, s])
|
559
|
+
# reference matching indices
|
560
|
+
r_indices = r.GetSubstructMatch(lcs.queryMol)
|
561
|
+
# subject matching indices
|
562
|
+
s_indices = s.GetSubstructMatch(lcs.queryMol)
|
563
|
+
# reference matching coordinates (2D)
|
564
|
+
r_xy = []
|
565
|
+
for i in r_indices:
|
566
|
+
pt = r.GetConformer().GetAtomPosition(i)
|
567
|
+
r_xy.append(Geometry.Point2D(pt.x, pt.y))
|
568
|
+
coord_map = { i : xy for i, xy in zip(s_indices, r_xy) }
|
569
|
+
|
570
|
+
return coord_map
|
571
|
+
|
572
|
+
|
573
|
+
def align_drawing(self,
|
574
|
+
ref: int = 0,
|
575
|
+
mcs: bool = True,
|
576
|
+
scaffold: str = "",
|
577
|
+
coordgen:bool = True,
|
578
|
+
**kwargs,
|
579
|
+
) -> Self:
|
580
|
+
"""Align 2D drawings by using MCS or scaffold SMILES.
|
581
|
+
|
582
|
+
Args:
|
583
|
+
ref (int, optional): index to the reference. Defaults to 0.
|
584
|
+
mcs (bool, optional): whether to use MCS(maximum common substructure). Defaults to True.
|
585
|
+
scaffold (str, optional): whether to use scaffold (SMILES). Defaults to "".
|
586
|
+
|
587
|
+
Returns:
|
588
|
+
Self: self
|
589
|
+
"""
|
590
|
+
|
591
|
+
obj = self.copy().compute(**kwargs)
|
592
|
+
|
593
|
+
if scaffold:
|
594
|
+
# scaffold (SMILES) of the reference 2D drawing
|
595
|
+
ref_2d_rdmol = Chem.MolFromSmiles(scaffold)
|
596
|
+
else:
|
597
|
+
# maximum common substructure to the reference 2D drawing
|
598
|
+
assert ref >=0 and ref < obj.count(), f"ref should be [0,{obj.count()-1}]"
|
599
|
+
ref_2d_rdmol = obj.libr[ref].rdmol
|
600
|
+
|
601
|
+
rdDepictor.SetPreferCoordGen(coordgen)
|
602
|
+
rdDepictor.Compute2DCoords(ref_2d_rdmol)
|
603
|
+
# AllChem.Compute2DCoords(ref_2d_rdmol)
|
604
|
+
|
605
|
+
with ProcessPoolExecutor(max_workers=obj.max_workers) as executor:
|
606
|
+
if obj.progress:
|
607
|
+
coord_maps = list(tqdm(
|
608
|
+
executor.map(MolLibr._mcs_coord_map,
|
609
|
+
obj.libr, # subject
|
610
|
+
itertools.repeat(ref_2d_rdmol), # infinite iterator
|
611
|
+
chunksize=obj.chunksize),
|
612
|
+
desc="align drawingp",
|
613
|
+
total=obj.count()))
|
614
|
+
else:
|
615
|
+
coord_maps = list(
|
616
|
+
executor.map(MolLibr._mcs_coord_map,
|
617
|
+
obj.libr, # subject
|
618
|
+
itertools.repeat(ref_2d_rdmol), # infinite iterator
|
619
|
+
chunksize=obj.chunksize))
|
620
|
+
|
621
|
+
for mol, coord_map in zip(obj.libr, coord_maps):
|
622
|
+
rdDepictor.Compute2DCoords(mol.rdmol, coordMap=coord_map)
|
623
|
+
# AllChem.Compute2DCoords(mol.rdmol, coordMap=coord_map)
|
624
|
+
|
625
|
+
|
626
|
+
# for idx, mol in enumerate(obj.libr):
|
627
|
+
# if mcs and idx == ref:
|
628
|
+
# continue
|
629
|
+
|
630
|
+
# # largest common substructure
|
631
|
+
# lcs = rdFMCS.FindMCS([ref_2d_rdmol, mol.rdmol])
|
632
|
+
|
633
|
+
# # matching indices
|
634
|
+
# ref_xy_coords = []
|
635
|
+
# for i in ref_2d_rdmol.GetSubstructMatch(lcs.queryMol):
|
636
|
+
# pt = ref_2d_rdmol.GetConformer().GetAtomPosition(i)
|
637
|
+
# ref_xy_coords.append(Geometry.Point2D(pt.x, pt.y))
|
638
|
+
# sub_indices = mol.rdmol.GetSubstructMatch(lcs.queryMol)
|
639
|
+
# coord_map = { i : xy for i, xy in zip(sub_indices, ref_xy_coords) }
|
640
|
+
# AllChem.Compute2DCoords(mol.rdmol, coordMap=coord_map)
|
641
|
+
|
642
|
+
return obj
|
643
|
+
|
644
|
+
|
645
|
+
def pick(self, n: int, **kwargs) -> Self:
|
526
646
|
"""Picks n diverse molecules.
|
527
647
|
|
528
648
|
Args:
|
@@ -531,7 +651,7 @@ class MolLibr:
|
|
531
651
|
Returns:
|
532
652
|
Self: a copy of self.
|
533
653
|
"""
|
534
|
-
obj = copy
|
654
|
+
obj = self.copy()
|
535
655
|
raise NotImplementedError
|
536
656
|
return obj
|
537
657
|
|
@@ -552,7 +672,11 @@ class MolLibr:
|
|
552
672
|
return len(self.libr)
|
553
673
|
|
554
674
|
|
555
|
-
def cluster(self,
|
675
|
+
def cluster(self,
|
676
|
+
threshold: float = 0.3,
|
677
|
+
ordered: bool = True,
|
678
|
+
drop_singleton: bool = True,
|
679
|
+
) -> list:
|
556
680
|
"""Clusters molecules using fingerprint.
|
557
681
|
|
558
682
|
Args:
|
@@ -592,10 +716,11 @@ class MolLibr:
|
|
592
716
|
|
593
717
|
|
594
718
|
def to_sdf(self,
|
595
|
-
path:str | Path,
|
596
|
-
confs:bool=False,
|
597
|
-
props:bool=True,
|
598
|
-
separate:bool=False
|
719
|
+
path: str | Path,
|
720
|
+
confs: bool = False,
|
721
|
+
props: bool = True,
|
722
|
+
separate: bool = False,
|
723
|
+
) -> None:
|
599
724
|
"""Writes to .sdf or .sdf.gz file.
|
600
725
|
|
601
726
|
Chem.SDWriter is supposed to write all non-private molecular properties.
|
@@ -638,7 +763,7 @@ class MolLibr:
|
|
638
763
|
f.write(mol.to_sdf(confs, props))
|
639
764
|
|
640
765
|
|
641
|
-
def to_smi(self, path:str | Path) -> None:
|
766
|
+
def to_smi(self, path: str | Path) -> None:
|
642
767
|
"""Writes to .smi file.
|
643
768
|
|
644
769
|
Args:
|
@@ -656,55 +781,82 @@ class MolLibr:
|
|
656
781
|
smi.write(f'{mol.smiles} {mol.name}\n')
|
657
782
|
|
658
783
|
|
659
|
-
def
|
660
|
-
|
784
|
+
def to_svg(self,
|
785
|
+
mols_per_row: int = 5,
|
786
|
+
width: int = 200,
|
787
|
+
height: int = 200,
|
788
|
+
atom_index: bool = False,
|
789
|
+
redraw: bool = False,
|
790
|
+
coordgen: bool = False) -> str:
|
791
|
+
"""Writes to a .svg strings for Jupyter notebook.
|
661
792
|
|
662
793
|
Args:
|
794
|
+
path (str | Path): output filename or path.
|
795
|
+
mols_per_row (int, optional): number of molecules per row. Defaults to 5.
|
663
796
|
width (int, optional): width. Defaults to 200.
|
664
797
|
height (int, optional): height. Defaults to 200.
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
Returns:
|
669
|
-
str: SVG strings for Jupyter notebook.
|
798
|
+
atom_index (bool, optional): whether to show atom index. Defaults to False.
|
799
|
+
redraw (bool, optional): whether to redraw. Defaults to False.
|
800
|
+
coordgen (bool, optional): whether to use coordgen. Defaults to False.
|
670
801
|
"""
|
671
802
|
|
672
|
-
|
673
|
-
for mol in self.libr:
|
674
|
-
for a in mol.rdmol.GetAtoms():
|
675
|
-
a.SetProp("atomNote", str(a.GetIdx()+1))
|
676
|
-
rdmols = [mol.rdmol for mol in self.libr]
|
803
|
+
rdmols = [mol.rdmol for mol in self.libr]
|
677
804
|
legends = [mol.name for mol in self.libr]
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
805
|
+
|
806
|
+
svg_string = render_matrix_grid(rdmols,
|
807
|
+
legends,
|
808
|
+
mols_per_row = mols_per_row,
|
809
|
+
width = width,
|
810
|
+
height = height,
|
811
|
+
atom_index = atom_index,
|
812
|
+
redraw = redraw,
|
813
|
+
coordgen = coordgen,
|
814
|
+
svg = True,
|
815
|
+
)
|
683
816
|
|
817
|
+
return svg_string
|
818
|
+
|
684
819
|
|
685
|
-
|
820
|
+
|
821
|
+
def to_png(self,
|
822
|
+
filename: str | Path | None = None,
|
823
|
+
mols_per_row: int = 5,
|
824
|
+
width: int = 200,
|
825
|
+
height: int = 200,
|
826
|
+
atom_index: bool = False,
|
827
|
+
redraw: bool = False,
|
828
|
+
coordgen: bool = False,
|
829
|
+
) -> Image.Image | None:
|
686
830
|
"""Writes to a .png file.
|
687
831
|
|
688
832
|
Args:
|
689
|
-
|
833
|
+
mols_per_row (int, optional): number of molecules per row. Defaults to 5.
|
690
834
|
width (int, optional): width. Defaults to 200.
|
691
835
|
height (int, optional): height. Defaults to 200.
|
692
|
-
|
693
|
-
|
836
|
+
atom_index (bool, optional): whether to show atom index. Defaults to False.
|
837
|
+
redraw (bool, optional): whether to redraw. Defaults to False.
|
838
|
+
coordgen (bool, optional): whether to use coordgen. Defaults to False.
|
694
839
|
"""
|
695
|
-
|
696
|
-
path = path.as_posix() # convert to string
|
697
|
-
if index:
|
698
|
-
for mol in self.libr:
|
699
|
-
for a in mol.rdmol.GetAtoms():
|
700
|
-
a.SetProp("atomNote", str(a.GetIdx()+1))
|
701
|
-
rdmols = [mol.rdmol for mol in self.libr]
|
840
|
+
rdmols = [mol.rdmol for mol in self.libr]
|
702
841
|
legends = [mol.name for mol in self.libr]
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
842
|
+
|
843
|
+
img = render_matrix_grid(rdmols,
|
844
|
+
legends,
|
845
|
+
mols_per_row = mols_per_row,
|
846
|
+
width = width,
|
847
|
+
height = height,
|
848
|
+
atom_index = atom_index,
|
849
|
+
redraw = redraw,
|
850
|
+
coordgen = coordgen,
|
851
|
+
svg = False,
|
852
|
+
)
|
853
|
+
|
854
|
+
if filename is None:
|
855
|
+
return img
|
856
|
+
else:
|
857
|
+
if isinstance(filename, Path):
|
858
|
+
filename = filename.as_posix()
|
859
|
+
img.save(filename)
|
708
860
|
|
709
861
|
|
710
862
|
def to_html(self) -> str:
|
@@ -721,9 +873,10 @@ class MolLibr:
|
|
721
873
|
|
722
874
|
|
723
875
|
def to_dataframe(self,
|
724
|
-
|
725
|
-
|
726
|
-
|
876
|
+
name: str = 'name',
|
877
|
+
smiles: str = 'smiles',
|
878
|
+
confs: bool = False,
|
879
|
+
) -> pd.DataFrame:
|
727
880
|
"""Returns a Pandas DataFrame.
|
728
881
|
|
729
882
|
Args:
|
@@ -770,44 +923,46 @@ class MolLibr:
|
|
770
923
|
data[k].append(mol.props[k])
|
771
924
|
else:
|
772
925
|
data[k].append(None)
|
926
|
+
|
773
927
|
return pd.DataFrame(data)
|
774
928
|
|
775
929
|
|
776
930
|
def to_csv(self,
|
777
|
-
|
778
|
-
|
779
|
-
|
931
|
+
path: str | Path,
|
932
|
+
confs: bool = False,
|
933
|
+
decimals:int = 3,
|
934
|
+
) -> None:
|
780
935
|
"""Writes to a .csv file.
|
781
936
|
|
782
937
|
Args:
|
783
938
|
path (str | Path): output filename or path.
|
784
939
|
confs (bool, optional): whether to include conformer properties. Defaults to False.
|
785
|
-
|
940
|
+
decimals (int, optional): decimal places for float numbers. Defaults to 3.
|
786
941
|
"""
|
787
942
|
df = self.to_dataframe(confs=confs)
|
788
|
-
df.to_csv(path, index=False, float_format=f'%.{
|
943
|
+
df.to_csv(path, index=False, float_format=f'%.{decimals}f')
|
789
944
|
|
790
945
|
|
791
946
|
@staticmethod
|
792
|
-
def
|
793
|
-
"""A mask function to return True if molecule is
|
947
|
+
def _mask_nnp_ready(mol: Mol, model: str) -> bool:
|
948
|
+
"""A mask function to return True if molecule is NNP ready.
|
794
949
|
|
795
950
|
Args:
|
796
951
|
mol (Mol): rdworks.Mol object.
|
797
|
-
model (str): name of
|
952
|
+
model (str): name of NNP model.
|
798
953
|
|
799
954
|
Returns:
|
800
|
-
bool: True if molecule is
|
955
|
+
bool: True if molecule is NNP ready.
|
801
956
|
"""
|
802
|
-
return mol.
|
957
|
+
return mol.nnp_ready(model)
|
803
958
|
|
804
959
|
|
805
|
-
def
|
806
|
-
"""Returns a copy of subset of library that is
|
960
|
+
def nnp_ready(self, model: str, **kwargs) -> Self:
|
961
|
+
"""Returns a copy of subset of library that is ready to given neural network potential.
|
807
962
|
|
808
963
|
Examples:
|
809
964
|
>>> libr = rdworks.MolLibr(drug_smiles, drug_names)
|
810
|
-
>>> ani2x_compatible_subset = libr.
|
965
|
+
>>> ani2x_compatible_subset = libr.nnp_ready('ANI-2x', progress=False)
|
811
966
|
|
812
967
|
Args:
|
813
968
|
model (str): name of model.
|
@@ -815,22 +970,23 @@ class MolLibr:
|
|
815
970
|
Returns:
|
816
971
|
Self: subset of library.
|
817
972
|
"""
|
818
|
-
obj = copy
|
973
|
+
obj = self.copy().compute(**kwargs)
|
819
974
|
lmodel = [model,] * self.count()
|
820
975
|
with ProcessPoolExecutor(max_workers=obj.max_workers) as executor:
|
821
976
|
if obj.progress:
|
822
977
|
mask = list(tqdm(
|
823
|
-
executor.map(self.
|
824
|
-
desc="
|
978
|
+
executor.map(self._mask_nnp_ready, obj.libr, lmodel, chunksize=obj.chunksize),
|
979
|
+
desc="NNP ready",
|
825
980
|
total=obj.count()))
|
826
981
|
else:
|
827
982
|
mask = list(
|
828
|
-
executor.map(self.
|
983
|
+
executor.map(self._mask_nnp_ready, obj.libr, lmodel, chunksize=obj.chunksize))
|
829
984
|
obj.libr = list(itertools.compress(obj.libr, mask))
|
985
|
+
|
830
986
|
return obj
|
831
987
|
|
832
988
|
|
833
|
-
def to_nnbatches(self, batchsize:int=1000) -> list:
|
989
|
+
def to_nnbatches(self, batchsize: int = 1000) -> list:
|
834
990
|
"""Split workload flexibily into a numer of batches.
|
835
991
|
|
836
992
|
- Each batch has up to `batchsize` number of atoms.
|