rdworks 0.25.7__py3-none-any.whl → 0.35.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdworks/__init__.py +19 -20
- rdworks/conf.py +308 -117
- rdworks/display.py +244 -83
- rdworks/mol.py +621 -493
- rdworks/mollibr.py +336 -182
- rdworks/readin.py +2 -4
- rdworks/scaffold.py +1 -1
- rdworks/std.py +64 -24
- rdworks/torsion.py +477 -0
- rdworks/units.py +7 -58
- rdworks/utils.py +141 -258
- rdworks/xtb/__init__.py +0 -0
- rdworks/xtb/wrapper.py +304 -0
- {rdworks-0.25.7.dist-info → rdworks-0.35.1.dist-info}/METADATA +7 -10
- {rdworks-0.25.7.dist-info → rdworks-0.35.1.dist-info}/RECORD +18 -15
- {rdworks-0.25.7.dist-info → rdworks-0.35.1.dist-info}/WHEEL +1 -1
- {rdworks-0.25.7.dist-info → rdworks-0.35.1.dist-info}/licenses/LICENSE +0 -0
- {rdworks-0.25.7.dist-info → rdworks-0.35.1.dist-info}/top_level.txt +0 -0
rdworks/mollibr.py
CHANGED
@@ -4,38 +4,37 @@ import pandas as pd
|
|
4
4
|
import gzip
|
5
5
|
|
6
6
|
from pathlib import Path
|
7
|
-
from
|
7
|
+
from collections.abc import Iterable
|
8
8
|
from collections import defaultdict
|
9
|
+
from typing import Self, Iterator
|
9
10
|
from concurrent.futures import ProcessPoolExecutor
|
10
11
|
from tqdm import tqdm
|
11
12
|
|
12
|
-
from rdkit import Chem, DataStructs
|
13
|
-
from rdkit.Chem import Draw
|
13
|
+
from rdkit import Chem, DataStructs, Geometry
|
14
|
+
from rdkit.Chem import Draw, AllChem, rdFMCS, rdDepictor
|
14
15
|
from rdkit.ML.Cluster import Butina
|
15
16
|
from rdkit.SimDivFilters.rdSimDivPickers import MaxMinPicker
|
17
|
+
from PIL import Image
|
16
18
|
|
17
|
-
from rdworks
|
18
|
-
from rdworks.
|
19
|
-
|
19
|
+
from rdworks import Conf, Mol
|
20
|
+
from rdworks.display import render_matrix_grid
|
20
21
|
from rdworks.xml import list_predefined_xml
|
21
|
-
from rdworks.utils import precheck_path, guess_mol_id
|
22
22
|
|
23
23
|
|
24
24
|
class MolLibr:
|
25
25
|
def __init__(self,
|
26
|
-
molecules:
|
27
|
-
names:
|
26
|
+
molecules: Iterable | None = None,
|
27
|
+
names: Iterable | None = None,
|
28
28
|
std:bool=False,
|
29
29
|
max_workers:int=4,
|
30
|
-
chunksize:int=
|
30
|
+
chunksize:int=10,
|
31
31
|
progress:bool=False) -> None:
|
32
32
|
"""Create a rdworks.MolLibr object.
|
33
33
|
|
34
34
|
Args:
|
35
|
-
molecules (
|
35
|
+
molecules (Iterable | None, optional): a list/tuple/set of molecules
|
36
36
|
(rdworks.Mol | SMILES | rdkit.Chem.Mol). Defaults to None.
|
37
|
-
names (
|
38
|
-
Defaults to None.
|
37
|
+
names (Iterable | None, optional): a list/tuple/set of names. Defaults to None.
|
39
38
|
std (bool, optional): whether to standardize molecules. Defaults to False.
|
40
39
|
max_workers (int, optional): max workers for parallel calculation. Defaults to 4.
|
41
40
|
chunksize (int, optional): chunksize for parallel calculation. Defaults to 100.
|
@@ -53,29 +52,34 @@ class MolLibr:
|
|
53
52
|
self.threshold = None
|
54
53
|
self.clusters = None
|
55
54
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
if isinstance(
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
55
|
+
assert isinstance(molecules, Iterable) or molecules is None, "molecules must be iterable or None"
|
56
|
+
assert isinstance(names, Iterable) or names is None, "names must be iterable or None"
|
57
|
+
|
58
|
+
if isinstance(molecules, Iterable):
|
59
|
+
if isinstance(names, Iterable):
|
60
|
+
assert len(molecules) == len(names), "molecules and names must be the same counts"
|
61
|
+
|
62
|
+
if names is None:
|
63
|
+
names = [''] * len(molecules)
|
64
|
+
|
65
|
+
for molecular_input, name in zip(molecules, names):
|
66
|
+
if isinstance(molecular_input, Mol):
|
67
|
+
_mol = molecular_input
|
68
|
+
|
69
|
+
elif isinstance(molecular_input, Chem.Mol) or isinstance(molecular_input, str):
|
70
|
+
_mol = Mol(molecular_input, name=name, std=std)
|
71
|
+
|
72
|
+
elif isinstance(molecular_input, Conf):
|
73
|
+
_mol = Mol(molecular_input.rdmol,
|
74
|
+
name=molecular_input.name,
|
75
|
+
std=std).props.update(molecular_input.props)
|
76
|
+
|
77
|
+
self.libr.append(_mol)
|
78
|
+
|
79
|
+
if not any(names):
|
80
|
+
self.rename(prefix='entry')
|
78
81
|
|
82
|
+
|
79
83
|
def copy(self) -> Self:
|
80
84
|
"""Returns a copy of self.
|
81
85
|
|
@@ -108,178 +112,192 @@ class MolLibr:
|
|
108
112
|
"""Next molecule.
|
109
113
|
|
110
114
|
Returns:
|
111
|
-
Mol: next molecule
|
115
|
+
Mol: next molecule.
|
112
116
|
"""
|
113
117
|
return next(self.libr)
|
114
118
|
|
115
119
|
|
116
|
-
def __eq__(self, other:Self) -> bool:
|
120
|
+
def __eq__(self, other: Self) -> bool:
|
117
121
|
"""Operator `==`.
|
118
122
|
|
119
123
|
Args:
|
120
124
|
other (rdworks.MolLibr): other rdworks.MolLibr object.
|
121
125
|
|
122
126
|
Returns:
|
123
|
-
|
127
|
+
Bool: True if other MolLibr object is identical with self.
|
124
128
|
"""
|
125
129
|
if isinstance(other, MolLibr):
|
126
130
|
return len(frozenset(self.libr) - frozenset(other.libr)) == 0
|
127
|
-
|
128
|
-
|
131
|
+
|
132
|
+
return False
|
129
133
|
|
130
134
|
|
131
|
-
def __getitem__(self, index: int | slice) -> Mol:
|
135
|
+
def __getitem__(self, index: int | slice) -> Mol | Self:
|
132
136
|
"""Operator `[]`.
|
133
137
|
|
134
138
|
Args:
|
135
139
|
index (Union[int, slice]): index or slice of indexes.
|
136
140
|
|
137
|
-
Raises:
|
138
|
-
ValueError: if library is empty or index is out of range.
|
139
|
-
|
140
141
|
Returns:
|
141
|
-
Mol
|
142
|
+
Mol or MolLibr specified by single index or slice.
|
142
143
|
"""
|
143
|
-
|
144
|
-
|
145
|
-
|
144
|
+
assert self.count() != 0, "library is empty"
|
145
|
+
if isinstance(index, slice):
|
146
|
+
return MolLibr(self.libr[index])
|
147
|
+
else:
|
146
148
|
return self.libr[index]
|
147
|
-
except:
|
148
|
-
raise ValueError(f"index should be 0..{self.count()-1}")
|
149
149
|
|
150
150
|
|
151
|
-
def
|
152
|
-
"""
|
151
|
+
def __setitem__(self, index: int, molecule: Mol) -> Self:
|
152
|
+
"""Set item.
|
153
153
|
|
154
154
|
Args:
|
155
|
-
|
155
|
+
index (int): index
|
156
|
+
molecule (Mol): molecule to replace
|
157
|
+
|
158
|
+
Returns:
|
159
|
+
Modified self.
|
160
|
+
"""
|
161
|
+
self.libr[index] = molecule
|
156
162
|
|
157
|
-
|
158
|
-
|
163
|
+
return self
|
164
|
+
|
165
|
+
|
166
|
+
def __add__(self, other: Mol | Self) -> Self:
|
167
|
+
"""Operator `+`.
|
168
|
+
|
169
|
+
Returns a new object, leaving the original objects unchanged (conventional behavior).
|
170
|
+
|
171
|
+
Args:
|
172
|
+
other (object): other Mol or MolLibr object.
|
159
173
|
|
160
174
|
Returns:
|
161
|
-
|
175
|
+
A new MolLibr object.
|
162
176
|
"""
|
177
|
+
assert isinstance(other, Mol | MolLibr), "'+' operator expects Mol or MolLibr object"
|
178
|
+
|
179
|
+
new_object = self.copy()
|
180
|
+
|
163
181
|
if isinstance(other, Mol):
|
164
|
-
|
165
|
-
|
166
|
-
return obj
|
182
|
+
new_object.libr.append(other)
|
183
|
+
|
167
184
|
elif isinstance(other, MolLibr):
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
else:
|
172
|
-
raise TypeError("'+' operator expects rdworks.Mol or rdworks.MolLibr object")
|
185
|
+
new_object.libr.extend(other.libr)
|
186
|
+
|
187
|
+
return new_object
|
173
188
|
|
174
189
|
|
175
190
|
def __iadd__(self, other: Mol | Self) -> Self:
|
176
|
-
"""Operator `+=`.
|
191
|
+
"""Operator `+=`.
|
177
192
|
|
178
193
|
Args:
|
179
|
-
other (object): other
|
180
|
-
|
181
|
-
Raises:
|
182
|
-
TypeError: if `other` is not rdworks.Mol or rdworks.MolLibr.
|
194
|
+
other (object): other Mol or MolLibr object.
|
183
195
|
|
184
196
|
Returns:
|
185
|
-
|
197
|
+
modified self.
|
186
198
|
"""
|
199
|
+
assert isinstance(other, Mol | MolLibr), "'+=' operator expects Mol or MolLibr object"
|
200
|
+
|
187
201
|
if isinstance(other, Mol):
|
188
202
|
self.libr.append(other)
|
203
|
+
|
189
204
|
elif isinstance(other, MolLibr):
|
190
205
|
self.libr.extend(other.libr)
|
191
|
-
|
192
|
-
raise TypeError("'+=' operator expects Mol or MolLibr object")
|
206
|
+
|
193
207
|
return self
|
194
208
|
|
195
209
|
|
196
210
|
def __sub__(self, other: Mol | Self) -> Self:
|
197
|
-
"""Operator `-`.
|
211
|
+
"""Operator `-`.
|
198
212
|
|
199
|
-
|
200
|
-
other (Union[Mol,Self]): other rdworks.Mol or rdworks.MolLibr object.
|
213
|
+
Returns a new object, leaving the original objects unchanged (conventional behavior).
|
201
214
|
|
202
|
-
|
203
|
-
|
215
|
+
Args:
|
216
|
+
other (Mol | MolLibr): other rdworks.Mol or rdworks.MolLibr object.
|
204
217
|
|
205
218
|
Returns:
|
206
|
-
|
219
|
+
A new MolLibr object.
|
207
220
|
"""
|
221
|
+
assert isinstance(other, Mol | MolLibr), "'-' operator expects Mol or MolLibr object"
|
222
|
+
|
208
223
|
if isinstance(other, Mol):
|
209
224
|
difference = frozenset(self.libr) - frozenset([other])
|
225
|
+
|
210
226
|
elif isinstance(other, MolLibr):
|
211
227
|
difference = frozenset(self.libr) - frozenset(other.libr)
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
return
|
228
|
+
|
229
|
+
new_object = self.copy()
|
230
|
+
new_object.libr = list(difference)
|
231
|
+
|
232
|
+
return new_object
|
217
233
|
|
218
234
|
|
219
235
|
def __isub__(self, other: Mol | Self) -> Self:
|
220
|
-
"""Operator `-=`.
|
236
|
+
"""Operator `-=`.
|
221
237
|
|
222
238
|
Args:
|
223
|
-
other (
|
224
|
-
|
225
|
-
Raises:
|
226
|
-
TypeError: if `other` is not rdworks.Mol or rdworks.MolLibr.
|
239
|
+
other (Mol | MolLibr): other molecule or library.
|
227
240
|
|
228
241
|
Returns:
|
229
|
-
|
242
|
+
Modified self.
|
230
243
|
"""
|
244
|
+
assert isinstance(other, Mol | MolLibr), "'-=' operator expects Mol or MolLibr object"
|
245
|
+
|
231
246
|
if isinstance(other, Mol):
|
232
247
|
difference = frozenset(self.libr) - frozenset([other])
|
248
|
+
|
233
249
|
elif isinstance(other, MolLibr):
|
234
250
|
difference = frozenset(self.libr) - frozenset(other.libr)
|
235
|
-
|
236
|
-
raise TypeError("'-=' operator expects rdworks.Mol or rdworks.MolLibr object")
|
251
|
+
|
237
252
|
self.libr = list(difference)
|
253
|
+
|
238
254
|
return self
|
239
255
|
|
240
256
|
|
241
257
|
def __and__(self, other: Mol | Self) -> Self:
|
242
|
-
"""Operator `&`.
|
258
|
+
"""Operator `&`.
|
243
259
|
|
244
|
-
|
245
|
-
other (Union[Mol,Self]): other molecule or library.
|
260
|
+
Returns a new object, leaving the original objects unchanged (conventional behavior).
|
246
261
|
|
247
|
-
|
248
|
-
|
262
|
+
Args:
|
263
|
+
other (Mol | MolLibr): other molecule or library.
|
249
264
|
|
250
265
|
Returns:
|
251
|
-
|
266
|
+
A new MolLibr object.
|
252
267
|
"""
|
268
|
+
assert isinstance(other, Mol | MolLibr), "'&' operator expects Mol or MolLibr object"
|
269
|
+
|
253
270
|
if isinstance(other, Mol):
|
254
271
|
intersection = frozenset(self.libr) & frozenset([other])
|
272
|
+
|
255
273
|
elif isinstance(other, MolLibr):
|
256
274
|
intersection = frozenset(self.libr) & frozenset(other.libr)
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
return
|
275
|
+
|
276
|
+
new_object = self.copy()
|
277
|
+
new_object.libr = list(intersection)
|
278
|
+
|
279
|
+
return new_object
|
262
280
|
|
263
281
|
|
264
282
|
def __iand__(self, other: Mol | Self) -> Self:
|
265
|
-
"""Operator `&=`.
|
283
|
+
"""Operator `&=`.
|
266
284
|
|
267
285
|
Args:
|
268
|
-
other (
|
269
|
-
|
270
|
-
Raises:
|
271
|
-
TypeError: if `other` is not rdworks.Mol or rdworks.MolLibr.
|
286
|
+
other (Mol | Self): other molecule or library.
|
272
287
|
|
273
288
|
Returns:
|
274
|
-
|
289
|
+
Modified self.
|
275
290
|
"""
|
291
|
+
assert isinstance(other, Mol | MolLibr), "'&=' operator expects Mol or MolLibr object"
|
292
|
+
|
276
293
|
if isinstance(other, Mol):
|
277
294
|
intersection = frozenset(self.libr) & frozenset([other])
|
295
|
+
|
278
296
|
elif isinstance(other, MolLibr):
|
279
297
|
intersection = frozenset(self.libr) & frozenset(other.libr)
|
280
|
-
|
281
|
-
raise TypeError("'&=' operator expects rdworks.Mol or rdworks.MolLibr object")
|
298
|
+
|
282
299
|
self.libr = list(intersection)
|
300
|
+
|
283
301
|
return self
|
284
302
|
|
285
303
|
|
@@ -334,9 +352,9 @@ class MolLibr:
|
|
334
352
|
"""Change settings for parallel computing.
|
335
353
|
|
336
354
|
Args:
|
337
|
-
max_workers (
|
338
|
-
chunksize (
|
339
|
-
progress (
|
355
|
+
max_workers (int, optional): max number of workers. Defaults to 4.
|
356
|
+
chunksize (int, optional): chunksize of splitted workload. Defaults to 10.
|
357
|
+
progress (bool, optional): whether to show progress bar. Defaults to False.
|
340
358
|
|
341
359
|
Returns:
|
342
360
|
Self: rdworks.MolLibr object.
|
@@ -344,10 +362,11 @@ class MolLibr:
|
|
344
362
|
self.max_workers = kwargs.get('max_workers', self.max_workers)
|
345
363
|
self.chunksize = kwargs.get('chunksize', self.chunksize)
|
346
364
|
self.progress = kwargs.get('progress', self.progress)
|
365
|
+
|
347
366
|
return self
|
348
367
|
|
349
368
|
|
350
|
-
def rename(self, prefix:
|
369
|
+
def rename(self, prefix: str | None = None, sep: str='.', start: int=1) -> Self:
|
351
370
|
"""Rename molecules with serial numbers in-place and their conformers.
|
352
371
|
|
353
372
|
Molecules will be named by a format, `{prefix}{sep}{serial_number}` and
|
@@ -393,10 +412,11 @@ class MolLibr:
|
|
393
412
|
# rename conformers
|
394
413
|
for mol in self.libr:
|
395
414
|
mol.rename()
|
415
|
+
|
396
416
|
return self
|
397
417
|
|
398
418
|
|
399
|
-
def overlap(self, other:Self) -> Self:
|
419
|
+
def overlap(self, other: Self) -> Self:
|
400
420
|
"""Returns a common subset with `other` library.
|
401
421
|
|
402
422
|
Args:
|
@@ -408,7 +428,7 @@ class MolLibr:
|
|
408
428
|
return self.__and__(other)
|
409
429
|
|
410
430
|
|
411
|
-
def similar(self, query:Mol, threshold:float=0.2, **kwargs) -> Self:
|
431
|
+
def similar(self, query: Mol, threshold: float = 0.2, **kwargs) -> Self:
|
412
432
|
"""Returns a copy of subset that are similar to `query`.
|
413
433
|
|
414
434
|
Args:
|
@@ -421,7 +441,8 @@ class MolLibr:
|
|
421
441
|
Returns:
|
422
442
|
Self: a copy of self.
|
423
443
|
"""
|
424
|
-
obj = copy
|
444
|
+
obj = self.copy().compute(**kwargs)
|
445
|
+
|
425
446
|
if isinstance(query, Mol):
|
426
447
|
largs = [(query, threshold),] * obj.count()
|
427
448
|
else:
|
@@ -434,6 +455,7 @@ class MolLibr:
|
|
434
455
|
else:
|
435
456
|
mask = list(executor.map(MolLibr._mask_similar, obj.libr, largs, chunksize=obj.chunksize))
|
436
457
|
obj.libr = list(itertools.compress(obj.libr, mask))
|
458
|
+
|
437
459
|
return obj
|
438
460
|
|
439
461
|
|
@@ -447,7 +469,8 @@ class MolLibr:
|
|
447
469
|
Returns:
|
448
470
|
Self: a copy of self.
|
449
471
|
"""
|
450
|
-
obj = copy
|
472
|
+
obj = self.copy()
|
473
|
+
|
451
474
|
U = {} # unique SMILES
|
452
475
|
mask = []
|
453
476
|
for mol in obj.libr:
|
@@ -466,6 +489,7 @@ class MolLibr:
|
|
466
489
|
if len(mol.props['aka']) > 0:
|
467
490
|
print(f" {mol.name}({len(mol.props['aka'])}) - {','.join(mol.props['aka'])}")
|
468
491
|
print(f"de-duplicated to {obj.count()} molecules")
|
492
|
+
|
469
493
|
return obj
|
470
494
|
|
471
495
|
|
@@ -491,10 +515,11 @@ class MolLibr:
|
|
491
515
|
self.libr = list(
|
492
516
|
executor.map(MolLibr._map_qed, self.libr, lprops, chunksize=self.chunksize)
|
493
517
|
)
|
518
|
+
|
494
519
|
return self
|
495
520
|
|
496
521
|
|
497
|
-
def drop(self, terms:str | Path | None = None, invert:bool=False, **kwargs) -> Self:
|
522
|
+
def drop(self, terms: str | Path | None = None, invert: bool = False, **kwargs) -> Self:
|
498
523
|
"""Drops matched molecules and returns a copy of library with remaining molecules.
|
499
524
|
|
500
525
|
Args:
|
@@ -507,7 +532,9 @@ class MolLibr:
|
|
507
532
|
if not terms:
|
508
533
|
print(list_predefined_xml())
|
509
534
|
return self
|
510
|
-
|
535
|
+
|
536
|
+
obj = self.copy().compute(**kwargs)
|
537
|
+
|
511
538
|
lterms = [ terms ] * obj.count()
|
512
539
|
with ProcessPoolExecutor(max_workers=obj.max_workers) as executor:
|
513
540
|
if obj.progress:
|
@@ -521,10 +548,101 @@ class MolLibr:
|
|
521
548
|
if invert:
|
522
549
|
mask = [not b for b in mask]
|
523
550
|
obj.libr = list(itertools.compress(obj.libr, mask))
|
551
|
+
|
524
552
|
return obj
|
525
553
|
|
526
554
|
|
527
|
-
|
555
|
+
@staticmethod
|
556
|
+
def _mcs_coord_map(subject:Mol, r:Chem.Mol) -> dict:
|
557
|
+
s = subject.rdmol
|
558
|
+
lcs = rdFMCS.FindMCS([r, s])
|
559
|
+
# reference matching indices
|
560
|
+
r_indices = r.GetSubstructMatch(lcs.queryMol)
|
561
|
+
# subject matching indices
|
562
|
+
s_indices = s.GetSubstructMatch(lcs.queryMol)
|
563
|
+
# reference matching coordinates (2D)
|
564
|
+
r_xy = []
|
565
|
+
for i in r_indices:
|
566
|
+
pt = r.GetConformer().GetAtomPosition(i)
|
567
|
+
r_xy.append(Geometry.Point2D(pt.x, pt.y))
|
568
|
+
coord_map = { i : xy for i, xy in zip(s_indices, r_xy) }
|
569
|
+
|
570
|
+
return coord_map
|
571
|
+
|
572
|
+
|
573
|
+
def align_drawing(self,
|
574
|
+
ref: int = 0,
|
575
|
+
mcs: bool = True,
|
576
|
+
scaffold: str = "",
|
577
|
+
coordgen:bool = True,
|
578
|
+
**kwargs,
|
579
|
+
) -> Self:
|
580
|
+
"""Align 2D drawings by using MCS or scaffold SMILES.
|
581
|
+
|
582
|
+
Args:
|
583
|
+
ref (int, optional): index to the reference. Defaults to 0.
|
584
|
+
mcs (bool, optional): whether to use MCS(maximum common substructure). Defaults to True.
|
585
|
+
scaffold (str, optional): whether to use scaffold (SMILES). Defaults to "".
|
586
|
+
|
587
|
+
Returns:
|
588
|
+
Self: self
|
589
|
+
"""
|
590
|
+
|
591
|
+
obj = self.copy().compute(**kwargs)
|
592
|
+
|
593
|
+
if scaffold:
|
594
|
+
# scaffold (SMILES) of the reference 2D drawing
|
595
|
+
ref_2d_rdmol = Chem.MolFromSmiles(scaffold)
|
596
|
+
else:
|
597
|
+
# maximum common substructure to the reference 2D drawing
|
598
|
+
assert ref >=0 and ref < obj.count(), f"ref should be [0,{obj.count()-1}]"
|
599
|
+
ref_2d_rdmol = obj.libr[ref].rdmol
|
600
|
+
|
601
|
+
rdDepictor.SetPreferCoordGen(coordgen)
|
602
|
+
rdDepictor.Compute2DCoords(ref_2d_rdmol)
|
603
|
+
# AllChem.Compute2DCoords(ref_2d_rdmol)
|
604
|
+
|
605
|
+
with ProcessPoolExecutor(max_workers=obj.max_workers) as executor:
|
606
|
+
if obj.progress:
|
607
|
+
coord_maps = list(tqdm(
|
608
|
+
executor.map(MolLibr._mcs_coord_map,
|
609
|
+
obj.libr, # subject
|
610
|
+
itertools.repeat(ref_2d_rdmol), # infinite iterator
|
611
|
+
chunksize=obj.chunksize),
|
612
|
+
desc="align drawingp",
|
613
|
+
total=obj.count()))
|
614
|
+
else:
|
615
|
+
coord_maps = list(
|
616
|
+
executor.map(MolLibr._mcs_coord_map,
|
617
|
+
obj.libr, # subject
|
618
|
+
itertools.repeat(ref_2d_rdmol), # infinite iterator
|
619
|
+
chunksize=obj.chunksize))
|
620
|
+
|
621
|
+
for mol, coord_map in zip(obj.libr, coord_maps):
|
622
|
+
rdDepictor.Compute2DCoords(mol.rdmol, coordMap=coord_map)
|
623
|
+
# AllChem.Compute2DCoords(mol.rdmol, coordMap=coord_map)
|
624
|
+
|
625
|
+
|
626
|
+
# for idx, mol in enumerate(obj.libr):
|
627
|
+
# if mcs and idx == ref:
|
628
|
+
# continue
|
629
|
+
|
630
|
+
# # largest common substructure
|
631
|
+
# lcs = rdFMCS.FindMCS([ref_2d_rdmol, mol.rdmol])
|
632
|
+
|
633
|
+
# # matching indices
|
634
|
+
# ref_xy_coords = []
|
635
|
+
# for i in ref_2d_rdmol.GetSubstructMatch(lcs.queryMol):
|
636
|
+
# pt = ref_2d_rdmol.GetConformer().GetAtomPosition(i)
|
637
|
+
# ref_xy_coords.append(Geometry.Point2D(pt.x, pt.y))
|
638
|
+
# sub_indices = mol.rdmol.GetSubstructMatch(lcs.queryMol)
|
639
|
+
# coord_map = { i : xy for i, xy in zip(sub_indices, ref_xy_coords) }
|
640
|
+
# AllChem.Compute2DCoords(mol.rdmol, coordMap=coord_map)
|
641
|
+
|
642
|
+
return obj
|
643
|
+
|
644
|
+
|
645
|
+
def pick(self, n: int, **kwargs) -> Self:
|
528
646
|
"""Picks n diverse molecules.
|
529
647
|
|
530
648
|
Args:
|
@@ -533,7 +651,7 @@ class MolLibr:
|
|
533
651
|
Returns:
|
534
652
|
Self: a copy of self.
|
535
653
|
"""
|
536
|
-
obj = copy
|
654
|
+
obj = self.copy()
|
537
655
|
raise NotImplementedError
|
538
656
|
return obj
|
539
657
|
|
@@ -554,7 +672,11 @@ class MolLibr:
|
|
554
672
|
return len(self.libr)
|
555
673
|
|
556
674
|
|
557
|
-
def cluster(self,
|
675
|
+
def cluster(self,
|
676
|
+
threshold: float = 0.3,
|
677
|
+
ordered: bool = True,
|
678
|
+
drop_singleton: bool = True,
|
679
|
+
) -> list:
|
558
680
|
"""Clusters molecules using fingerprint.
|
559
681
|
|
560
682
|
Args:
|
@@ -594,10 +716,11 @@ class MolLibr:
|
|
594
716
|
|
595
717
|
|
596
718
|
def to_sdf(self,
|
597
|
-
path:str | Path,
|
598
|
-
confs:bool=False,
|
599
|
-
props:bool=True,
|
600
|
-
separate:bool=False
|
719
|
+
path: str | Path,
|
720
|
+
confs: bool = False,
|
721
|
+
props: bool = True,
|
722
|
+
separate: bool = False,
|
723
|
+
) -> None:
|
601
724
|
"""Writes to .sdf or .sdf.gz file.
|
602
725
|
|
603
726
|
Chem.SDWriter is supposed to write all non-private molecular properties.
|
@@ -640,7 +763,7 @@ class MolLibr:
|
|
640
763
|
f.write(mol.to_sdf(confs, props))
|
641
764
|
|
642
765
|
|
643
|
-
def to_smi(self, path:str | Path) -> None:
|
766
|
+
def to_smi(self, path: str | Path) -> None:
|
644
767
|
"""Writes to .smi file.
|
645
768
|
|
646
769
|
Args:
|
@@ -658,55 +781,82 @@ class MolLibr:
|
|
658
781
|
smi.write(f'{mol.smiles} {mol.name}\n')
|
659
782
|
|
660
783
|
|
661
|
-
def
|
662
|
-
|
784
|
+
def to_svg(self,
|
785
|
+
mols_per_row: int = 5,
|
786
|
+
width: int = 200,
|
787
|
+
height: int = 200,
|
788
|
+
atom_index: bool = False,
|
789
|
+
redraw: bool = False,
|
790
|
+
coordgen: bool = False) -> str:
|
791
|
+
"""Writes to a .svg strings for Jupyter notebook.
|
663
792
|
|
664
793
|
Args:
|
794
|
+
path (str | Path): output filename or path.
|
795
|
+
mols_per_row (int, optional): number of molecules per row. Defaults to 5.
|
665
796
|
width (int, optional): width. Defaults to 200.
|
666
797
|
height (int, optional): height. Defaults to 200.
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
Returns:
|
671
|
-
str: SVG strings for Jupyter notebook.
|
798
|
+
atom_index (bool, optional): whether to show atom index. Defaults to False.
|
799
|
+
redraw (bool, optional): whether to redraw. Defaults to False.
|
800
|
+
coordgen (bool, optional): whether to use coordgen. Defaults to False.
|
672
801
|
"""
|
673
802
|
|
674
|
-
|
675
|
-
for mol in self.libr:
|
676
|
-
for a in mol.rdmol.GetAtoms():
|
677
|
-
a.SetProp("atomNote", str(a.GetIdx()+1))
|
678
|
-
rdmols = [mol.rdmol for mol in self.libr]
|
803
|
+
rdmols = [mol.rdmol for mol in self.libr]
|
679
804
|
legends = [mol.name for mol in self.libr]
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
805
|
+
|
806
|
+
svg_string = render_matrix_grid(rdmols,
|
807
|
+
legends,
|
808
|
+
mols_per_row = mols_per_row,
|
809
|
+
width = width,
|
810
|
+
height = height,
|
811
|
+
atom_index = atom_index,
|
812
|
+
redraw = redraw,
|
813
|
+
coordgen = coordgen,
|
814
|
+
svg = True,
|
815
|
+
)
|
685
816
|
|
817
|
+
return svg_string
|
818
|
+
|
686
819
|
|
687
|
-
|
820
|
+
|
821
|
+
def to_png(self,
|
822
|
+
filename: str | Path | None = None,
|
823
|
+
mols_per_row: int = 5,
|
824
|
+
width: int = 200,
|
825
|
+
height: int = 200,
|
826
|
+
atom_index: bool = False,
|
827
|
+
redraw: bool = False,
|
828
|
+
coordgen: bool = False,
|
829
|
+
) -> Image.Image | None:
|
688
830
|
"""Writes to a .png file.
|
689
831
|
|
690
832
|
Args:
|
691
|
-
|
833
|
+
mols_per_row (int, optional): number of molecules per row. Defaults to 5.
|
692
834
|
width (int, optional): width. Defaults to 200.
|
693
835
|
height (int, optional): height. Defaults to 200.
|
694
|
-
|
695
|
-
|
836
|
+
atom_index (bool, optional): whether to show atom index. Defaults to False.
|
837
|
+
redraw (bool, optional): whether to redraw. Defaults to False.
|
838
|
+
coordgen (bool, optional): whether to use coordgen. Defaults to False.
|
696
839
|
"""
|
697
|
-
|
698
|
-
path = path.as_posix() # convert to string
|
699
|
-
if index:
|
700
|
-
for mol in self.libr:
|
701
|
-
for a in mol.rdmol.GetAtoms():
|
702
|
-
a.SetProp("atomNote", str(a.GetIdx()+1))
|
703
|
-
rdmols = [mol.rdmol for mol in self.libr]
|
840
|
+
rdmols = [mol.rdmol for mol in self.libr]
|
704
841
|
legends = [mol.name for mol in self.libr]
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
842
|
+
|
843
|
+
img = render_matrix_grid(rdmols,
|
844
|
+
legends,
|
845
|
+
mols_per_row = mols_per_row,
|
846
|
+
width = width,
|
847
|
+
height = height,
|
848
|
+
atom_index = atom_index,
|
849
|
+
redraw = redraw,
|
850
|
+
coordgen = coordgen,
|
851
|
+
svg = False,
|
852
|
+
)
|
853
|
+
|
854
|
+
if filename is None:
|
855
|
+
return img
|
856
|
+
else:
|
857
|
+
if isinstance(filename, Path):
|
858
|
+
filename = filename.as_posix()
|
859
|
+
img.save(filename)
|
710
860
|
|
711
861
|
|
712
862
|
def to_html(self) -> str:
|
@@ -723,9 +873,10 @@ class MolLibr:
|
|
723
873
|
|
724
874
|
|
725
875
|
def to_dataframe(self,
|
726
|
-
|
727
|
-
|
728
|
-
|
876
|
+
name: str = 'name',
|
877
|
+
smiles: str = 'smiles',
|
878
|
+
confs: bool = False,
|
879
|
+
) -> pd.DataFrame:
|
729
880
|
"""Returns a Pandas DataFrame.
|
730
881
|
|
731
882
|
Args:
|
@@ -772,44 +923,46 @@ class MolLibr:
|
|
772
923
|
data[k].append(mol.props[k])
|
773
924
|
else:
|
774
925
|
data[k].append(None)
|
926
|
+
|
775
927
|
return pd.DataFrame(data)
|
776
928
|
|
777
929
|
|
778
930
|
def to_csv(self,
|
779
|
-
|
780
|
-
|
781
|
-
|
931
|
+
path: str | Path,
|
932
|
+
confs: bool = False,
|
933
|
+
decimals:int = 3,
|
934
|
+
) -> None:
|
782
935
|
"""Writes to a .csv file.
|
783
936
|
|
784
937
|
Args:
|
785
938
|
path (str | Path): output filename or path.
|
786
939
|
confs (bool, optional): whether to include conformer properties. Defaults to False.
|
787
|
-
|
940
|
+
decimals (int, optional): decimal places for float numbers. Defaults to 3.
|
788
941
|
"""
|
789
942
|
df = self.to_dataframe(confs=confs)
|
790
|
-
df.to_csv(path, index=False, float_format=f'%.{
|
943
|
+
df.to_csv(path, index=False, float_format=f'%.{decimals}f')
|
791
944
|
|
792
945
|
|
793
946
|
@staticmethod
|
794
|
-
def
|
795
|
-
"""A mask function to return True if molecule is
|
947
|
+
def _mask_nnp_ready(mol: Mol, model: str) -> bool:
|
948
|
+
"""A mask function to return True if molecule is NNP ready.
|
796
949
|
|
797
950
|
Args:
|
798
951
|
mol (Mol): rdworks.Mol object.
|
799
|
-
model (str): name of
|
952
|
+
model (str): name of NNP model.
|
800
953
|
|
801
954
|
Returns:
|
802
|
-
bool: True if molecule is
|
955
|
+
bool: True if molecule is NNP ready.
|
803
956
|
"""
|
804
|
-
return mol.
|
957
|
+
return mol.nnp_ready(model)
|
805
958
|
|
806
959
|
|
807
|
-
def
|
808
|
-
"""Returns a copy of subset of library that is
|
960
|
+
def nnp_ready(self, model: str, **kwargs) -> Self:
|
961
|
+
"""Returns a copy of subset of library that is ready to given neural network potential.
|
809
962
|
|
810
963
|
Examples:
|
811
964
|
>>> libr = rdworks.MolLibr(drug_smiles, drug_names)
|
812
|
-
>>> ani2x_compatible_subset = libr.
|
965
|
+
>>> ani2x_compatible_subset = libr.nnp_ready('ANI-2x', progress=False)
|
813
966
|
|
814
967
|
Args:
|
815
968
|
model (str): name of model.
|
@@ -817,22 +970,23 @@ class MolLibr:
|
|
817
970
|
Returns:
|
818
971
|
Self: subset of library.
|
819
972
|
"""
|
820
|
-
obj = copy
|
973
|
+
obj = self.copy().compute(**kwargs)
|
821
974
|
lmodel = [model,] * self.count()
|
822
975
|
with ProcessPoolExecutor(max_workers=obj.max_workers) as executor:
|
823
976
|
if obj.progress:
|
824
977
|
mask = list(tqdm(
|
825
|
-
executor.map(self.
|
826
|
-
desc="
|
978
|
+
executor.map(self._mask_nnp_ready, obj.libr, lmodel, chunksize=obj.chunksize),
|
979
|
+
desc="NNP ready",
|
827
980
|
total=obj.count()))
|
828
981
|
else:
|
829
982
|
mask = list(
|
830
|
-
executor.map(self.
|
983
|
+
executor.map(self._mask_nnp_ready, obj.libr, lmodel, chunksize=obj.chunksize))
|
831
984
|
obj.libr = list(itertools.compress(obj.libr, mask))
|
985
|
+
|
832
986
|
return obj
|
833
987
|
|
834
988
|
|
835
|
-
def to_nnbatches(self, batchsize:int=1000) -> list:
|
989
|
+
def to_nnbatches(self, batchsize: int = 1000) -> list:
|
836
990
|
"""Split workload flexibily into a numer of batches.
|
837
991
|
|
838
992
|
- Each batch has up to `batchsize` number of atoms.
|