rdworks 0.25.8__py3-none-any.whl → 0.35.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rdworks/mollibr.py CHANGED
@@ -4,36 +4,37 @@ import pandas as pd
4
4
  import gzip
5
5
 
6
6
  from pathlib import Path
7
- from typing import Optional, Union, Self, Iterator
7
+ from collections.abc import Iterable
8
8
  from collections import defaultdict
9
+ from typing import Self, Iterator
9
10
  from concurrent.futures import ProcessPoolExecutor
10
11
  from tqdm import tqdm
11
12
 
12
- from rdkit import Chem, DataStructs
13
- from rdkit.Chem import Draw
13
+ from rdkit import Chem, DataStructs, Geometry
14
+ from rdkit.Chem import Draw, AllChem, rdFMCS, rdDepictor
14
15
  from rdkit.ML.Cluster import Butina
15
16
  from rdkit.SimDivFilters.rdSimDivPickers import MaxMinPicker
17
+ from PIL import Image
16
18
 
17
- from rdworks.conf import Conf
18
- from rdworks.mol import Mol
19
+ from rdworks import Conf, Mol
20
+ from rdworks.display import render_matrix_grid
19
21
  from rdworks.xml import list_predefined_xml
20
22
 
21
23
 
22
24
  class MolLibr:
23
25
  def __init__(self,
24
- molecules: list | tuple | set | None = None,
25
- names: list | tuple | set | None = None,
26
+ molecules: Iterable | None = None,
27
+ names: Iterable | None = None,
26
28
  std:bool=False,
27
29
  max_workers:int=4,
28
- chunksize:int=100,
30
+ chunksize:int=10,
29
31
  progress:bool=False) -> None:
30
32
  """Create a rdworks.MolLibr object.
31
33
 
32
34
  Args:
33
- molecules (Optional[Union[list,tuple,set]], optional): a list/tuple/set of molecules
35
+ molecules (Iterable | None, optional): a list/tuple/set of molecules
34
36
  (rdworks.Mol | SMILES | rdkit.Chem.Mol). Defaults to None.
35
- names (Optional[Union[list,tuple,set]], optional): a list/tuple/set of names.
36
- Defaults to None.
37
+ names (Iterable | None, optional): a list/tuple/set of names. Defaults to None.
37
38
  std (bool, optional): whether to standardize molecules. Defaults to False.
38
39
  max_workers (int, optional): max workers for parallel calculation. Defaults to 4.
39
40
  chunksize (int, optional): chunksize for parallel calculation. Defaults to 100.
@@ -51,29 +52,34 @@ class MolLibr:
51
52
  self.threshold = None
52
53
  self.clusters = None
53
54
 
54
- if molecules and isinstance(molecules, (list, tuple, set)):
55
- if names and isinstance(names, (list, tuple, set)):
56
- if len(names) != len(molecules):
57
- raise ValueError('MolLibr() counts of molecules and names are different')
58
- if isinstance(molecules[0], Mol):
59
- self.libr = molecules
60
- elif isinstance(molecules[0], Conf):
61
- self.libr = [Mol(conf.rdmol, name=conf.name).props.update(conf.props) for conf in molecules]
62
- elif isinstance(molecules[0], str): # SMILES string
63
- if names:
64
- self.libr = [Mol(smi, name=name, std=std) for (smi, name) in zip(molecules, names)]
65
- else:
66
- self.libr = [Mol(smi, std=std) for smi in molecules]
67
- self.rename(prefix='entry') # default name
68
- elif isinstance(molecules[0], Chem.Mol):
69
- if names:
70
- self.libr = [Mol(rdmol, name=name, std=std) for (rdmol, name) in zip(molecules, names)]
71
- else:
72
- self.libr = [Mol(rdmol, std=std) for rdmol in molecules]
73
- self.rename(prefix='entry') # default name
74
- else:
75
- raise TypeError('MolLibr() takes a list|tuple|set of Mol|SMILES|Chem.Mol')
55
+ assert isinstance(molecules, Iterable) or molecules is None, "molecules must be iterable or None"
56
+ assert isinstance(names, Iterable) or names is None, "names must be iterable or None"
57
+
58
+ if isinstance(molecules, Iterable):
59
+ if isinstance(names, Iterable):
60
+ assert len(molecules) == len(names), "molecules and names must be the same counts"
61
+
62
+ if names is None:
63
+ names = [''] * len(molecules)
64
+
65
+ for molecular_input, name in zip(molecules, names):
66
+ if isinstance(molecular_input, Mol):
67
+ _mol = molecular_input
68
+
69
+ elif isinstance(molecular_input, Chem.Mol) or isinstance(molecular_input, str):
70
+ _mol = Mol(molecular_input, name=name, std=std)
71
+
72
+ elif isinstance(molecular_input, Conf):
73
+ _mol = Mol(molecular_input.rdmol,
74
+ name=molecular_input.name,
75
+ std=std).props.update(molecular_input.props)
76
+
77
+ self.libr.append(_mol)
78
+
79
+ if not any(names):
80
+ self.rename(prefix='entry')
76
81
 
82
+
77
83
  def copy(self) -> Self:
78
84
  """Returns a copy of self.
79
85
 
@@ -106,178 +112,192 @@ class MolLibr:
106
112
  """Next molecule.
107
113
 
108
114
  Returns:
109
- Mol: next molecule (rdworks.Mol) object.
115
+ Mol: next molecule.
110
116
  """
111
117
  return next(self.libr)
112
118
 
113
119
 
114
- def __eq__(self, other:Self) -> bool:
120
+ def __eq__(self, other: Self) -> bool:
115
121
  """Operator `==`.
116
122
 
117
123
  Args:
118
124
  other (rdworks.MolLibr): other rdworks.MolLibr object.
119
125
 
120
126
  Returns:
121
- bool: True if other rdworks.MolLibr object is identical with self.
127
+ Bool: True if other MolLibr object is identical with self.
122
128
  """
123
129
  if isinstance(other, MolLibr):
124
130
  return len(frozenset(self.libr) - frozenset(other.libr)) == 0
125
- else:
126
- return False
131
+
132
+ return False
127
133
 
128
134
 
129
- def __getitem__(self, index: int | slice) -> Mol:
135
+ def __getitem__(self, index: int | slice) -> Mol | Self:
130
136
  """Operator `[]`.
131
137
 
132
138
  Args:
133
139
  index (Union[int, slice]): index or slice of indexes.
134
140
 
135
- Raises:
136
- ValueError: if library is empty or index is out of range.
137
-
138
141
  Returns:
139
- Mol: rdworks.Mol object
142
+ Mol or MolLibr specified by single index or slice.
140
143
  """
141
- if self.count() == 0:
142
- raise ValueError(f"library is empty")
143
- try:
144
+ assert self.count() != 0, "library is empty"
145
+ if isinstance(index, slice):
146
+ return MolLibr(self.libr[index])
147
+ else:
144
148
  return self.libr[index]
145
- except:
146
- raise ValueError(f"index should be 0..{self.count()-1}")
147
149
 
148
150
 
149
- def __add__(self, other:object) -> Self:
150
- """Operator `+`. Returns a copy of extended library.
151
+ def __setitem__(self, index: int, molecule: Mol) -> Self:
152
+ """Set item.
151
153
 
152
154
  Args:
153
- other (object): other rdworks.Mol or rdworks.MolLibr object.
155
+ index (int): index
156
+ molecule (Mol): molecule to replace
157
+
158
+ Returns:
159
+ Modified self.
160
+ """
161
+ self.libr[index] = molecule
162
+
163
+ return self
154
164
 
155
- Raises:
156
- TypeError: if `other` is not rdworks.Mol or rdworks.MolLibr.
165
+
166
+ def __add__(self, other: Mol | Self) -> Self:
167
+ """Operator `+`.
168
+
169
+ Returns a new object, leaving the original objects unchanged (conventional behavior).
170
+
171
+ Args:
172
+ other (object): other Mol or MolLibr object.
157
173
 
158
174
  Returns:
159
- Self: rdworks.MolLibr object.
175
+ A new MolLibr object.
160
176
  """
177
+ assert isinstance(other, Mol | MolLibr), "'+' operator expects Mol or MolLibr object"
178
+
179
+ new_object = self.copy()
180
+
161
181
  if isinstance(other, Mol):
162
- obj = copy.deepcopy(self)
163
- obj.libr.append(other)
164
- return obj
182
+ new_object.libr.append(other)
183
+
165
184
  elif isinstance(other, MolLibr):
166
- obj = copy.deepcopy(self)
167
- obj.libr.extend(other.libr)
168
- return obj
169
- else:
170
- raise TypeError("'+' operator expects rdworks.Mol or rdworks.MolLibr object")
185
+ new_object.libr.extend(other.libr)
186
+
187
+ return new_object
171
188
 
172
189
 
173
190
  def __iadd__(self, other: Mol | Self) -> Self:
174
- """Operator `+=`. Updates self by adding other molecule or library
191
+ """Operator `+=`.
175
192
 
176
193
  Args:
177
- other (object): other rdworks.Mol or rdworks.MolLibr object.
178
-
179
- Raises:
180
- TypeError: if `other` is not rdworks.Mol or rdworks.MolLibr.
194
+ other (object): other Mol or MolLibr object.
181
195
 
182
196
  Returns:
183
- Self: rdworks.MolLibr object.
197
+ modified self.
184
198
  """
199
+ assert isinstance(other, Mol | MolLibr), "'+=' operator expects Mol or MolLibr object"
200
+
185
201
  if isinstance(other, Mol):
186
202
  self.libr.append(other)
203
+
187
204
  elif isinstance(other, MolLibr):
188
205
  self.libr.extend(other.libr)
189
- else:
190
- raise TypeError("'+=' operator expects Mol or MolLibr object")
206
+
191
207
  return self
192
208
 
193
209
 
194
210
  def __sub__(self, other: Mol | Self) -> Self:
195
- """Operator `-`. Returns a copy of subtractive subset.
211
+ """Operator `-`.
196
212
 
197
- Args:
198
- other (Union[Mol,Self]): other rdworks.Mol or rdworks.MolLibr object.
213
+ Returns a new object, leaving the original objects unchanged (conventional behavior).
199
214
 
200
- Raises:
201
- TypeError: if `other` is not rdworks.Mol or rdworks.MolLibr.
215
+ Args:
216
+ other (Mol | MolLibr): other rdworks.Mol or rdworks.MolLibr object.
202
217
 
203
218
  Returns:
204
- Self: a copy of subtractive subset.
219
+ A new MolLibr object.
205
220
  """
221
+ assert isinstance(other, Mol | MolLibr), "'-' operator expects Mol or MolLibr object"
222
+
206
223
  if isinstance(other, Mol):
207
224
  difference = frozenset(self.libr) - frozenset([other])
225
+
208
226
  elif isinstance(other, MolLibr):
209
227
  difference = frozenset(self.libr) - frozenset(other.libr)
210
- else:
211
- raise TypeError("'-' operator expects rdworks.Mol or rdworks.MolLibr object")
212
- obj = copy.deepcopy(self)
213
- obj.libr = list(difference)
214
- return obj
228
+
229
+ new_object = self.copy()
230
+ new_object.libr = list(difference)
231
+
232
+ return new_object
215
233
 
216
234
 
217
235
  def __isub__(self, other: Mol | Self) -> Self:
218
- """Operator `-=`. Updates self by subtracting other molecule or library.
236
+ """Operator `-=`.
219
237
 
220
238
  Args:
221
- other (Union[Mol,Self]): other molecule or library.
222
-
223
- Raises:
224
- TypeError: if `other` is not rdworks.Mol or rdworks.MolLibr.
239
+ other (Mol | MolLibr): other molecule or library.
225
240
 
226
241
  Returns:
227
- Self: rdworks.MolLibr object.
242
+ Modified self.
228
243
  """
244
+ assert isinstance(other, Mol | MolLibr), "'-=' operator expects Mol or MolLibr object"
245
+
229
246
  if isinstance(other, Mol):
230
247
  difference = frozenset(self.libr) - frozenset([other])
248
+
231
249
  elif isinstance(other, MolLibr):
232
250
  difference = frozenset(self.libr) - frozenset(other.libr)
233
- else:
234
- raise TypeError("'-=' operator expects rdworks.Mol or rdworks.MolLibr object")
251
+
235
252
  self.libr = list(difference)
253
+
236
254
  return self
237
255
 
238
256
 
239
257
  def __and__(self, other: Mol | Self) -> Self:
240
- """Operator `&`. Returns a copy of common subset.
258
+ """Operator `&`.
241
259
 
242
- Args:
243
- other (Union[Mol,Self]): other molecule or library.
260
+ Returns a new object, leaving the original objects unchanged (conventional behavior).
244
261
 
245
- Raises:
246
- TypeError: if `other` is not rdworks.Mol or rdworks.MolLibr.
262
+ Args:
263
+ other (Mol | MolLibr): other molecule or library.
247
264
 
248
265
  Returns:
249
- Self: a copy of rdworks.MolLibr object.
266
+ A new MolLibr object.
250
267
  """
268
+ assert isinstance(other, Mol | MolLibr), "'&' operator expects Mol or MolLibr object"
269
+
251
270
  if isinstance(other, Mol):
252
271
  intersection = frozenset(self.libr) & frozenset([other])
272
+
253
273
  elif isinstance(other, MolLibr):
254
274
  intersection = frozenset(self.libr) & frozenset(other.libr)
255
- else:
256
- raise TypeError("'&' operator or overlap() expects rdworks.Mol or rdworks.MolLibr object")
257
- obj = copy.deepcopy(self)
258
- obj.libr = list(intersection)
259
- return obj
275
+
276
+ new_object = self.copy()
277
+ new_object.libr = list(intersection)
278
+
279
+ return new_object
260
280
 
261
281
 
262
282
  def __iand__(self, other: Mol | Self) -> Self:
263
- """Operator `&=`. Re-assigns self with common subset.
283
+ """Operator `&=`.
264
284
 
265
285
  Args:
266
- other (Union[Mol,Self]): other molecule or library.
267
-
268
- Raises:
269
- TypeError: if `other` is not rdworks.Mol or rdworks.MolLibr.
286
+ other (Mol | Self): other molecule or library.
270
287
 
271
288
  Returns:
272
- Self: rdworks.MolLibr object.
289
+ Modified self.
273
290
  """
291
+ assert isinstance(other, Mol | MolLibr), "'&=' operator expects Mol or MolLibr object"
292
+
274
293
  if isinstance(other, Mol):
275
294
  intersection = frozenset(self.libr) & frozenset([other])
295
+
276
296
  elif isinstance(other, MolLibr):
277
297
  intersection = frozenset(self.libr) & frozenset(other.libr)
278
- else:
279
- raise TypeError("'&=' operator expects rdworks.Mol or rdworks.MolLibr object")
298
+
280
299
  self.libr = list(intersection)
300
+
281
301
  return self
282
302
 
283
303
 
@@ -332,9 +352,9 @@ class MolLibr:
332
352
  """Change settings for parallel computing.
333
353
 
334
354
  Args:
335
- max_workers (Optional[int], optional): max number of workers. Defaults to None.
336
- chunksize (Optional[int], optional): chunksize of splitted workload. Defaults to None.
337
- progress (Optional[bool], optional): whether to show progress bar. Defaults to None.
355
+ max_workers (int, optional): max number of workers. Defaults to 4.
356
+ chunksize (int, optional): chunksize of splitted workload. Defaults to 10.
357
+ progress (bool, optional): whether to show progress bar. Defaults to False.
338
358
 
339
359
  Returns:
340
360
  Self: rdworks.MolLibr object.
@@ -342,10 +362,11 @@ class MolLibr:
342
362
  self.max_workers = kwargs.get('max_workers', self.max_workers)
343
363
  self.chunksize = kwargs.get('chunksize', self.chunksize)
344
364
  self.progress = kwargs.get('progress', self.progress)
365
+
345
366
  return self
346
367
 
347
368
 
348
- def rename(self, prefix:Optional[str]=None, sep:str='.', start:int=1) -> Self:
369
+ def rename(self, prefix: str | None = None, sep: str='.', start: int=1) -> Self:
349
370
  """Rename molecules with serial numbers in-place and their conformers.
350
371
 
351
372
  Molecules will be named by a format, `{prefix}{sep}{serial_number}` and
@@ -391,10 +412,11 @@ class MolLibr:
391
412
  # rename conformers
392
413
  for mol in self.libr:
393
414
  mol.rename()
415
+
394
416
  return self
395
417
 
396
418
 
397
- def overlap(self, other:Self) -> Self:
419
+ def overlap(self, other: Self) -> Self:
398
420
  """Returns a common subset with `other` library.
399
421
 
400
422
  Args:
@@ -406,7 +428,7 @@ class MolLibr:
406
428
  return self.__and__(other)
407
429
 
408
430
 
409
- def similar(self, query:Mol, threshold:float=0.2, **kwargs) -> Self:
431
+ def similar(self, query: Mol, threshold: float = 0.2, **kwargs) -> Self:
410
432
  """Returns a copy of subset that are similar to `query`.
411
433
 
412
434
  Args:
@@ -419,7 +441,8 @@ class MolLibr:
419
441
  Returns:
420
442
  Self: a copy of self.
421
443
  """
422
- obj = copy.deepcopy(self).compute(**kwargs)
444
+ obj = self.copy().compute(**kwargs)
445
+
423
446
  if isinstance(query, Mol):
424
447
  largs = [(query, threshold),] * obj.count()
425
448
  else:
@@ -432,6 +455,7 @@ class MolLibr:
432
455
  else:
433
456
  mask = list(executor.map(MolLibr._mask_similar, obj.libr, largs, chunksize=obj.chunksize))
434
457
  obj.libr = list(itertools.compress(obj.libr, mask))
458
+
435
459
  return obj
436
460
 
437
461
 
@@ -445,7 +469,8 @@ class MolLibr:
445
469
  Returns:
446
470
  Self: a copy of self.
447
471
  """
448
- obj = copy.deepcopy(self)
472
+ obj = self.copy()
473
+
449
474
  U = {} # unique SMILES
450
475
  mask = []
451
476
  for mol in obj.libr:
@@ -464,6 +489,7 @@ class MolLibr:
464
489
  if len(mol.props['aka']) > 0:
465
490
  print(f" {mol.name}({len(mol.props['aka'])}) - {','.join(mol.props['aka'])}")
466
491
  print(f"de-duplicated to {obj.count()} molecules")
492
+
467
493
  return obj
468
494
 
469
495
 
@@ -489,10 +515,11 @@ class MolLibr:
489
515
  self.libr = list(
490
516
  executor.map(MolLibr._map_qed, self.libr, lprops, chunksize=self.chunksize)
491
517
  )
518
+
492
519
  return self
493
520
 
494
521
 
495
- def drop(self, terms:str | Path | None = None, invert:bool=False, **kwargs) -> Self:
522
+ def drop(self, terms: str | Path | None = None, invert: bool = False, **kwargs) -> Self:
496
523
  """Drops matched molecules and returns a copy of library with remaining molecules.
497
524
 
498
525
  Args:
@@ -505,7 +532,9 @@ class MolLibr:
505
532
  if not terms:
506
533
  print(list_predefined_xml())
507
534
  return self
508
- obj = copy.deepcopy(self).compute(**kwargs)
535
+
536
+ obj = self.copy().compute(**kwargs)
537
+
509
538
  lterms = [ terms ] * obj.count()
510
539
  with ProcessPoolExecutor(max_workers=obj.max_workers) as executor:
511
540
  if obj.progress:
@@ -519,10 +548,101 @@ class MolLibr:
519
548
  if invert:
520
549
  mask = [not b for b in mask]
521
550
  obj.libr = list(itertools.compress(obj.libr, mask))
551
+
522
552
  return obj
523
553
 
524
554
 
525
- def pick(self, n:int, **kwargs) -> Self:
555
+ @staticmethod
556
+ def _mcs_coord_map(subject:Mol, r:Chem.Mol) -> dict:
557
+ s = subject.rdmol
558
+ lcs = rdFMCS.FindMCS([r, s])
559
+ # reference matching indices
560
+ r_indices = r.GetSubstructMatch(lcs.queryMol)
561
+ # subject matching indices
562
+ s_indices = s.GetSubstructMatch(lcs.queryMol)
563
+ # reference matching coordinates (2D)
564
+ r_xy = []
565
+ for i in r_indices:
566
+ pt = r.GetConformer().GetAtomPosition(i)
567
+ r_xy.append(Geometry.Point2D(pt.x, pt.y))
568
+ coord_map = { i : xy for i, xy in zip(s_indices, r_xy) }
569
+
570
+ return coord_map
571
+
572
+
573
+ def align_drawing(self,
574
+ ref: int = 0,
575
+ mcs: bool = True,
576
+ scaffold: str = "",
577
+ coordgen:bool = True,
578
+ **kwargs,
579
+ ) -> Self:
580
+ """Align 2D drawings by using MCS or scaffold SMILES.
581
+
582
+ Args:
583
+ ref (int, optional): index to the reference. Defaults to 0.
584
+ mcs (bool, optional): whether to use MCS(maximum common substructure). Defaults to True.
585
+ scaffold (str, optional): whether to use scaffold (SMILES). Defaults to "".
586
+
587
+ Returns:
588
+ Self: self
589
+ """
590
+
591
+ obj = self.copy().compute(**kwargs)
592
+
593
+ if scaffold:
594
+ # scaffold (SMILES) of the reference 2D drawing
595
+ ref_2d_rdmol = Chem.MolFromSmiles(scaffold)
596
+ else:
597
+ # maximum common substructure to the reference 2D drawing
598
+ assert ref >=0 and ref < obj.count(), f"ref should be [0,{obj.count()-1}]"
599
+ ref_2d_rdmol = obj.libr[ref].rdmol
600
+
601
+ rdDepictor.SetPreferCoordGen(coordgen)
602
+ rdDepictor.Compute2DCoords(ref_2d_rdmol)
603
+ # AllChem.Compute2DCoords(ref_2d_rdmol)
604
+
605
+ with ProcessPoolExecutor(max_workers=obj.max_workers) as executor:
606
+ if obj.progress:
607
+ coord_maps = list(tqdm(
608
+ executor.map(MolLibr._mcs_coord_map,
609
+ obj.libr, # subject
610
+ itertools.repeat(ref_2d_rdmol), # infinite iterator
611
+ chunksize=obj.chunksize),
612
+ desc="align drawingp",
613
+ total=obj.count()))
614
+ else:
615
+ coord_maps = list(
616
+ executor.map(MolLibr._mcs_coord_map,
617
+ obj.libr, # subject
618
+ itertools.repeat(ref_2d_rdmol), # infinite iterator
619
+ chunksize=obj.chunksize))
620
+
621
+ for mol, coord_map in zip(obj.libr, coord_maps):
622
+ rdDepictor.Compute2DCoords(mol.rdmol, coordMap=coord_map)
623
+ # AllChem.Compute2DCoords(mol.rdmol, coordMap=coord_map)
624
+
625
+
626
+ # for idx, mol in enumerate(obj.libr):
627
+ # if mcs and idx == ref:
628
+ # continue
629
+
630
+ # # largest common substructure
631
+ # lcs = rdFMCS.FindMCS([ref_2d_rdmol, mol.rdmol])
632
+
633
+ # # matching indices
634
+ # ref_xy_coords = []
635
+ # for i in ref_2d_rdmol.GetSubstructMatch(lcs.queryMol):
636
+ # pt = ref_2d_rdmol.GetConformer().GetAtomPosition(i)
637
+ # ref_xy_coords.append(Geometry.Point2D(pt.x, pt.y))
638
+ # sub_indices = mol.rdmol.GetSubstructMatch(lcs.queryMol)
639
+ # coord_map = { i : xy for i, xy in zip(sub_indices, ref_xy_coords) }
640
+ # AllChem.Compute2DCoords(mol.rdmol, coordMap=coord_map)
641
+
642
+ return obj
643
+
644
+
645
+ def pick(self, n: int, **kwargs) -> Self:
526
646
  """Picks n diverse molecules.
527
647
 
528
648
  Args:
@@ -531,7 +651,7 @@ class MolLibr:
531
651
  Returns:
532
652
  Self: a copy of self.
533
653
  """
534
- obj = copy.deepcopy(self)
654
+ obj = self.copy()
535
655
  raise NotImplementedError
536
656
  return obj
537
657
 
@@ -552,7 +672,11 @@ class MolLibr:
552
672
  return len(self.libr)
553
673
 
554
674
 
555
- def cluster(self, threshold:float=0.3, ordered:bool=True, drop_singleton:bool=True) -> list:
675
+ def cluster(self,
676
+ threshold: float = 0.3,
677
+ ordered: bool = True,
678
+ drop_singleton: bool = True,
679
+ ) -> list:
556
680
  """Clusters molecules using fingerprint.
557
681
 
558
682
  Args:
@@ -592,10 +716,11 @@ class MolLibr:
592
716
 
593
717
 
594
718
  def to_sdf(self,
595
- path:str | Path,
596
- confs:bool=False,
597
- props:bool=True,
598
- separate:bool=False) -> None:
719
+ path: str | Path,
720
+ confs: bool = False,
721
+ props: bool = True,
722
+ separate: bool = False,
723
+ ) -> None:
599
724
  """Writes to .sdf or .sdf.gz file.
600
725
 
601
726
  Chem.SDWriter is supposed to write all non-private molecular properties.
@@ -638,7 +763,7 @@ class MolLibr:
638
763
  f.write(mol.to_sdf(confs, props))
639
764
 
640
765
 
641
- def to_smi(self, path:str | Path) -> None:
766
+ def to_smi(self, path: str | Path) -> None:
642
767
  """Writes to .smi file.
643
768
 
644
769
  Args:
@@ -656,55 +781,82 @@ class MolLibr:
656
781
  smi.write(f'{mol.smiles} {mol.name}\n')
657
782
 
658
783
 
659
- def to_image(self, width:int=200, height:int=200, index:bool=False, mols_per_row:int=5) -> str:
660
- """Returns SVG strings for Jupyter notebook.
784
+ def to_svg(self,
785
+ mols_per_row: int = 5,
786
+ width: int = 200,
787
+ height: int = 200,
788
+ atom_index: bool = False,
789
+ redraw: bool = False,
790
+ coordgen: bool = False) -> str:
791
+ """Writes to a .svg strings for Jupyter notebook.
661
792
 
662
793
  Args:
794
+ path (str | Path): output filename or path.
795
+ mols_per_row (int, optional): number of molecules per row. Defaults to 5.
663
796
  width (int, optional): width. Defaults to 200.
664
797
  height (int, optional): height. Defaults to 200.
665
- index (bool, optional): whether to show atom index. Defaults to False.
666
- mols_per_row (int, optional): number of molecules per row. Defaults to 5.
667
-
668
- Returns:
669
- str: SVG strings for Jupyter notebook.
798
+ atom_index (bool, optional): whether to show atom index. Defaults to False.
799
+ redraw (bool, optional): whether to redraw. Defaults to False.
800
+ coordgen (bool, optional): whether to use coordgen. Defaults to False.
670
801
  """
671
802
 
672
- if index:
673
- for mol in self.libr:
674
- for a in mol.rdmol.GetAtoms():
675
- a.SetProp("atomNote", str(a.GetIdx()+1))
676
- rdmols = [mol.rdmol for mol in self.libr]
803
+ rdmols = [mol.rdmol for mol in self.libr]
677
804
  legends = [mol.name for mol in self.libr]
678
- return Draw.MolsToGridImage(rdmols,
679
- legends=legends,
680
- molsPerRow=min(mols_per_row, len(rdmols)),
681
- subImgSize=(width,height),
682
- useSVG=True)
805
+
806
+ svg_string = render_matrix_grid(rdmols,
807
+ legends,
808
+ mols_per_row = mols_per_row,
809
+ width = width,
810
+ height = height,
811
+ atom_index = atom_index,
812
+ redraw = redraw,
813
+ coordgen = coordgen,
814
+ svg = True,
815
+ )
683
816
 
817
+ return svg_string
818
+
684
819
 
685
- def to_png(self, path:str | Path, width:int=200, height:int=200, index:bool=False, mols_per_row:int=5) -> None:
820
+
821
+ def to_png(self,
822
+ filename: str | Path | None = None,
823
+ mols_per_row: int = 5,
824
+ width: int = 200,
825
+ height: int = 200,
826
+ atom_index: bool = False,
827
+ redraw: bool = False,
828
+ coordgen: bool = False,
829
+ ) -> Image.Image | None:
686
830
  """Writes to a .png file.
687
831
 
688
832
  Args:
689
- path (str | Path): output filename or path.
833
+ mols_per_row (int, optional): number of molecules per row. Defaults to 5.
690
834
  width (int, optional): width. Defaults to 200.
691
835
  height (int, optional): height. Defaults to 200.
692
- index (bool, optional): whether to show atom index. Defaults to False.
693
- mols_per_row (int, optional): number of molecules per row. Defaults to 5.
836
+ atom_index (bool, optional): whether to show atom index. Defaults to False.
837
+ redraw (bool, optional): whether to redraw. Defaults to False.
838
+ coordgen (bool, optional): whether to use coordgen. Defaults to False.
694
839
  """
695
- if isinstance(path, Path):
696
- path = path.as_posix() # convert to string
697
- if index:
698
- for mol in self.libr:
699
- for a in mol.rdmol.GetAtoms():
700
- a.SetProp("atomNote", str(a.GetIdx()+1))
701
- rdmols = [mol.rdmol for mol in self.libr]
840
+ rdmols = [mol.rdmol for mol in self.libr]
702
841
  legends = [mol.name for mol in self.libr]
703
- Draw.MolsToGridImage(rdmols,
704
- legends=legends,
705
- molsPerRow=min(mols_per_row,len(rdmols)),
706
- subImgSize=(width,height),
707
- useSVG=False).save(path)
842
+
843
+ img = render_matrix_grid(rdmols,
844
+ legends,
845
+ mols_per_row = mols_per_row,
846
+ width = width,
847
+ height = height,
848
+ atom_index = atom_index,
849
+ redraw = redraw,
850
+ coordgen = coordgen,
851
+ svg = False,
852
+ )
853
+
854
+ if filename is None:
855
+ return img
856
+ else:
857
+ if isinstance(filename, Path):
858
+ filename = filename.as_posix()
859
+ img.save(filename)
708
860
 
709
861
 
710
862
  def to_html(self) -> str:
@@ -721,9 +873,10 @@ class MolLibr:
721
873
 
722
874
 
723
875
  def to_dataframe(self,
724
- name:str='name',
725
- smiles:str='smiles',
726
- confs:bool=False) -> pd.DataFrame:
876
+ name: str = 'name',
877
+ smiles: str = 'smiles',
878
+ confs: bool = False,
879
+ ) -> pd.DataFrame:
727
880
  """Returns a Pandas DataFrame.
728
881
 
729
882
  Args:
@@ -770,44 +923,46 @@ class MolLibr:
770
923
  data[k].append(mol.props[k])
771
924
  else:
772
925
  data[k].append(None)
926
+
773
927
  return pd.DataFrame(data)
774
928
 
775
929
 
776
930
  def to_csv(self,
777
- path:str | Path,
778
- confs:bool=False,
779
- decimal_places:int=3) -> None:
931
+ path: str | Path,
932
+ confs: bool = False,
933
+ decimals:int = 3,
934
+ ) -> None:
780
935
  """Writes to a .csv file.
781
936
 
782
937
  Args:
783
938
  path (str | Path): output filename or path.
784
939
  confs (bool, optional): whether to include conformer properties. Defaults to False.
785
- decimal_places (int, optional): decimal places for float numbers. Defaults to 3.
940
+ decimals (int, optional): decimal places for float numbers. Defaults to 3.
786
941
  """
787
942
  df = self.to_dataframe(confs=confs)
788
- df.to_csv(path, index=False, float_format=f'%.{decimal_places}f')
943
+ df.to_csv(path, index=False, float_format=f'%.{decimals}f')
789
944
 
790
945
 
791
946
  @staticmethod
792
- def _mask_nn_applicable(mol:Mol, model:str) -> bool:
793
- """A mask function to return True if molecule is NN applicable.
947
+ def _mask_nnp_ready(mol: Mol, model: str) -> bool:
948
+ """A mask function to return True if molecule is NNP ready.
794
949
 
795
950
  Args:
796
951
  mol (Mol): rdworks.Mol object.
797
- model (str): name of NN model.
952
+ model (str): name of NNP model.
798
953
 
799
954
  Returns:
800
- bool: True if molecule is NN applicable.
955
+ bool: True if molecule is NNP ready.
801
956
  """
802
- return mol.is_nn_applicable(model)
957
+ return mol.nnp_ready(model)
803
958
 
804
959
 
805
- def nn_applicable(self, model:str, **kwargs) -> Self:
806
- """Returns a copy of subset of library that is applicable to given neural network `model`.
960
+ def nnp_ready(self, model: str, **kwargs) -> Self:
961
+ """Returns a copy of subset of library that is ready to given neural network potential.
807
962
 
808
963
  Examples:
809
964
  >>> libr = rdworks.MolLibr(drug_smiles, drug_names)
810
- >>> ani2x_compatible_subset = libr.nn_applicable('ANI-2x', progress=False)
965
+ >>> ani2x_compatible_subset = libr.nnp_ready('ANI-2x', progress=False)
811
966
 
812
967
  Args:
813
968
  model (str): name of model.
@@ -815,22 +970,23 @@ class MolLibr:
815
970
  Returns:
816
971
  Self: subset of library.
817
972
  """
818
- obj = copy.deepcopy(self).compute(**kwargs)
973
+ obj = self.copy().compute(**kwargs)
819
974
  lmodel = [model,] * self.count()
820
975
  with ProcessPoolExecutor(max_workers=obj.max_workers) as executor:
821
976
  if obj.progress:
822
977
  mask = list(tqdm(
823
- executor.map(self.mask_nn_applicable, obj.libr, lmodel, chunksize=obj.chunksize),
824
- desc="NN applicable",
978
+ executor.map(self._mask_nnp_ready, obj.libr, lmodel, chunksize=obj.chunksize),
979
+ desc="NNP ready",
825
980
  total=obj.count()))
826
981
  else:
827
982
  mask = list(
828
- executor.map(self._mask_nn_applicable, obj.libr, lmodel, chunksize=obj.chunksize))
983
+ executor.map(self._mask_nnp_ready, obj.libr, lmodel, chunksize=obj.chunksize))
829
984
  obj.libr = list(itertools.compress(obj.libr, mask))
985
+
830
986
  return obj
831
987
 
832
988
 
833
- def to_nnbatches(self, batchsize:int=1000) -> list:
989
+ def to_nnbatches(self, batchsize: int = 1000) -> list:
834
990
  """Split workload flexibily into a numer of batches.
835
991
 
836
992
  - Each batch has up to `batchsize` number of atoms.