rdworks 0.25.7__py3-none-any.whl → 0.35.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rdworks/mollibr.py CHANGED
@@ -4,38 +4,37 @@ import pandas as pd
4
4
  import gzip
5
5
 
6
6
  from pathlib import Path
7
- from typing import Optional, Union, Self, Iterator
7
+ from collections.abc import Iterable
8
8
  from collections import defaultdict
9
+ from typing import Self, Iterator
9
10
  from concurrent.futures import ProcessPoolExecutor
10
11
  from tqdm import tqdm
11
12
 
12
- from rdkit import Chem, DataStructs
13
- from rdkit.Chem import Draw
13
+ from rdkit import Chem, DataStructs, Geometry
14
+ from rdkit.Chem import Draw, AllChem, rdFMCS, rdDepictor
14
15
  from rdkit.ML.Cluster import Butina
15
16
  from rdkit.SimDivFilters.rdSimDivPickers import MaxMinPicker
17
+ from PIL import Image
16
18
 
17
- from rdworks.conf import Conf
18
- from rdworks.mol import Mol
19
-
19
+ from rdworks import Conf, Mol
20
+ from rdworks.display import render_matrix_grid
20
21
  from rdworks.xml import list_predefined_xml
21
- from rdworks.utils import precheck_path, guess_mol_id
22
22
 
23
23
 
24
24
  class MolLibr:
25
25
  def __init__(self,
26
- molecules: list | tuple | set | None = None,
27
- names: list | tuple | set | None = None,
26
+ molecules: Iterable | None = None,
27
+ names: Iterable | None = None,
28
28
  std:bool=False,
29
29
  max_workers:int=4,
30
- chunksize:int=100,
30
+ chunksize:int=10,
31
31
  progress:bool=False) -> None:
32
32
  """Create a rdworks.MolLibr object.
33
33
 
34
34
  Args:
35
- molecules (Optional[Union[list,tuple,set]], optional): a list/tuple/set of molecules
35
+ molecules (Iterable | None, optional): a list/tuple/set of molecules
36
36
  (rdworks.Mol | SMILES | rdkit.Chem.Mol). Defaults to None.
37
- names (Optional[Union[list,tuple,set]], optional): a list/tuple/set of names.
38
- Defaults to None.
37
+ names (Iterable | None, optional): a list/tuple/set of names. Defaults to None.
39
38
  std (bool, optional): whether to standardize molecules. Defaults to False.
40
39
  max_workers (int, optional): max workers for parallel calculation. Defaults to 4.
41
40
  chunksize (int, optional): chunksize for parallel calculation. Defaults to 100.
@@ -53,29 +52,34 @@ class MolLibr:
53
52
  self.threshold = None
54
53
  self.clusters = None
55
54
 
56
- if molecules and isinstance(molecules, (list, tuple, set)):
57
- if names and isinstance(names, (list, tuple, set)):
58
- if len(names) != len(molecules):
59
- raise ValueError('MolLibr() counts of molecules and names are different')
60
- if isinstance(molecules[0], Mol):
61
- self.libr = molecules
62
- elif isinstance(molecules[0], Conf):
63
- self.libr = [Mol(conf.rdmol, name=conf.name).props.update(conf.props) for conf in molecules]
64
- elif isinstance(molecules[0], str): # SMILES string
65
- if names:
66
- self.libr = [Mol(smi, name=name, std=std) for (smi, name) in zip(molecules, names)]
67
- else:
68
- self.libr = [Mol(smi, std=std) for smi in molecules]
69
- self.rename(prefix='entry') # default name
70
- elif isinstance(molecules[0], Chem.Mol):
71
- if names:
72
- self.libr = [Mol(rdmol, name=name, std=std) for (rdmol, name) in zip(molecules, names)]
73
- else:
74
- self.libr = [Mol(rdmol, std=std) for rdmol in molecules]
75
- self.rename(prefix='entry') # default name
76
- else:
77
- raise TypeError('MolLibr() takes a list|tuple|set of Mol|SMILES|Chem.Mol')
55
+ assert isinstance(molecules, Iterable) or molecules is None, "molecules must be iterable or None"
56
+ assert isinstance(names, Iterable) or names is None, "names must be iterable or None"
57
+
58
+ if isinstance(molecules, Iterable):
59
+ if isinstance(names, Iterable):
60
+ assert len(molecules) == len(names), "molecules and names must be the same counts"
61
+
62
+ if names is None:
63
+ names = [''] * len(molecules)
64
+
65
+ for molecular_input, name in zip(molecules, names):
66
+ if isinstance(molecular_input, Mol):
67
+ _mol = molecular_input
68
+
69
+ elif isinstance(molecular_input, Chem.Mol) or isinstance(molecular_input, str):
70
+ _mol = Mol(molecular_input, name=name, std=std)
71
+
72
+ elif isinstance(molecular_input, Conf):
73
+ _mol = Mol(molecular_input.rdmol,
74
+ name=molecular_input.name,
75
+ std=std).props.update(molecular_input.props)
76
+
77
+ self.libr.append(_mol)
78
+
79
+ if not any(names):
80
+ self.rename(prefix='entry')
78
81
 
82
+
79
83
  def copy(self) -> Self:
80
84
  """Returns a copy of self.
81
85
 
@@ -108,178 +112,192 @@ class MolLibr:
108
112
  """Next molecule.
109
113
 
110
114
  Returns:
111
- Mol: next molecule (rdworks.Mol) object.
115
+ Mol: next molecule.
112
116
  """
113
117
  return next(self.libr)
114
118
 
115
119
 
116
- def __eq__(self, other:Self) -> bool:
120
+ def __eq__(self, other: Self) -> bool:
117
121
  """Operator `==`.
118
122
 
119
123
  Args:
120
124
  other (rdworks.MolLibr): other rdworks.MolLibr object.
121
125
 
122
126
  Returns:
123
- bool: True if other rdworks.MolLibr object is identical with self.
127
+ Bool: True if other MolLibr object is identical with self.
124
128
  """
125
129
  if isinstance(other, MolLibr):
126
130
  return len(frozenset(self.libr) - frozenset(other.libr)) == 0
127
- else:
128
- return False
131
+
132
+ return False
129
133
 
130
134
 
131
- def __getitem__(self, index: int | slice) -> Mol:
135
+ def __getitem__(self, index: int | slice) -> Mol | Self:
132
136
  """Operator `[]`.
133
137
 
134
138
  Args:
135
139
  index (Union[int, slice]): index or slice of indexes.
136
140
 
137
- Raises:
138
- ValueError: if library is empty or index is out of range.
139
-
140
141
  Returns:
141
- Mol: rdworks.Mol object
142
+ Mol or MolLibr specified by single index or slice.
142
143
  """
143
- if self.count() == 0:
144
- raise ValueError(f"library is empty")
145
- try:
144
+ assert self.count() != 0, "library is empty"
145
+ if isinstance(index, slice):
146
+ return MolLibr(self.libr[index])
147
+ else:
146
148
  return self.libr[index]
147
- except:
148
- raise ValueError(f"index should be 0..{self.count()-1}")
149
149
 
150
150
 
151
- def __add__(self, other:object) -> Self:
152
- """Operator `+`. Returns a copy of extended library.
151
+ def __setitem__(self, index: int, molecule: Mol) -> Self:
152
+ """Set item.
153
153
 
154
154
  Args:
155
- other (object): other rdworks.Mol or rdworks.MolLibr object.
155
+ index (int): index
156
+ molecule (Mol): molecule to replace
157
+
158
+ Returns:
159
+ Modified self.
160
+ """
161
+ self.libr[index] = molecule
156
162
 
157
- Raises:
158
- TypeError: if `other` is not rdworks.Mol or rdworks.MolLibr.
163
+ return self
164
+
165
+
166
+ def __add__(self, other: Mol | Self) -> Self:
167
+ """Operator `+`.
168
+
169
+ Returns a new object, leaving the original objects unchanged (conventional behavior).
170
+
171
+ Args:
172
+ other (object): other Mol or MolLibr object.
159
173
 
160
174
  Returns:
161
- Self: rdworks.MolLibr object.
175
+ A new MolLibr object.
162
176
  """
177
+ assert isinstance(other, Mol | MolLibr), "'+' operator expects Mol or MolLibr object"
178
+
179
+ new_object = self.copy()
180
+
163
181
  if isinstance(other, Mol):
164
- obj = copy.deepcopy(self)
165
- obj.libr.append(other)
166
- return obj
182
+ new_object.libr.append(other)
183
+
167
184
  elif isinstance(other, MolLibr):
168
- obj = copy.deepcopy(self)
169
- obj.libr.extend(other.libr)
170
- return obj
171
- else:
172
- raise TypeError("'+' operator expects rdworks.Mol or rdworks.MolLibr object")
185
+ new_object.libr.extend(other.libr)
186
+
187
+ return new_object
173
188
 
174
189
 
175
190
  def __iadd__(self, other: Mol | Self) -> Self:
176
- """Operator `+=`. Updates self by adding other molecule or library
191
+ """Operator `+=`.
177
192
 
178
193
  Args:
179
- other (object): other rdworks.Mol or rdworks.MolLibr object.
180
-
181
- Raises:
182
- TypeError: if `other` is not rdworks.Mol or rdworks.MolLibr.
194
+ other (object): other Mol or MolLibr object.
183
195
 
184
196
  Returns:
185
- Self: rdworks.MolLibr object.
197
+ modified self.
186
198
  """
199
+ assert isinstance(other, Mol | MolLibr), "'+=' operator expects Mol or MolLibr object"
200
+
187
201
  if isinstance(other, Mol):
188
202
  self.libr.append(other)
203
+
189
204
  elif isinstance(other, MolLibr):
190
205
  self.libr.extend(other.libr)
191
- else:
192
- raise TypeError("'+=' operator expects Mol or MolLibr object")
206
+
193
207
  return self
194
208
 
195
209
 
196
210
  def __sub__(self, other: Mol | Self) -> Self:
197
- """Operator `-`. Returns a copy of subtractive subset.
211
+ """Operator `-`.
198
212
 
199
- Args:
200
- other (Union[Mol,Self]): other rdworks.Mol or rdworks.MolLibr object.
213
+ Returns a new object, leaving the original objects unchanged (conventional behavior).
201
214
 
202
- Raises:
203
- TypeError: if `other` is not rdworks.Mol or rdworks.MolLibr.
215
+ Args:
216
+ other (Mol | MolLibr): other rdworks.Mol or rdworks.MolLibr object.
204
217
 
205
218
  Returns:
206
- Self: a copy of subtractive subset.
219
+ A new MolLibr object.
207
220
  """
221
+ assert isinstance(other, Mol | MolLibr), "'-' operator expects Mol or MolLibr object"
222
+
208
223
  if isinstance(other, Mol):
209
224
  difference = frozenset(self.libr) - frozenset([other])
225
+
210
226
  elif isinstance(other, MolLibr):
211
227
  difference = frozenset(self.libr) - frozenset(other.libr)
212
- else:
213
- raise TypeError("'-' operator expects rdworks.Mol or rdworks.MolLibr object")
214
- obj = copy.deepcopy(self)
215
- obj.libr = list(difference)
216
- return obj
228
+
229
+ new_object = self.copy()
230
+ new_object.libr = list(difference)
231
+
232
+ return new_object
217
233
 
218
234
 
219
235
  def __isub__(self, other: Mol | Self) -> Self:
220
- """Operator `-=`. Updates self by subtracting other molecule or library.
236
+ """Operator `-=`.
221
237
 
222
238
  Args:
223
- other (Union[Mol,Self]): other molecule or library.
224
-
225
- Raises:
226
- TypeError: if `other` is not rdworks.Mol or rdworks.MolLibr.
239
+ other (Mol | MolLibr): other molecule or library.
227
240
 
228
241
  Returns:
229
- Self: rdworks.MolLibr object.
242
+ Modified self.
230
243
  """
244
+ assert isinstance(other, Mol | MolLibr), "'-=' operator expects Mol or MolLibr object"
245
+
231
246
  if isinstance(other, Mol):
232
247
  difference = frozenset(self.libr) - frozenset([other])
248
+
233
249
  elif isinstance(other, MolLibr):
234
250
  difference = frozenset(self.libr) - frozenset(other.libr)
235
- else:
236
- raise TypeError("'-=' operator expects rdworks.Mol or rdworks.MolLibr object")
251
+
237
252
  self.libr = list(difference)
253
+
238
254
  return self
239
255
 
240
256
 
241
257
  def __and__(self, other: Mol | Self) -> Self:
242
- """Operator `&`. Returns a copy of common subset.
258
+ """Operator `&`.
243
259
 
244
- Args:
245
- other (Union[Mol,Self]): other molecule or library.
260
+ Returns a new object, leaving the original objects unchanged (conventional behavior).
246
261
 
247
- Raises:
248
- TypeError: if `other` is not rdworks.Mol or rdworks.MolLibr.
262
+ Args:
263
+ other (Mol | MolLibr): other molecule or library.
249
264
 
250
265
  Returns:
251
- Self: a copy of rdworks.MolLibr object.
266
+ A new MolLibr object.
252
267
  """
268
+ assert isinstance(other, Mol | MolLibr), "'&' operator expects Mol or MolLibr object"
269
+
253
270
  if isinstance(other, Mol):
254
271
  intersection = frozenset(self.libr) & frozenset([other])
272
+
255
273
  elif isinstance(other, MolLibr):
256
274
  intersection = frozenset(self.libr) & frozenset(other.libr)
257
- else:
258
- raise TypeError("'&' operator or overlap() expects rdworks.Mol or rdworks.MolLibr object")
259
- obj = copy.deepcopy(self)
260
- obj.libr = list(intersection)
261
- return obj
275
+
276
+ new_object = self.copy()
277
+ new_object.libr = list(intersection)
278
+
279
+ return new_object
262
280
 
263
281
 
264
282
  def __iand__(self, other: Mol | Self) -> Self:
265
- """Operator `&=`. Re-assigns self with common subset.
283
+ """Operator `&=`.
266
284
 
267
285
  Args:
268
- other (Union[Mol,Self]): other molecule or library.
269
-
270
- Raises:
271
- TypeError: if `other` is not rdworks.Mol or rdworks.MolLibr.
286
+ other (Mol | Self): other molecule or library.
272
287
 
273
288
  Returns:
274
- Self: rdworks.MolLibr object.
289
+ Modified self.
275
290
  """
291
+ assert isinstance(other, Mol | MolLibr), "'&=' operator expects Mol or MolLibr object"
292
+
276
293
  if isinstance(other, Mol):
277
294
  intersection = frozenset(self.libr) & frozenset([other])
295
+
278
296
  elif isinstance(other, MolLibr):
279
297
  intersection = frozenset(self.libr) & frozenset(other.libr)
280
- else:
281
- raise TypeError("'&=' operator expects rdworks.Mol or rdworks.MolLibr object")
298
+
282
299
  self.libr = list(intersection)
300
+
283
301
  return self
284
302
 
285
303
 
@@ -334,9 +352,9 @@ class MolLibr:
334
352
  """Change settings for parallel computing.
335
353
 
336
354
  Args:
337
- max_workers (Optional[int], optional): max number of workers. Defaults to None.
338
- chunksize (Optional[int], optional): chunksize of splitted workload. Defaults to None.
339
- progress (Optional[bool], optional): whether to show progress bar. Defaults to None.
355
+ max_workers (int, optional): max number of workers. Defaults to 4.
356
+ chunksize (int, optional): chunksize of splitted workload. Defaults to 10.
357
+ progress (bool, optional): whether to show progress bar. Defaults to False.
340
358
 
341
359
  Returns:
342
360
  Self: rdworks.MolLibr object.
@@ -344,10 +362,11 @@ class MolLibr:
344
362
  self.max_workers = kwargs.get('max_workers', self.max_workers)
345
363
  self.chunksize = kwargs.get('chunksize', self.chunksize)
346
364
  self.progress = kwargs.get('progress', self.progress)
365
+
347
366
  return self
348
367
 
349
368
 
350
- def rename(self, prefix:Optional[str]=None, sep:str='.', start:int=1) -> Self:
369
+ def rename(self, prefix: str | None = None, sep: str='.', start: int=1) -> Self:
351
370
  """Rename molecules with serial numbers in-place and their conformers.
352
371
 
353
372
  Molecules will be named by a format, `{prefix}{sep}{serial_number}` and
@@ -393,10 +412,11 @@ class MolLibr:
393
412
  # rename conformers
394
413
  for mol in self.libr:
395
414
  mol.rename()
415
+
396
416
  return self
397
417
 
398
418
 
399
- def overlap(self, other:Self) -> Self:
419
+ def overlap(self, other: Self) -> Self:
400
420
  """Returns a common subset with `other` library.
401
421
 
402
422
  Args:
@@ -408,7 +428,7 @@ class MolLibr:
408
428
  return self.__and__(other)
409
429
 
410
430
 
411
- def similar(self, query:Mol, threshold:float=0.2, **kwargs) -> Self:
431
+ def similar(self, query: Mol, threshold: float = 0.2, **kwargs) -> Self:
412
432
  """Returns a copy of subset that are similar to `query`.
413
433
 
414
434
  Args:
@@ -421,7 +441,8 @@ class MolLibr:
421
441
  Returns:
422
442
  Self: a copy of self.
423
443
  """
424
- obj = copy.deepcopy(self).compute(**kwargs)
444
+ obj = self.copy().compute(**kwargs)
445
+
425
446
  if isinstance(query, Mol):
426
447
  largs = [(query, threshold),] * obj.count()
427
448
  else:
@@ -434,6 +455,7 @@ class MolLibr:
434
455
  else:
435
456
  mask = list(executor.map(MolLibr._mask_similar, obj.libr, largs, chunksize=obj.chunksize))
436
457
  obj.libr = list(itertools.compress(obj.libr, mask))
458
+
437
459
  return obj
438
460
 
439
461
 
@@ -447,7 +469,8 @@ class MolLibr:
447
469
  Returns:
448
470
  Self: a copy of self.
449
471
  """
450
- obj = copy.deepcopy(self)
472
+ obj = self.copy()
473
+
451
474
  U = {} # unique SMILES
452
475
  mask = []
453
476
  for mol in obj.libr:
@@ -466,6 +489,7 @@ class MolLibr:
466
489
  if len(mol.props['aka']) > 0:
467
490
  print(f" {mol.name}({len(mol.props['aka'])}) - {','.join(mol.props['aka'])}")
468
491
  print(f"de-duplicated to {obj.count()} molecules")
492
+
469
493
  return obj
470
494
 
471
495
 
@@ -491,10 +515,11 @@ class MolLibr:
491
515
  self.libr = list(
492
516
  executor.map(MolLibr._map_qed, self.libr, lprops, chunksize=self.chunksize)
493
517
  )
518
+
494
519
  return self
495
520
 
496
521
 
497
- def drop(self, terms:str | Path | None = None, invert:bool=False, **kwargs) -> Self:
522
+ def drop(self, terms: str | Path | None = None, invert: bool = False, **kwargs) -> Self:
498
523
  """Drops matched molecules and returns a copy of library with remaining molecules.
499
524
 
500
525
  Args:
@@ -507,7 +532,9 @@ class MolLibr:
507
532
  if not terms:
508
533
  print(list_predefined_xml())
509
534
  return self
510
- obj = copy.deepcopy(self).compute(**kwargs)
535
+
536
+ obj = self.copy().compute(**kwargs)
537
+
511
538
  lterms = [ terms ] * obj.count()
512
539
  with ProcessPoolExecutor(max_workers=obj.max_workers) as executor:
513
540
  if obj.progress:
@@ -521,10 +548,101 @@ class MolLibr:
521
548
  if invert:
522
549
  mask = [not b for b in mask]
523
550
  obj.libr = list(itertools.compress(obj.libr, mask))
551
+
524
552
  return obj
525
553
 
526
554
 
527
- def pick(self, n:int, **kwargs) -> Self:
555
+ @staticmethod
556
+ def _mcs_coord_map(subject:Mol, r:Chem.Mol) -> dict:
557
+ s = subject.rdmol
558
+ lcs = rdFMCS.FindMCS([r, s])
559
+ # reference matching indices
560
+ r_indices = r.GetSubstructMatch(lcs.queryMol)
561
+ # subject matching indices
562
+ s_indices = s.GetSubstructMatch(lcs.queryMol)
563
+ # reference matching coordinates (2D)
564
+ r_xy = []
565
+ for i in r_indices:
566
+ pt = r.GetConformer().GetAtomPosition(i)
567
+ r_xy.append(Geometry.Point2D(pt.x, pt.y))
568
+ coord_map = { i : xy for i, xy in zip(s_indices, r_xy) }
569
+
570
+ return coord_map
571
+
572
+
573
+ def align_drawing(self,
574
+ ref: int = 0,
575
+ mcs: bool = True,
576
+ scaffold: str = "",
577
+ coordgen:bool = True,
578
+ **kwargs,
579
+ ) -> Self:
580
+ """Align 2D drawings by using MCS or scaffold SMILES.
581
+
582
+ Args:
583
+ ref (int, optional): index to the reference. Defaults to 0.
584
+ mcs (bool, optional): whether to use MCS(maximum common substructure). Defaults to True.
585
+ scaffold (str, optional): whether to use scaffold (SMILES). Defaults to "".
586
+
587
+ Returns:
588
+ Self: self
589
+ """
590
+
591
+ obj = self.copy().compute(**kwargs)
592
+
593
+ if scaffold:
594
+ # scaffold (SMILES) of the reference 2D drawing
595
+ ref_2d_rdmol = Chem.MolFromSmiles(scaffold)
596
+ else:
597
+ # maximum common substructure to the reference 2D drawing
598
+ assert ref >=0 and ref < obj.count(), f"ref should be [0,{obj.count()-1}]"
599
+ ref_2d_rdmol = obj.libr[ref].rdmol
600
+
601
+ rdDepictor.SetPreferCoordGen(coordgen)
602
+ rdDepictor.Compute2DCoords(ref_2d_rdmol)
603
+ # AllChem.Compute2DCoords(ref_2d_rdmol)
604
+
605
+ with ProcessPoolExecutor(max_workers=obj.max_workers) as executor:
606
+ if obj.progress:
607
+ coord_maps = list(tqdm(
608
+ executor.map(MolLibr._mcs_coord_map,
609
+ obj.libr, # subject
610
+ itertools.repeat(ref_2d_rdmol), # infinite iterator
611
+ chunksize=obj.chunksize),
612
+ desc="align drawingp",
613
+ total=obj.count()))
614
+ else:
615
+ coord_maps = list(
616
+ executor.map(MolLibr._mcs_coord_map,
617
+ obj.libr, # subject
618
+ itertools.repeat(ref_2d_rdmol), # infinite iterator
619
+ chunksize=obj.chunksize))
620
+
621
+ for mol, coord_map in zip(obj.libr, coord_maps):
622
+ rdDepictor.Compute2DCoords(mol.rdmol, coordMap=coord_map)
623
+ # AllChem.Compute2DCoords(mol.rdmol, coordMap=coord_map)
624
+
625
+
626
+ # for idx, mol in enumerate(obj.libr):
627
+ # if mcs and idx == ref:
628
+ # continue
629
+
630
+ # # largest common substructure
631
+ # lcs = rdFMCS.FindMCS([ref_2d_rdmol, mol.rdmol])
632
+
633
+ # # matching indices
634
+ # ref_xy_coords = []
635
+ # for i in ref_2d_rdmol.GetSubstructMatch(lcs.queryMol):
636
+ # pt = ref_2d_rdmol.GetConformer().GetAtomPosition(i)
637
+ # ref_xy_coords.append(Geometry.Point2D(pt.x, pt.y))
638
+ # sub_indices = mol.rdmol.GetSubstructMatch(lcs.queryMol)
639
+ # coord_map = { i : xy for i, xy in zip(sub_indices, ref_xy_coords) }
640
+ # AllChem.Compute2DCoords(mol.rdmol, coordMap=coord_map)
641
+
642
+ return obj
643
+
644
+
645
+ def pick(self, n: int, **kwargs) -> Self:
528
646
  """Picks n diverse molecules.
529
647
 
530
648
  Args:
@@ -533,7 +651,7 @@ class MolLibr:
533
651
  Returns:
534
652
  Self: a copy of self.
535
653
  """
536
- obj = copy.deepcopy(self)
654
+ obj = self.copy()
537
655
  raise NotImplementedError
538
656
  return obj
539
657
 
@@ -554,7 +672,11 @@ class MolLibr:
554
672
  return len(self.libr)
555
673
 
556
674
 
557
- def cluster(self, threshold:float=0.3, ordered:bool=True, drop_singleton:bool=True) -> list:
675
+ def cluster(self,
676
+ threshold: float = 0.3,
677
+ ordered: bool = True,
678
+ drop_singleton: bool = True,
679
+ ) -> list:
558
680
  """Clusters molecules using fingerprint.
559
681
 
560
682
  Args:
@@ -594,10 +716,11 @@ class MolLibr:
594
716
 
595
717
 
596
718
  def to_sdf(self,
597
- path:str | Path,
598
- confs:bool=False,
599
- props:bool=True,
600
- separate:bool=False) -> None:
719
+ path: str | Path,
720
+ confs: bool = False,
721
+ props: bool = True,
722
+ separate: bool = False,
723
+ ) -> None:
601
724
  """Writes to .sdf or .sdf.gz file.
602
725
 
603
726
  Chem.SDWriter is supposed to write all non-private molecular properties.
@@ -640,7 +763,7 @@ class MolLibr:
640
763
  f.write(mol.to_sdf(confs, props))
641
764
 
642
765
 
643
- def to_smi(self, path:str | Path) -> None:
766
+ def to_smi(self, path: str | Path) -> None:
644
767
  """Writes to .smi file.
645
768
 
646
769
  Args:
@@ -658,55 +781,82 @@ class MolLibr:
658
781
  smi.write(f'{mol.smiles} {mol.name}\n')
659
782
 
660
783
 
661
- def to_image(self, width:int=200, height:int=200, index:bool=False, mols_per_row:int=5) -> str:
662
- """Returns SVG strings for Jupyter notebook.
784
+ def to_svg(self,
785
+ mols_per_row: int = 5,
786
+ width: int = 200,
787
+ height: int = 200,
788
+ atom_index: bool = False,
789
+ redraw: bool = False,
790
+ coordgen: bool = False) -> str:
791
+ """Writes to a .svg strings for Jupyter notebook.
663
792
 
664
793
  Args:
794
+ path (str | Path): output filename or path.
795
+ mols_per_row (int, optional): number of molecules per row. Defaults to 5.
665
796
  width (int, optional): width. Defaults to 200.
666
797
  height (int, optional): height. Defaults to 200.
667
- index (bool, optional): whether to show atom index. Defaults to False.
668
- mols_per_row (int, optional): number of molecules per row. Defaults to 5.
669
-
670
- Returns:
671
- str: SVG strings for Jupyter notebook.
798
+ atom_index (bool, optional): whether to show atom index. Defaults to False.
799
+ redraw (bool, optional): whether to redraw. Defaults to False.
800
+ coordgen (bool, optional): whether to use coordgen. Defaults to False.
672
801
  """
673
802
 
674
- if index:
675
- for mol in self.libr:
676
- for a in mol.rdmol.GetAtoms():
677
- a.SetProp("atomNote", str(a.GetIdx()+1))
678
- rdmols = [mol.rdmol for mol in self.libr]
803
+ rdmols = [mol.rdmol for mol in self.libr]
679
804
  legends = [mol.name for mol in self.libr]
680
- return Draw.MolsToGridImage(rdmols,
681
- legends=legends,
682
- molsPerRow=min(mols_per_row, len(rdmols)),
683
- subImgSize=(width,height),
684
- useSVG=True)
805
+
806
+ svg_string = render_matrix_grid(rdmols,
807
+ legends,
808
+ mols_per_row = mols_per_row,
809
+ width = width,
810
+ height = height,
811
+ atom_index = atom_index,
812
+ redraw = redraw,
813
+ coordgen = coordgen,
814
+ svg = True,
815
+ )
685
816
 
817
+ return svg_string
818
+
686
819
 
687
- def to_png(self, path:str | Path, width:int=200, height:int=200, index:bool=False, mols_per_row:int=5) -> None:
820
+
821
+ def to_png(self,
822
+ filename: str | Path | None = None,
823
+ mols_per_row: int = 5,
824
+ width: int = 200,
825
+ height: int = 200,
826
+ atom_index: bool = False,
827
+ redraw: bool = False,
828
+ coordgen: bool = False,
829
+ ) -> Image.Image | None:
688
830
  """Writes to a .png file.
689
831
 
690
832
  Args:
691
- path (str | Path): output filename or path.
833
+ mols_per_row (int, optional): number of molecules per row. Defaults to 5.
692
834
  width (int, optional): width. Defaults to 200.
693
835
  height (int, optional): height. Defaults to 200.
694
- index (bool, optional): whether to show atom index. Defaults to False.
695
- mols_per_row (int, optional): number of molecules per row. Defaults to 5.
836
+ atom_index (bool, optional): whether to show atom index. Defaults to False.
837
+ redraw (bool, optional): whether to redraw. Defaults to False.
838
+ coordgen (bool, optional): whether to use coordgen. Defaults to False.
696
839
  """
697
- if isinstance(path, Path):
698
- path = path.as_posix() # convert to string
699
- if index:
700
- for mol in self.libr:
701
- for a in mol.rdmol.GetAtoms():
702
- a.SetProp("atomNote", str(a.GetIdx()+1))
703
- rdmols = [mol.rdmol for mol in self.libr]
840
+ rdmols = [mol.rdmol for mol in self.libr]
704
841
  legends = [mol.name for mol in self.libr]
705
- Draw.MolsToGridImage(rdmols,
706
- legends=legends,
707
- molsPerRow=min(mols_per_row,len(rdmols)),
708
- subImgSize=(width,height),
709
- useSVG=False).save(path)
842
+
843
+ img = render_matrix_grid(rdmols,
844
+ legends,
845
+ mols_per_row = mols_per_row,
846
+ width = width,
847
+ height = height,
848
+ atom_index = atom_index,
849
+ redraw = redraw,
850
+ coordgen = coordgen,
851
+ svg = False,
852
+ )
853
+
854
+ if filename is None:
855
+ return img
856
+ else:
857
+ if isinstance(filename, Path):
858
+ filename = filename.as_posix()
859
+ img.save(filename)
710
860
 
711
861
 
712
862
  def to_html(self) -> str:
@@ -723,9 +873,10 @@ class MolLibr:
723
873
 
724
874
 
725
875
  def to_dataframe(self,
726
- name:str='name',
727
- smiles:str='smiles',
728
- confs:bool=False) -> pd.DataFrame:
876
+ name: str = 'name',
877
+ smiles: str = 'smiles',
878
+ confs: bool = False,
879
+ ) -> pd.DataFrame:
729
880
  """Returns a Pandas DataFrame.
730
881
 
731
882
  Args:
@@ -772,44 +923,46 @@ class MolLibr:
772
923
  data[k].append(mol.props[k])
773
924
  else:
774
925
  data[k].append(None)
926
+
775
927
  return pd.DataFrame(data)
776
928
 
777
929
 
778
930
  def to_csv(self,
779
- path:str | Path,
780
- confs:bool=False,
781
- decimal_places:int=3) -> None:
931
+ path: str | Path,
932
+ confs: bool = False,
933
+ decimals:int = 3,
934
+ ) -> None:
782
935
  """Writes to a .csv file.
783
936
 
784
937
  Args:
785
938
  path (str | Path): output filename or path.
786
939
  confs (bool, optional): whether to include conformer properties. Defaults to False.
787
- decimal_places (int, optional): decimal places for float numbers. Defaults to 3.
940
+ decimals (int, optional): decimal places for float numbers. Defaults to 3.
788
941
  """
789
942
  df = self.to_dataframe(confs=confs)
790
- df.to_csv(path, index=False, float_format=f'%.{decimal_places}f')
943
+ df.to_csv(path, index=False, float_format=f'%.{decimals}f')
791
944
 
792
945
 
793
946
  @staticmethod
794
- def _mask_nn_applicable(mol:Mol, model:str) -> bool:
795
- """A mask function to return True if molecule is NN applicable.
947
+ def _mask_nnp_ready(mol: Mol, model: str) -> bool:
948
+ """A mask function to return True if molecule is NNP ready.
796
949
 
797
950
  Args:
798
951
  mol (Mol): rdworks.Mol object.
799
- model (str): name of NN model.
952
+ model (str): name of NNP model.
800
953
 
801
954
  Returns:
802
- bool: True if molecule is NN applicable.
955
+ bool: True if molecule is NNP ready.
803
956
  """
804
- return mol.is_nn_applicable(model)
957
+ return mol.nnp_ready(model)
805
958
 
806
959
 
807
- def nn_applicable(self, model:str, **kwargs) -> Self:
808
- """Returns a copy of subset of library that is applicable to given neural network `model`.
960
+ def nnp_ready(self, model: str, **kwargs) -> Self:
961
+ """Returns a copy of subset of library that is ready to given neural network potential.
809
962
 
810
963
  Examples:
811
964
  >>> libr = rdworks.MolLibr(drug_smiles, drug_names)
812
- >>> ani2x_compatible_subset = libr.nn_applicable('ANI-2x', progress=False)
965
+ >>> ani2x_compatible_subset = libr.nnp_ready('ANI-2x', progress=False)
813
966
 
814
967
  Args:
815
968
  model (str): name of model.
@@ -817,22 +970,23 @@ class MolLibr:
817
970
  Returns:
818
971
  Self: subset of library.
819
972
  """
820
- obj = copy.deepcopy(self).compute(**kwargs)
973
+ obj = self.copy().compute(**kwargs)
821
974
  lmodel = [model,] * self.count()
822
975
  with ProcessPoolExecutor(max_workers=obj.max_workers) as executor:
823
976
  if obj.progress:
824
977
  mask = list(tqdm(
825
- executor.map(self.mask_nn_applicable, obj.libr, lmodel, chunksize=obj.chunksize),
826
- desc="NN applicable",
978
+ executor.map(self._mask_nnp_ready, obj.libr, lmodel, chunksize=obj.chunksize),
979
+ desc="NNP ready",
827
980
  total=obj.count()))
828
981
  else:
829
982
  mask = list(
830
- executor.map(self._mask_nn_applicable, obj.libr, lmodel, chunksize=obj.chunksize))
983
+ executor.map(self._mask_nnp_ready, obj.libr, lmodel, chunksize=obj.chunksize))
831
984
  obj.libr = list(itertools.compress(obj.libr, mask))
985
+
832
986
  return obj
833
987
 
834
988
 
835
- def to_nnbatches(self, batchsize:int=1000) -> list:
989
+ def to_nnbatches(self, batchsize: int = 1000) -> list:
836
990
  """Split workload flexibily into a numer of batches.
837
991
 
838
992
  - Each batch has up to `batchsize` number of atoms.