rdworks 0.25.8__py3-none-any.whl → 0.35.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rdworks/mol.py CHANGED
@@ -1,14 +1,13 @@
1
- import io
2
1
  import copy
3
- import pathlib
4
2
  import itertools
5
3
  import json
6
4
  import logging
7
5
  import tempfile
8
6
 
7
+ from io import StringIO, BytesIO
8
+ from pathlib import Path
9
9
  from collections import defaultdict
10
10
  from collections.abc import Callable
11
- from pathlib import Path
12
11
  from typing import Iterator, Self
13
12
 
14
13
  import numpy as np
@@ -22,36 +21,35 @@ import CDPL.Chem
22
21
  import CDPL.ConfGen
23
22
 
24
23
  from rdkit import Chem, DataStructs
25
-
26
24
  from rdkit.Chem import (
27
25
  rdMolDescriptors, AllChem, Descriptors, QED,
28
26
  rdFingerprintGenerator,
29
- Draw, rdDepictor,
27
+ Draw, rdDepictor, inchi,
30
28
  rdDistGeom, rdMolAlign, rdMolTransforms, rdmolops
31
29
  )
32
-
33
30
  from rdkit.Chem.Draw import rdMolDraw2D
34
-
35
31
  from rdkit.ML.Cluster import Butina
32
+ from PIL import Image
36
33
 
37
- from rdworks.std import desalt_smiles, standardize
34
+ from rdworks.conf import Conf
35
+ from rdworks.std import desalt_smiles, standardize, clean_2d
38
36
  from rdworks.xml import list_predefined_xml, get_predefined_xml, parse_xml
39
37
  from rdworks.scaffold import rigid_fragment_indices
40
38
  from rdworks.descriptor import rd_descriptor, rd_descriptor_f
41
- from rdworks.display import svg
42
- from rdworks.utils import convert_tril_to_symm, QT, fix_decimal_places_in_dict
39
+ from rdworks.utils import convert_tril_to_symm, QT, recursive_round
43
40
  from rdworks.units import ev2kcalpermol
44
41
  from rdworks.autograph import NMRCLUST, DynamicTreeCut, RCKmeans, AutoGraph
45
42
  from rdworks.bitqt import BitQT
46
- from rdworks.conf import Conf
43
+ from rdworks.torsion import create_torsion_fragment, get_torsion_atoms
44
+ from rdworks.display import render_svg, render_png
47
45
 
46
+ from scour.scour import scourString
48
47
 
49
48
  main_logger = logging.getLogger()
50
49
 
51
50
 
52
51
  class Mol:
53
- """Container for molecular structure, conformers, and other information.
54
- """
52
+ """Container for molecular structure, conformers, and other information."""
55
53
 
56
54
  MFP2 = rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=2048)
57
55
 
@@ -59,100 +57,98 @@ class Mol:
59
57
  ETKDG_params.useSmallRingTorsions = True
60
58
  ETKDG_params.maxIterations = 2000
61
59
 
62
-
63
60
  def __init__(self,
64
- molecular_input: str | Chem.Mol,
65
- name:str='',
66
- std:bool=False,
67
- max_workers:int=1,
68
- chunksize:int=4,
69
- progress:bool=False) -> None:
70
- """Create a rdworks.Mol object.
61
+ molecule: str | Chem.Mol | Conf | None = None,
62
+ name: str = '',
63
+ std: bool = False,
64
+ reset_isotope: bool = True,
65
+ remove_H: bool = True,
66
+ max_workers: int = 1,
67
+ chunksize: int = 4,
68
+ progress: bool = False) -> None:
69
+ """Initialize.
71
70
 
72
71
  Examples:
73
- >>> import rdworks
74
- >>> m = rdworks.Mol('c1ccccc1', name='benzene')
72
+ >>> from rdworks import Mol
73
+ >>> m = Mol('c1ccccc1', name='benzene')
75
74
 
76
75
  Args:
77
- molecular_input (str | Chem.Mol): SMILES or rdkit.Chem.Mol object
78
- name (str, optional): name of the molecule. Defaults to ''.
79
- std (bool, optional): whether to standardize the molecule. Defaults to False.
80
-
81
- Raises:
82
- ValueError: Invalid SMILES or rdkit.Chem.Mol object.
83
- TypeError: No SMILES or rdkit.Chem.Mol object is provided.
84
- RuntimeError: Desalting or standardization process failed.
76
+ molecule (str | Chem.Mol | None): SMILES or rdkit.Chem.Mol or None
77
+ name (str): name of the molecule. Defaults to ''.
78
+ std (bool): whether to standardize the molecule. Defaults to False.
79
+ max_workers (int): number of maximum workers for parallelization. Defaults to 1.
80
+ chunksize (int): batch size for parallelization. Defaults to 4.
81
+ progress (bool): whether to show progress bar. Defaults to False.
85
82
  """
86
-
87
- self.rdmol = None # rdkit.Chem.Mol object
88
- self.smiles = None # isomeric SMILES
89
- self.name = None
83
+ assert isinstance(molecule, str | Chem.Mol | Conf) or molecule is None
84
+
85
+ self.rdmol = None # 2D, one and only one Conformer
86
+ self.smiles = '' # isomeric SMILES
87
+ self.confs = [] # container for 3D conformers
88
+ self.name = ''
89
+ self.InChIKey = '' # 27 characters (SHA-256 hash of InChI)
90
+ self.InChI = ''
90
91
  self.props = {}
91
- self.confs = [] # 3D conformers (iterable)
92
92
  self.fp = None
93
93
  self.max_workers = max_workers
94
94
  self.chunksize = chunksize
95
95
  self.progress = progress
96
-
97
- if isinstance(molecular_input, str):
96
+
97
+ if molecule is None:
98
+ return
99
+
100
+ if isinstance(molecule, str): # 1-D SMILES
98
101
  try:
99
- self.rdmol = Chem.MolFromSmiles(molecular_input)
100
- assert self.rdmol
101
- self.smiles = Chem.MolToSmiles(self.rdmol)
102
+ if "." in molecule: # mandatory desalting
103
+ (self.smiles, self.rdmol) = desalt_smiles(molecule)
104
+ else:
105
+ self.rdmol = Chem.MolFromSmiles(molecule)
106
+ self.smiles = Chem.MolToSmiles(self.rdmol)
102
107
  except:
103
- raise ValueError(f'Mol() received invalid SMILES: {molecular_input}')
104
- elif isinstance(molecular_input, Chem.Mol):
108
+ raise ValueError(f'Mol() Error: invalid SMILES {molecule}')
109
+
110
+ elif isinstance(molecule, Chem.Mol): # 2-D or 3-D Chem.Mol
105
111
  try:
106
- self.rdmol = molecular_input
107
- assert self.rdmol
112
+ self.rdmol, _ = clean_2d(molecule, reset_isotope, remove_H)
108
113
  self.smiles = Chem.MolToSmiles(self.rdmol)
114
+ self.confs = [Conf(x) for x in _]
109
115
  except:
110
- raise ValueError('Mol() received invalid rdkit.Chem.Mol object')
111
- else:
112
- raise TypeError('Mol() expects SMILES or rdkit.Chem.Mol object')
113
-
114
- ### desalting
115
- if "." in self.smiles:
116
+ raise ValueError(f'Mol() Error: invalid Chem.Mol object')
117
+
118
+ elif isinstance(molecule, Conf): # 3-D input
116
119
  try:
117
- (self.smiles, self.rdmol) = desalt_smiles(self.smiles)
118
- assert self.smiles
119
- assert self.rdmol
120
+ self.rdmol, _ = clean_2d(molecule.rdmol, reset_isotope, remove_H)
121
+ self.smiles = Chem.MolToSmiles(self.rdmol)
122
+ self.confs = [molecule]
120
123
  except:
121
- raise RuntimeError(f'Mol() error occurred in desalting: {self.smiles}')
122
-
123
- ### standardization
124
- if std:
125
- # standardization changes self.rdmol
126
- try:
124
+ raise ValueError(f'Mol() Error: invalid Conf object')
125
+
126
+ try:
127
+ if std:
127
128
  self.rdmol = standardize(self.rdmol)
128
129
  self.smiles = Chem.MolToSmiles(self.rdmol)
129
- assert self.smiles
130
- assert self.rdmol
131
- except:
132
- raise RuntimeError('Mol() error occurred in standardization')
133
-
134
- ### naming
130
+ except:
131
+ raise RuntimeError('Mol() Error: standardization')
132
+
133
+ assert self.smiles and self.rdmol, "Mol() Error: invalid molecule"
134
+
135
+ rdDepictor.Compute2DCoords(self.rdmol)
136
+
135
137
  try:
136
138
  self.name = str(name)
137
139
  except:
138
140
  self.name = 'untitled'
141
+
139
142
  self.rdmol.SetProp('_Name', self.name) # _Name can't be None
140
-
141
- ### set default properties
143
+ self.InChI = Chem.MolToInchi(self.rdmol)
144
+ self.InChIKey = inchi.InchiToInchiKey(self.InChI)
142
145
  self.props.update({
143
146
  'aka' : [], # <-- to be set by MolLibr.unique()
144
- 'atoms' : self.rdmol.GetNumAtoms(),
145
- # hydrogens not excluded
146
- # m = Chem.MolFromSmiles("c1c[nH]cc1")
147
- # m.GetNumAtoms()
148
- # >> 5
149
- # Chem.AddHs(m).GetNumAtoms()
150
- # >> 10
147
+ 'atoms' : self.rdmol.GetNumAtoms(), # hydrogens not excluded?
151
148
  'charge': rdmolops.GetFormalCharge(self.rdmol),
152
- # number of rotatable bonds
153
149
  "nrb" : Descriptors.NumRotatableBonds(self.rdmol),
154
150
  })
155
-
151
+
156
152
 
157
153
  def __str__(self) -> str:
158
154
  """String representation of the molecule.
@@ -184,21 +180,21 @@ class Mol:
184
180
  return hash(self.smiles)
185
181
 
186
182
 
187
- def __eq__(self, other:object) -> bool:
188
- """True if `other` molecule is identical with the molecule.
183
+ def __eq__(self, other: Self) -> bool:
184
+ """True if `other` Mol is identical with this Mol.
189
185
 
190
- It compares canonicalized SMILES.
186
+ It compares InChIKeys.
191
187
 
192
188
  Examples:
193
189
  >>> m1 == m2
194
190
 
195
191
  Args:
196
- other (object): other rdworks.Mol object.
192
+ other (object): other Mol object.
197
193
 
198
194
  Returns:
199
195
  bool: True if identical.
200
196
  """
201
- return self.smiles == other.smiles
197
+ return self.InChIKey == other.InChIKey
202
198
 
203
199
 
204
200
  def __iter__(self) -> Iterator:
@@ -223,7 +219,7 @@ class Mol:
223
219
  return next(self.confs)
224
220
 
225
221
 
226
- def __getitem__(self, index: int | slice) -> Conf:
222
+ def __getitem__(self, index: int | slice) -> Conf | Self:
227
223
  """Conformer object of conformers of the molecule with given index or slice of indexes.
228
224
 
229
225
  Examples:
@@ -232,31 +228,34 @@ class Mol:
232
228
  Args:
233
229
  index (int | slice): index for conformers.
234
230
 
235
- Raises:
236
- ValueError: conformers are not defined in the molecule or index is out of range.
237
-
238
231
  Returns:
239
- Conf: Conf object matching the index of the molecule.
232
+ Conf or Mol(copy) with conformers specified by index.
240
233
  """
241
- if self.count() == 0:
242
- raise ValueError(f"no conformers")
243
- try:
244
- return self.confs[index]
245
- except:
246
- raise ValueError(f"index should be 0..{self.count()-1}")
234
+ assert self.count() > 0, "no conformers"
235
+
236
+ if isinstance(index, slice):
237
+ new_object = self.copy()
238
+ new_object.confs = new_object.confs[index]
239
+ return new_object
247
240
 
241
+ else:
242
+ return self.confs[index]
248
243
 
244
+
249
245
  def copy(self) -> Self:
250
246
  """Returns a copy of self.
251
247
 
252
248
  Returns:
253
- Self: a copy of self (rdworks.Mol) object.
249
+ a copy of self.
254
250
  """
255
251
  return copy.deepcopy(self)
256
252
 
257
253
 
258
- def rename(self, prefix:str='', sep:str='/', start:int=1) -> Self:
259
- """Rename conformer names and returns self
254
+ def rename(self,
255
+ prefix: str = '',
256
+ sep: str = '/',
257
+ start: int = 1) -> Self:
258
+ """Updates name and conformer names.
260
259
 
261
260
  The first conformer name is {prefix}{sep}{start}
262
261
 
@@ -266,11 +265,12 @@ class Mol:
266
265
  start (int, optional): first serial number. Defaults to 1.
267
266
 
268
267
  Returns:
269
- Self: rdworks.Mol object.
268
+ Self: modified self.
270
269
  """
271
270
  if prefix :
272
271
  self.name = prefix
273
272
  self.rdmol.SetProp('_Name', prefix)
273
+
274
274
  # update conformer names
275
275
  num_digits = len(str(self.count())) # ex. '100' -> 3
276
276
  for (serial, conf) in enumerate(self.confs, start=start):
@@ -278,11 +278,13 @@ class Mol:
278
278
  while len(serial_str) < num_digits:
279
279
  serial_str = '0' + serial_str
280
280
  conf.rename(f'{self.name}{sep}{serial_str}')
281
+
281
282
  return self
282
283
 
283
284
 
284
- def qed(self, properties:list[str]=['QED', 'MolWt', 'LogP', 'TPSA', 'HBD']) -> Self:
285
- """Updates quantitative estimate of drug-likeness (QED).
285
+ def qed(self,
286
+ properties: list[str] = ['QED', 'MolWt', 'LogP', 'TPSA', 'HBD']) -> Self:
287
+ """Updates quantitative estimate of drug-likeness (QED) and other descriptors.
286
288
 
287
289
  Args:
288
290
  properties (list[str], optional): Defaults to ['QED', 'MolWt', 'LogP', 'TPSA', 'HBD'].
@@ -291,53 +293,54 @@ class Mol:
291
293
  KeyError: if property key is unknown.
292
294
 
293
295
  Returns:
294
- Self: rdworks.Mol object.
296
+ Self: modified self.
295
297
  """
296
298
  props_dict = {}
297
299
  for k in properties:
298
300
  try:
299
301
  props_dict[k] = rd_descriptor_f[k](self.rdmol)
300
302
  except:
301
- raise KeyError(f'Mol.qed() received undefined property {k} for {self}')
303
+ raise KeyError(f'qed() Error: unknown property {k}')
302
304
  self.props.update(props_dict)
305
+
303
306
  return self
304
307
 
305
308
 
306
309
  def remove_stereo(self) -> Self:
307
- """Removes stereochemistry and returns a copy of self.
310
+ """Removes stereochemistry.
308
311
 
309
312
  Examples:
310
- >>> m = rdworks.Mol("C/C=C/C=C\\C", "double_bond")
313
+ >>> m = Mol("C/C=C/C=C\\C", "double_bond")
311
314
  >>> m.remove_stereo().smiles == "CC=CC=CC"
312
315
 
313
316
  Returns:
314
- Self: rdworks.Mol object.
317
+ Self: modified self.
315
318
  """
316
- obj = copy.deepcopy(self)
317
319
  # keep the original stereo info. for ring double bond
318
- Chem.RemoveStereochemistry(obj.rdmol)
319
- Chem.AssignStereochemistry(obj.rdmol,
320
+ Chem.RemoveStereochemistry(self.rdmol)
321
+ Chem.AssignStereochemistry(self.rdmol,
320
322
  cleanIt=False,
321
323
  force=False,
322
324
  flagPossibleStereoCenters=False)
323
- obj.smiles = Chem.MolToSmiles(obj.rdmol)
324
- return obj
325
+ self.smiles = Chem.MolToSmiles(self.rdmol)
326
+
327
+ return self
325
328
 
326
329
 
327
330
  def make_confs(self,
328
331
  n:int = 50,
329
- method:str = 'RDKit_ETKDG',
330
- calculator:str | Callable = 'MMFF94') -> Self:
332
+ method:str = 'ETKDG',
333
+ calculator:str | Callable = 'MMFF94',
334
+ ) -> Self:
331
335
  """Generates 3D conformers.
332
336
 
333
337
  Args:
334
338
  n (int, optional): number of conformers to generate. Defaults to 50.
335
339
  method (str, optional): conformer generation method.
336
- Choices are `RDKit_ETKDG`, `CDPL_CONFORGE`.
337
- Defaults to 'RDKit_ETKDG'.
340
+ Choices are `ETKDG`, `CONFORGE`. Defaults to 'ETKDG'.
338
341
 
339
342
  Returns:
340
- Self: rdworks.Mol object
343
+ Self: modified self.
341
344
 
342
345
  Reference:
343
346
  T. Seidel, C. Permann, O. Wieder, S. M. Kohlbacher, T. Langer,
@@ -352,9 +355,9 @@ class Mol:
352
355
 
353
356
  self.confs = []
354
357
 
355
- if method.upper() == 'RDKIT_ETKDG':
358
+ if method.upper() == 'ETKDG':
356
359
  rdmol_H = Chem.AddHs(self.rdmol, addCoords=True) # returns a copy with hydrogens added
357
- conf_ids = rdDistGeom.EmbedMultipleConfs(rdmol_H, n, params=self.ETKDG_params)
360
+ conf_ids = rdDistGeom.EmbedMultipleConfs(rdmol_H, numConfs=n, params=self.ETKDG_params)
358
361
  for rdConformer in rdmol_H.GetConformers():
359
362
  # number of atoms should match with conformer(s)
360
363
  rdmol_conf = Chem.Mol(rdmol_H)
@@ -363,7 +366,7 @@ class Mol:
363
366
  conf = Conf(rdmol_conf)
364
367
  self.confs.append(conf)
365
368
 
366
- elif method.upper() == 'CDPL_CONFORGE':
369
+ elif method.upper() == 'CONFORGE':
367
370
  with tempfile.NamedTemporaryFile() as tmpfile:
368
371
  mol = CDPL.Chem.parseSMILES(self.smiles)
369
372
  # create and initialize an instance of the class ConfGen.ConformerGenerator which
@@ -412,7 +415,7 @@ class Mol:
412
415
 
413
416
  # energy evaluations for ranking
414
417
  for conf in self.confs:
415
- conf.get_potential_energy(calculator) # default: MMFF94
418
+ conf.potential_energy(calculator) # default: MMFF94
416
419
 
417
420
  # set relative energy, E_rel(kcal/mol)
418
421
  sort_by = 'E_tot(kcal/mol)'
@@ -421,50 +424,97 @@ class Mol:
421
424
  for conf in self.confs:
422
425
  conf.props.update({"E_rel(kcal/mol)": conf.props[sort_by] - lowest_energy})
423
426
 
424
- return self.rename()
427
+ self = self.rename()
428
+
429
+ return self
425
430
 
426
431
 
427
- def optimize(self, calculator:str | Callable = 'MMFF94', fmax:float=0.05) -> Self:
428
- """Optimizes 3D conformers
432
+ def optimize_confs(self,
433
+ calculator: str | Callable = 'MMFF94',
434
+ fmax: float = 0.05,
435
+ max_iter: int = 1000,
436
+ ) -> Self:
437
+ """Optimizes 3D geometry of conformers.
429
438
 
430
439
  Args:
431
- calculator (str | Callable): _description_
432
- fmax (float, optional): _description_. Defaults to 0.05.
440
+ calculator (str | Callable): MMFF94 (= MMFF), MMFF94s, UFF, or ASE calculator.
441
+ `MMFF94` or `MMFF` - Intended for general use, including organic molecules and proteins,
442
+ and primarily relies on data from quantum mechanical calculations.
443
+ It's often used in molecular dynamics simulations.
444
+ `MMFF94s` - A "static" variant of MMFF94, with adjusted parameters for out-of-plane
445
+ bending and dihedral torsions to favor planar geometries for specific nitrogen atoms.
446
+ This makes it better suited for geometry optimization studies where a static,
447
+ time-averaged structure is desired. The "s" stands for "static".
448
+ `UFF` - UFF refers to the "Universal Force Field," a force field model used for
449
+ molecular mechanics calculations. It's a tool for geometry optimization,
450
+ energy minimization, and exploring molecular conformations in 3D space.
451
+ UFF is often used to refine conformers generated by other methods,
452
+ such as random conformer generation, to produce more physically plausible
453
+ and stable structures.
454
+ fmax (float, optional): fmax for the calculator convergence. Defaults to 0.05.
455
+ max_iter (int, optional): max iterations for the calculator. Defaults to 1000.
433
456
 
434
457
  Returns:
435
- Self: _description_
458
+ Self: modified self.
436
459
  """
437
- self.confs = [ conf.optimize(calculator, fmax) for conf in self.confs ]
460
+ self.confs = [ conf.optimize(calculator, fmax, max_iter) for conf in self.confs ]
461
+
438
462
  return self
439
463
 
440
464
 
441
- def sort_confs(self) -> Self:
442
- """Sorts conformers by `E_tot(eV)` or `E_tot(kcal/mol)` and sets `E_rel(kcal/mol)`.
465
+ def sort_confs(self, calculator: str | Callable | None = None) -> Self:
466
+ """Sorts by `E_tot(kcal/mol)` or `E_tot(eV)` and sets `E_rel(kcal/mol)`.
443
467
 
468
+ Args:
469
+ calculator (str | Callable | None): MMFF94 (= MMFF), MMFF94s, UFF, or ASE calculator.
470
+ `MMFF94` or `MMFF` - Intended for general use, including organic molecules and proteins,
471
+ and primarily relies on data from quantum mechanical calculations.
472
+ It's often used in molecular dynamics simulations.
473
+ `MMFF94s` - A "static" variant of MMFF94, with adjusted parameters for out-of-plane
474
+ bending and dihedral torsions to favor planar geometries for specific nitrogen atoms.
475
+ This makes it better suited for geometry optimization studies where a static,
476
+ time-averaged structure is desired. The "s" stands for "static".
477
+ `UFF` - UFF refers to the "Universal Force Field," a force field model used for
478
+ molecular mechanics calculations. It's a tool for geometry optimization,
479
+ energy minimization, and exploring molecular conformations in 3D space.
480
+ UFF is often used to refine conformers generated by other methods,
481
+ such as random conformer generation, to produce more physically plausible
482
+ and stable structures.
444
483
  Raises:
445
484
  KeyError: if `E_tot(eV)` or `E_tot(kcal/mol)` is not defined.
446
485
 
447
486
  Returns:
448
- Self: rdworks.Mol object.
487
+ Self: modified self.
449
488
  """
450
- if all(['E_tot(eV)' in c.props for c in self.confs]):
451
- sort_by = 'E_tot(eV)'
452
- conversion = 23.060547830619026 # eV to kcal/mol
453
- elif all(['E_tot(kcal/mol)' in c.props for c in self.confs]):
489
+ if calculator is not None:
490
+ # re-calculate potential energies
491
+ for conf in self.confs:
492
+ PE = conf.potential_energy(calculator=calculator) # sets `E_tot(kcal/mol)`
493
+
494
+ if all(['E_tot(kcal/mol)' in conf.props for conf in self.confs]):
454
495
  sort_by = 'E_tot(kcal/mol)'
455
496
  conversion = 1.0
497
+
498
+ elif all(['E_tot(eV)' in conf.props for conf in self.confs]):
499
+ sort_by = 'E_tot(eV)'
500
+ conversion = ev2kcalpermol # eV to kcal/mol
501
+
456
502
  else:
457
- raise KeyError(f'Mol.sort_confs() requires E_tot(eV) or E_tot(kcal/mol) property')
458
- self.confs = sorted(self.confs, key=lambda c: c.props[sort_by]) # ascending order
503
+ raise KeyError(f'sort_confs() requires `E_tot(eV)` or `E_tot(kcal/mol)` property')
504
+
505
+ # ascending order
506
+ self.confs = sorted(self.confs, key=lambda c: c.props[sort_by])
507
+
459
508
  if self.count() > 0:
460
- E_lowest = self.confs[0].props[sort_by]
509
+ E_min = self.confs[0].props[sort_by]
461
510
  for conf in self.confs:
462
- E_rel = (conf.props[sort_by] - E_lowest)* conversion
511
+ E_rel = conversion * (conf.props[sort_by] - E_min)
463
512
  conf.props.update({"E_rel(kcal/mol)": E_rel})
513
+
464
514
  return self
465
515
 
466
516
 
467
- def align_confs(self, method:str='rigid_fragment') -> Self:
517
+ def align_confs(self, method: str = 'rigid_fragment') -> Self:
468
518
  """Aligns all conformers to the first conformer.
469
519
 
470
520
  Args:
@@ -473,7 +523,7 @@ class Mol:
473
523
  Defaults to `rigid_fragment`.
474
524
 
475
525
  Returns:
476
- Self: rdworks.Mol object.
526
+ Self: modified self.
477
527
  """
478
528
 
479
529
  if self.count() < 2: # nothing to do
@@ -524,7 +574,10 @@ class Mol:
524
574
  return self
525
575
 
526
576
 
527
- def cluster_confs(self, method:str='QT', threshold:float=1.0, sortby:str='size') -> Self:
577
+ def cluster_confs(self,
578
+ method: str = 'QT',
579
+ threshold: float = 1.0,
580
+ sort: str = 'size') -> Self:
528
581
  """Clusters all conformers and sets cluster properties.
529
582
 
530
583
  Following cluster properties will be added: `cluster`, `cluster_mean_energy`,
@@ -543,14 +596,14 @@ class Mol:
543
596
  `AutoGraph`.
544
597
  Defaults to `QT`.
545
598
  threshold (float, optional): RMSD threshold of a cluster. Defaults to 1.0.
546
- sortby (str, optional): sort cluster(s) by mean `energy` or cluster `size`.
599
+ sort (str, optional): sort cluster(s) by mean `energy` or cluster `size`.
547
600
  Defaults to `size`.
548
601
 
549
602
  Raises:
550
603
  NotImplementedError: if unsupported method is requested.
551
604
 
552
605
  Returns:
553
- Self: rdworks.Mol object
606
+ Self: modified self.
554
607
  """
555
608
  if method != 'DQT': # rmsd of x,y,z coordinates (non-H)
556
609
  conf_rdmols_noH = [Chem.RemoveHs(Chem.Mol(conf.rdmol)) for conf in self.confs]
@@ -569,7 +622,7 @@ class Mol:
569
622
  torsions = []
570
623
  for conf in self.confs:
571
624
  t_radians = []
572
- for (i, j, k, l, rot_indices, fix_indices) in torsion_atom_indices:
625
+ for torsion_key, (i, j, k, l) in torsion_atom_indices.items():
573
626
  t_radians.append(
574
627
  rdMolTransforms.GetDihedralRad(conf.rdmol.GetConformer(), i, j, k, l))
575
628
  torsions.append(np.array(t_radians))
@@ -661,14 +714,14 @@ class Mol:
661
714
  'iqr_energy' : iqr_energy,
662
715
  })
663
716
  # sort cluster index
664
- if sortby == 'size':
717
+ if sort == 'size':
665
718
  cluster_list = sorted(cluster_list, key=lambda x: x['size'], reverse=True)
666
719
 
667
- elif sortby == 'energy':
720
+ elif sort == 'energy':
668
721
  cluster_list = sorted(cluster_list, key=lambda x: x['median_energy'], reverse=False)
669
722
 
670
723
  else:
671
- raise NotImplementedError(f'{sortby} is not implemented yet.')
724
+ raise NotImplementedError(f'{sort} is not implemented yet.')
672
725
 
673
726
  for cluster_idx, cluster_dict in enumerate(cluster_list, start=1):
674
727
  for conf_idx in cluster_dict['confs']:
@@ -694,10 +747,10 @@ class Mol:
694
747
 
695
748
 
696
749
  def drop_confs(self,
697
- stereo_flipped:bool=True,
698
- unconverged:bool=True,
750
+ stereo_flipped: bool = True,
751
+ unconverged: bool = True,
699
752
  similar: bool | None = None,
700
- similar_rmsd:float=0.3,
753
+ similar_rmsd: float = 0.3,
701
754
  cluster: bool | None =None,
702
755
  k: int | None = None,
703
756
  window: float | None = None,
@@ -715,39 +768,46 @@ class Mol:
715
768
  k (int, optional): drop all except for `k` lowest energy conformers.
716
769
  window (float, optional): drop all except for conformers within `window` of relative energy.
717
770
 
718
- Returns:
719
- Self: a copy of rdworks.Mol object.
720
-
721
771
  Examples:
722
772
  To drop similar conformers within rmsd of 0.5 A
723
773
  >>> mol.drop_confs(similar=True, similar_rmsd=0.5)
724
774
 
725
775
  To drop conformers beyond 5 kcal/mol
726
776
  >>> mol.drop_confs(window=5.0)
727
-
777
+
778
+ Returns:
779
+ Self: modified self.
728
780
  """
729
- obj = copy.deepcopy(self)
781
+
782
+ reasons = [f'stereo flipped',
783
+ f'unconverged',
784
+ f'similar({similar_rmsd})',
785
+ f'cluster(non-centroid)',
786
+ f'k and/or energy window',
787
+ ]
788
+
789
+ w = max([len(s) for s in reasons])
730
790
 
731
- if stereo_flipped and obj.count() > 0:
732
- mask = [Chem.MolToSmiles(Chem.RemoveHs(_.rdmol)) == obj.smiles for _ in obj.confs]
733
- obj.confs = list(itertools.compress(obj.confs, mask))
791
+ if stereo_flipped and self.count() > 0:
792
+ mask = [Chem.MolToSmiles(Chem.RemoveHs(_.rdmol)) == self.smiles for _ in self.confs]
793
+ self.confs = list(itertools.compress(self.confs, mask))
734
794
  if verbose:
735
- main_logger.info(f'drop_confs stereo_flipped={mask.count(False)} -> {obj.count()}')
795
+ main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[0]:<{w}} -> {self.count()}')
736
796
 
737
- if unconverged and obj.count() > 0:
738
- mask = [_.props['Converged'] if 'Converged' in _.props else True for _ in obj.confs]
739
- obj.confs = list(itertools.compress(obj.confs, mask))
797
+ if unconverged and self.count() > 0:
798
+ mask = [_.props['Converged'] if 'Converged' in _.props else True for _ in self.confs]
799
+ self.confs = list(itertools.compress(self.confs, mask))
740
800
  if verbose:
741
- main_logger.info(f'drop_confs unconverged={mask.count(False)} -> {obj.count()}')
801
+ main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[1]:<{w}} -> {self.count()}')
742
802
 
743
- if similar and obj.count() > 1:
803
+ if similar and self.count() > 1:
744
804
  # it is observed that there are essentially identical conformers
745
805
  # such as 180-degree ring rotation and there is not minor conformational variations
746
806
  # in the RDKit ETKDG generated conformers.
747
- conf_rdmols_noH = [Chem.RemoveHs(Chem.Mol(_.rdmol)) for _ in obj.confs]
807
+ conf_rdmols_noH = [Chem.RemoveHs(Chem.Mol(_.rdmol)) for _ in self.confs]
748
808
  # copies are made for rmsd calculations to prevent coordinates changes
749
809
  lower_triangle_values = []
750
- for i in range(obj.count()): # number of conformers
810
+ for i in range(self.count()): # number of conformers
751
811
  for j in range(i):
752
812
  # rdMolAlign.GetBestRMS takes symmetry into account
753
813
  # removed hydrogens to speed up
@@ -755,10 +815,10 @@ class Mol:
755
815
  lower_triangle_values.append(best_rms)
756
816
  symm_matrix = convert_tril_to_symm(lower_triangle_values)
757
817
  cluster_assignment, centroid_indices = QT(symm_matrix, similar_rmsd)
758
- mask = [conf_idx in centroid_indices for conf_idx, conf in enumerate(obj.confs)]
759
- obj.confs = list(itertools.compress(obj.confs, mask))
818
+ mask = [conf_idx in centroid_indices for conf_idx, conf in enumerate(self.confs)]
819
+ self.confs = list(itertools.compress(self.confs, mask))
760
820
  if verbose:
761
- main_logger.info(f'drop_confs similar({similar_rmsd})={mask.count(False)} -> {obj.count()}')
821
+ main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[2]:<{w}} -> {self.count()}')
762
822
 
763
823
  # note: it will retain the conformers with lower index
764
824
  # so, it should be sorted before dropping
@@ -778,29 +838,29 @@ class Mol:
778
838
  # retained_confs.append(Chem.RemoveHs(conf_i.rdmol)) # store a copy of H-removed rdmol
779
839
  # obj.confs = list(itertools.compress(obj.confs, mask))
780
840
 
781
- if cluster and obj.count() > 1:
841
+ if cluster and self.count() > 1:
782
842
  # drop non-centroid cluster member(s)
783
- mask = [_.props['centroid'] if 'centroid' in _.props else True for _ in obj.confs]
784
- obj.confs = list(itertools.compress(obj.confs, mask))
843
+ mask = [_.props['centroid'] if 'centroid' in _.props else True for _ in self.confs]
844
+ self.confs = list(itertools.compress(self.confs, mask))
785
845
  if verbose:
786
- main_logger.info(f'drop_confs cluster(non-centroid)={mask.count(False)} -> {obj.count()}')
846
+ main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[3]:<{w}} -> {self.count()}')
787
847
 
788
- if (k or window) and obj.count() > 0:
848
+ if (k or window) and self.count() > 0:
789
849
  if k:
790
- mask_k = [i < k for i,_ in enumerate(obj.confs)]
850
+ mask_k = [i < k for i,_ in enumerate(self.confs)]
791
851
  else:
792
- mask_k = [True,] * obj.count()
852
+ mask_k = [True,] * self.count()
793
853
  if window:
794
- mask_window = [_.props['E_rel(kcal/mol)'] < window if 'E_rel(kcal/mol)' in _.props else True for _ in obj.confs]
854
+ mask_window = [_.props['E_rel(kcal/mol)'] < window if 'E_rel(kcal/mol)' in _.props else True for _ in self.confs]
795
855
  else:
796
- mask_window = [True,] * obj.count()
856
+ mask_window = [True,] * self.count()
797
857
  # retain conformer(s) that satisfy both k and window conditions
798
858
  mask = [(x and y) for (x,y) in zip(mask_k, mask_window)]
799
- obj.confs = list(itertools.compress(obj.confs, mask))
859
+ self.confs = list(itertools.compress(self.confs, mask))
800
860
  if verbose:
801
- main_logger.info(f'drop_confs k and/or window={mask.count(False)} -> {obj.count()}')
861
+ main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[4]:<{w}} -> {self.count()}')
802
862
 
803
- return obj
863
+ return self
804
864
 
805
865
 
806
866
  def count(self) -> int:
@@ -812,7 +872,7 @@ class Mol:
812
872
  return len(self.confs)
813
873
 
814
874
 
815
- def is_nn_applicable(self, model:str) -> bool:
875
+ def nnp_ready(self, model: str = 'aimnet2') -> bool:
816
876
  """Check if a particular neural network model is applicable to current molecule.
817
877
 
818
878
  Args:
@@ -830,19 +890,18 @@ class Mol:
830
890
  # H, C, N, O, F, S, Cl
831
891
  atomic_numbers = [1, 6, 7, 8, 9, 16, 17 ]
832
892
 
833
- elif model in ['aimnet', 'aimnet2']:
893
+ elif model.lower() in ['aimnet', 'aimnet2']:
834
894
  # H, B, C, N, O, F, Si, P, S, Cl, As, Se, Br, I
835
895
  atomic_numbers = [1, 5, 6, 7, 8, 9, 14, 15, 16, 17, 33, 34, 35, 53 ]
836
896
 
837
897
  else:
838
- raise ValueError('is_nn_applicable() supports ANI-2x, ANI-2xt, or AIMNET')
898
+ raise ValueError('nnp_ready() supports ANI-2x, ANI-2xt, AIMNet, or AIMNet2')
839
899
 
840
- for a in self.rdmol.GetAtoms():
841
- if a.GetAtomicNum() not in atomic_numbers:
842
- return False
900
+ if all([ a.GetAtomicNum() in atomic_numbers for a in self.rdmol.GetAtoms() ]):
901
+ return True
902
+ else:
903
+ return False
843
904
 
844
- return True
845
-
846
905
 
847
906
  def charge(self) -> int:
848
907
  """Returns molecular formal charge
@@ -859,7 +918,7 @@ class Mol:
859
918
  Returns:
860
919
  list: list of element symbols.
861
920
  """
862
- return [ a.GetSymbol() for a in self.rdmol.GetAtoms() ]
921
+ return [atom.GetSymbol() for atom in self.rdmol.GetAtoms()]
863
922
 
864
923
 
865
924
  def numbers(self) -> list[int]:
@@ -868,111 +927,19 @@ class Mol:
868
927
  Returns:
869
928
  list: list of atomic numbers.
870
929
  """
871
- return [ a.GetAtomicNum() for a in self.rdmol.GetAtoms() ]
930
+ return [atom.GetAtomicNum() for atom in self.rdmol.GetAtoms()]
872
931
 
873
932
 
874
- def torsion_atoms(self, strict:bool=True) -> list[tuple]:
875
- """Determine dihedral angle atoms (a-b-c-d) and rotating group for each rotatable bond (b-c).
933
+ def torsion_atoms(self, strict: bool = True) -> dict[int, tuple]:
934
+ """Determine torsion/dihedral angle atoms (i-j-k-l) and rotating group for each rotatable bond (j-k).
876
935
 
877
936
  Args:
878
937
  strict (bool): whether to exclude amide/imide/ester/acid bonds.
879
938
 
880
939
  Returns:
881
- [ (a, b, c, d, rot_atom_indices, fix_atom_indices),
882
- (a, b, c, d, rot_atom_indices, fix_atom_indices),
883
- ...,
884
- ]
940
+ {torsion_key: (i, j, k, l), ...,}
885
941
  """
886
- # https://github.com/rdkit/rdkit/blob/1bf6ef3d65f5c7b06b56862b3fb9116a3839b229/rdkit/Chem/Lipinski.py#L47%3E
887
- # https://github.com/rdkit/rdkit/blob/de602c88809ea6ceba1e8ed50fd543b6e406e9c4/Code/GraphMol/Descriptors/Lipinski.cpp#L108
888
- if strict :
889
- # excludes amide/imide/ester/acid bonds
890
- rotatable_bond_pattern = Chem.MolFromSmarts(
891
- (
892
- "[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])("
893
- "[CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1])&!$([#7,O,S!D1]-!@[CD3]="
894
- "[N,O,S])&!$([CD3](=[N+])-!@[#7!D1])&!$([#7!D1]-!@[CD3]=[N+])]-,:;!@[!$"
895
- "(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])(["
896
- "CH3])[CH3])]"
897
- )
898
- )
899
- else:
900
- rotatable_bond_pattern = Chem.MolFromSmarts('[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]')
901
- rotatable_bonds = self.rdmol.GetSubstructMatches(rotatable_bond_pattern)
902
- torsion_angle_atom_indices = []
903
-
904
- # small rings (n=3 or 4)
905
- small_rings = [ r for r in list(self.rdmol.GetRingInfo().AtomRings()) if len(r) < 5 ]
906
- # ex. = [(1, 37, 35, 34, 3, 2), (29, 28, 30)]
907
-
908
- forbidden_terminal_nuclei = [1, 9, 17, 35, 53] # H,F,Cl,Br,I
909
-
910
- for (b_idx, c_idx) in rotatable_bonds:
911
- # determine a atom ``a`` that define a dihedral angle
912
- a_candidates = []
913
- for neighbor in self.rdmol.GetAtomWithIdx(b_idx).GetNeighbors():
914
- neighbor_idx = neighbor.GetIdx()
915
- if neighbor_idx == c_idx:
916
- continue
917
- neighbor_atomic_num = neighbor.GetAtomicNum()
918
- if neighbor_atomic_num not in forbidden_terminal_nuclei:
919
- a_candidates.append((neighbor_atomic_num, neighbor_idx))
920
-
921
- if not a_candidates:
922
- continue
923
-
924
- (a_atomic_num, a_idx) = sorted(a_candidates, key=lambda x: (x[0], -x[1]), reverse=True)[0]
925
-
926
- # is a-b in a small ring (n=3 or 4)?
927
- is_in_small_ring = False
928
- for small_ring in small_rings:
929
- if (a_idx in small_ring) and (b_idx in small_ring):
930
- is_in_small_ring = True
931
- break
932
-
933
- if is_in_small_ring:
934
- continue
935
-
936
- # determine a atom ``d`` that define a dihedral angle
937
- d_candidates = []
938
- for neighbor in self.rdmol.GetAtomWithIdx(c_idx).GetNeighbors():
939
- neighbor_idx = neighbor.GetIdx()
940
- if (neighbor_idx == b_idx):
941
- continue
942
- neighbor_atomic_num = neighbor.GetAtomicNum()
943
- if neighbor_atomic_num not in forbidden_terminal_nuclei:
944
- d_candidates.append((neighbor_atomic_num, neighbor_idx))
945
-
946
- if not d_candidates:
947
- continue
948
-
949
- (d_atomic_num, d_idx) = sorted(d_candidates, key=lambda x: (x[0], -x[1]), reverse=True)[0]
950
-
951
- # is c-d in a small ring?
952
- is_in_small_ring = False
953
- for small_ring in small_rings:
954
- if (c_idx in small_ring) and (d_idx in small_ring):
955
- is_in_small_ring = True
956
- break
957
-
958
- if is_in_small_ring:
959
- continue
960
-
961
- # determine a group of atoms to be rotated
962
- # https://ctr.fandom.com/wiki/Break_rotatable_bonds_and_report_the_fragments
963
- em = Chem.EditableMol(self.rdmol)
964
- em.RemoveBond(b_idx, c_idx)
965
- fragmented = em.GetMol()
966
- (frag1, frag2) = Chem.GetMolFrags(fragmented, asMols=False) # returns tuple of tuple
967
- hac1 = sum([ 1 for i in frag1 if self.rdmol.GetAtomWithIdx(i).GetAtomicNum() > 1 ])
968
- hac2 = sum([ 1 for i in frag2 if self.rdmol.GetAtomWithIdx(i).GetAtomicNum() > 1 ])
969
-
970
- # smaller fragment will be rotated and must contain at least three heavy atoms
971
- if min(hac1, hac2) >= 3:
972
- (frag_rot, frag_fix) = sorted([(hac1, frag1), (hac2, frag2)])
973
- torsion_angle_atom_indices.append((a_idx, b_idx, c_idx, d_idx, frag_rot[1], frag_fix[1]))
974
-
975
- return torsion_angle_atom_indices
942
+ return {i: d[:4] for i, d in enumerate(get_torsion_atoms(self.rdmol, strict))}
976
943
 
977
944
 
978
945
  def compute(self, **kwargs) -> Self:
@@ -984,147 +951,152 @@ class Mol:
984
951
  progress (bool): whether to show progress bar.
985
952
 
986
953
  Returns:
987
- Self: rdworks.MolLibr object.
954
+ Self: modified self.
988
955
  """
989
956
  self.max_workers = kwargs.get('max_workers', self.max_workers)
990
957
  self.chunksize = kwargs.get('chunksize', self.chunksize)
991
958
  self.progress = kwargs.get('progress', self.progress)
959
+
992
960
  return self
993
961
 
994
962
 
995
- @staticmethod
996
- def _map_optimize_conf(conf:Conf, targs:tuple) -> Conf:
997
- """A map function to apply Conf.optimize() on `conf`.
998
-
999
- The default behavior of map() is to pass the elements of the iterable to the function by reference.
1000
- This means that if the function modifies the elements of the iterable,
1001
- those changes will be reflected in the iterable itself.
1002
-
1003
- Args:
1004
- conf (Conf): subject rdworks.Conf object.
1005
- targs (tuple): tuple of arguments to be passed to Conf.optimize().
1006
-
1007
- Returns:
1008
- Conf: rdworks.Conf object
1009
- """
1010
- return conf.optimize(*targs)
1011
-
1012
-
1013
963
  def torsion_energies(self,
1014
- calculator:str | Callable,
1015
- fmax:float = 0.05,
1016
- interval:float = 15.0,
964
+ calculator: str | Callable,
965
+ torsion_key: int | None = None,
966
+ simplify: bool = True,
967
+ fmax: float = 0.05,
968
+ interval: float = 20.0,
1017
969
  use_converged_only: bool = True,
1018
- optimize_ref: bool = False,
1019
970
  **kwargs,
1020
971
  ) -> Self:
1021
972
  """Calculates potential energy profiles for each torsion angle using ASE optimizer.
1022
973
 
974
+ It uses the first conformer as a reference.
975
+
1023
976
  Args:
1024
977
  calculator (str | Callable): 'MMFF', 'UFF', or ASE calculator.
978
+ torsion_key (int | None): torsion index to calculate. Defaults to None (all).
979
+ simplify (bool, optional): whether to use fragment surrogate. Defaults to True.
1025
980
  fmax (float, optional): fmax of ASE optimizer. Defaults to 0.05.
1026
981
  interval (float, optional): interval of torsion angles in degree. Defaults to 15.0.
1027
982
  use_converged_only (bool, optional): whether to use only converged data. Defaults to True.
1028
983
 
1029
984
  Returns:
1030
- list[dict]: [{'indices':list, 'angle':list, 'E_rel(kcal/mol)':list}, ...]
985
+ Self: modified self.
1031
986
  """
987
+ assert self.count() > 0, "torsion_energies() requires at least one conformer"
988
+
1032
989
  self = self.compute(**kwargs)
1033
990
 
1034
- torsion_atoms_indices = self.torsion_atoms()
1035
-
1036
- ref_conf = self.confs[0].copy() # use the lowest energy conformer as a reference
1037
- if optimize_ref:
1038
- ref_conf = ref_conf.optimize(calculator, fmax)
1039
-
1040
- # mol.confs will be populated with torsion conformers.
1041
- # It is designed for a batch optimization in the future.
1042
- mol = self.copy()
1043
- mol.confs = []
1044
- data = []
1045
-
1046
- for k, (a, b, c, d, rot_indices, fix_indices) in enumerate(torsion_atoms_indices):
1047
- data.append({'angle':[], 'init':[], 'final':[], 'Converged':[]})
1048
- for angle in np.arange(-180.0, 180.0, interval):
1049
- # Iterated numpy.ndarray does not contain the last 180: -180., ..., (180).
1050
- x = ref_conf.copy()
1051
- x.props.update({'torsion_index': k, 'angle': float(angle)})
1052
- AllChem.SetDihedralDeg(x.rdmol.GetConformer(), a, b, c, d, angle)
1053
- # All atoms bonded to atom d will move.
1054
- mol.confs.append(x)
1055
-
1056
- # Optimize
1057
- # with ProcessPoolExecutor(max_workers=self.max_workers) as executor:
1058
- # largs = [ (calculator, fmax,) ] * mol.count()
1059
- # if self.progress:
1060
- # lconfs = list(tqdm(
1061
- # executor.map(Mol._map_optimize_conf, mol.confs, largs, chunksize=1),
1062
- # desc="Optimize conformers",
1063
- # total=mol.count()))
1064
- # else:
1065
- # lconfs = list(
1066
- # executor.map(Mol._map_optimize_conf, mol.confs, largs, chunksize=1))
1067
- # mol.confs = lconfs
1068
-
1069
- # Calculate relaxation energies
1070
- for conf in mol.confs:
1071
- conf = conf.optimize(calculator, fmax)
1072
- # conf.optimize() updates coordinates and conf.props:
1073
- # `angle`, `E_tot_init(kcal/mol)`, `E_tot(kcal/mol)`, `Converged`.
1074
- i = conf.props['torsion_index']
1075
- data[i]['angle'].append(conf.props['angle'])
1076
- data[i]['init'].append(conf.props['E_tot_init(kcal/mol)'])
1077
- data[i]['final'].append(conf.props['E_tot(kcal/mol)'])
1078
- data[i]['Converged'].append(conf.props['Converged'])
991
+ if torsion_key is None:
992
+ torsion_atoms_indices = self.torsion_atoms()
993
+ else:
994
+ torsion_atoms_indices = {torsion_key: self.torsion_atoms()[torsion_key]}
995
+
996
+ ref_conf = self.confs[0].copy()
997
+
998
+ data = {}
999
+
1000
+ if simplify:
1001
+ for tk, indices in torsion_atoms_indices.items():
1002
+ frag, frag_ijkl = create_torsion_fragment(ref_conf.rdmol, indices)
1003
+ frag_conf = Conf(frag)
1004
+ data[tk] = {'indices': indices, 'angle':[], 'init':[], 'last':[], 'Converged':[]}
1005
+ for angle in np.arange(-180.0, 180.0, interval):
1006
+ # Iterated numpy.ndarray does not contain the last 180: -180., ..., (180).
1007
+ conf = frag_conf.copy()
1008
+ conf.props.update({'torsion_key': tk, 'angle': float(angle)})
1009
+ conf.set_torsion(*frag_ijkl, angle) # atoms bonded to `l` move.
1010
+ conf = conf.optimize(calculator, fmax, **kwargs)
1011
+ # conf.optimize() updates coordinates and conf.props:
1012
+ # `angle`, `E_tot_init(kcal/mol)`, `E_tot(kcal/mol)`, `Converged`.
1013
+ tk = conf.props['torsion_key']
1014
+ data[tk]['angle'].append(conf.props['angle'])
1015
+ data[tk]['init'].append(conf.props['E_tot_init(kcal/mol)'])
1016
+ data[tk]['last'].append(conf.props['E_tot(kcal/mol)'])
1017
+ data[tk]['Converged'].append(conf.props['Converged'])
1018
+ frag_cleaned, _ = clean_2d(frag, reset_isotope=True, remove_H=True)
1019
+ rdDepictor.Compute2DCoords(frag_cleaned)
1020
+ # to serialize the molecule
1021
+ data[tk]['frag'] = Chem.MolToMolBlock(frag_cleaned)
1022
+ data[tk]['frag_indices'] = frag_ijkl
1023
+
1024
+ else:
1025
+ # mol.confs will be populated with torsion conformers.
1026
+ # It is designed for a batch optimization in the future.
1027
+ mol = self.copy()
1028
+ mol.confs = []
1029
+ for tk, indices in torsion_atoms_indices.items():
1030
+ data[tk] = {'indices': indices, 'angle':[], 'init':[], 'last':[], 'Converged':[]}
1031
+ for angle in np.arange(-180.0, 180.0, interval):
1032
+ # Iterated numpy.ndarray does not contain the last 180: -180., ..., (180).
1033
+ x = ref_conf.copy()
1034
+ x.props.update({'torsion_key': tk, 'angle': float(angle)})
1035
+ x.set_torsion(*indices, angle) # atoms bonded to `l` move.
1036
+ mol.confs.append(x)
1037
+
1038
+ # Calculate relaxation energies
1039
+ for conf in mol.confs:
1040
+ conf = conf.optimize(calculator, fmax, **kwargs)
1041
+ # conf.optimize() updates coordinates and conf.props:
1042
+ # `angle`, `E_tot_init(kcal/mol)`, `E_tot(kcal/mol)`, `Converged`.
1043
+ tk = conf.props['torsion_key']
1044
+ data[tk]['angle'].append(conf.props['angle'])
1045
+ data[tk]['init'].append(conf.props['E_tot_init(kcal/mol)'])
1046
+ data[tk]['last'].append(conf.props['E_tot(kcal/mol)'])
1047
+ data[tk]['Converged'].append(conf.props['Converged'])
1079
1048
 
1080
1049
  # Post-processing
1081
- torsion_energy_profiles = []
1082
- for indices, datadict in zip(torsion_atoms_indices, data):
1050
+ torsion_energy_profiles = {}
1051
+ for tk, dictdata in data.items():
1083
1052
  if use_converged_only:
1084
- datadict['angle'] = list(itertools.compress(datadict['angle'], datadict['Converged']))
1085
- datadict['init'] = list(itertools.compress(datadict['init'], datadict['Converged']))
1086
- datadict['final'] = list(itertools.compress(datadict['final'], datadict['Converged']))
1087
- relax = np.array(datadict['init']) - np.median(datadict['final'])
1053
+ dictdata['angle'] = list(itertools.compress(dictdata['angle'], dictdata['Converged']))
1054
+ dictdata['init'] = list(itertools.compress(dictdata['init'], dictdata['Converged']))
1055
+ dictdata['last'] = list(itertools.compress(dictdata['last'], dictdata['Converged']))
1056
+ relax = np.array(dictdata['init']) - np.median(dictdata['last'])
1088
1057
  E_rel = relax - np.min(relax)
1089
- torsion_energy_profiles.append({
1090
- 'indices': indices, # (a, b, c, d, rot_indices, fix_indices)
1091
- 'angle': np.array(datadict['angle']).tolist(), # np.ndarray -> list for serialization
1092
- 'E_rel(kcal/mol)': E_rel.tolist(), # np.ndarray -> list for serialization
1093
- })
1058
+ torsion_energy_profiles[tk] = {
1059
+ 'indices' : dictdata['indices'],
1060
+ 'angle' : np.round(np.array(dictdata['angle']), 1).tolist(), # np.ndarray -> list for serialization
1061
+ 'E_rel(kcal/mol)': np.round(E_rel, 2).tolist(), # np.ndarray -> list for serialization
1062
+ 'frag' : dictdata.get('frag', None),
1063
+ 'frag_indices' : dictdata.get('frag_indices', None),
1064
+ }
1065
+
1094
1066
  self.props['torsion'] = torsion_energy_profiles
1095
1067
  self.props['torsion_calculator'] = str(calculator)
1096
1068
 
1097
1069
  return self
1098
1070
 
1099
1071
 
1100
-
1101
-
1102
- def similarity(self, other:object) -> float:
1103
- """Returns Tanimoto similarity with `other` rdworks.Mol object.
1072
+ def similarity(self, other: Self) -> float:
1073
+ """Returns Tanimoto similarity with other Mol object.
1104
1074
 
1105
1075
  Args:
1106
- other (rdworks.Mol): other rdworks.Mol object.
1076
+ other (Mol): other Mol object.
1107
1077
 
1108
1078
  Raises:
1109
- TypeError: if `other` is not rdworks.Mol object type.
1079
+ TypeError: if `other` is not Mol object type.
1110
1080
 
1111
1081
  Returns:
1112
1082
  float: Tanimoto similarity.
1113
1083
  """
1114
- if not isinstance(other, Mol):
1115
- raise TypeError("Mol.is_similar() expects Mol object")
1084
+ assert isinstance(other, Mol), "similarity() Error: invalid Mol object"
1085
+
1116
1086
  if not self.fp:
1117
1087
  self.fp = self.MFP2.GetFingerprint(self.rdmol)
1088
+
1118
1089
  if not other.fp:
1119
1090
  other.fp = other.MFP2.GetFingerprint(other.rdmol)
1091
+
1120
1092
  return DataStructs.TanimotoSimilarity(self.fp, other.fp)
1121
1093
 
1122
1094
 
1123
- def is_similar(self, other:object, threshold:float) -> bool:
1124
- """Check if `other` molecule is similar within `threshold`.
1095
+ def is_similar(self, other: Self, threshold: float) -> bool:
1096
+ """Check if other molecule is similar within Tanimoto similarity threshold.
1125
1097
 
1126
1098
  Args:
1127
- other (rdworks.Mol): other rdworks.Mol object to compare with.
1099
+ other (Mol): other Mol object to compare with.
1128
1100
  threshold (float): Tanimoto similarity threshold.
1129
1101
 
1130
1102
  Returns:
@@ -1132,8 +1104,21 @@ class Mol:
1132
1104
  """
1133
1105
  return self.similarity(other) >= threshold
1134
1106
 
1135
-
1136
- def is_matching(self, terms: str | Path, invert:bool=False) -> bool:
1107
+
1108
+ def has_substr(self, substr: str) -> bool:
1109
+ """Determine if the molecule has the substructure match.
1110
+
1111
+ Args:
1112
+ pattern (str): SMARTS or SMILES.
1113
+
1114
+ Returns:
1115
+ bool: True if matches.
1116
+ """
1117
+ query = Chem.MolFromSmarts(substr)
1118
+ return self.rdmol.HasSubstructMatch(query)
1119
+
1120
+
1121
+ def is_matching(self, terms: str | Path, invert: bool = False) -> bool:
1137
1122
  """Determines if the molecule matches the predefined substructure and/or descriptor ranges.
1138
1123
 
1139
1124
  invert | terms(~ or !) | effect
@@ -1151,14 +1136,15 @@ class Mol:
1151
1136
  Returns:
1152
1137
  bool: True if matches.
1153
1138
  """
1154
- if isinstance(terms, pathlib.PosixPath):
1139
+ if isinstance(terms, Path):
1155
1140
  path = terms.as_posix()
1141
+
1156
1142
  elif isinstance(terms, str):
1157
1143
  if terms.startswith('~') or terms.startswith('!'):
1158
1144
  terms = terms.replace('~','').replace('!','')
1159
1145
  invert = (invert ^ True)
1160
1146
  try:
1161
- path = pathlib.Path(terms) # test if terms points to a xml file
1147
+ path = Path(terms) # test if terms points to a xml file
1162
1148
  assert path.is_file()
1163
1149
  except:
1164
1150
  path = get_predefined_xml(terms)
@@ -1189,8 +1175,10 @@ class Mol:
1189
1175
  if combine.lower() == 'or' and any(mask):
1190
1176
  # early termination if any term is satisfied
1191
1177
  return invert ^ True # XOR(^) inverts only if invert is True
1178
+
1192
1179
  if combine.lower() == 'and' and all(mask):
1193
1180
  return invert ^ True
1181
+
1194
1182
  return invert ^ False
1195
1183
 
1196
1184
 
@@ -1243,6 +1231,7 @@ class Mol:
1243
1231
  continue
1244
1232
  else:
1245
1233
  stereos.append(element.specified == Chem.StereoSpecified.Specified)
1234
+
1246
1235
  # note all([]) returns True
1247
1236
  return all(stereos)
1248
1237
 
@@ -1259,12 +1248,12 @@ class Mol:
1259
1248
  if element.type == Chem.StereoType.Bond_Double:
1260
1249
  if self.rdmol.GetBondWithIdx(element.centeredOn).IsInRing():
1261
1250
  ring_bond_stereo_info.append((element.centeredOn, element.descriptor))
1251
+
1262
1252
  return ring_bond_stereo_info
1263
1253
 
1264
1254
 
1265
1255
  def report_stereo(self) -> None:
1266
- """Print out stereochemistry information.
1267
- """
1256
+ """Report stereochemistry information for debug"""
1268
1257
  num_chiral_centers = rdMolDescriptors.CalcNumAtomStereoCenters(self.rdmol)
1269
1258
  # Returns the total number of atomic stereocenters (specified and unspecified)
1270
1259
  num_unspecified_chiral_centers = rdMolDescriptors.CalcNumUnspecifiedAtomStereoCenters(self.rdmol)
@@ -1289,8 +1278,7 @@ class Mol:
1289
1278
 
1290
1279
 
1291
1280
  def report_props(self) -> None:
1292
- """Print out properties.
1293
- """
1281
+ """Report properties"""
1294
1282
  if self.props:
1295
1283
  print(f"Properties({len(self.props)}):")
1296
1284
  fixed_width = max([len(k) for k in self.props]) + 4
@@ -1302,7 +1290,59 @@ class Mol:
1302
1290
  print(f"Properties: None")
1303
1291
 
1304
1292
 
1305
- def to_sdf(self, confs:bool=False, props:bool=True) -> str:
1293
+ def draw(self,
1294
+ coordgen: bool = False,
1295
+ rotate: bool = False,
1296
+ axis: str = 'z',
1297
+ degree: float = 0.0,
1298
+ ) -> Self:
1299
+ """Draw molecule in 2D.
1300
+
1301
+ Args:
1302
+ coordgen (bool, optional): whether to use `coordgen`. Defaults to False.
1303
+ rotate (bool, optional): whether to rotate drawing. Defaults to False.
1304
+ axis (str, optional): axis for rotation. Defaults to 'z'.
1305
+ degree (float, optional): degree for rotation. Defaults to 0.0.
1306
+
1307
+ Returns:
1308
+ Self.
1309
+ """
1310
+ rdDepictor.SetPreferCoordGen(coordgen)
1311
+ rdDepictor.Compute2DCoords(self.rdmol)
1312
+
1313
+ if rotate:
1314
+ rad = (np.pi/180.0) * degree
1315
+ c = np.cos(rad)
1316
+ s = np.sin(rad)
1317
+ if axis.lower() == 'x':
1318
+ rotmat = np.array([
1319
+ [1., 0., 0., 0.],
1320
+ [0., c, -s, 0.],
1321
+ [0., s, c, 0.],
1322
+ [0., 0., 0., 1.],
1323
+ ])
1324
+ elif axis.lower() == 'y':
1325
+ rotmat = np.array([
1326
+ [ c, 0., s, 0.],
1327
+ [ 0., 1., 0., 0.],
1328
+ [-s, 0., c, 0.],
1329
+ [ 0., 0., 0., 1.],
1330
+ ])
1331
+ elif axis.lower() == 'z':
1332
+ rotmat = np.array([
1333
+ [c, -s, 0., 0.],
1334
+ [s, c, 0., 0.],
1335
+ [0., 0., 1., 0.],
1336
+ [0., 0., 0., 1.],
1337
+ ])
1338
+ rdMolTransforms.TransformConformer(
1339
+ self.rdmol.GetConformer(),
1340
+ rotmat)
1341
+
1342
+ return self
1343
+
1344
+
1345
+ def to_sdf(self, confs: bool = False, props: bool = True) -> str:
1306
1346
  """Returns strings of SDF output.
1307
1347
 
1308
1348
  Args:
@@ -1312,8 +1352,8 @@ class Mol:
1312
1352
  Returns:
1313
1353
  str: strings of SDF output.
1314
1354
  """
1315
- in_memory = io.StringIO()
1316
- with Chem.SDWriter(in_memory) as f:
1355
+ buf = StringIO()
1356
+ with Chem.SDWriter(buf) as f:
1317
1357
  if confs:
1318
1358
  for conf in self.confs:
1319
1359
  rdmol = Chem.Mol(conf.rdmol)
@@ -1333,45 +1373,61 @@ class Mol:
1333
1373
  for k,v in self.props.items():
1334
1374
  rdmol.SetProp(k, str(v))
1335
1375
  f.write(rdmol)
1336
- return in_memory.getvalue()
1337
-
1338
-
1339
- def to_image(self, width:int=300, height:int=300, index:bool=False, svg:bool=True) -> object:
1340
- """Returns PIL(Python Image Library) image object.
1376
+
1377
+ return buf.getvalue()
1378
+
1341
1379
 
1342
- Use .save(output_filename) method to save as an image file.
1380
+ def to_png(self,
1381
+ width: int = 300,
1382
+ height: int = 300,
1383
+ legend: str = '',
1384
+ atom_index: bool = False,
1385
+ highlight_atoms: list[int] | None = None,
1386
+ highlight_bonds: list[int] | None = None,
1387
+ redraw: bool = False,
1388
+ coordgen: bool = False,
1389
+ trim: bool = True,
1390
+ ) -> Image.Image:
1391
+ """Draw 2D molecule in PNG format.
1343
1392
 
1344
1393
  Args:
1345
- width (int, optional): width of image. Defaults to 300.
1346
- height (int, optional): height of image. Defaults to 300.
1347
- index (bool, optional): whether to highlight atom indexes. Defaults to False.
1348
- svg (bool, optional): whether to return in SVG format. Defaults to True.
1349
-
1394
+ width (int, optional): width. Defaults to 300.
1395
+ height (int, optional): height. Defaults to 300.
1396
+ legend (str, optional): legend. Defaults to ''.
1397
+ atom_index (bool, optional): whether to show atom index. Defaults to False.
1398
+ highlight_atoms (list[int] | None, optional): atom(s) to highlight. Defaults to None.
1399
+ highlight_bonds (list[int] | None, optional): bond(s) to highlight. Defaults to None.
1400
+ redraw (bool, optional): whether to redraw. Defaults to False.
1401
+ coordgen (bool, optional): whether to use coordgen. Defaults to False.
1402
+ trim (bool, optional): whether to trim white margins. Default to True.
1403
+
1350
1404
  Returns:
1351
- object: PIL image object.
1405
+ Image.Image: output PIL Image object.
1352
1406
  """
1353
- if index:
1354
- for a in self.rdmol.GetAtoms():
1355
- a.SetProp("atomNote", str(a.GetIdx()+1))
1356
1407
 
1357
- return Draw.MolsToImage(self.rdmol,
1358
- size=(width,height),
1359
- kekulize=True,
1360
- wedgeBonds=True, # draw wedge (stereo)
1361
- fitImage=False,
1362
- options=None,
1363
- canvas=None,
1364
- useSVG=svg)
1365
-
1408
+ return render_png(self.rdmol,
1409
+ width = width,
1410
+ height = height,
1411
+ legend = legend,
1412
+ atom_index = atom_index,
1413
+ highlight_atoms = highlight_atoms,
1414
+ highlight_bonds = highlight_bonds,
1415
+ redraw = redraw,
1416
+ coordgen = coordgen,
1417
+ trim = trim)
1366
1418
 
1367
1419
  def to_svg(self,
1368
- width:int = 400,
1369
- height:int = 400,
1370
- legend:str = '',
1371
- index:bool = False,
1372
- highlight: list[int] | None = None,
1373
- coordgen:bool = False) -> str:
1374
- """Returns depiction strings in SVG format.
1420
+ width: int = 300,
1421
+ height: int = 300,
1422
+ legend: str = '',
1423
+ atom_index: bool = False,
1424
+ highlight_atoms: list[int] | None = None,
1425
+ highlight_bonds: list[int] | None = None,
1426
+ redraw: bool = False,
1427
+ coordgen: bool = False,
1428
+ optimize: bool = True,
1429
+ ) -> str:
1430
+ """Draw 2D molecule in SVG format.
1375
1431
 
1376
1432
  Examples:
1377
1433
  For Jupyternotebook, wrap the output with SVG:
@@ -1380,55 +1436,60 @@ class Mol:
1380
1436
  >>> SVG(libr[0].to_svg())
1381
1437
 
1382
1438
  Args:
1383
- width (int): width (default:400)
1384
- height (int): height (default:400)
1385
- legend (str): legend
1386
- index (bool): True/False whether to display atom index
1387
- highlight (list): list of atom indices to highlight
1439
+ width (int, optional): width. Defaults to 300.
1440
+ height (int, optional): height. Defaults to 300.
1441
+ legend (str, optional): legend. Defaults to ''.
1442
+ atom_index (bool, optional): whether to show atom index. Defaults to False.
1443
+ highlight_atoms (list[int] | None, optional): atom(s) to highlight. Defaults to None.
1444
+ highlight_bonds (list[int] | None, optional): bond(s) to highlight. Defaults to None.
1445
+ redraw (bool, optional): whether to redraw. Defaults to False.
1446
+ coordgen (bool, optional): whether to use coordgen. Defaults to False.
1447
+ optimize (bool, optional): whether to optimize SVG string. Defaults to True.
1388
1448
 
1389
1449
  Returns:
1390
- str: SVG text
1450
+ str: SVG string
1391
1451
  """
1392
- rdDepictor.SetPreferCoordGen(coordgen)
1393
-
1394
- rdmol_2d = Chem.Mol(self.rdmol)
1395
- rdDepictor.Compute2DCoords(rdmol_2d)
1396
- rdDepictor.StraightenDepiction(rdmol_2d)
1397
-
1398
- for atom in rdmol_2d.GetAtoms():
1399
- for key in atom.GetPropsAsDict():
1400
- atom.ClearProp(key)
1401
-
1402
- if index: # index hides polar hydrogens
1403
- for atom in rdmol_2d.GetAtoms():
1404
- atom.SetProp("atomLabel", str(atom.GetIdx()))
1405
- # atom.SetProp("atomNote", str(atom.GetIdx()))
1406
- # atom.SetProp("molAtomMapNumber", str(atom.GetIdx()))
1407
-
1408
- drawer = rdMolDraw2D.MolDraw2DSVG(width, height)
1409
- if highlight:
1410
- drawer.DrawMolecule(rdmol_2d, legend=legend, highlightAtoms=highlight)
1411
- else:
1412
- drawer.DrawMolecule(rdmol_2d, legend=legend)
1413
- drawer.FinishDrawing()
1414
- return drawer.GetDrawingText()
1415
-
1416
-
1417
- def plot_energy(self, df:pd.DataFrame) -> str:
1418
- """Returns Seaborn plot strings for dihedral energy profile in SVG format.
1452
+ return render_svg(self.rdmol,
1453
+ width = width,
1454
+ height = height,
1455
+ legend = legend,
1456
+ atom_index = atom_index,
1457
+ highlight_atoms = highlight_atoms,
1458
+ highlight_bonds = highlight_bonds,
1459
+ redraw = redraw,
1460
+ coordgen = coordgen,
1461
+ optimize = optimize)
1462
+
1419
1463
 
1420
- Input pandas DataFrame must have columns: `angle` and `E_rel(kcal/mol)`
1464
+ def plot_torsion_energies(self,
1465
+ torsion_key: int,
1466
+ svg: bool = False,
1467
+ upper_limit: float = 35.0,
1468
+ zoomin_limit: float = 5.0,
1469
+ **kwargs,
1470
+ ) -> str | None:
1471
+ """Plot torsion energies.
1421
1472
 
1422
1473
  Args:
1423
- df (pd.DataFrame): input dataframe.
1474
+ torsion_key (int): torsion data to plot.
1475
+ svg (bool, optional): whether to return SVG strings. Defaults to False.
1476
+ upper_limit (float, optional): upper limit for E_rel(kcal/mol). Defaults to 35.0.
1477
+ zoomin_limit (float, optional): lower limit for E_rel(kcal/mol). Defaults to 5.0.
1478
+ **kwargs: matplotlib.pyplot.plt.figure options.
1424
1479
 
1425
1480
  Returns:
1426
- str: Seaborn plot in strings.
1481
+ SVG strings or None for Jupyter Notebook.
1427
1482
  """
1483
+ data = self.props['torsion'][torsion_key]
1484
+ df = pd.DataFrame({ax: data[ax] for ax in ['angle', 'E_rel(kcal/mol)']})
1428
1485
 
1429
- # sns.set_theme()
1486
+ plt.figure(**kwargs)
1487
+ plt.clf() # Clear the current figure to prevent overlapping plots
1488
+
1489
+ sns.set_theme()
1430
1490
  sns.color_palette("tab10")
1431
1491
  sns.set_style("whitegrid")
1492
+
1432
1493
  if len(df['angle']) == len(df['angle'].drop_duplicates()):
1433
1494
  g = sns.lineplot(x="angle",
1434
1495
  y="E_rel(kcal/mol)",
@@ -1445,31 +1506,57 @@ class Mol:
1445
1506
  markersize=10)
1446
1507
  g.xaxis.set_major_locator(ticker.MultipleLocator(30))
1447
1508
  g.xaxis.set_major_formatter(ticker.ScalarFormatter())
1448
- if df["E_rel(kcal/mol)"].max() > 35.0:
1509
+ if df["E_rel(kcal/mol)"].max() > upper_limit:
1449
1510
  g.set(title=self.name,
1450
1511
  xlabel='Dihedral Angle (degree)',
1451
1512
  ylabel='Relative Energy (Kcal/mol)',
1452
1513
  xlim=(-190, 190),
1453
- ylim=(-1.5, 35.0))
1454
- elif df["E_rel(kcal/mol)"].max() < 5.0:
1514
+ ylim=(-1.5, upper_limit))
1515
+ elif df["E_rel(kcal/mol)"].max() < zoomin_limit:
1455
1516
  g.set(title=self.name,
1456
1517
  xlabel='Dihedral Angle (degree)',
1457
1518
  ylabel='Relative Energy (Kcal/mol)',
1458
1519
  xlim=(-190, 190),
1459
- ylim=(-1.5, 5.0))
1520
+ ylim=(-1.5, zoomin_limit))
1460
1521
  else:
1461
1522
  g.set(title=self.name,
1462
1523
  xlabel='Dihedral Angle (degree)',
1463
1524
  ylabel='Relative Energy (Kcal/mol)',
1464
1525
  xlim=(-190, 190),)
1465
1526
  g.tick_params(axis='x', rotation=30)
1466
- in_memory = io.StringIO()
1467
- plt.savefig(in_memory, format='svg', bbox_inches='tight')
1468
- plt.clf()
1469
- return in_memory.getvalue()
1470
-
1527
+
1528
+ if svg:
1529
+ buf = StringIO()
1530
+ plt.savefig(buf, format='svg', bbox_inches='tight')
1531
+ plt.close() # prevents duplicate plot outputs in Jupyter Notebook
1532
+ svg_string = buf.getvalue()
1533
+ # optimize SVG string
1534
+ scour_options = {
1535
+ 'strip_comments': True,
1536
+ 'strip_ids': True,
1537
+ 'shorten_ids': True,
1538
+ 'compact_paths': True,
1539
+ 'indent_type': 'none',
1540
+ }
1541
+ svg_string = scourString(svg_string, options=scour_options)
1542
+
1543
+ return svg_string
1544
+
1545
+ else:
1546
+ buf = BytesIO()
1547
+ plt.savefig(buf, format='png', bbox_inches='tight')
1548
+ plt.close() # prevents duplicate plot outputs in Jupyter Notebook
1549
+ buf.seek(0)
1550
+ img = Image.open(buf)
1551
+ plt.imshow(img)
1552
+ plt.axis('off') # Optional: remove axes
1553
+ plt.show()
1554
+
1471
1555
 
1472
- def to_html(self, htmlbody:bool=False) -> str:
1556
+ def to_html(self,
1557
+ htmlbody: bool = False,
1558
+ contents: str = 'torsion',
1559
+ ) -> str:
1473
1560
  """Returns HTML text of dihedral energy profile.
1474
1561
 
1475
1562
  Args:
@@ -1478,42 +1565,86 @@ class Mol:
1478
1565
  Returns:
1479
1566
  str: HTML text.
1480
1567
  """
1568
+ HTML = ''
1481
1569
  if htmlbody:
1482
- HTML = "<html><body>"
1483
- else:
1484
- HTML = ""
1485
- # start of content
1486
- HTML += f'<h1 style="text-align:left">{self.name}</h1>'
1487
- HTML += "<table>"
1488
- for datadict in self.props['torsion']: # list of dict
1489
- (a1, a2, a3, a4, _, _) = datadict['indices']
1490
- df = pd.DataFrame({k:datadict[k] for k in ['angle', 'E_rel(kcal/mol)']})
1491
- svg_rdmol = self.to_svg(highlight=[a1, a2, a3, a4], index=True)
1492
- svg_energy_plot = self.plot_energy(df)
1493
- HTML += f"<tr>"
1494
- HTML += f"<td>{a1}-{a2}-{a3}-{a4}</td>"
1495
- HTML += f"<td>{svg_rdmol}</td>"
1496
- HTML += f"<td>{svg_energy_plot}</td>"
1497
- HTML += f"</tr>"
1498
- HTML += '</table>'
1499
- HTML += '<hr style="height:2px;border-width:0;color:gray;background-color:gray">'
1500
- # end of content
1570
+ HTML = '<html><body>'
1571
+
1572
+ if contents.lower() == 'torsion':
1573
+ # start of content
1574
+ HTML += f'<h1 style="text-align:left">{self.name}</h1>'
1575
+ HTML += '<table>'
1576
+ for tk, dictdata in self.props['torsion'].items():
1577
+ ijkl = dictdata['indices']
1578
+ ijkl_str = '-'.join([str(i) for i in ijkl])
1579
+ svg_mol = self.to_svg(highlight_atoms=ijkl, atom_index=True)
1580
+ svg_plot = self.plot_torsion_energies(torsion_key=tk, svg=True)
1581
+ frag = dictdata.get('frag', None)
1582
+ if frag is not None:
1583
+ frag = Chem.MolFromMolBlock(frag)
1584
+ pqrs = dictdata['frag_indices']
1585
+ pqrs_str = '-'.join([str(i) for i in pqrs])
1586
+ svg_frag = render_svg(frag, highlight_atoms=pqrs, atom_index=True)
1587
+ HTML += f'<tr><td>{ijkl_str}</td><td>{svg_mol}</td>'
1588
+ HTML += f'<td>{pqrs_str}<td>{svg_frag}</td><td>{svg_plot}</td></tr>'
1589
+ else:
1590
+ HTML += f'<tr><td>{ijkl_str}</td><td>{svg_mol}</td><td>{svg_plot}</td></tr>'
1591
+ HTML += '</table>'
1592
+ HTML += '<hr style="height:2px;border-width:0;color:gray;background-color:gray">'
1593
+ # end of content
1594
+
1501
1595
  if htmlbody:
1502
- HTML += "</body></html>"
1596
+ HTML += '</body></html>'
1597
+
1503
1598
  return HTML
1504
1599
 
1505
1600
 
1506
- def serialize(self, key: str | None = None, decimal_places:int=2) -> str:
1601
+ def dumps(self, key: str = "", decimals: int = 2) -> str:
1507
1602
  """Returns JSON dumps of properties.
1508
1603
 
1509
1604
  Args:
1510
1605
  key (str | None): key for a subset of properties. Defaults to None.
1511
- decimal_places (int, optional): decimal places for float numbers. Defaults to 2.
1606
+ decimals (int, optional): decimal places for float numbers. Defaults to 2.
1512
1607
 
1513
1608
  Returns:
1514
- str: serialized JSON dumps.
1609
+ str: JSON dumps.
1515
1610
  """
1516
- props = fix_decimal_places_in_dict(self.props, decimal_places)
1611
+ props = recursive_round(self.props, decimals)
1612
+
1517
1613
  if key:
1518
1614
  return json.dumps({key:props[key]})
1615
+
1519
1616
  return json.dumps(props)
1617
+
1618
+
1619
+ def serialize(self, decimals: int = 2) -> str:
1620
+ serialized = json.dumps({
1621
+ 'name' : self.name,
1622
+ 'smiles': self.smiles,
1623
+ 'props' : recursive_round(self.props, decimals),
1624
+ 'confs' : [conf.serialize() for conf in self.confs],
1625
+ })
1626
+
1627
+ return serialized
1628
+
1629
+
1630
+ def deserialize(self, serialized: str) -> Self:
1631
+ """Updates self with the serialized string input.
1632
+
1633
+ Args:
1634
+ serialized (str): input
1635
+
1636
+ Returns:
1637
+ Self: modified self.
1638
+ """
1639
+ data = json.loads(serialized)
1640
+
1641
+ self.name = data['name']
1642
+ self.smiles = data['smiles'] # isomeric SMILES, no H
1643
+ self.rdmol = Chem.MolFromSmiles(data['smiles']) # for 2D depiction
1644
+ self.rdmol.SetProp('_Name', self.name)
1645
+ self.InChI = Chem.MolToInchi(self.rdmol)
1646
+ self.InChIKey = inchi.InchiToInchiKey(self.InChI)
1647
+ self.props = data['props']
1648
+ self.confs = [Conf().deserialize(_) for _ in data['confs']] # for 3D conformers (iterable)
1649
+
1650
+ return self