rdworks 0.25.7__py3-none-any.whl → 0.35.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rdworks/mol.py CHANGED
@@ -1,19 +1,13 @@
1
- # rdworks/mol.py
2
-
3
- import os
4
- import io
5
1
  import copy
6
- import types
7
- import pathlib
8
2
  import itertools
9
- import math
10
3
  import json
11
4
  import logging
12
5
  import tempfile
13
6
 
7
+ from io import StringIO, BytesIO
8
+ from pathlib import Path
14
9
  from collections import defaultdict
15
10
  from collections.abc import Callable
16
- from pathlib import Path
17
11
  from typing import Iterator, Self
18
12
 
19
13
  import numpy as np
@@ -27,34 +21,35 @@ import CDPL.Chem
27
21
  import CDPL.ConfGen
28
22
 
29
23
  from rdkit import Chem, DataStructs
30
-
31
24
  from rdkit.Chem import (
32
25
  rdMolDescriptors, AllChem, Descriptors, QED,
33
26
  rdFingerprintGenerator,
34
- Draw, rdDepictor,
27
+ Draw, rdDepictor, inchi,
35
28
  rdDistGeom, rdMolAlign, rdMolTransforms, rdmolops
36
29
  )
37
30
  from rdkit.Chem.Draw import rdMolDraw2D
38
-
39
31
  from rdkit.ML.Cluster import Butina
32
+ from PIL import Image
40
33
 
41
- from rdworks.std import desalt_smiles, standardize
34
+ from rdworks.conf import Conf
35
+ from rdworks.std import desalt_smiles, standardize, clean_2d
42
36
  from rdworks.xml import list_predefined_xml, get_predefined_xml, parse_xml
43
37
  from rdworks.scaffold import rigid_fragment_indices
44
38
  from rdworks.descriptor import rd_descriptor, rd_descriptor_f
45
- from rdworks.display import svg
46
- from rdworks.utils import convert_tril_to_symm, QT, fix_decimal_places_in_dict
39
+ from rdworks.utils import convert_tril_to_symm, QT, recursive_round
47
40
  from rdworks.units import ev2kcalpermol
48
41
  from rdworks.autograph import NMRCLUST, DynamicTreeCut, RCKmeans, AutoGraph
49
42
  from rdworks.bitqt import BitQT
50
- from rdworks.conf import Conf
43
+ from rdworks.torsion import create_torsion_fragment, get_torsion_atoms
44
+ from rdworks.display import render_svg, render_png
45
+
46
+ from scour.scour import scourString
51
47
 
52
48
  main_logger = logging.getLogger()
53
49
 
54
50
 
55
51
  class Mol:
56
- """Container for molecular structure, conformers, and other information.
57
- """
52
+ """Container for molecular structure, conformers, and other information."""
58
53
 
59
54
  MFP2 = rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=2048)
60
55
 
@@ -62,100 +57,98 @@ class Mol:
62
57
  ETKDG_params.useSmallRingTorsions = True
63
58
  ETKDG_params.maxIterations = 2000
64
59
 
65
-
66
60
  def __init__(self,
67
- molecular_input: str | Chem.Mol,
68
- name:str='',
69
- std:bool=False,
70
- max_workers:int=1,
71
- chunksize:int=4,
72
- progress:bool=False) -> None:
73
- """Create a rdworks.Mol object.
61
+ molecule: str | Chem.Mol | Conf | None = None,
62
+ name: str = '',
63
+ std: bool = False,
64
+ reset_isotope: bool = True,
65
+ remove_H: bool = True,
66
+ max_workers: int = 1,
67
+ chunksize: int = 4,
68
+ progress: bool = False) -> None:
69
+ """Initialize.
74
70
 
75
71
  Examples:
76
- >>> import rdworks
77
- >>> m = rdworks.Mol('c1ccccc1', name='benzene')
72
+ >>> from rdworks import Mol
73
+ >>> m = Mol('c1ccccc1', name='benzene')
78
74
 
79
75
  Args:
80
- molecular_input (str | Chem.Mol): SMILES or rdkit.Chem.Mol object
81
- name (str, optional): name of the molecule. Defaults to ''.
82
- std (bool, optional): whether to standardize the molecule. Defaults to False.
83
-
84
- Raises:
85
- ValueError: Invalid SMILES or rdkit.Chem.Mol object.
86
- TypeError: No SMILES or rdkit.Chem.Mol object is provided.
87
- RuntimeError: Desalting or standardization process failed.
76
+ molecule (str | Chem.Mol | None): SMILES or rdkit.Chem.Mol or None
77
+ name (str): name of the molecule. Defaults to ''.
78
+ std (bool): whether to standardize the molecule. Defaults to False.
79
+ max_workers (int): number of maximum workers for parallelization. Defaults to 1.
80
+ chunksize (int): batch size for parallelization. Defaults to 4.
81
+ progress (bool): whether to show progress bar. Defaults to False.
88
82
  """
89
-
90
- self.rdmol = None # rdkit.Chem.Mol object
91
- self.smiles = None # isomeric SMILES
92
- self.name = None
83
+ assert isinstance(molecule, str | Chem.Mol | Conf) or molecule is None
84
+
85
+ self.rdmol = None # 2D, one and only one Conformer
86
+ self.smiles = '' # isomeric SMILES
87
+ self.confs = [] # container for 3D conformers
88
+ self.name = ''
89
+ self.InChIKey = '' # 27 characters (SHA-256 hash of InChI)
90
+ self.InChI = ''
93
91
  self.props = {}
94
- self.confs = [] # 3D conformers (iterable)
95
92
  self.fp = None
96
93
  self.max_workers = max_workers
97
94
  self.chunksize = chunksize
98
95
  self.progress = progress
99
-
100
- if isinstance(molecular_input, str):
96
+
97
+ if molecule is None:
98
+ return
99
+
100
+ if isinstance(molecule, str): # 1-D SMILES
101
101
  try:
102
- self.rdmol = Chem.MolFromSmiles(molecular_input)
103
- assert self.rdmol
104
- self.smiles = Chem.MolToSmiles(self.rdmol)
102
+ if "." in molecule: # mandatory desalting
103
+ (self.smiles, self.rdmol) = desalt_smiles(molecule)
104
+ else:
105
+ self.rdmol = Chem.MolFromSmiles(molecule)
106
+ self.smiles = Chem.MolToSmiles(self.rdmol)
105
107
  except:
106
- raise ValueError(f'Mol() received invalid SMILES: {molecular_input}')
107
- elif isinstance(molecular_input, Chem.Mol):
108
+ raise ValueError(f'Mol() Error: invalid SMILES {molecule}')
109
+
110
+ elif isinstance(molecule, Chem.Mol): # 2-D or 3-D Chem.Mol
108
111
  try:
109
- self.rdmol = molecular_input
110
- assert self.rdmol
112
+ self.rdmol, _ = clean_2d(molecule, reset_isotope, remove_H)
111
113
  self.smiles = Chem.MolToSmiles(self.rdmol)
114
+ self.confs = [Conf(x) for x in _]
112
115
  except:
113
- raise ValueError('Mol() received invalid rdkit.Chem.Mol object')
114
- else:
115
- raise TypeError('Mol() expects SMILES or rdkit.Chem.Mol object')
116
-
117
- ### desalting
118
- if "." in self.smiles:
116
+ raise ValueError(f'Mol() Error: invalid Chem.Mol object')
117
+
118
+ elif isinstance(molecule, Conf): # 3-D input
119
119
  try:
120
- (self.smiles, self.rdmol) = desalt_smiles(self.smiles)
121
- assert self.smiles
122
- assert self.rdmol
120
+ self.rdmol, _ = clean_2d(molecule.rdmol, reset_isotope, remove_H)
121
+ self.smiles = Chem.MolToSmiles(self.rdmol)
122
+ self.confs = [molecule]
123
123
  except:
124
- raise RuntimeError(f'Mol() error occurred in desalting: {self.smiles}')
125
-
126
- ### standardization
127
- if std:
128
- # standardization changes self.rdmol
129
- try:
124
+ raise ValueError(f'Mol() Error: invalid Conf object')
125
+
126
+ try:
127
+ if std:
130
128
  self.rdmol = standardize(self.rdmol)
131
129
  self.smiles = Chem.MolToSmiles(self.rdmol)
132
- assert self.smiles
133
- assert self.rdmol
134
- except:
135
- raise RuntimeError('Mol() error occurred in standardization')
136
-
137
- ### naming
130
+ except:
131
+ raise RuntimeError('Mol() Error: standardization')
132
+
133
+ assert self.smiles and self.rdmol, "Mol() Error: invalid molecule"
134
+
135
+ rdDepictor.Compute2DCoords(self.rdmol)
136
+
138
137
  try:
139
138
  self.name = str(name)
140
139
  except:
141
140
  self.name = 'untitled'
141
+
142
142
  self.rdmol.SetProp('_Name', self.name) # _Name can't be None
143
-
144
- ### set default properties
143
+ self.InChI = Chem.MolToInchi(self.rdmol)
144
+ self.InChIKey = inchi.InchiToInchiKey(self.InChI)
145
145
  self.props.update({
146
146
  'aka' : [], # <-- to be set by MolLibr.unique()
147
- 'atoms' : self.rdmol.GetNumAtoms(),
148
- # hydrogens not excluded
149
- # m = Chem.MolFromSmiles("c1c[nH]cc1")
150
- # m.GetNumAtoms()
151
- # >> 5
152
- # Chem.AddHs(m).GetNumAtoms()
153
- # >> 10
147
+ 'atoms' : self.rdmol.GetNumAtoms(), # hydrogens not excluded?
154
148
  'charge': rdmolops.GetFormalCharge(self.rdmol),
155
- # number of rotatable bonds
156
149
  "nrb" : Descriptors.NumRotatableBonds(self.rdmol),
157
150
  })
158
-
151
+
159
152
 
160
153
  def __str__(self) -> str:
161
154
  """String representation of the molecule.
@@ -187,21 +180,21 @@ class Mol:
187
180
  return hash(self.smiles)
188
181
 
189
182
 
190
- def __eq__(self, other:object) -> bool:
191
- """True if `other` molecule is identical with the molecule.
183
+ def __eq__(self, other: Self) -> bool:
184
+ """True if `other` Mol is identical with this Mol.
192
185
 
193
- It compares canonicalized SMILES.
186
+ It compares InChIKeys.
194
187
 
195
188
  Examples:
196
189
  >>> m1 == m2
197
190
 
198
191
  Args:
199
- other (object): other rdworks.Mol object.
192
+ other (object): other Mol object.
200
193
 
201
194
  Returns:
202
195
  bool: True if identical.
203
196
  """
204
- return self.smiles == other.smiles
197
+ return self.InChIKey == other.InChIKey
205
198
 
206
199
 
207
200
  def __iter__(self) -> Iterator:
@@ -226,7 +219,7 @@ class Mol:
226
219
  return next(self.confs)
227
220
 
228
221
 
229
- def __getitem__(self, index: int | slice) -> Conf:
222
+ def __getitem__(self, index: int | slice) -> Conf | Self:
230
223
  """Conformer object of conformers of the molecule with given index or slice of indexes.
231
224
 
232
225
  Examples:
@@ -235,31 +228,34 @@ class Mol:
235
228
  Args:
236
229
  index (int | slice): index for conformers.
237
230
 
238
- Raises:
239
- ValueError: conformers are not defined in the molecule or index is out of range.
240
-
241
231
  Returns:
242
- Conf: Conf object matching the index of the molecule.
232
+ Conf or Mol(copy) with conformers specified by index.
243
233
  """
244
- if self.count() == 0:
245
- raise ValueError(f"no conformers")
246
- try:
247
- return self.confs[index]
248
- except:
249
- raise ValueError(f"index should be 0..{self.count()-1}")
234
+ assert self.count() > 0, "no conformers"
250
235
 
236
+ if isinstance(index, slice):
237
+ new_object = self.copy()
238
+ new_object.confs = new_object.confs[index]
239
+ return new_object
240
+
241
+ else:
242
+ return self.confs[index]
251
243
 
244
+
252
245
  def copy(self) -> Self:
253
246
  """Returns a copy of self.
254
247
 
255
248
  Returns:
256
- Self: a copy of self (rdworks.Mol) object.
249
+ a copy of self.
257
250
  """
258
251
  return copy.deepcopy(self)
259
252
 
260
253
 
261
- def rename(self, prefix:str='', sep:str='/', start:int=1) -> Self:
262
- """Rename conformer names and returns self
254
+ def rename(self,
255
+ prefix: str = '',
256
+ sep: str = '/',
257
+ start: int = 1) -> Self:
258
+ """Updates name and conformer names.
263
259
 
264
260
  The first conformer name is {prefix}{sep}{start}
265
261
 
@@ -269,11 +265,12 @@ class Mol:
269
265
  start (int, optional): first serial number. Defaults to 1.
270
266
 
271
267
  Returns:
272
- Self: rdworks.Mol object.
268
+ Self: modified self.
273
269
  """
274
270
  if prefix :
275
271
  self.name = prefix
276
272
  self.rdmol.SetProp('_Name', prefix)
273
+
277
274
  # update conformer names
278
275
  num_digits = len(str(self.count())) # ex. '100' -> 3
279
276
  for (serial, conf) in enumerate(self.confs, start=start):
@@ -281,11 +278,13 @@ class Mol:
281
278
  while len(serial_str) < num_digits:
282
279
  serial_str = '0' + serial_str
283
280
  conf.rename(f'{self.name}{sep}{serial_str}')
281
+
284
282
  return self
285
283
 
286
284
 
287
- def qed(self, properties:list[str]=['QED', 'MolWt', 'LogP', 'TPSA', 'HBD']) -> Self:
288
- """Updates quantitative estimate of drug-likeness (QED).
285
+ def qed(self,
286
+ properties: list[str] = ['QED', 'MolWt', 'LogP', 'TPSA', 'HBD']) -> Self:
287
+ """Updates quantitative estimate of drug-likeness (QED) and other descriptors.
289
288
 
290
289
  Args:
291
290
  properties (list[str], optional): Defaults to ['QED', 'MolWt', 'LogP', 'TPSA', 'HBD'].
@@ -294,53 +293,54 @@ class Mol:
294
293
  KeyError: if property key is unknown.
295
294
 
296
295
  Returns:
297
- Self: rdworks.Mol object.
296
+ Self: modified self.
298
297
  """
299
298
  props_dict = {}
300
299
  for k in properties:
301
300
  try:
302
301
  props_dict[k] = rd_descriptor_f[k](self.rdmol)
303
302
  except:
304
- raise KeyError(f'Mol.qed() received undefined property {k} for {self}')
303
+ raise KeyError(f'qed() Error: unknown property {k}')
305
304
  self.props.update(props_dict)
305
+
306
306
  return self
307
307
 
308
308
 
309
309
  def remove_stereo(self) -> Self:
310
- """Removes stereochemistry and returns a copy of self.
310
+ """Removes stereochemistry.
311
311
 
312
312
  Examples:
313
- >>> m = rdworks.Mol("C/C=C/C=C\\C", "double_bond")
313
+ >>> m = Mol("C/C=C/C=C\\C", "double_bond")
314
314
  >>> m.remove_stereo().smiles == "CC=CC=CC"
315
315
 
316
316
  Returns:
317
- Self: rdworks.Mol object.
317
+ Self: modified self.
318
318
  """
319
- obj = copy.deepcopy(self)
320
319
  # keep the original stereo info. for ring double bond
321
- Chem.RemoveStereochemistry(obj.rdmol)
322
- Chem.AssignStereochemistry(obj.rdmol,
320
+ Chem.RemoveStereochemistry(self.rdmol)
321
+ Chem.AssignStereochemistry(self.rdmol,
323
322
  cleanIt=False,
324
323
  force=False,
325
324
  flagPossibleStereoCenters=False)
326
- obj.smiles = Chem.MolToSmiles(obj.rdmol)
327
- return obj
325
+ self.smiles = Chem.MolToSmiles(self.rdmol)
326
+
327
+ return self
328
328
 
329
329
 
330
330
  def make_confs(self,
331
331
  n:int = 50,
332
- method:str = 'RDKit_ETKDG',
333
- calculator:str | Callable = 'MMFF94') -> Self:
332
+ method:str = 'ETKDG',
333
+ calculator:str | Callable = 'MMFF94',
334
+ ) -> Self:
334
335
  """Generates 3D conformers.
335
336
 
336
337
  Args:
337
338
  n (int, optional): number of conformers to generate. Defaults to 50.
338
339
  method (str, optional): conformer generation method.
339
- Choices are `RDKit_ETKDG`, `CDPL_CONFORGE`.
340
- Defaults to 'RDKit_ETKDG'.
340
+ Choices are `ETKDG`, `CONFORGE`. Defaults to 'ETKDG'.
341
341
 
342
342
  Returns:
343
- Self: rdworks.Mol object
343
+ Self: modified self.
344
344
 
345
345
  Reference:
346
346
  T. Seidel, C. Permann, O. Wieder, S. M. Kohlbacher, T. Langer,
@@ -355,9 +355,9 @@ class Mol:
355
355
 
356
356
  self.confs = []
357
357
 
358
- if method.upper() == 'RDKIT_ETKDG':
358
+ if method.upper() == 'ETKDG':
359
359
  rdmol_H = Chem.AddHs(self.rdmol, addCoords=True) # returns a copy with hydrogens added
360
- conf_ids = rdDistGeom.EmbedMultipleConfs(rdmol_H, n, params=self.ETKDG_params)
360
+ conf_ids = rdDistGeom.EmbedMultipleConfs(rdmol_H, numConfs=n, params=self.ETKDG_params)
361
361
  for rdConformer in rdmol_H.GetConformers():
362
362
  # number of atoms should match with conformer(s)
363
363
  rdmol_conf = Chem.Mol(rdmol_H)
@@ -366,7 +366,7 @@ class Mol:
366
366
  conf = Conf(rdmol_conf)
367
367
  self.confs.append(conf)
368
368
 
369
- elif method.upper() == 'CDPL_CONFORGE':
369
+ elif method.upper() == 'CONFORGE':
370
370
  with tempfile.NamedTemporaryFile() as tmpfile:
371
371
  mol = CDPL.Chem.parseSMILES(self.smiles)
372
372
  # create and initialize an instance of the class ConfGen.ConformerGenerator which
@@ -415,7 +415,7 @@ class Mol:
415
415
 
416
416
  # energy evaluations for ranking
417
417
  for conf in self.confs:
418
- conf.get_potential_energy(calculator) # default: MMFF94
418
+ conf.potential_energy(calculator) # default: MMFF94
419
419
 
420
420
  # set relative energy, E_rel(kcal/mol)
421
421
  sort_by = 'E_tot(kcal/mol)'
@@ -424,50 +424,97 @@ class Mol:
424
424
  for conf in self.confs:
425
425
  conf.props.update({"E_rel(kcal/mol)": conf.props[sort_by] - lowest_energy})
426
426
 
427
- return self.rename()
427
+ self = self.rename()
428
+
429
+ return self
428
430
 
429
431
 
430
- def optimize(self, calculator:str | Callable = 'MMFF94', fmax:float=0.05) -> Self:
431
- """Optimizes 3D conformers
432
+ def optimize_confs(self,
433
+ calculator: str | Callable = 'MMFF94',
434
+ fmax: float = 0.05,
435
+ max_iter: int = 1000,
436
+ ) -> Self:
437
+ """Optimizes 3D geometry of conformers.
432
438
 
433
439
  Args:
434
- calculator (str | Callable): _description_
435
- fmax (float, optional): _description_. Defaults to 0.05.
440
+ calculator (str | Callable): MMFF94 (= MMFF), MMFF94s, UFF, or ASE calculator.
441
+ `MMFF94` or `MMFF` - Intended for general use, including organic molecules and proteins,
442
+ and primarily relies on data from quantum mechanical calculations.
443
+ It's often used in molecular dynamics simulations.
444
+ `MMFF94s` - A "static" variant of MMFF94, with adjusted parameters for out-of-plane
445
+ bending and dihedral torsions to favor planar geometries for specific nitrogen atoms.
446
+ This makes it better suited for geometry optimization studies where a static,
447
+ time-averaged structure is desired. The "s" stands for "static".
448
+ `UFF` - UFF refers to the "Universal Force Field," a force field model used for
449
+ molecular mechanics calculations. It's a tool for geometry optimization,
450
+ energy minimization, and exploring molecular conformations in 3D space.
451
+ UFF is often used to refine conformers generated by other methods,
452
+ such as random conformer generation, to produce more physically plausible
453
+ and stable structures.
454
+ fmax (float, optional): fmax for the calculator convergence. Defaults to 0.05.
455
+ max_iter (int, optional): max iterations for the calculator. Defaults to 1000.
436
456
 
437
457
  Returns:
438
- Self: _description_
458
+ Self: modified self.
439
459
  """
440
- self.confs = [ conf.optimize(calculator, fmax) for conf in self.confs ]
460
+ self.confs = [ conf.optimize(calculator, fmax, max_iter) for conf in self.confs ]
461
+
441
462
  return self
442
463
 
443
464
 
444
- def sort_confs(self) -> Self:
445
- """Sorts conformers by `E_tot(eV)` or `E_tot(kcal/mol)` and sets `E_rel(kcal/mol)`.
465
+ def sort_confs(self, calculator: str | Callable | None = None) -> Self:
466
+ """Sorts by `E_tot(kcal/mol)` or `E_tot(eV)` and sets `E_rel(kcal/mol)`.
446
467
 
468
+ Args:
469
+ calculator (str | Callable | None): MMFF94 (= MMFF), MMFF94s, UFF, or ASE calculator.
470
+ `MMFF94` or `MMFF` - Intended for general use, including organic molecules and proteins,
471
+ and primarily relies on data from quantum mechanical calculations.
472
+ It's often used in molecular dynamics simulations.
473
+ `MMFF94s` - A "static" variant of MMFF94, with adjusted parameters for out-of-plane
474
+ bending and dihedral torsions to favor planar geometries for specific nitrogen atoms.
475
+ This makes it better suited for geometry optimization studies where a static,
476
+ time-averaged structure is desired. The "s" stands for "static".
477
+ `UFF` - UFF refers to the "Universal Force Field," a force field model used for
478
+ molecular mechanics calculations. It's a tool for geometry optimization,
479
+ energy minimization, and exploring molecular conformations in 3D space.
480
+ UFF is often used to refine conformers generated by other methods,
481
+ such as random conformer generation, to produce more physically plausible
482
+ and stable structures.
447
483
  Raises:
448
484
  KeyError: if `E_tot(eV)` or `E_tot(kcal/mol)` is not defined.
449
485
 
450
486
  Returns:
451
- Self: rdworks.Mol object.
487
+ Self: modified self.
452
488
  """
453
- if all(['E_tot(eV)' in c.props for c in self.confs]):
454
- sort_by = 'E_tot(eV)'
455
- conversion = 23.060547830619026 # eV to kcal/mol
456
- elif all(['E_tot(kcal/mol)' in c.props for c in self.confs]):
489
+ if calculator is not None:
490
+ # re-calculate potential energies
491
+ for conf in self.confs:
492
+ PE = conf.potential_energy(calculator=calculator) # sets `E_tot(kcal/mol)`
493
+
494
+ if all(['E_tot(kcal/mol)' in conf.props for conf in self.confs]):
457
495
  sort_by = 'E_tot(kcal/mol)'
458
496
  conversion = 1.0
497
+
498
+ elif all(['E_tot(eV)' in conf.props for conf in self.confs]):
499
+ sort_by = 'E_tot(eV)'
500
+ conversion = ev2kcalpermol # eV to kcal/mol
501
+
459
502
  else:
460
- raise KeyError(f'Mol.sort_confs() requires E_tot(eV) or E_tot(kcal/mol) property')
461
- self.confs = sorted(self.confs, key=lambda c: c.props[sort_by]) # ascending order
503
+ raise KeyError(f'sort_confs() requires `E_tot(eV)` or `E_tot(kcal/mol)` property')
504
+
505
+ # ascending order
506
+ self.confs = sorted(self.confs, key=lambda c: c.props[sort_by])
507
+
462
508
  if self.count() > 0:
463
- E_lowest = self.confs[0].props[sort_by]
509
+ E_min = self.confs[0].props[sort_by]
464
510
  for conf in self.confs:
465
- E_rel = (conf.props[sort_by] - E_lowest)* conversion
511
+ E_rel = conversion * (conf.props[sort_by] - E_min)
466
512
  conf.props.update({"E_rel(kcal/mol)": E_rel})
513
+
467
514
  return self
468
515
 
469
516
 
470
- def align_confs(self, method:str='rigid_fragment') -> Self:
517
+ def align_confs(self, method: str = 'rigid_fragment') -> Self:
471
518
  """Aligns all conformers to the first conformer.
472
519
 
473
520
  Args:
@@ -476,7 +523,7 @@ class Mol:
476
523
  Defaults to `rigid_fragment`.
477
524
 
478
525
  Returns:
479
- Self: rdworks.Mol object.
526
+ Self: modified self.
480
527
  """
481
528
 
482
529
  if self.count() < 2: # nothing to do
@@ -527,7 +574,10 @@ class Mol:
527
574
  return self
528
575
 
529
576
 
530
- def cluster_confs(self, method:str='QT', threshold:float=1.0, sortby:str='size') -> Self:
577
+ def cluster_confs(self,
578
+ method: str = 'QT',
579
+ threshold: float = 1.0,
580
+ sort: str = 'size') -> Self:
531
581
  """Clusters all conformers and sets cluster properties.
532
582
 
533
583
  Following cluster properties will be added: `cluster`, `cluster_mean_energy`,
@@ -546,14 +596,14 @@ class Mol:
546
596
  `AutoGraph`.
547
597
  Defaults to `QT`.
548
598
  threshold (float, optional): RMSD threshold of a cluster. Defaults to 1.0.
549
- sortby (str, optional): sort cluster(s) by mean `energy` or cluster `size`.
599
+ sort (str, optional): sort cluster(s) by mean `energy` or cluster `size`.
550
600
  Defaults to `size`.
551
601
 
552
602
  Raises:
553
603
  NotImplementedError: if unsupported method is requested.
554
604
 
555
605
  Returns:
556
- Self: rdworks.Mol object
606
+ Self: modified self.
557
607
  """
558
608
  if method != 'DQT': # rmsd of x,y,z coordinates (non-H)
559
609
  conf_rdmols_noH = [Chem.RemoveHs(Chem.Mol(conf.rdmol)) for conf in self.confs]
@@ -572,7 +622,7 @@ class Mol:
572
622
  torsions = []
573
623
  for conf in self.confs:
574
624
  t_radians = []
575
- for (i, j, k, l, rot_indices, fix_indices) in torsion_atom_indices:
625
+ for torsion_key, (i, j, k, l) in torsion_atom_indices.items():
576
626
  t_radians.append(
577
627
  rdMolTransforms.GetDihedralRad(conf.rdmol.GetConformer(), i, j, k, l))
578
628
  torsions.append(np.array(t_radians))
@@ -664,14 +714,14 @@ class Mol:
664
714
  'iqr_energy' : iqr_energy,
665
715
  })
666
716
  # sort cluster index
667
- if sortby == 'size':
717
+ if sort == 'size':
668
718
  cluster_list = sorted(cluster_list, key=lambda x: x['size'], reverse=True)
669
719
 
670
- elif sortby == 'energy':
720
+ elif sort == 'energy':
671
721
  cluster_list = sorted(cluster_list, key=lambda x: x['median_energy'], reverse=False)
672
722
 
673
723
  else:
674
- raise NotImplementedError(f'{sortby} is not implemented yet.')
724
+ raise NotImplementedError(f'{sort} is not implemented yet.')
675
725
 
676
726
  for cluster_idx, cluster_dict in enumerate(cluster_list, start=1):
677
727
  for conf_idx in cluster_dict['confs']:
@@ -697,10 +747,10 @@ class Mol:
697
747
 
698
748
 
699
749
  def drop_confs(self,
700
- stereo_flipped:bool=True,
701
- unconverged:bool=True,
750
+ stereo_flipped: bool = True,
751
+ unconverged: bool = True,
702
752
  similar: bool | None = None,
703
- similar_rmsd:float=0.3,
753
+ similar_rmsd: float = 0.3,
704
754
  cluster: bool | None =None,
705
755
  k: int | None = None,
706
756
  window: float | None = None,
@@ -718,39 +768,46 @@ class Mol:
718
768
  k (int, optional): drop all except for `k` lowest energy conformers.
719
769
  window (float, optional): drop all except for conformers within `window` of relative energy.
720
770
 
721
- Returns:
722
- Self: a copy of rdworks.Mol object.
723
-
724
771
  Examples:
725
772
  To drop similar conformers within rmsd of 0.5 A
726
773
  >>> mol.drop_confs(similar=True, similar_rmsd=0.5)
727
774
 
728
775
  To drop conformers beyond 5 kcal/mol
729
776
  >>> mol.drop_confs(window=5.0)
730
-
777
+
778
+ Returns:
779
+ Self: modified self.
731
780
  """
732
- obj = copy.deepcopy(self)
781
+
782
+ reasons = [f'stereo flipped',
783
+ f'unconverged',
784
+ f'similar({similar_rmsd})',
785
+ f'cluster(non-centroid)',
786
+ f'k and/or energy window',
787
+ ]
788
+
789
+ w = max([len(s) for s in reasons])
733
790
 
734
- if stereo_flipped and obj.count() > 0:
735
- mask = [Chem.MolToSmiles(Chem.RemoveHs(_.rdmol)) == obj.smiles for _ in obj.confs]
736
- obj.confs = list(itertools.compress(obj.confs, mask))
791
+ if stereo_flipped and self.count() > 0:
792
+ mask = [Chem.MolToSmiles(Chem.RemoveHs(_.rdmol)) == self.smiles for _ in self.confs]
793
+ self.confs = list(itertools.compress(self.confs, mask))
737
794
  if verbose:
738
- main_logger.info(f'drop_confs stereo_flipped={mask.count(False)} -> {obj.count()}')
795
+ main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[0]:<{w}} -> {self.count()}')
739
796
 
740
- if unconverged and obj.count() > 0:
741
- mask = [_.props['Converged'] if 'Converged' in _.props else True for _ in obj.confs]
742
- obj.confs = list(itertools.compress(obj.confs, mask))
797
+ if unconverged and self.count() > 0:
798
+ mask = [_.props['Converged'] if 'Converged' in _.props else True for _ in self.confs]
799
+ self.confs = list(itertools.compress(self.confs, mask))
743
800
  if verbose:
744
- main_logger.info(f'drop_confs unconverged={mask.count(False)} -> {obj.count()}')
801
+ main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[1]:<{w}} -> {self.count()}')
745
802
 
746
- if similar and obj.count() > 1:
803
+ if similar and self.count() > 1:
747
804
  # it is observed that there are essentially identical conformers
748
805
  # such as 180-degree ring rotation and there is not minor conformational variations
749
806
  # in the RDKit ETKDG generated conformers.
750
- conf_rdmols_noH = [Chem.RemoveHs(Chem.Mol(_.rdmol)) for _ in obj.confs]
807
+ conf_rdmols_noH = [Chem.RemoveHs(Chem.Mol(_.rdmol)) for _ in self.confs]
751
808
  # copies are made for rmsd calculations to prevent coordinates changes
752
809
  lower_triangle_values = []
753
- for i in range(obj.count()): # number of conformers
810
+ for i in range(self.count()): # number of conformers
754
811
  for j in range(i):
755
812
  # rdMolAlign.GetBestRMS takes symmetry into account
756
813
  # removed hydrogens to speed up
@@ -758,10 +815,10 @@ class Mol:
758
815
  lower_triangle_values.append(best_rms)
759
816
  symm_matrix = convert_tril_to_symm(lower_triangle_values)
760
817
  cluster_assignment, centroid_indices = QT(symm_matrix, similar_rmsd)
761
- mask = [conf_idx in centroid_indices for conf_idx, conf in enumerate(obj.confs)]
762
- obj.confs = list(itertools.compress(obj.confs, mask))
818
+ mask = [conf_idx in centroid_indices for conf_idx, conf in enumerate(self.confs)]
819
+ self.confs = list(itertools.compress(self.confs, mask))
763
820
  if verbose:
764
- main_logger.info(f'drop_confs similar({similar_rmsd})={mask.count(False)} -> {obj.count()}')
821
+ main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[2]:<{w}} -> {self.count()}')
765
822
 
766
823
  # note: it will retain the conformers with lower index
767
824
  # so, it should be sorted before dropping
@@ -781,29 +838,29 @@ class Mol:
781
838
  # retained_confs.append(Chem.RemoveHs(conf_i.rdmol)) # store a copy of H-removed rdmol
782
839
  # obj.confs = list(itertools.compress(obj.confs, mask))
783
840
 
784
- if cluster and obj.count() > 1:
841
+ if cluster and self.count() > 1:
785
842
  # drop non-centroid cluster member(s)
786
- mask = [_.props['centroid'] if 'centroid' in _.props else True for _ in obj.confs]
787
- obj.confs = list(itertools.compress(obj.confs, mask))
843
+ mask = [_.props['centroid'] if 'centroid' in _.props else True for _ in self.confs]
844
+ self.confs = list(itertools.compress(self.confs, mask))
788
845
  if verbose:
789
- main_logger.info(f'drop_confs cluster(non-centroid)={mask.count(False)} -> {obj.count()}')
846
+ main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[3]:<{w}} -> {self.count()}')
790
847
 
791
- if (k or window) and obj.count() > 0:
848
+ if (k or window) and self.count() > 0:
792
849
  if k:
793
- mask_k = [i < k for i,_ in enumerate(obj.confs)]
850
+ mask_k = [i < k for i,_ in enumerate(self.confs)]
794
851
  else:
795
- mask_k = [True,] * obj.count()
852
+ mask_k = [True,] * self.count()
796
853
  if window:
797
- mask_window = [_.props['E_rel(kcal/mol)'] < window if 'E_rel(kcal/mol)' in _.props else True for _ in obj.confs]
854
+ mask_window = [_.props['E_rel(kcal/mol)'] < window if 'E_rel(kcal/mol)' in _.props else True for _ in self.confs]
798
855
  else:
799
- mask_window = [True,] * obj.count()
856
+ mask_window = [True,] * self.count()
800
857
  # retain conformer(s) that satisfy both k and window conditions
801
858
  mask = [(x and y) for (x,y) in zip(mask_k, mask_window)]
802
- obj.confs = list(itertools.compress(obj.confs, mask))
859
+ self.confs = list(itertools.compress(self.confs, mask))
803
860
  if verbose:
804
- main_logger.info(f'drop_confs k and/or window={mask.count(False)} -> {obj.count()}')
861
+ main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[4]:<{w}} -> {self.count()}')
805
862
 
806
- return obj
863
+ return self
807
864
 
808
865
 
809
866
  def count(self) -> int:
@@ -815,7 +872,7 @@ class Mol:
815
872
  return len(self.confs)
816
873
 
817
874
 
818
- def is_nn_applicable(self, model:str) -> bool:
875
+ def nnp_ready(self, model: str = 'aimnet2') -> bool:
819
876
  """Check if a particular neural network model is applicable to current molecule.
820
877
 
821
878
  Args:
@@ -833,19 +890,18 @@ class Mol:
833
890
  # H, C, N, O, F, S, Cl
834
891
  atomic_numbers = [1, 6, 7, 8, 9, 16, 17 ]
835
892
 
836
- elif model in ['aimnet', 'aimnet2']:
893
+ elif model.lower() in ['aimnet', 'aimnet2']:
837
894
  # H, B, C, N, O, F, Si, P, S, Cl, As, Se, Br, I
838
895
  atomic_numbers = [1, 5, 6, 7, 8, 9, 14, 15, 16, 17, 33, 34, 35, 53 ]
839
896
 
840
897
  else:
841
- raise ValueError('is_nn_applicable() supports ANI-2x, ANI-2xt, or AIMNET')
898
+ raise ValueError('nnp_ready() supports ANI-2x, ANI-2xt, AIMNet, or AIMNet2')
842
899
 
843
- for a in self.rdmol.GetAtoms():
844
- if a.GetAtomicNum() not in atomic_numbers:
845
- return False
900
+ if all([ a.GetAtomicNum() in atomic_numbers for a in self.rdmol.GetAtoms() ]):
901
+ return True
902
+ else:
903
+ return False
846
904
 
847
- return True
848
-
849
905
 
850
906
  def charge(self) -> int:
851
907
  """Returns molecular formal charge
@@ -862,7 +918,7 @@ class Mol:
862
918
  Returns:
863
919
  list: list of element symbols.
864
920
  """
865
- return [ a.GetSymbol() for a in self.rdmol.GetAtoms() ]
921
+ return [atom.GetSymbol() for atom in self.rdmol.GetAtoms()]
866
922
 
867
923
 
868
924
  def numbers(self) -> list[int]:
@@ -871,111 +927,19 @@ class Mol:
871
927
  Returns:
872
928
  list: list of atomic numbers.
873
929
  """
874
- return [ a.GetAtomicNum() for a in self.rdmol.GetAtoms() ]
930
+ return [atom.GetAtomicNum() for atom in self.rdmol.GetAtoms()]
875
931
 
876
932
 
877
- def torsion_atoms(self, strict:bool=True) -> list[tuple]:
878
- """Determine dihedral angle atoms (a-b-c-d) and rotating group for each rotatable bond (b-c).
933
+ def torsion_atoms(self, strict: bool = True) -> dict[int, tuple]:
934
+ """Determine torsion/dihedral angle atoms (i-j-k-l) and rotating group for each rotatable bond (j-k).
879
935
 
880
936
  Args:
881
937
  strict (bool): whether to exclude amide/imide/ester/acid bonds.
882
938
 
883
939
  Returns:
884
- [ (a, b, c, d, rot_atom_indices, fix_atom_indices),
885
- (a, b, c, d, rot_atom_indices, fix_atom_indices),
886
- ...,
887
- ]
940
+ {torsion_key: (i, j, k, l), ...,}
888
941
  """
889
- # https://github.com/rdkit/rdkit/blob/1bf6ef3d65f5c7b06b56862b3fb9116a3839b229/rdkit/Chem/Lipinski.py#L47%3E
890
- # https://github.com/rdkit/rdkit/blob/de602c88809ea6ceba1e8ed50fd543b6e406e9c4/Code/GraphMol/Descriptors/Lipinski.cpp#L108
891
- if strict :
892
- # excludes amide/imide/ester/acid bonds
893
- rotatable_bond_pattern = Chem.MolFromSmarts(
894
- (
895
- "[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])("
896
- "[CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1])&!$([#7,O,S!D1]-!@[CD3]="
897
- "[N,O,S])&!$([CD3](=[N+])-!@[#7!D1])&!$([#7!D1]-!@[CD3]=[N+])]-,:;!@[!$"
898
- "(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])(["
899
- "CH3])[CH3])]"
900
- )
901
- )
902
- else:
903
- rotatable_bond_pattern = Chem.MolFromSmarts('[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]')
904
- rotatable_bonds = self.rdmol.GetSubstructMatches(rotatable_bond_pattern)
905
- torsion_angle_atom_indices = []
906
-
907
- # small rings (n=3 or 4)
908
- small_rings = [ r for r in list(self.rdmol.GetRingInfo().AtomRings()) if len(r) < 5 ]
909
- # ex. = [(1, 37, 35, 34, 3, 2), (29, 28, 30)]
910
-
911
- forbidden_terminal_nuclei = [1, 9, 17, 35, 53] # H,F,Cl,Br,I
912
-
913
- for (b_idx, c_idx) in rotatable_bonds:
914
- # determine a atom ``a`` that define a dihedral angle
915
- a_candidates = []
916
- for neighbor in self.rdmol.GetAtomWithIdx(b_idx).GetNeighbors():
917
- neighbor_idx = neighbor.GetIdx()
918
- if neighbor_idx == c_idx:
919
- continue
920
- neighbor_atomic_num = neighbor.GetAtomicNum()
921
- if neighbor_atomic_num not in forbidden_terminal_nuclei:
922
- a_candidates.append((neighbor_atomic_num, neighbor_idx))
923
-
924
- if not a_candidates:
925
- continue
926
-
927
- (a_atomic_num, a_idx) = sorted(a_candidates, key=lambda x: (x[0], -x[1]), reverse=True)[0]
928
-
929
- # is a-b in a small ring (n=3 or 4)?
930
- is_in_small_ring = False
931
- for small_ring in small_rings:
932
- if (a_idx in small_ring) and (b_idx in small_ring):
933
- is_in_small_ring = True
934
- break
935
-
936
- if is_in_small_ring:
937
- continue
938
-
939
- # determine a atom ``d`` that define a dihedral angle
940
- d_candidates = []
941
- for neighbor in self.rdmol.GetAtomWithIdx(c_idx).GetNeighbors():
942
- neighbor_idx = neighbor.GetIdx()
943
- if (neighbor_idx == b_idx):
944
- continue
945
- neighbor_atomic_num = neighbor.GetAtomicNum()
946
- if neighbor_atomic_num not in forbidden_terminal_nuclei:
947
- d_candidates.append((neighbor_atomic_num, neighbor_idx))
948
-
949
- if not d_candidates:
950
- continue
951
-
952
- (d_atomic_num, d_idx) = sorted(d_candidates, key=lambda x: (x[0], -x[1]), reverse=True)[0]
953
-
954
- # is c-d in a small ring?
955
- is_in_small_ring = False
956
- for small_ring in small_rings:
957
- if (c_idx in small_ring) and (d_idx in small_ring):
958
- is_in_small_ring = True
959
- break
960
-
961
- if is_in_small_ring:
962
- continue
963
-
964
- # determine a group of atoms to be rotated
965
- # https://ctr.fandom.com/wiki/Break_rotatable_bonds_and_report_the_fragments
966
- em = Chem.EditableMol(self.rdmol)
967
- em.RemoveBond(b_idx, c_idx)
968
- fragmented = em.GetMol()
969
- (frag1, frag2) = Chem.GetMolFrags(fragmented, asMols=False) # returns tuple of tuple
970
- hac1 = sum([ 1 for i in frag1 if self.rdmol.GetAtomWithIdx(i).GetAtomicNum() > 1 ])
971
- hac2 = sum([ 1 for i in frag2 if self.rdmol.GetAtomWithIdx(i).GetAtomicNum() > 1 ])
972
-
973
- # smaller fragment will be rotated and must contain at least three heavy atoms
974
- if min(hac1, hac2) >= 3:
975
- (frag_rot, frag_fix) = sorted([(hac1, frag1), (hac2, frag2)])
976
- torsion_angle_atom_indices.append((a_idx, b_idx, c_idx, d_idx, frag_rot[1], frag_fix[1]))
977
-
978
- return torsion_angle_atom_indices
942
+ return {i: d[:4] for i, d in enumerate(get_torsion_atoms(self.rdmol, strict))}
979
943
 
980
944
 
981
945
  def compute(self, **kwargs) -> Self:
@@ -987,147 +951,152 @@ class Mol:
987
951
  progress (bool): whether to show progress bar.
988
952
 
989
953
  Returns:
990
- Self: rdworks.MolLibr object.
954
+ Self: modified self.
991
955
  """
992
956
  self.max_workers = kwargs.get('max_workers', self.max_workers)
993
957
  self.chunksize = kwargs.get('chunksize', self.chunksize)
994
958
  self.progress = kwargs.get('progress', self.progress)
959
+
995
960
  return self
996
961
 
997
962
 
998
- @staticmethod
999
- def _map_optimize_conf(conf:Conf, targs:tuple) -> Conf:
1000
- """A map function to apply Conf.optimize() on `conf`.
1001
-
1002
- The default behavior of map() is to pass the elements of the iterable to the function by reference.
1003
- This means that if the function modifies the elements of the iterable,
1004
- those changes will be reflected in the iterable itself.
1005
-
1006
- Args:
1007
- conf (Conf): subject rdworks.Conf object.
1008
- targs (tuple): tuple of arguments to be passed to Conf.optimize().
1009
-
1010
- Returns:
1011
- Conf: rdworks.Conf object
1012
- """
1013
- return conf.optimize(*targs)
1014
-
1015
-
1016
963
  def torsion_energies(self,
1017
- calculator:str | Callable,
1018
- fmax:float = 0.05,
1019
- interval:float = 15.0,
964
+ calculator: str | Callable,
965
+ torsion_key: int | None = None,
966
+ simplify: bool = True,
967
+ fmax: float = 0.05,
968
+ interval: float = 20.0,
1020
969
  use_converged_only: bool = True,
1021
- optimize_ref: bool = False,
1022
970
  **kwargs,
1023
971
  ) -> Self:
1024
972
  """Calculates potential energy profiles for each torsion angle using ASE optimizer.
1025
973
 
974
+ It uses the first conformer as a reference.
975
+
1026
976
  Args:
1027
977
  calculator (str | Callable): 'MMFF', 'UFF', or ASE calculator.
978
+ torsion_key (int | None): torsion index to calculate. Defaults to None (all).
979
+ simplify (bool, optional): whether to use fragment surrogate. Defaults to True.
1028
980
  fmax (float, optional): fmax of ASE optimizer. Defaults to 0.05.
1029
981
  interval (float, optional): interval of torsion angles in degree. Defaults to 15.0.
1030
982
  use_converged_only (bool, optional): whether to use only converged data. Defaults to True.
1031
983
 
1032
984
  Returns:
1033
- list[dict]: [{'indices':list, 'angle':list, 'E_rel(kcal/mol)':list}, ...]
985
+ Self: modified self.
1034
986
  """
987
+ assert self.count() > 0, "torsion_energies() requires at least one conformer"
988
+
1035
989
  self = self.compute(**kwargs)
1036
990
 
1037
- torsion_atoms_indices = self.torsion_atoms()
1038
-
1039
- ref_conf = self.confs[0].copy() # use the lowest energy conformer as a reference
1040
- if optimize_ref:
1041
- ref_conf = ref_conf.optimize(calculator, fmax)
1042
-
1043
- # mol.confs will be populated with torsion conformers.
1044
- # It is designed for a batch optimization in the future.
1045
- mol = self.copy()
1046
- mol.confs = []
1047
- data = []
1048
-
1049
- for k, (a, b, c, d, rot_indices, fix_indices) in enumerate(torsion_atoms_indices):
1050
- data.append({'angle':[], 'init':[], 'final':[], 'Converged':[]})
1051
- for angle in np.arange(-180.0, 180.0, interval):
1052
- # Iterated numpy.ndarray does not contain the last 180: -180., ..., (180).
1053
- x = ref_conf.copy()
1054
- x.props.update({'torsion_index': k, 'angle': float(angle)})
1055
- AllChem.SetDihedralDeg(x.rdmol.GetConformer(), a, b, c, d, angle)
1056
- # All atoms bonded to atom d will move.
1057
- mol.confs.append(x)
1058
-
1059
- # Optimize
1060
- # with ProcessPoolExecutor(max_workers=self.max_workers) as executor:
1061
- # largs = [ (calculator, fmax,) ] * mol.count()
1062
- # if self.progress:
1063
- # lconfs = list(tqdm(
1064
- # executor.map(Mol._map_optimize_conf, mol.confs, largs, chunksize=1),
1065
- # desc="Optimize conformers",
1066
- # total=mol.count()))
1067
- # else:
1068
- # lconfs = list(
1069
- # executor.map(Mol._map_optimize_conf, mol.confs, largs, chunksize=1))
1070
- # mol.confs = lconfs
1071
-
1072
- # Calculate relaxation energies
1073
- for conf in mol.confs:
1074
- conf = conf.optimize(calculator, fmax)
1075
- # conf.optimize() updates coordinates and conf.props:
1076
- # `angle`, `E_tot_init(kcal/mol)`, `E_tot(kcal/mol)`, `Converged`.
1077
- i = conf.props['torsion_index']
1078
- data[i]['angle'].append(conf.props['angle'])
1079
- data[i]['init'].append(conf.props['E_tot_init(kcal/mol)'])
1080
- data[i]['final'].append(conf.props['E_tot(kcal/mol)'])
1081
- data[i]['Converged'].append(conf.props['Converged'])
991
+ if torsion_key is None:
992
+ torsion_atoms_indices = self.torsion_atoms()
993
+ else:
994
+ torsion_atoms_indices = {torsion_key: self.torsion_atoms()[torsion_key]}
995
+
996
+ ref_conf = self.confs[0].copy()
997
+
998
+ data = {}
999
+
1000
+ if simplify:
1001
+ for tk, indices in torsion_atoms_indices.items():
1002
+ frag, frag_ijkl = create_torsion_fragment(ref_conf.rdmol, indices)
1003
+ frag_conf = Conf(frag)
1004
+ data[tk] = {'indices': indices, 'angle':[], 'init':[], 'last':[], 'Converged':[]}
1005
+ for angle in np.arange(-180.0, 180.0, interval):
1006
+ # Iterated numpy.ndarray does not contain the last 180: -180., ..., (180).
1007
+ conf = frag_conf.copy()
1008
+ conf.props.update({'torsion_key': tk, 'angle': float(angle)})
1009
+ conf.set_torsion(*frag_ijkl, angle) # atoms bonded to `l` move.
1010
+ conf = conf.optimize(calculator, fmax, **kwargs)
1011
+ # conf.optimize() updates coordinates and conf.props:
1012
+ # `angle`, `E_tot_init(kcal/mol)`, `E_tot(kcal/mol)`, `Converged`.
1013
+ tk = conf.props['torsion_key']
1014
+ data[tk]['angle'].append(conf.props['angle'])
1015
+ data[tk]['init'].append(conf.props['E_tot_init(kcal/mol)'])
1016
+ data[tk]['last'].append(conf.props['E_tot(kcal/mol)'])
1017
+ data[tk]['Converged'].append(conf.props['Converged'])
1018
+ frag_cleaned, _ = clean_2d(frag, reset_isotope=True, remove_H=True)
1019
+ rdDepictor.Compute2DCoords(frag_cleaned)
1020
+ # to serialize the molecule
1021
+ data[tk]['frag'] = Chem.MolToMolBlock(frag_cleaned)
1022
+ data[tk]['frag_indices'] = frag_ijkl
1023
+
1024
+ else:
1025
+ # mol.confs will be populated with torsion conformers.
1026
+ # It is designed for a batch optimization in the future.
1027
+ mol = self.copy()
1028
+ mol.confs = []
1029
+ for tk, indices in torsion_atoms_indices.items():
1030
+ data[tk] = {'indices': indices, 'angle':[], 'init':[], 'last':[], 'Converged':[]}
1031
+ for angle in np.arange(-180.0, 180.0, interval):
1032
+ # Iterated numpy.ndarray does not contain the last 180: -180., ..., (180).
1033
+ x = ref_conf.copy()
1034
+ x.props.update({'torsion_key': tk, 'angle': float(angle)})
1035
+ x.set_torsion(*indices, angle) # atoms bonded to `l` move.
1036
+ mol.confs.append(x)
1037
+
1038
+ # Calculate relaxation energies
1039
+ for conf in mol.confs:
1040
+ conf = conf.optimize(calculator, fmax, **kwargs)
1041
+ # conf.optimize() updates coordinates and conf.props:
1042
+ # `angle`, `E_tot_init(kcal/mol)`, `E_tot(kcal/mol)`, `Converged`.
1043
+ tk = conf.props['torsion_key']
1044
+ data[tk]['angle'].append(conf.props['angle'])
1045
+ data[tk]['init'].append(conf.props['E_tot_init(kcal/mol)'])
1046
+ data[tk]['last'].append(conf.props['E_tot(kcal/mol)'])
1047
+ data[tk]['Converged'].append(conf.props['Converged'])
1082
1048
 
1083
1049
  # Post-processing
1084
- torsion_energy_profiles = []
1085
- for indices, datadict in zip(torsion_atoms_indices, data):
1050
+ torsion_energy_profiles = {}
1051
+ for tk, dictdata in data.items():
1086
1052
  if use_converged_only:
1087
- datadict['angle'] = list(itertools.compress(datadict['angle'], datadict['Converged']))
1088
- datadict['init'] = list(itertools.compress(datadict['init'], datadict['Converged']))
1089
- datadict['final'] = list(itertools.compress(datadict['final'], datadict['Converged']))
1090
- relax = np.array(datadict['init']) - np.median(datadict['final'])
1053
+ dictdata['angle'] = list(itertools.compress(dictdata['angle'], dictdata['Converged']))
1054
+ dictdata['init'] = list(itertools.compress(dictdata['init'], dictdata['Converged']))
1055
+ dictdata['last'] = list(itertools.compress(dictdata['last'], dictdata['Converged']))
1056
+ relax = np.array(dictdata['init']) - np.median(dictdata['last'])
1091
1057
  E_rel = relax - np.min(relax)
1092
- torsion_energy_profiles.append({
1093
- 'indices': indices, # (a, b, c, d, rot_indices, fix_indices)
1094
- 'angle': np.array(datadict['angle']).tolist(), # np.ndarray -> list for serialization
1095
- 'E_rel(kcal/mol)': E_rel.tolist(), # np.ndarray -> list for serialization
1096
- })
1058
+ torsion_energy_profiles[tk] = {
1059
+ 'indices' : dictdata['indices'],
1060
+ 'angle' : np.round(np.array(dictdata['angle']), 1).tolist(), # np.ndarray -> list for serialization
1061
+ 'E_rel(kcal/mol)': np.round(E_rel, 2).tolist(), # np.ndarray -> list for serialization
1062
+ 'frag' : dictdata.get('frag', None),
1063
+ 'frag_indices' : dictdata.get('frag_indices', None),
1064
+ }
1065
+
1097
1066
  self.props['torsion'] = torsion_energy_profiles
1098
1067
  self.props['torsion_calculator'] = str(calculator)
1099
1068
 
1100
1069
  return self
1101
1070
 
1102
1071
 
1103
-
1104
-
1105
- def similarity(self, other:object) -> float:
1106
- """Returns Tanimoto similarity with `other` rdworks.Mol object.
1072
+ def similarity(self, other: Self) -> float:
1073
+ """Returns Tanimoto similarity with other Mol object.
1107
1074
 
1108
1075
  Args:
1109
- other (rdworks.Mol): other rdworks.Mol object.
1076
+ other (Mol): other Mol object.
1110
1077
 
1111
1078
  Raises:
1112
- TypeError: if `other` is not rdworks.Mol object type.
1079
+ TypeError: if `other` is not Mol object type.
1113
1080
 
1114
1081
  Returns:
1115
1082
  float: Tanimoto similarity.
1116
1083
  """
1117
- if not isinstance(other, Mol):
1118
- raise TypeError("Mol.is_similar() expects Mol object")
1084
+ assert isinstance(other, Mol), "similarity() Error: invalid Mol object"
1085
+
1119
1086
  if not self.fp:
1120
1087
  self.fp = self.MFP2.GetFingerprint(self.rdmol)
1088
+
1121
1089
  if not other.fp:
1122
1090
  other.fp = other.MFP2.GetFingerprint(other.rdmol)
1091
+
1123
1092
  return DataStructs.TanimotoSimilarity(self.fp, other.fp)
1124
1093
 
1125
1094
 
1126
- def is_similar(self, other:object, threshold:float) -> bool:
1127
- """Check if `other` molecule is similar within `threshold`.
1095
+ def is_similar(self, other: Self, threshold: float) -> bool:
1096
+ """Check if other molecule is similar within Tanimoto similarity threshold.
1128
1097
 
1129
1098
  Args:
1130
- other (rdworks.Mol): other rdworks.Mol object to compare with.
1099
+ other (Mol): other Mol object to compare with.
1131
1100
  threshold (float): Tanimoto similarity threshold.
1132
1101
 
1133
1102
  Returns:
@@ -1135,8 +1104,21 @@ class Mol:
1135
1104
  """
1136
1105
  return self.similarity(other) >= threshold
1137
1106
 
1138
-
1139
- def is_matching(self, terms: str | Path, invert:bool=False) -> bool:
1107
+
1108
+ def has_substr(self, substr: str) -> bool:
1109
+ """Determine if the molecule has the substructure match.
1110
+
1111
+ Args:
1112
+ pattern (str): SMARTS or SMILES.
1113
+
1114
+ Returns:
1115
+ bool: True if matches.
1116
+ """
1117
+ query = Chem.MolFromSmarts(substr)
1118
+ return self.rdmol.HasSubstructMatch(query)
1119
+
1120
+
1121
+ def is_matching(self, terms: str | Path, invert: bool = False) -> bool:
1140
1122
  """Determines if the molecule matches the predefined substructure and/or descriptor ranges.
1141
1123
 
1142
1124
  invert | terms(~ or !) | effect
@@ -1154,14 +1136,15 @@ class Mol:
1154
1136
  Returns:
1155
1137
  bool: True if matches.
1156
1138
  """
1157
- if isinstance(terms, pathlib.PosixPath):
1139
+ if isinstance(terms, Path):
1158
1140
  path = terms.as_posix()
1141
+
1159
1142
  elif isinstance(terms, str):
1160
1143
  if terms.startswith('~') or terms.startswith('!'):
1161
1144
  terms = terms.replace('~','').replace('!','')
1162
1145
  invert = (invert ^ True)
1163
1146
  try:
1164
- path = pathlib.Path(terms) # test if terms points to a xml file
1147
+ path = Path(terms) # test if terms points to a xml file
1165
1148
  assert path.is_file()
1166
1149
  except:
1167
1150
  path = get_predefined_xml(terms)
@@ -1192,8 +1175,10 @@ class Mol:
1192
1175
  if combine.lower() == 'or' and any(mask):
1193
1176
  # early termination if any term is satisfied
1194
1177
  return invert ^ True # XOR(^) inverts only if invert is True
1178
+
1195
1179
  if combine.lower() == 'and' and all(mask):
1196
1180
  return invert ^ True
1181
+
1197
1182
  return invert ^ False
1198
1183
 
1199
1184
 
@@ -1246,6 +1231,7 @@ class Mol:
1246
1231
  continue
1247
1232
  else:
1248
1233
  stereos.append(element.specified == Chem.StereoSpecified.Specified)
1234
+
1249
1235
  # note all([]) returns True
1250
1236
  return all(stereos)
1251
1237
 
@@ -1262,12 +1248,12 @@ class Mol:
1262
1248
  if element.type == Chem.StereoType.Bond_Double:
1263
1249
  if self.rdmol.GetBondWithIdx(element.centeredOn).IsInRing():
1264
1250
  ring_bond_stereo_info.append((element.centeredOn, element.descriptor))
1251
+
1265
1252
  return ring_bond_stereo_info
1266
1253
 
1267
1254
 
1268
1255
  def report_stereo(self) -> None:
1269
- """Print out stereochemistry information.
1270
- """
1256
+ """Report stereochemistry information for debug"""
1271
1257
  num_chiral_centers = rdMolDescriptors.CalcNumAtomStereoCenters(self.rdmol)
1272
1258
  # Returns the total number of atomic stereocenters (specified and unspecified)
1273
1259
  num_unspecified_chiral_centers = rdMolDescriptors.CalcNumUnspecifiedAtomStereoCenters(self.rdmol)
@@ -1292,8 +1278,7 @@ class Mol:
1292
1278
 
1293
1279
 
1294
1280
  def report_props(self) -> None:
1295
- """Print out properties.
1296
- """
1281
+ """Report properties"""
1297
1282
  if self.props:
1298
1283
  print(f"Properties({len(self.props)}):")
1299
1284
  fixed_width = max([len(k) for k in self.props]) + 4
@@ -1305,7 +1290,59 @@ class Mol:
1305
1290
  print(f"Properties: None")
1306
1291
 
1307
1292
 
1308
- def to_sdf(self, confs:bool=False, props:bool=True) -> str:
1293
+ def draw(self,
1294
+ coordgen: bool = False,
1295
+ rotate: bool = False,
1296
+ axis: str = 'z',
1297
+ degree: float = 0.0,
1298
+ ) -> Self:
1299
+ """Draw molecule in 2D.
1300
+
1301
+ Args:
1302
+ coordgen (bool, optional): whether to use `coordgen`. Defaults to False.
1303
+ rotate (bool, optional): whether to rotate drawing. Defaults to False.
1304
+ axis (str, optional): axis for rotation. Defaults to 'z'.
1305
+ degree (float, optional): degree for rotation. Defaults to 0.0.
1306
+
1307
+ Returns:
1308
+ Self.
1309
+ """
1310
+ rdDepictor.SetPreferCoordGen(coordgen)
1311
+ rdDepictor.Compute2DCoords(self.rdmol)
1312
+
1313
+ if rotate:
1314
+ rad = (np.pi/180.0) * degree
1315
+ c = np.cos(rad)
1316
+ s = np.sin(rad)
1317
+ if axis.lower() == 'x':
1318
+ rotmat = np.array([
1319
+ [1., 0., 0., 0.],
1320
+ [0., c, -s, 0.],
1321
+ [0., s, c, 0.],
1322
+ [0., 0., 0., 1.],
1323
+ ])
1324
+ elif axis.lower() == 'y':
1325
+ rotmat = np.array([
1326
+ [ c, 0., s, 0.],
1327
+ [ 0., 1., 0., 0.],
1328
+ [-s, 0., c, 0.],
1329
+ [ 0., 0., 0., 1.],
1330
+ ])
1331
+ elif axis.lower() == 'z':
1332
+ rotmat = np.array([
1333
+ [c, -s, 0., 0.],
1334
+ [s, c, 0., 0.],
1335
+ [0., 0., 1., 0.],
1336
+ [0., 0., 0., 1.],
1337
+ ])
1338
+ rdMolTransforms.TransformConformer(
1339
+ self.rdmol.GetConformer(),
1340
+ rotmat)
1341
+
1342
+ return self
1343
+
1344
+
1345
+ def to_sdf(self, confs: bool = False, props: bool = True) -> str:
1309
1346
  """Returns strings of SDF output.
1310
1347
 
1311
1348
  Args:
@@ -1315,8 +1352,8 @@ class Mol:
1315
1352
  Returns:
1316
1353
  str: strings of SDF output.
1317
1354
  """
1318
- in_memory = io.StringIO()
1319
- with Chem.SDWriter(in_memory) as f:
1355
+ buf = StringIO()
1356
+ with Chem.SDWriter(buf) as f:
1320
1357
  if confs:
1321
1358
  for conf in self.confs:
1322
1359
  rdmol = Chem.Mol(conf.rdmol)
@@ -1336,45 +1373,61 @@ class Mol:
1336
1373
  for k,v in self.props.items():
1337
1374
  rdmol.SetProp(k, str(v))
1338
1375
  f.write(rdmol)
1339
- return in_memory.getvalue()
1340
-
1341
-
1342
- def to_image(self, width:int=300, height:int=300, index:bool=False, svg:bool=True) -> object:
1343
- """Returns PIL(Python Image Library) image object.
1376
+
1377
+ return buf.getvalue()
1378
+
1344
1379
 
1345
- Use .save(output_filename) method to save as an image file.
1380
+ def to_png(self,
1381
+ width: int = 300,
1382
+ height: int = 300,
1383
+ legend: str = '',
1384
+ atom_index: bool = False,
1385
+ highlight_atoms: list[int] | None = None,
1386
+ highlight_bonds: list[int] | None = None,
1387
+ redraw: bool = False,
1388
+ coordgen: bool = False,
1389
+ trim: bool = True,
1390
+ ) -> Image.Image:
1391
+ """Draw 2D molecule in PNG format.
1346
1392
 
1347
1393
  Args:
1348
- width (int, optional): width of image. Defaults to 300.
1349
- height (int, optional): height of image. Defaults to 300.
1350
- index (bool, optional): whether to highlight atom indexes. Defaults to False.
1351
- svg (bool, optional): whether to return in SVG format. Defaults to True.
1352
-
1394
+ width (int, optional): width. Defaults to 300.
1395
+ height (int, optional): height. Defaults to 300.
1396
+ legend (str, optional): legend. Defaults to ''.
1397
+ atom_index (bool, optional): whether to show atom index. Defaults to False.
1398
+ highlight_atoms (list[int] | None, optional): atom(s) to highlight. Defaults to None.
1399
+ highlight_bonds (list[int] | None, optional): bond(s) to highlight. Defaults to None.
1400
+ redraw (bool, optional): whether to redraw. Defaults to False.
1401
+ coordgen (bool, optional): whether to use coordgen. Defaults to False.
1402
+ trim (bool, optional): whether to trim white margins. Default to True.
1403
+
1353
1404
  Returns:
1354
- object: PIL image object.
1405
+ Image.Image: output PIL Image object.
1355
1406
  """
1356
- if index:
1357
- for a in self.rdmol.GetAtoms():
1358
- a.SetProp("atomNote", str(a.GetIdx()+1))
1359
1407
 
1360
- return Draw.MolsToImage(self.rdmol,
1361
- size=(width,height),
1362
- kekulize=True,
1363
- wedgeBonds=True, # draw wedge (stereo)
1364
- fitImage=False,
1365
- options=None,
1366
- canvas=None,
1367
- useSVG=svg)
1368
-
1408
+ return render_png(self.rdmol,
1409
+ width = width,
1410
+ height = height,
1411
+ legend = legend,
1412
+ atom_index = atom_index,
1413
+ highlight_atoms = highlight_atoms,
1414
+ highlight_bonds = highlight_bonds,
1415
+ redraw = redraw,
1416
+ coordgen = coordgen,
1417
+ trim = trim)
1369
1418
 
1370
1419
  def to_svg(self,
1371
- width:int = 400,
1372
- height:int = 400,
1373
- legend:str = '',
1374
- index:bool = False,
1375
- highlight: list[int] | None = None,
1376
- coordgen:bool = False) -> str:
1377
- """Returns depiction strings in SVG format.
1420
+ width: int = 300,
1421
+ height: int = 300,
1422
+ legend: str = '',
1423
+ atom_index: bool = False,
1424
+ highlight_atoms: list[int] | None = None,
1425
+ highlight_bonds: list[int] | None = None,
1426
+ redraw: bool = False,
1427
+ coordgen: bool = False,
1428
+ optimize: bool = True,
1429
+ ) -> str:
1430
+ """Draw 2D molecule in SVG format.
1378
1431
 
1379
1432
  Examples:
1380
1433
  For Jupyternotebook, wrap the output with SVG:
@@ -1383,55 +1436,60 @@ class Mol:
1383
1436
  >>> SVG(libr[0].to_svg())
1384
1437
 
1385
1438
  Args:
1386
- width (int): width (default:400)
1387
- height (int): height (default:400)
1388
- legend (str): legend
1389
- index (bool): True/False whether to display atom index
1390
- highlight (list): list of atom indices to highlight
1439
+ width (int, optional): width. Defaults to 300.
1440
+ height (int, optional): height. Defaults to 300.
1441
+ legend (str, optional): legend. Defaults to ''.
1442
+ atom_index (bool, optional): whether to show atom index. Defaults to False.
1443
+ highlight_atoms (list[int] | None, optional): atom(s) to highlight. Defaults to None.
1444
+ highlight_bonds (list[int] | None, optional): bond(s) to highlight. Defaults to None.
1445
+ redraw (bool, optional): whether to redraw. Defaults to False.
1446
+ coordgen (bool, optional): whether to use coordgen. Defaults to False.
1447
+ optimize (bool, optional): whether to optimize SVG string. Defaults to True.
1391
1448
 
1392
1449
  Returns:
1393
- str: SVG text
1450
+ str: SVG string
1394
1451
  """
1395
- rdDepictor.SetPreferCoordGen(coordgen)
1396
-
1397
- rdmol_2d = Chem.Mol(self.rdmol)
1398
- rdDepictor.Compute2DCoords(rdmol_2d)
1399
- rdDepictor.StraightenDepiction(rdmol_2d)
1400
-
1401
- for atom in rdmol_2d.GetAtoms():
1402
- for key in atom.GetPropsAsDict():
1403
- atom.ClearProp(key)
1404
-
1405
- if index: # index hides polar hydrogens
1406
- for atom in rdmol_2d.GetAtoms():
1407
- atom.SetProp("atomLabel", str(atom.GetIdx()))
1408
- # atom.SetProp("atomNote", str(atom.GetIdx()))
1409
- # atom.SetProp("molAtomMapNumber", str(atom.GetIdx()))
1410
-
1411
- drawer = rdMolDraw2D.MolDraw2DSVG(width, height)
1412
- if highlight:
1413
- drawer.DrawMolecule(rdmol_2d, legend=legend, highlightAtoms=highlight)
1414
- else:
1415
- drawer.DrawMolecule(rdmol_2d, legend=legend)
1416
- drawer.FinishDrawing()
1417
- return drawer.GetDrawingText()
1418
-
1419
-
1420
- def plot_energy(self, df:pd.DataFrame) -> str:
1421
- """Returns Seaborn plot strings for dihedral energy profile in SVG format.
1452
+ return render_svg(self.rdmol,
1453
+ width = width,
1454
+ height = height,
1455
+ legend = legend,
1456
+ atom_index = atom_index,
1457
+ highlight_atoms = highlight_atoms,
1458
+ highlight_bonds = highlight_bonds,
1459
+ redraw = redraw,
1460
+ coordgen = coordgen,
1461
+ optimize = optimize)
1462
+
1422
1463
 
1423
- Input pandas DataFrame must have columns: `angle` and `E_rel(kcal/mol)`
1464
+ def plot_torsion_energies(self,
1465
+ torsion_key: int,
1466
+ svg: bool = False,
1467
+ upper_limit: float = 35.0,
1468
+ zoomin_limit: float = 5.0,
1469
+ **kwargs,
1470
+ ) -> str | None:
1471
+ """Plot torsion energies.
1424
1472
 
1425
1473
  Args:
1426
- df (pd.DataFrame): input dataframe.
1474
+ torsion_key (int): torsion data to plot.
1475
+ svg (bool, optional): whether to return SVG strings. Defaults to False.
1476
+ upper_limit (float, optional): upper limit for E_rel(kcal/mol). Defaults to 35.0.
1477
+ zoomin_limit (float, optional): lower limit for E_rel(kcal/mol). Defaults to 5.0.
1478
+ **kwargs: matplotlib.pyplot.plt.figure options.
1427
1479
 
1428
1480
  Returns:
1429
- str: Seaborn plot in strings.
1481
+ SVG strings or None for Jupyter Notebook.
1430
1482
  """
1483
+ data = self.props['torsion'][torsion_key]
1484
+ df = pd.DataFrame({ax: data[ax] for ax in ['angle', 'E_rel(kcal/mol)']})
1431
1485
 
1432
- # sns.set_theme()
1486
+ plt.figure(**kwargs)
1487
+ plt.clf() # Clear the current figure to prevent overlapping plots
1488
+
1489
+ sns.set_theme()
1433
1490
  sns.color_palette("tab10")
1434
1491
  sns.set_style("whitegrid")
1492
+
1435
1493
  if len(df['angle']) == len(df['angle'].drop_duplicates()):
1436
1494
  g = sns.lineplot(x="angle",
1437
1495
  y="E_rel(kcal/mol)",
@@ -1448,31 +1506,57 @@ class Mol:
1448
1506
  markersize=10)
1449
1507
  g.xaxis.set_major_locator(ticker.MultipleLocator(30))
1450
1508
  g.xaxis.set_major_formatter(ticker.ScalarFormatter())
1451
- if df["E_rel(kcal/mol)"].max() > 35.0:
1509
+ if df["E_rel(kcal/mol)"].max() > upper_limit:
1452
1510
  g.set(title=self.name,
1453
1511
  xlabel='Dihedral Angle (degree)',
1454
1512
  ylabel='Relative Energy (Kcal/mol)',
1455
1513
  xlim=(-190, 190),
1456
- ylim=(-1.5, 35.0))
1457
- elif df["E_rel(kcal/mol)"].max() < 5.0:
1514
+ ylim=(-1.5, upper_limit))
1515
+ elif df["E_rel(kcal/mol)"].max() < zoomin_limit:
1458
1516
  g.set(title=self.name,
1459
1517
  xlabel='Dihedral Angle (degree)',
1460
1518
  ylabel='Relative Energy (Kcal/mol)',
1461
1519
  xlim=(-190, 190),
1462
- ylim=(-1.5, 5.0))
1520
+ ylim=(-1.5, zoomin_limit))
1463
1521
  else:
1464
1522
  g.set(title=self.name,
1465
1523
  xlabel='Dihedral Angle (degree)',
1466
1524
  ylabel='Relative Energy (Kcal/mol)',
1467
1525
  xlim=(-190, 190),)
1468
1526
  g.tick_params(axis='x', rotation=30)
1469
- in_memory = io.StringIO()
1470
- plt.savefig(in_memory, format='svg', bbox_inches='tight')
1471
- plt.clf()
1472
- return in_memory.getvalue()
1473
-
1527
+
1528
+ if svg:
1529
+ buf = StringIO()
1530
+ plt.savefig(buf, format='svg', bbox_inches='tight')
1531
+ plt.close() # prevents duplicate plot outputs in Jupyter Notebook
1532
+ svg_string = buf.getvalue()
1533
+ # optimize SVG string
1534
+ scour_options = {
1535
+ 'strip_comments': True,
1536
+ 'strip_ids': True,
1537
+ 'shorten_ids': True,
1538
+ 'compact_paths': True,
1539
+ 'indent_type': 'none',
1540
+ }
1541
+ svg_string = scourString(svg_string, options=scour_options)
1542
+
1543
+ return svg_string
1544
+
1545
+ else:
1546
+ buf = BytesIO()
1547
+ plt.savefig(buf, format='png', bbox_inches='tight')
1548
+ plt.close() # prevents duplicate plot outputs in Jupyter Notebook
1549
+ buf.seek(0)
1550
+ img = Image.open(buf)
1551
+ plt.imshow(img)
1552
+ plt.axis('off') # Optional: remove axes
1553
+ plt.show()
1554
+
1474
1555
 
1475
- def to_html(self, htmlbody:bool=False) -> str:
1556
+ def to_html(self,
1557
+ htmlbody: bool = False,
1558
+ contents: str = 'torsion',
1559
+ ) -> str:
1476
1560
  """Returns HTML text of dihedral energy profile.
1477
1561
 
1478
1562
  Args:
@@ -1481,42 +1565,86 @@ class Mol:
1481
1565
  Returns:
1482
1566
  str: HTML text.
1483
1567
  """
1568
+ HTML = ''
1484
1569
  if htmlbody:
1485
- HTML = "<html><body>"
1486
- else:
1487
- HTML = ""
1488
- # start of content
1489
- HTML += f'<h1 style="text-align:left">{self.name}</h1>'
1490
- HTML += "<table>"
1491
- for datadict in self.props['torsion']: # list of dict
1492
- (a1, a2, a3, a4, _, _) = datadict['indices']
1493
- df = pd.DataFrame({k:datadict[k] for k in ['angle', 'E_rel(kcal/mol)']})
1494
- svg_rdmol = self.to_svg(highlight=[a1, a2, a3, a4], index=True)
1495
- svg_energy_plot = self.plot_energy(df)
1496
- HTML += f"<tr>"
1497
- HTML += f"<td>{a1}-{a2}-{a3}-{a4}</td>"
1498
- HTML += f"<td>{svg_rdmol}</td>"
1499
- HTML += f"<td>{svg_energy_plot}</td>"
1500
- HTML += f"</tr>"
1501
- HTML += '</table>'
1502
- HTML += '<hr style="height:2px;border-width:0;color:gray;background-color:gray">'
1503
- # end of content
1570
+ HTML = '<html><body>'
1571
+
1572
+ if contents.lower() == 'torsion':
1573
+ # start of content
1574
+ HTML += f'<h1 style="text-align:left">{self.name}</h1>'
1575
+ HTML += '<table>'
1576
+ for tk, dictdata in self.props['torsion'].items():
1577
+ ijkl = dictdata['indices']
1578
+ ijkl_str = '-'.join([str(i) for i in ijkl])
1579
+ svg_mol = self.to_svg(highlight_atoms=ijkl, atom_index=True)
1580
+ svg_plot = self.plot_torsion_energies(torsion_key=tk, svg=True)
1581
+ frag = dictdata.get('frag', None)
1582
+ if frag is not None:
1583
+ frag = Chem.MolFromMolBlock(frag)
1584
+ pqrs = dictdata['frag_indices']
1585
+ pqrs_str = '-'.join([str(i) for i in pqrs])
1586
+ svg_frag = render_svg(frag, highlight_atoms=pqrs, atom_index=True)
1587
+ HTML += f'<tr><td>{ijkl_str}</td><td>{svg_mol}</td>'
1588
+ HTML += f'<td>{pqrs_str}<td>{svg_frag}</td><td>{svg_plot}</td></tr>'
1589
+ else:
1590
+ HTML += f'<tr><td>{ijkl_str}</td><td>{svg_mol}</td><td>{svg_plot}</td></tr>'
1591
+ HTML += '</table>'
1592
+ HTML += '<hr style="height:2px;border-width:0;color:gray;background-color:gray">'
1593
+ # end of content
1594
+
1504
1595
  if htmlbody:
1505
- HTML += "</body></html>"
1596
+ HTML += '</body></html>'
1597
+
1506
1598
  return HTML
1507
1599
 
1508
1600
 
1509
- def serialize(self, key: str | None = None, decimal_places:int=2) -> str:
1601
+ def dumps(self, key: str = "", decimals: int = 2) -> str:
1510
1602
  """Returns JSON dumps of properties.
1511
1603
 
1512
1604
  Args:
1513
1605
  key (str | None): key for a subset of properties. Defaults to None.
1514
- decimal_places (int, optional): decimal places for float numbers. Defaults to 2.
1606
+ decimals (int, optional): decimal places for float numbers. Defaults to 2.
1515
1607
 
1516
1608
  Returns:
1517
- str: serialized JSON dumps.
1609
+ str: JSON dumps.
1518
1610
  """
1519
- props = fix_decimal_places_in_dict(self.props, decimal_places)
1611
+ props = recursive_round(self.props, decimals)
1612
+
1520
1613
  if key:
1521
1614
  return json.dumps({key:props[key]})
1615
+
1522
1616
  return json.dumps(props)
1617
+
1618
+
1619
+ def serialize(self, decimals: int = 2) -> str:
1620
+ serialized = json.dumps({
1621
+ 'name' : self.name,
1622
+ 'smiles': self.smiles,
1623
+ 'props' : recursive_round(self.props, decimals),
1624
+ 'confs' : [conf.serialize() for conf in self.confs],
1625
+ })
1626
+
1627
+ return serialized
1628
+
1629
+
1630
+ def deserialize(self, serialized: str) -> Self:
1631
+ """Updates self with the serialized string input.
1632
+
1633
+ Args:
1634
+ serialized (str): input
1635
+
1636
+ Returns:
1637
+ Self: modified self.
1638
+ """
1639
+ data = json.loads(serialized)
1640
+
1641
+ self.name = data['name']
1642
+ self.smiles = data['smiles'] # isomeric SMILES, no H
1643
+ self.rdmol = Chem.MolFromSmiles(data['smiles']) # for 2D depiction
1644
+ self.rdmol.SetProp('_Name', self.name)
1645
+ self.InChI = Chem.MolToInchi(self.rdmol)
1646
+ self.InChIKey = inchi.InchiToInchiKey(self.InChI)
1647
+ self.props = data['props']
1648
+ self.confs = [Conf().deserialize(_) for _ in data['confs']] # for 3D conformers (iterable)
1649
+
1650
+ return self