rdworks 0.25.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. rdworks/__init__.py +35 -0
  2. rdworks/autograph/__init__.py +4 -0
  3. rdworks/autograph/autograph.py +184 -0
  4. rdworks/autograph/centroid.py +90 -0
  5. rdworks/autograph/dynamictreecut.py +135 -0
  6. rdworks/autograph/nmrclust.py +123 -0
  7. rdworks/autograph/rckmeans.py +74 -0
  8. rdworks/bitqt/__init__.py +1 -0
  9. rdworks/bitqt/bitqt.py +355 -0
  10. rdworks/conf.py +374 -0
  11. rdworks/descriptor.py +36 -0
  12. rdworks/display.py +206 -0
  13. rdworks/ionized.py +170 -0
  14. rdworks/matchedseries.py +260 -0
  15. rdworks/mol.py +1522 -0
  16. rdworks/mollibr.py +887 -0
  17. rdworks/pka.py +38 -0
  18. rdworks/predefined/Asinex_fragment.xml +20 -0
  19. rdworks/predefined/Astex_RO3.xml +16 -0
  20. rdworks/predefined/Baell2010_PAINS/Baell2010A.xml +52 -0
  21. rdworks/predefined/Baell2010_PAINS/Baell2010B.xml +169 -0
  22. rdworks/predefined/Baell2010_PAINS/Baell2010C.xml +1231 -0
  23. rdworks/predefined/Baell2010_PAINS/PAINS-less-than-015-hits.xml +2048 -0
  24. rdworks/predefined/Baell2010_PAINS/PAINS-less-than-150-hits.xml +278 -0
  25. rdworks/predefined/Baell2010_PAINS/PAINS-more-than-150-hits.xml +83 -0
  26. rdworks/predefined/Baell2010_PAINS/makexml.py +70 -0
  27. rdworks/predefined/Brenk2008_Dundee/makexml.py +21 -0
  28. rdworks/predefined/CNS.xml +18 -0
  29. rdworks/predefined/ChEMBL_Walters/BMS.xml +543 -0
  30. rdworks/predefined/ChEMBL_Walters/Dundee.xml +318 -0
  31. rdworks/predefined/ChEMBL_Walters/Glaxo.xml +168 -0
  32. rdworks/predefined/ChEMBL_Walters/Inpharmatica.xml +276 -0
  33. rdworks/predefined/ChEMBL_Walters/LINT.xml +174 -0
  34. rdworks/predefined/ChEMBL_Walters/MLSMR.xml +351 -0
  35. rdworks/predefined/ChEMBL_Walters/PAINS.xml +1446 -0
  36. rdworks/predefined/ChEMBL_Walters/SureChEMBL.xml +501 -0
  37. rdworks/predefined/ChEMBL_Walters/makexml.py +40 -0
  38. rdworks/predefined/Hann1999_Glaxo/Hann1999.xml +168 -0
  39. rdworks/predefined/Hann1999_Glaxo/Hann1999Acid.xml +102 -0
  40. rdworks/predefined/Hann1999_Glaxo/Hann1999Base.xml +6 -0
  41. rdworks/predefined/Hann1999_Glaxo/Hann1999ElPh.xml +6 -0
  42. rdworks/predefined/Hann1999_Glaxo/Hann1999NuPh.xml +6 -0
  43. rdworks/predefined/Hann1999_Glaxo/makexml.py +83 -0
  44. rdworks/predefined/Kazius2005/Kazius2005.xml +114 -0
  45. rdworks/predefined/Kazius2005/makexml.py +66 -0
  46. rdworks/predefined/ZINC_druglike.xml +24 -0
  47. rdworks/predefined/ZINC_fragment.xml +14 -0
  48. rdworks/predefined/ZINC_leadlike.xml +15 -0
  49. rdworks/predefined/fragment.xml +7 -0
  50. rdworks/predefined/ionized/simple_smarts_pattern.csv +57 -0
  51. rdworks/predefined/ionized/smarts_pattern.csv +107 -0
  52. rdworks/predefined/misc/makexml.py +119 -0
  53. rdworks/predefined/misc/reactive-part-2.xml +104 -0
  54. rdworks/predefined/misc/reactive-part-3.xml +74 -0
  55. rdworks/predefined/misc/reactive.xml +321 -0
  56. rdworks/readin.py +312 -0
  57. rdworks/rgroup.py +2173 -0
  58. rdworks/scaffold.py +520 -0
  59. rdworks/std.py +143 -0
  60. rdworks/stereoisomers.py +127 -0
  61. rdworks/tautomers.py +20 -0
  62. rdworks/units.py +63 -0
  63. rdworks/utils.py +495 -0
  64. rdworks/xml.py +260 -0
  65. rdworks-0.25.7.dist-info/METADATA +37 -0
  66. rdworks-0.25.7.dist-info/RECORD +69 -0
  67. rdworks-0.25.7.dist-info/WHEEL +5 -0
  68. rdworks-0.25.7.dist-info/licenses/LICENSE +21 -0
  69. rdworks-0.25.7.dist-info/top_level.txt +1 -0
rdworks/mol.py ADDED
@@ -0,0 +1,1522 @@
1
+ # rdworks/mol.py
2
+
3
+ import os
4
+ import io
5
+ import copy
6
+ import types
7
+ import pathlib
8
+ import itertools
9
+ import math
10
+ import json
11
+ import logging
12
+ import tempfile
13
+
14
+ from collections import defaultdict
15
+ from collections.abc import Callable
16
+ from pathlib import Path
17
+ from typing import Iterator, Self
18
+
19
+ import numpy as np
20
+ import pandas as pd
21
+ import matplotlib.ticker as ticker
22
+ import matplotlib.pyplot as plt
23
+ import seaborn as sns
24
+
25
+ import CDPL
26
+ import CDPL.Chem
27
+ import CDPL.ConfGen
28
+
29
+ from rdkit import Chem, DataStructs
30
+
31
+ from rdkit.Chem import (
32
+ rdMolDescriptors, AllChem, Descriptors, QED,
33
+ rdFingerprintGenerator,
34
+ Draw, rdDepictor,
35
+ rdDistGeom, rdMolAlign, rdMolTransforms, rdmolops
36
+ )
37
+ from rdkit.Chem.Draw import rdMolDraw2D
38
+
39
+ from rdkit.ML.Cluster import Butina
40
+
41
+ from rdworks.std import desalt_smiles, standardize
42
+ from rdworks.xml import list_predefined_xml, get_predefined_xml, parse_xml
43
+ from rdworks.scaffold import rigid_fragment_indices
44
+ from rdworks.descriptor import rd_descriptor, rd_descriptor_f
45
+ from rdworks.display import svg
46
+ from rdworks.utils import convert_tril_to_symm, QT, fix_decimal_places_in_dict
47
+ from rdworks.units import ev2kcalpermol
48
+ from rdworks.autograph import NMRCLUST, DynamicTreeCut, RCKmeans, AutoGraph
49
+ from rdworks.bitqt import BitQT
50
+ from rdworks.conf import Conf
51
+
52
+ main_logger = logging.getLogger()
53
+
54
+
55
+ class Mol:
56
+ """Container for molecular structure, conformers, and other information.
57
+ """
58
+
59
+ MFP2 = rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=2048)
60
+
61
+ ETKDG_params = rdDistGeom.ETKDGv3()
62
+ ETKDG_params.useSmallRingTorsions = True
63
+ ETKDG_params.maxIterations = 2000
64
+
65
+
66
+ def __init__(self,
67
+ molecular_input: str | Chem.Mol,
68
+ name:str='',
69
+ std:bool=False,
70
+ max_workers:int=1,
71
+ chunksize:int=4,
72
+ progress:bool=False) -> None:
73
+ """Create a rdworks.Mol object.
74
+
75
+ Examples:
76
+ >>> import rdworks
77
+ >>> m = rdworks.Mol('c1ccccc1', name='benzene')
78
+
79
+ Args:
80
+ molecular_input (str | Chem.Mol): SMILES or rdkit.Chem.Mol object
81
+ name (str, optional): name of the molecule. Defaults to ''.
82
+ std (bool, optional): whether to standardize the molecule. Defaults to False.
83
+
84
+ Raises:
85
+ ValueError: Invalid SMILES or rdkit.Chem.Mol object.
86
+ TypeError: No SMILES or rdkit.Chem.Mol object is provided.
87
+ RuntimeError: Desalting or standardization process failed.
88
+ """
89
+
90
+ self.rdmol = None # rdkit.Chem.Mol object
91
+ self.smiles = None # isomeric SMILES
92
+ self.name = None
93
+ self.props = {}
94
+ self.confs = [] # 3D conformers (iterable)
95
+ self.fp = None
96
+ self.max_workers = max_workers
97
+ self.chunksize = chunksize
98
+ self.progress = progress
99
+
100
+ if isinstance(molecular_input, str):
101
+ try:
102
+ self.rdmol = Chem.MolFromSmiles(molecular_input)
103
+ assert self.rdmol
104
+ self.smiles = Chem.MolToSmiles(self.rdmol)
105
+ except:
106
+ raise ValueError(f'Mol() received invalid SMILES: {molecular_input}')
107
+ elif isinstance(molecular_input, Chem.Mol):
108
+ try:
109
+ self.rdmol = molecular_input
110
+ assert self.rdmol
111
+ self.smiles = Chem.MolToSmiles(self.rdmol)
112
+ except:
113
+ raise ValueError('Mol() received invalid rdkit.Chem.Mol object')
114
+ else:
115
+ raise TypeError('Mol() expects SMILES or rdkit.Chem.Mol object')
116
+
117
+ ### desalting
118
+ if "." in self.smiles:
119
+ try:
120
+ (self.smiles, self.rdmol) = desalt_smiles(self.smiles)
121
+ assert self.smiles
122
+ assert self.rdmol
123
+ except:
124
+ raise RuntimeError(f'Mol() error occurred in desalting: {self.smiles}')
125
+
126
+ ### standardization
127
+ if std:
128
+ # standardization changes self.rdmol
129
+ try:
130
+ self.rdmol = standardize(self.rdmol)
131
+ self.smiles = Chem.MolToSmiles(self.rdmol)
132
+ assert self.smiles
133
+ assert self.rdmol
134
+ except:
135
+ raise RuntimeError('Mol() error occurred in standardization')
136
+
137
+ ### naming
138
+ try:
139
+ self.name = str(name)
140
+ except:
141
+ self.name = 'untitled'
142
+ self.rdmol.SetProp('_Name', self.name) # _Name can't be None
143
+
144
+ ### set default properties
145
+ self.props.update({
146
+ 'aka' : [], # <-- to be set by MolLibr.unique()
147
+ 'atoms' : self.rdmol.GetNumAtoms(),
148
+ # hydrogens not excluded
149
+ # m = Chem.MolFromSmiles("c1c[nH]cc1")
150
+ # m.GetNumAtoms()
151
+ # >> 5
152
+ # Chem.AddHs(m).GetNumAtoms()
153
+ # >> 10
154
+ 'charge': rdmolops.GetFormalCharge(self.rdmol),
155
+ # number of rotatable bonds
156
+ "nrb" : Descriptors.NumRotatableBonds(self.rdmol),
157
+ })
158
+
159
+
160
+ def __str__(self) -> str:
161
+ """String representation of the molecule.
162
+
163
+ Examples:
164
+ >>> m = Mol('CCO', name='ethanol')
165
+ >>> print(m)
166
+
167
+ Returns:
168
+ str: string representation.
169
+ """
170
+ return f"<Mol({self.smiles} name={self.name} conformers={self.count()})>"
171
+
172
+
173
+ def __hash__(self) -> str:
174
+ """Hashed SMILES string of the molecule.
175
+
176
+ When you compare two objects using the `==` operator, Python first checks
177
+ if their hash values are equal. If they are different, the objects are
178
+ considered unequal, and the __eq__ method is not called.
179
+ The return value of `__hash__` method is also used as dictionary keys or set elements.
180
+
181
+ Examples:
182
+ >>> m1 == m2
183
+
184
+ Returns:
185
+ str: hashed SMILES string.
186
+ """
187
+ return hash(self.smiles)
188
+
189
+
190
+ def __eq__(self, other:object) -> bool:
191
+ """True if `other` molecule is identical with the molecule.
192
+
193
+ It compares canonicalized SMILES.
194
+
195
+ Examples:
196
+ >>> m1 == m2
197
+
198
+ Args:
199
+ other (object): other rdworks.Mol object.
200
+
201
+ Returns:
202
+ bool: True if identical.
203
+ """
204
+ return self.smiles == other.smiles
205
+
206
+
207
+ def __iter__(self) -> Iterator:
208
+ """Yields an iterator of conformers of the molecule.
209
+
210
+ Examples:
211
+ >>> for conformer in mol:
212
+ >>> print(conformer.name)
213
+
214
+ Yields:
215
+ Iterator: conformers of the molecule.
216
+ """
217
+ return iter(self.confs)
218
+
219
+
220
+ def __next__(self) -> Conf:
221
+ """Next conformer of the molecule.
222
+
223
+ Returns:
224
+ Conf: Conf object of one of conformers of the molecule.
225
+ """
226
+ return next(self.confs)
227
+
228
+
229
+ def __getitem__(self, index: int | slice) -> Conf:
230
+ """Conformer object of conformers of the molecule with given index or slice of indexes.
231
+
232
+ Examples:
233
+ >>> first_conformer = mol[0]
234
+
235
+ Args:
236
+ index (int | slice): index for conformers.
237
+
238
+ Raises:
239
+ ValueError: conformers are not defined in the molecule or index is out of range.
240
+
241
+ Returns:
242
+ Conf: Conf object matching the index of the molecule.
243
+ """
244
+ if self.count() == 0:
245
+ raise ValueError(f"no conformers")
246
+ try:
247
+ return self.confs[index]
248
+ except:
249
+ raise ValueError(f"index should be 0..{self.count()-1}")
250
+
251
+
252
+ def copy(self) -> Self:
253
+ """Returns a copy of self.
254
+
255
+ Returns:
256
+ Self: a copy of self (rdworks.Mol) object.
257
+ """
258
+ return copy.deepcopy(self)
259
+
260
+
261
+ def rename(self, prefix:str='', sep:str='/', start:int=1) -> Self:
262
+ """Rename conformer names and returns self
263
+
264
+ The first conformer name is {prefix}{sep}{start}
265
+
266
+ Args:
267
+ prefix (str, optional): prefix of the name. Defaults to ''.
268
+ sep (str, optional): separtor betwween prefix and serial number. Defaults to '/'.
269
+ start (int, optional): first serial number. Defaults to 1.
270
+
271
+ Returns:
272
+ Self: rdworks.Mol object.
273
+ """
274
+ if prefix :
275
+ self.name = prefix
276
+ self.rdmol.SetProp('_Name', prefix)
277
+ # update conformer names
278
+ num_digits = len(str(self.count())) # ex. '100' -> 3
279
+ for (serial, conf) in enumerate(self.confs, start=start):
280
+ serial_str = str(serial)
281
+ while len(serial_str) < num_digits:
282
+ serial_str = '0' + serial_str
283
+ conf.rename(f'{self.name}{sep}{serial_str}')
284
+ return self
285
+
286
+
287
+ def qed(self, properties:list[str]=['QED', 'MolWt', 'LogP', 'TPSA', 'HBD']) -> Self:
288
+ """Updates quantitative estimate of drug-likeness (QED).
289
+
290
+ Args:
291
+ properties (list[str], optional): Defaults to ['QED', 'MolWt', 'LogP', 'TPSA', 'HBD'].
292
+
293
+ Raises:
294
+ KeyError: if property key is unknown.
295
+
296
+ Returns:
297
+ Self: rdworks.Mol object.
298
+ """
299
+ props_dict = {}
300
+ for k in properties:
301
+ try:
302
+ props_dict[k] = rd_descriptor_f[k](self.rdmol)
303
+ except:
304
+ raise KeyError(f'Mol.qed() received undefined property {k} for {self}')
305
+ self.props.update(props_dict)
306
+ return self
307
+
308
+
309
+ def remove_stereo(self) -> Self:
310
+ """Removes stereochemistry and returns a copy of self.
311
+
312
+ Examples:
313
+ >>> m = rdworks.Mol("C/C=C/C=C\\C", "double_bond")
314
+ >>> m.remove_stereo().smiles == "CC=CC=CC"
315
+
316
+ Returns:
317
+ Self: rdworks.Mol object.
318
+ """
319
+ obj = copy.deepcopy(self)
320
+ # keep the original stereo info. for ring double bond
321
+ Chem.RemoveStereochemistry(obj.rdmol)
322
+ Chem.AssignStereochemistry(obj.rdmol,
323
+ cleanIt=False,
324
+ force=False,
325
+ flagPossibleStereoCenters=False)
326
+ obj.smiles = Chem.MolToSmiles(obj.rdmol)
327
+ return obj
328
+
329
+
330
+ def make_confs(self,
331
+ n:int = 50,
332
+ method:str = 'RDKit_ETKDG',
333
+ calculator:str | Callable = 'MMFF94') -> Self:
334
+ """Generates 3D conformers.
335
+
336
+ Args:
337
+ n (int, optional): number of conformers to generate. Defaults to 50.
338
+ method (str, optional): conformer generation method.
339
+ Choices are `RDKit_ETKDG`, `CDPL_CONFORGE`.
340
+ Defaults to 'RDKit_ETKDG'.
341
+
342
+ Returns:
343
+ Self: rdworks.Mol object
344
+
345
+ Reference:
346
+ T. Seidel, C. Permann, O. Wieder, S. M. Kohlbacher, T. Langer,
347
+ High-Quality Conformer Generation with CONFORGE: Algorithm and Performance Assessment.
348
+ J. Chem. Inf. Model. 63, 5549-5570 (2023).
349
+ """
350
+
351
+ # if n is None:
352
+ # rot_bonds = rd_descriptor_f['RotBonds'](self.rdmol)
353
+ # n = min(max(1, int(8.481 * (rot_bonds **1.642))), 1000)
354
+ # n = max(1, math.ceil(n * n_rel)) # ensures that n is at least 1
355
+
356
+ self.confs = []
357
+
358
+ if method.upper() == 'RDKIT_ETKDG':
359
+ rdmol_H = Chem.AddHs(self.rdmol, addCoords=True) # returns a copy with hydrogens added
360
+ conf_ids = rdDistGeom.EmbedMultipleConfs(rdmol_H, n, params=self.ETKDG_params)
361
+ for rdConformer in rdmol_H.GetConformers():
362
+ # number of atoms should match with conformer(s)
363
+ rdmol_conf = Chem.Mol(rdmol_H)
364
+ rdmol_conf.RemoveAllConformers()
365
+ rdmol_conf.AddConformer(Chem.Conformer(rdConformer))
366
+ conf = Conf(rdmol_conf)
367
+ self.confs.append(conf)
368
+
369
+ elif method.upper() == 'CDPL_CONFORGE':
370
+ with tempfile.NamedTemporaryFile() as tmpfile:
371
+ mol = CDPL.Chem.parseSMILES(self.smiles)
372
+ # create and initialize an instance of the class ConfGen.ConformerGenerator which
373
+ # will perform the actual conformer ensemble generation work
374
+ conf_gen = CDPL.ConfGen.ConformerGenerator()
375
+ conf_gen.settings.timeout = 60 * 1000 # 60 sec.
376
+ conf_gen.settings.minRMSD = 0.5
377
+ conf_gen.settings.energyWindow = 20.0 # kcal/mol(?)
378
+ conf_gen.settings.maxNumOutputConformers = n
379
+ # dictionary mapping status codes to human readable strings
380
+ status_to_str = {
381
+ CDPL.ConfGen.ReturnCode.UNINITIALIZED : 'uninitialized',
382
+ CDPL.ConfGen.ReturnCode.TIMEOUT : 'max. processing time exceeded',
383
+ CDPL.ConfGen.ReturnCode.ABORTED : 'aborted',
384
+ CDPL.ConfGen.ReturnCode.FORCEFIELD_SETUP_FAILED : 'force field setup failed',
385
+ CDPL.ConfGen.ReturnCode.FORCEFIELD_MINIMIZATION_FAILED : 'force field structure refinement failed',
386
+ CDPL.ConfGen.ReturnCode.FRAGMENT_LIBRARY_NOT_SET : 'fragment library not available',
387
+ CDPL.ConfGen.ReturnCode.FRAGMENT_CONF_GEN_FAILED : 'fragment conformer generation failed',
388
+ CDPL.ConfGen.ReturnCode.FRAGMENT_CONF_GEN_TIMEOUT : 'fragment conformer generation timeout',
389
+ CDPL.ConfGen.ReturnCode.FRAGMENT_ALREADY_PROCESSED : 'fragment already processed',
390
+ CDPL.ConfGen.ReturnCode.TORSION_DRIVING_FAILED : 'torsion driving failed',
391
+ CDPL.ConfGen.ReturnCode.CONF_GEN_FAILED : 'conformer generation failed',
392
+ }
393
+ writer = CDPL.Chem.MolecularGraphWriter( f"{tmpfile.name}.sdf", "sdf" )
394
+ # SB - io.StringIO does not work with Chem.MolecularGraphWriter()
395
+ # We have to create a temporary file and re-read it for storing individual conformers.
396
+ try:
397
+ # prepare the molecule for conformer generation
398
+ CDPL.ConfGen.prepareForConformerGeneration(mol)
399
+ # generate the conformer ensemble
400
+ status = conf_gen.generate(mol)
401
+ # if successful, store the generated conformer ensemble as
402
+ # per atom 3D coordinates arrays (= the way conformers are represented in CDPKit)
403
+ if status == CDPL.ConfGen.ReturnCode.SUCCESS or status == CDPL.ConfGen.ReturnCode.TOO_MUCH_SYMMETRY:
404
+ # TOO_MUCH_SYMMETRY: output ensemble may contain duplicates
405
+ conf_gen.setConformers(mol)
406
+ writer.write(mol)
407
+ with Chem.SDMolSupplier(f"{tmpfile.name}.sdf", sanitize=True, removeHs=False) as sdf:
408
+ self.confs = [ Conf(m) for m in sdf if m is not None ]
409
+ else:
410
+ raise RuntimeError('Error: conformer generation failed: %s' % status_to_str[status])
411
+ except Exception as e:
412
+ raise RuntimeError('Error: conformer generation failed: %s' % str(e))
413
+ # tmpfile is automatically closed and deleted here
414
+
415
+
416
+ # energy evaluations for ranking
417
+ for conf in self.confs:
418
+ conf.get_potential_energy(calculator) # default: MMFF94
419
+
420
+ # set relative energy, E_rel(kcal/mol)
421
+ sort_by = 'E_tot(kcal/mol)'
422
+ self.confs = sorted(self.confs, key=lambda c: c.props[sort_by]) # ascending order
423
+ lowest_energy = self.confs[0].props[sort_by]
424
+ for conf in self.confs:
425
+ conf.props.update({"E_rel(kcal/mol)": conf.props[sort_by] - lowest_energy})
426
+
427
+ return self.rename()
428
+
429
+
430
+ def optimize(self, calculator:str | Callable = 'MMFF94', fmax:float=0.05) -> Self:
431
+ """Optimizes 3D conformers
432
+
433
+ Args:
434
+ calculator (str | Callable): _description_
435
+ fmax (float, optional): _description_. Defaults to 0.05.
436
+
437
+ Returns:
438
+ Self: _description_
439
+ """
440
+ self.confs = [ conf.optimize(calculator, fmax) for conf in self.confs ]
441
+ return self
442
+
443
+
444
+ def sort_confs(self) -> Self:
445
+ """Sorts conformers by `E_tot(eV)` or `E_tot(kcal/mol)` and sets `E_rel(kcal/mol)`.
446
+
447
+ Raises:
448
+ KeyError: if `E_tot(eV)` or `E_tot(kcal/mol)` is not defined.
449
+
450
+ Returns:
451
+ Self: rdworks.Mol object.
452
+ """
453
+ if all(['E_tot(eV)' in c.props for c in self.confs]):
454
+ sort_by = 'E_tot(eV)'
455
+ conversion = 23.060547830619026 # eV to kcal/mol
456
+ elif all(['E_tot(kcal/mol)' in c.props for c in self.confs]):
457
+ sort_by = 'E_tot(kcal/mol)'
458
+ conversion = 1.0
459
+ else:
460
+ raise KeyError(f'Mol.sort_confs() requires E_tot(eV) or E_tot(kcal/mol) property')
461
+ self.confs = sorted(self.confs, key=lambda c: c.props[sort_by]) # ascending order
462
+ if self.count() > 0:
463
+ E_lowest = self.confs[0].props[sort_by]
464
+ for conf in self.confs:
465
+ E_rel = (conf.props[sort_by] - E_lowest)* conversion
466
+ conf.props.update({"E_rel(kcal/mol)": E_rel})
467
+ return self
468
+
469
+
470
+ def align_confs(self, method:str='rigid_fragment') -> Self:
471
+ """Aligns all conformers to the first conformer.
472
+
473
+ Args:
474
+ method (str, optional): alignment method:
475
+ `rigid_fragment`, `CrippenO3A`, `MMFFO3A`, `best_rms`.
476
+ Defaults to `rigid_fragment`.
477
+
478
+ Returns:
479
+ Self: rdworks.Mol object.
480
+ """
481
+
482
+ if self.count() < 2: # nothing to do
483
+ return self
484
+
485
+ if method == 'rigid_fragment':
486
+ indices = rigid_fragment_indices(self.confs[0].rdmol)[0] # 3D and H, largest fragment
487
+ atomMap = [(i, i) for i in indices]
488
+ for i in range(1, self.count()):
489
+ # rdMolAlign.AlignMol does not take symmetry into account
490
+ # but we will use atom indices for alignment anyway.
491
+ rmsd = rdMolAlign.AlignMol(prbMol=self.confs[i].rdmol,
492
+ refMol=self.confs[0].rdmol,
493
+ atomMap=atomMap)
494
+ # If atomMap is not given, AlignMol() will attempt to generate atomMap by
495
+ # substructure matching.
496
+
497
+ elif method == 'CrippenO3A':
498
+ crippen_ref_contrib = rdMolDescriptors._CalcCrippenContribs(self.confs[0].rdmol)
499
+ for i in range(1, self.count()):
500
+ crippen_prb_contrib = rdMolDescriptors._CalcCrippenContribs(self.confs[i].rdmol)
501
+ crippen_O3A = rdMolAlign.GetCrippenO3A(prbMol=self.confs[i].rdmol,
502
+ refMol=self.confs[0].rdmol,
503
+ prbCrippenContribs=crippen_prb_contrib,
504
+ refCrippenContribs=crippen_ref_contrib,
505
+ )
506
+ crippen_O3A.Align()
507
+ # crippen_O3A.Score()
508
+
509
+ elif method == 'MMFFO3A':
510
+ mmff_ref_params = AllChem.MMFFGetMoleculeProperties(self.confs[0].rdmol)
511
+ for i in range(1, self.count()):
512
+ mmff_prb_params = AllChem.MMFFGetMoleculeProperties(self.confs[i].rdmol)
513
+ mmff_O3A = rdMolAlign.GetO3A(prbMol=self.confs[i].rdmol,
514
+ refMol=self.confs[0].rdmol,
515
+ prbPyMMFFMolProperties=mmff_prb_params,
516
+ refPyMMFFMolProperties=mmff_ref_params,
517
+ )
518
+ mmff_O3A.Align()
519
+ # mmff_O3A.Score()
520
+
521
+ elif method == 'best_rms':
522
+ for i in range(1, self.count()):
523
+ # symmetry-aware alignment / speed can be improved by removing Hs
524
+ rmsd = rdMolAlign.GetBestRMS(prbMol=self.confs[i].rdmol,
525
+ refMol=self.confs[0].rdmol)
526
+
527
+ return self
528
+
529
+
530
+ def cluster_confs(self, method:str='QT', threshold:float=1.0, sortby:str='size') -> Self:
531
+ """Clusters all conformers and sets cluster properties.
532
+
533
+ Following cluster properties will be added: `cluster`, `cluster_mean_energy`,
534
+ `cluster_median_energy`, `cluster_IQR_energy`, `cluster_size`, `cluster_centroid` (True or False)
535
+
536
+ `RCKMeans` algorithm is unreliable and not supported for now.
537
+
538
+ Args:
539
+ method (str, optional): clustering algorithm:
540
+ `Butina`,
541
+ `QT`,
542
+ `NMRCLUST`,
543
+ `DQT`,
544
+ `BitQT`,
545
+ `DynamicTreeCut`,
546
+ `AutoGraph`.
547
+ Defaults to `QT`.
548
+ threshold (float, optional): RMSD threshold of a cluster. Defaults to 1.0.
549
+ sortby (str, optional): sort cluster(s) by mean `energy` or cluster `size`.
550
+ Defaults to `size`.
551
+
552
+ Raises:
553
+ NotImplementedError: if unsupported method is requested.
554
+
555
+ Returns:
556
+ Self: rdworks.Mol object
557
+ """
558
+ if method != 'DQT': # rmsd of x,y,z coordinates (non-H)
559
+ conf_rdmols_noH = [Chem.RemoveHs(Chem.Mol(conf.rdmol)) for conf in self.confs]
560
+ # copies are made for rmsd calculations to prevent coordinates changes
561
+ lower_triangle_values = []
562
+ for i in range(self.count()): # number of conformers
563
+ for j in range(i):
564
+ # rdMolAlign.GetBestRMS takes symmetry into account
565
+ # removed hydrogens to speed up
566
+ best_rms = rdMolAlign.GetBestRMS(prbMol=conf_rdmols_noH[i], refMol=conf_rdmols_noH[j])
567
+ lower_triangle_values.append(best_rms)
568
+
569
+ else: # rmsd (radian) of dihedral angles
570
+ torsion_atom_indices = self.torsion_atoms()
571
+ # symmmetry-related equivalence is not considered
572
+ torsions = []
573
+ for conf in self.confs:
574
+ t_radians = []
575
+ for (i, j, k, l, rot_indices, fix_indices) in torsion_atom_indices:
576
+ t_radians.append(
577
+ rdMolTransforms.GetDihedralRad(conf.rdmol.GetConformer(), i, j, k, l))
578
+ torsions.append(np.array(t_radians))
579
+ # torsions: num.confs x num.torsions
580
+ N = len(torsions)
581
+ lower_triangle_values = []
582
+ for i in range(N):
583
+ for j in range(i):
584
+ rad_diff = np.fmod(torsions[i] - torsions[j], 2.0*np.pi)
585
+ rmsd = np.sqrt(np.sum(rad_diff**2)/N)
586
+ # np.max(np.absolute(rad_diff))
587
+ lower_triangle_values.append(rmsd)
588
+
589
+ cluster_assignment = None
590
+ centroid_indices = None
591
+
592
+ if method == 'Butina':
593
+ clusters = Butina.ClusterData(data=lower_triangle_values,
594
+ nPts=self.count(),
595
+ distThresh=threshold,
596
+ isDistData=True,
597
+ reordering=True)
598
+ cluster_assignment = [None,] * self.count()
599
+ centroid_indices = []
600
+ for cluster_idx, indices in enumerate(clusters):
601
+ for conf_idx in indices:
602
+ cluster_assignment[conf_idx] = cluster_idx
603
+ centroid_indices.append(indices[0])
604
+
605
+ elif method == 'QT':
606
+ # my implementation of the original QT algorithm
607
+ # tighter than Butina
608
+ symm_matrix = convert_tril_to_symm(lower_triangle_values)
609
+ cluster_assignment, centroid_indices = QT(symm_matrix, threshold)
610
+
611
+ elif method == 'NMRCLUST':
612
+ # looser than Butina
613
+ # does not require threshold
614
+ symm_matrix = convert_tril_to_symm(lower_triangle_values)
615
+ cluster_assignment, centroid_indices = NMRCLUST(symm_matrix)
616
+
617
+ elif method == 'DQT':
618
+ # issues with symmetry related multiplicities
619
+ symm_matrix = convert_tril_to_symm(lower_triangle_values)
620
+ cluster_assignment, centroid_indices = QT(symm_matrix, threshold)
621
+
622
+ elif method == 'BitQT':
623
+ # supposed to produce identical result as QT but it does not
624
+ symm_matrix = convert_tril_to_symm(lower_triangle_values)
625
+ cluster_assignment, centroid_indices = BitQT(symm_matrix, threshold)
626
+
627
+ elif method == 'DynamicTreeCut':
628
+ # often collapses into single cluster. so not very useful.
629
+ symm_matrix = convert_tril_to_symm(lower_triangle_values)
630
+ cluster_assignment, centroid_indices = DynamicTreeCut(symm_matrix)
631
+
632
+ # elif method == 'RCKmeans':
633
+ # # buggy
634
+ # symm_matrix = convert_tril_to_symm(lower_triangle_values)
635
+ # cluster_assignment, centroid_indices = RCKmeans(symm_matrix)
636
+
637
+ elif method == 'AutoGraph':
638
+ # not reliable
639
+ symm_matrix = convert_tril_to_symm(lower_triangle_values)
640
+ cluster_assignment, centroid_indices = AutoGraph(symm_matrix)
641
+
642
+ else:
643
+ raise NotImplementedError(f'{method} clustering is not implemented yet.')
644
+
645
+ # cluster_assignment: ex. [0,1,0,0,2,..]
646
+ # centroid_indices: ex. [10,5,..] i.e. centroids of clusters 0 and 1 are 10 and 5, respectively.
647
+
648
+ if cluster_assignment is not None and centroid_indices is not None:
649
+ cluster_raw_data = defaultdict(list)
650
+ for conf_idx, cluster_idx in enumerate(cluster_assignment):
651
+ cluster_raw_data[cluster_idx].append(conf_idx)
652
+ cluster_list = []
653
+ for i, k in enumerate(sorted(cluster_raw_data.keys())):
654
+ energies = [self.confs[conf_idx].props['E_rel(kcal/mol)'] for conf_idx in cluster_raw_data[k]]
655
+ mean_energy = np.mean(energies)
656
+ median_energy = np.median(energies)
657
+ q75, q25 = np.percentile(energies, [75, 25])
658
+ iqr_energy = q75 - q25 # interquartile range (IQR)
659
+ cluster_list.append({'confs' : cluster_raw_data[k],
660
+ 'centroid' : centroid_indices[i], # conformer index
661
+ 'size' : len(cluster_raw_data[k]),
662
+ 'mean_energy' : mean_energy,
663
+ 'median_energy' : median_energy,
664
+ 'iqr_energy' : iqr_energy,
665
+ })
666
+ # sort cluster index
667
+ if sortby == 'size':
668
+ cluster_list = sorted(cluster_list, key=lambda x: x['size'], reverse=True)
669
+
670
+ elif sortby == 'energy':
671
+ cluster_list = sorted(cluster_list, key=lambda x: x['median_energy'], reverse=False)
672
+
673
+ else:
674
+ raise NotImplementedError(f'{sortby} is not implemented yet.')
675
+
676
+ for cluster_idx, cluster_dict in enumerate(cluster_list, start=1):
677
+ for conf_idx in cluster_dict['confs']:
678
+ if conf_idx == cluster_dict['centroid']:
679
+ self.confs[conf_idx].props.update({
680
+ 'cluster' : cluster_idx,
681
+ 'cluster_mean_energy' : cluster_dict['mean_energy'],
682
+ 'cluster_median_energy' : cluster_dict['median_energy'],
683
+ 'cluster_IQR_energy' : cluster_dict['iqr_energy'],
684
+ 'cluster_size' : cluster_dict['size'],
685
+ 'cluster_centroid' : True,
686
+ })
687
+ else:
688
+ self.confs[conf_idx].props.update({
689
+ 'cluster' : cluster_idx,
690
+ 'cluster_mean_energy' : cluster_dict['mean_energy'],
691
+ 'cluster_median_energy' : cluster_dict['median_energy'],
692
+ 'cluster_IQR_energy' : cluster_dict['iqr_energy'],
693
+ 'cluster_size' : cluster_dict['size'],
694
+ 'cluster_centroid' : False,
695
+ })
696
+ return self
697
+
698
+
699
+ def drop_confs(self,
700
+ stereo_flipped:bool=True,
701
+ unconverged:bool=True,
702
+ similar: bool | None = None,
703
+ similar_rmsd:float=0.3,
704
+ cluster: bool | None =None,
705
+ k: int | None = None,
706
+ window: float | None = None,
707
+ verbose: bool = False) -> Self:
708
+ """Drop conformers that meet some condition(s).
709
+
710
+ Args:
711
+ stereo_flipped (bool): drop conformers whose R/S and cis/trans stereo is unintentionally flipped.
712
+ For example, a trans double bond in a macrocyle can end up with both trans
713
+ and cis isomers in the final optimized conformers.
714
+ unconverged (bool): drop unconverged conformers. see `Converged` property.
715
+ similar (bool, optional): drop similar conformers. see `similar_rmsd`.
716
+ similar_rmsd (float): RMSD (A) below `similar_rmsd` is regarded similar (default: 0.3)
717
+ cluster (bool, optional): drop all except for the lowest energy conformer in each cluster.
718
+ k (int, optional): drop all except for `k` lowest energy conformers.
719
+ window (float, optional): drop all except for conformers within `window` of relative energy.
720
+
721
+ Returns:
722
+ Self: a copy of rdworks.Mol object.
723
+
724
+ Examples:
725
+ To drop similar conformers within rmsd of 0.5 A
726
+ >>> mol.drop_confs(similar=True, similar_rmsd=0.5)
727
+
728
+ To drop conformers beyond 5 kcal/mol
729
+ >>> mol.drop_confs(window=5.0)
730
+
731
+ """
732
+ obj = copy.deepcopy(self)
733
+
734
+ if stereo_flipped and obj.count() > 0:
735
+ mask = [Chem.MolToSmiles(Chem.RemoveHs(_.rdmol)) == obj.smiles for _ in obj.confs]
736
+ obj.confs = list(itertools.compress(obj.confs, mask))
737
+ if verbose:
738
+ main_logger.info(f'drop_confs stereo_flipped={mask.count(False)} -> {obj.count()}')
739
+
740
+ if unconverged and obj.count() > 0:
741
+ mask = [_.props['Converged'] if 'Converged' in _.props else True for _ in obj.confs]
742
+ obj.confs = list(itertools.compress(obj.confs, mask))
743
+ if verbose:
744
+ main_logger.info(f'drop_confs unconverged={mask.count(False)} -> {obj.count()}')
745
+
746
+ if similar and obj.count() > 1:
747
+ # it is observed that there are essentially identical conformers
748
+ # such as 180-degree ring rotation and there is not minor conformational variations
749
+ # in the RDKit ETKDG generated conformers.
750
+ conf_rdmols_noH = [Chem.RemoveHs(Chem.Mol(_.rdmol)) for _ in obj.confs]
751
+ # copies are made for rmsd calculations to prevent coordinates changes
752
+ lower_triangle_values = []
753
+ for i in range(obj.count()): # number of conformers
754
+ for j in range(i):
755
+ # rdMolAlign.GetBestRMS takes symmetry into account
756
+ # removed hydrogens to speed up
757
+ best_rms = rdMolAlign.GetBestRMS(prbMol=conf_rdmols_noH[i], refMol=conf_rdmols_noH[j])
758
+ lower_triangle_values.append(best_rms)
759
+ symm_matrix = convert_tril_to_symm(lower_triangle_values)
760
+ cluster_assignment, centroid_indices = QT(symm_matrix, similar_rmsd)
761
+ mask = [conf_idx in centroid_indices for conf_idx, conf in enumerate(obj.confs)]
762
+ obj.confs = list(itertools.compress(obj.confs, mask))
763
+ if verbose:
764
+ main_logger.info(f'drop_confs similar({similar_rmsd})={mask.count(False)} -> {obj.count()}')
765
+
766
+ # note: it will retain the conformers with lower index
767
+ # so, it should be sorted before dropping
768
+ # obj = obj.sort_confs()
769
+ # mask = []
770
+ # retained_confs = []
771
+ # for conf_i in obj.confs:
772
+ # is_dissimilar = True
773
+ # for conf_j_rdmol_noH in retained_confs:
774
+ # # symmetry-aware alignment / removing Hs speeds up the calculation
775
+ # rmsd = rdMolAlign.GetBestRMS(Chem.RemoveHs(conf_i.rdmol), conf_j_rdmol_noH)
776
+ # if rmsd < similar_rmsd:
777
+ # is_dissimilar = False
778
+ # break
779
+ # mask.append(is_dissimilar)
780
+ # if is_dissimilar:
781
+ # retained_confs.append(Chem.RemoveHs(conf_i.rdmol)) # store a copy of H-removed rdmol
782
+ # obj.confs = list(itertools.compress(obj.confs, mask))
783
+
784
+ if cluster and obj.count() > 1:
785
+ # drop non-centroid cluster member(s)
786
+ mask = [_.props['centroid'] if 'centroid' in _.props else True for _ in obj.confs]
787
+ obj.confs = list(itertools.compress(obj.confs, mask))
788
+ if verbose:
789
+ main_logger.info(f'drop_confs cluster(non-centroid)={mask.count(False)} -> {obj.count()}')
790
+
791
+ if (k or window) and obj.count() > 0:
792
+ if k:
793
+ mask_k = [i < k for i,_ in enumerate(obj.confs)]
794
+ else:
795
+ mask_k = [True,] * obj.count()
796
+ if window:
797
+ mask_window = [_.props['E_rel(kcal/mol)'] < window if 'E_rel(kcal/mol)' in _.props else True for _ in obj.confs]
798
+ else:
799
+ mask_window = [True,] * obj.count()
800
+ # retain conformer(s) that satisfy both k and window conditions
801
+ mask = [(x and y) for (x,y) in zip(mask_k, mask_window)]
802
+ obj.confs = list(itertools.compress(obj.confs, mask))
803
+ if verbose:
804
+ main_logger.info(f'drop_confs k and/or window={mask.count(False)} -> {obj.count()}')
805
+
806
+ return obj
807
+
808
+
809
+ def count(self) -> int:
810
+ """Returns the total number of conformers.
811
+
812
+ Returns:
813
+ int: total count of conformers.
814
+ """
815
+ return len(self.confs)
816
+
817
+
818
+ def is_nn_applicable(self, model:str) -> bool:
819
+ """Check if a particular neural network model is applicable to current molecule.
820
+
821
+ Args:
822
+ model (str): neural network models: `ANI-2x`, `ANI-2xt`, `AIMNET`
823
+
824
+ Raises:
825
+ ValueError: if model is not supported.
826
+
827
+ Returns:
828
+ bool: True if applicable.
829
+ """
830
+ if model.lower() in ['ani-2x', 'ani-2xt']:
831
+ if self.props['charge'] != 0:
832
+ return False
833
+ # H, C, N, O, F, S, Cl
834
+ atomic_numbers = [1, 6, 7, 8, 9, 16, 17 ]
835
+
836
+ elif model in ['aimnet', 'aimnet2']:
837
+ # H, B, C, N, O, F, Si, P, S, Cl, As, Se, Br, I
838
+ atomic_numbers = [1, 5, 6, 7, 8, 9, 14, 15, 16, 17, 33, 34, 35, 53 ]
839
+
840
+ else:
841
+ raise ValueError('is_nn_applicable() supports ANI-2x, ANI-2xt, or AIMNET')
842
+
843
+ for a in self.rdmol.GetAtoms():
844
+ if a.GetAtomicNum() not in atomic_numbers:
845
+ return False
846
+
847
+ return True
848
+
849
+
850
+ def charge(self) -> int:
851
+ """Returns molecular formal charge
852
+
853
+ Returns:
854
+ int: molecular formal charge
855
+ """
856
+ return rdmolops.GetFormalCharge(self.rdmol)
857
+
858
+
859
+ def symbols(self) -> list[str]:
860
+ """Returns the element symbols.
861
+
862
+ Returns:
863
+ list: list of element symbols.
864
+ """
865
+ return [ a.GetSymbol() for a in self.rdmol.GetAtoms() ]
866
+
867
+
868
+ def numbers(self) -> list[int]:
869
+ """Returns the atomic numbers.
870
+
871
+ Returns:
872
+ list: list of atomic numbers.
873
+ """
874
+ return [ a.GetAtomicNum() for a in self.rdmol.GetAtoms() ]
875
+
876
+
877
+ def torsion_atoms(self, strict:bool=True) -> list[tuple]:
878
+ """Determine dihedral angle atoms (a-b-c-d) and rotating group for each rotatable bond (b-c).
879
+
880
+ Args:
881
+ strict (bool): whether to exclude amide/imide/ester/acid bonds.
882
+
883
+ Returns:
884
+ [ (a, b, c, d, rot_atom_indices, fix_atom_indices),
885
+ (a, b, c, d, rot_atom_indices, fix_atom_indices),
886
+ ...,
887
+ ]
888
+ """
889
+ # https://github.com/rdkit/rdkit/blob/1bf6ef3d65f5c7b06b56862b3fb9116a3839b229/rdkit/Chem/Lipinski.py#L47%3E
890
+ # https://github.com/rdkit/rdkit/blob/de602c88809ea6ceba1e8ed50fd543b6e406e9c4/Code/GraphMol/Descriptors/Lipinski.cpp#L108
891
+ if strict :
892
+ # excludes amide/imide/ester/acid bonds
893
+ rotatable_bond_pattern = Chem.MolFromSmarts(
894
+ (
895
+ "[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])("
896
+ "[CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1])&!$([#7,O,S!D1]-!@[CD3]="
897
+ "[N,O,S])&!$([CD3](=[N+])-!@[#7!D1])&!$([#7!D1]-!@[CD3]=[N+])]-,:;!@[!$"
898
+ "(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])(["
899
+ "CH3])[CH3])]"
900
+ )
901
+ )
902
+ else:
903
+ rotatable_bond_pattern = Chem.MolFromSmarts('[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]')
904
+ rotatable_bonds = self.rdmol.GetSubstructMatches(rotatable_bond_pattern)
905
+ torsion_angle_atom_indices = []
906
+
907
+ # small rings (n=3 or 4)
908
+ small_rings = [ r for r in list(self.rdmol.GetRingInfo().AtomRings()) if len(r) < 5 ]
909
+ # ex. = [(1, 37, 35, 34, 3, 2), (29, 28, 30)]
910
+
911
+ forbidden_terminal_nuclei = [1, 9, 17, 35, 53] # H,F,Cl,Br,I
912
+
913
+ for (b_idx, c_idx) in rotatable_bonds:
914
+ # determine a atom ``a`` that define a dihedral angle
915
+ a_candidates = []
916
+ for neighbor in self.rdmol.GetAtomWithIdx(b_idx).GetNeighbors():
917
+ neighbor_idx = neighbor.GetIdx()
918
+ if neighbor_idx == c_idx:
919
+ continue
920
+ neighbor_atomic_num = neighbor.GetAtomicNum()
921
+ if neighbor_atomic_num not in forbidden_terminal_nuclei:
922
+ a_candidates.append((neighbor_atomic_num, neighbor_idx))
923
+
924
+ if not a_candidates:
925
+ continue
926
+
927
+ (a_atomic_num, a_idx) = sorted(a_candidates, key=lambda x: (x[0], -x[1]), reverse=True)[0]
928
+
929
+ # is a-b in a small ring (n=3 or 4)?
930
+ is_in_small_ring = False
931
+ for small_ring in small_rings:
932
+ if (a_idx in small_ring) and (b_idx in small_ring):
933
+ is_in_small_ring = True
934
+ break
935
+
936
+ if is_in_small_ring:
937
+ continue
938
+
939
+ # determine a atom ``d`` that define a dihedral angle
940
+ d_candidates = []
941
+ for neighbor in self.rdmol.GetAtomWithIdx(c_idx).GetNeighbors():
942
+ neighbor_idx = neighbor.GetIdx()
943
+ if (neighbor_idx == b_idx):
944
+ continue
945
+ neighbor_atomic_num = neighbor.GetAtomicNum()
946
+ if neighbor_atomic_num not in forbidden_terminal_nuclei:
947
+ d_candidates.append((neighbor_atomic_num, neighbor_idx))
948
+
949
+ if not d_candidates:
950
+ continue
951
+
952
+ (d_atomic_num, d_idx) = sorted(d_candidates, key=lambda x: (x[0], -x[1]), reverse=True)[0]
953
+
954
+ # is c-d in a small ring?
955
+ is_in_small_ring = False
956
+ for small_ring in small_rings:
957
+ if (c_idx in small_ring) and (d_idx in small_ring):
958
+ is_in_small_ring = True
959
+ break
960
+
961
+ if is_in_small_ring:
962
+ continue
963
+
964
+ # determine a group of atoms to be rotated
965
+ # https://ctr.fandom.com/wiki/Break_rotatable_bonds_and_report_the_fragments
966
+ em = Chem.EditableMol(self.rdmol)
967
+ em.RemoveBond(b_idx, c_idx)
968
+ fragmented = em.GetMol()
969
+ (frag1, frag2) = Chem.GetMolFrags(fragmented, asMols=False) # returns tuple of tuple
970
+ hac1 = sum([ 1 for i in frag1 if self.rdmol.GetAtomWithIdx(i).GetAtomicNum() > 1 ])
971
+ hac2 = sum([ 1 for i in frag2 if self.rdmol.GetAtomWithIdx(i).GetAtomicNum() > 1 ])
972
+
973
+ # smaller fragment will be rotated and must contain at least three heavy atoms
974
+ if min(hac1, hac2) >= 3:
975
+ (frag_rot, frag_fix) = sorted([(hac1, frag1), (hac2, frag2)])
976
+ torsion_angle_atom_indices.append((a_idx, b_idx, c_idx, d_idx, frag_rot[1], frag_fix[1]))
977
+
978
+ return torsion_angle_atom_indices
979
+
980
+
981
+ def compute(self, **kwargs) -> Self:
982
+ """Change settings for parallel computing.
983
+
984
+ Args:
985
+ max_workers (int): max number of workers.
986
+ chunksize (int): chunksize of splitted workload.
987
+ progress (bool): whether to show progress bar.
988
+
989
+ Returns:
990
+ Self: rdworks.MolLibr object.
991
+ """
992
+ self.max_workers = kwargs.get('max_workers', self.max_workers)
993
+ self.chunksize = kwargs.get('chunksize', self.chunksize)
994
+ self.progress = kwargs.get('progress', self.progress)
995
+ return self
996
+
997
+
998
+ @staticmethod
999
+ def _map_optimize_conf(conf:Conf, targs:tuple) -> Conf:
1000
+ """A map function to apply Conf.optimize() on `conf`.
1001
+
1002
+ The default behavior of map() is to pass the elements of the iterable to the function by reference.
1003
+ This means that if the function modifies the elements of the iterable,
1004
+ those changes will be reflected in the iterable itself.
1005
+
1006
+ Args:
1007
+ conf (Conf): subject rdworks.Conf object.
1008
+ targs (tuple): tuple of arguments to be passed to Conf.optimize().
1009
+
1010
+ Returns:
1011
+ Conf: rdworks.Conf object
1012
+ """
1013
+ return conf.optimize(*targs)
1014
+
1015
+
1016
+ def torsion_energies(self,
1017
+ calculator:str | Callable,
1018
+ fmax:float = 0.05,
1019
+ interval:float = 15.0,
1020
+ use_converged_only: bool = True,
1021
+ optimize_ref: bool = False,
1022
+ **kwargs,
1023
+ ) -> Self:
1024
+ """Calculates potential energy profiles for each torsion angle using ASE optimizer.
1025
+
1026
+ Args:
1027
+ calculator (str | Callable): 'MMFF', 'UFF', or ASE calculator.
1028
+ fmax (float, optional): fmax of ASE optimizer. Defaults to 0.05.
1029
+ interval (float, optional): interval of torsion angles in degree. Defaults to 15.0.
1030
+ use_converged_only (bool, optional): whether to use only converged data. Defaults to True.
1031
+
1032
+ Returns:
1033
+ list[dict]: [{'indices':list, 'angle':list, 'E_rel(kcal/mol)':list}, ...]
1034
+ """
1035
+ self = self.compute(**kwargs)
1036
+
1037
+ torsion_atoms_indices = self.torsion_atoms()
1038
+
1039
+ ref_conf = self.confs[0].copy() # use the lowest energy conformer as a reference
1040
+ if optimize_ref:
1041
+ ref_conf = ref_conf.optimize(calculator, fmax)
1042
+
1043
+ # mol.confs will be populated with torsion conformers.
1044
+ # It is designed for a batch optimization in the future.
1045
+ mol = self.copy()
1046
+ mol.confs = []
1047
+ data = []
1048
+
1049
+ for k, (a, b, c, d, rot_indices, fix_indices) in enumerate(torsion_atoms_indices):
1050
+ data.append({'angle':[], 'init':[], 'final':[], 'Converged':[]})
1051
+ for angle in np.arange(-180.0, 180.0, interval):
1052
+ # Iterated numpy.ndarray does not contain the last 180: -180., ..., (180).
1053
+ x = ref_conf.copy()
1054
+ x.props.update({'torsion_index': k, 'angle': float(angle)})
1055
+ AllChem.SetDihedralDeg(x.rdmol.GetConformer(), a, b, c, d, angle)
1056
+ # All atoms bonded to atom d will move.
1057
+ mol.confs.append(x)
1058
+
1059
+ # Optimize
1060
+ # with ProcessPoolExecutor(max_workers=self.max_workers) as executor:
1061
+ # largs = [ (calculator, fmax,) ] * mol.count()
1062
+ # if self.progress:
1063
+ # lconfs = list(tqdm(
1064
+ # executor.map(Mol._map_optimize_conf, mol.confs, largs, chunksize=1),
1065
+ # desc="Optimize conformers",
1066
+ # total=mol.count()))
1067
+ # else:
1068
+ # lconfs = list(
1069
+ # executor.map(Mol._map_optimize_conf, mol.confs, largs, chunksize=1))
1070
+ # mol.confs = lconfs
1071
+
1072
+ # Calculate relaxation energies
1073
+ for conf in mol.confs:
1074
+ conf = conf.optimize(calculator, fmax)
1075
+ # conf.optimize() updates coordinates and conf.props:
1076
+ # `angle`, `E_tot_init(kcal/mol)`, `E_tot(kcal/mol)`, `Converged`.
1077
+ i = conf.props['torsion_index']
1078
+ data[i]['angle'].append(conf.props['angle'])
1079
+ data[i]['init'].append(conf.props['E_tot_init(kcal/mol)'])
1080
+ data[i]['final'].append(conf.props['E_tot(kcal/mol)'])
1081
+ data[i]['Converged'].append(conf.props['Converged'])
1082
+
1083
+ # Post-processing
1084
+ torsion_energy_profiles = []
1085
+ for indices, datadict in zip(torsion_atoms_indices, data):
1086
+ if use_converged_only:
1087
+ datadict['angle'] = list(itertools.compress(datadict['angle'], datadict['Converged']))
1088
+ datadict['init'] = list(itertools.compress(datadict['init'], datadict['Converged']))
1089
+ datadict['final'] = list(itertools.compress(datadict['final'], datadict['Converged']))
1090
+ relax = np.array(datadict['init']) - np.median(datadict['final'])
1091
+ E_rel = relax - np.min(relax)
1092
+ torsion_energy_profiles.append({
1093
+ 'indices': indices, # (a, b, c, d, rot_indices, fix_indices)
1094
+ 'angle': np.array(datadict['angle']).tolist(), # np.ndarray -> list for serialization
1095
+ 'E_rel(kcal/mol)': E_rel.tolist(), # np.ndarray -> list for serialization
1096
+ })
1097
+ self.props['torsion'] = torsion_energy_profiles
1098
+ self.props['torsion_calculator'] = str(calculator)
1099
+
1100
+ return self
1101
+
1102
+
1103
+
1104
+
1105
+ def similarity(self, other:object) -> float:
1106
+ """Returns Tanimoto similarity with `other` rdworks.Mol object.
1107
+
1108
+ Args:
1109
+ other (rdworks.Mol): other rdworks.Mol object.
1110
+
1111
+ Raises:
1112
+ TypeError: if `other` is not rdworks.Mol object type.
1113
+
1114
+ Returns:
1115
+ float: Tanimoto similarity.
1116
+ """
1117
+ if not isinstance(other, Mol):
1118
+ raise TypeError("Mol.is_similar() expects Mol object")
1119
+ if not self.fp:
1120
+ self.fp = self.MFP2.GetFingerprint(self.rdmol)
1121
+ if not other.fp:
1122
+ other.fp = other.MFP2.GetFingerprint(other.rdmol)
1123
+ return DataStructs.TanimotoSimilarity(self.fp, other.fp)
1124
+
1125
+
1126
+ def is_similar(self, other:object, threshold:float) -> bool:
1127
+ """Check if `other` molecule is similar within `threshold`.
1128
+
1129
+ Args:
1130
+ other (rdworks.Mol): other rdworks.Mol object to compare with.
1131
+ threshold (float): Tanimoto similarity threshold.
1132
+
1133
+ Returns:
1134
+ bool: True if similar.
1135
+ """
1136
+ return self.similarity(other) >= threshold
1137
+
1138
+
1139
+ def is_matching(self, terms: str | Path, invert:bool=False) -> bool:
1140
+ """Determines if the molecule matches the predefined substructure and/or descriptor ranges.
1141
+
1142
+ invert | terms(~ or !) | effect
1143
+ ------ | ------------- | -------------
1144
+ True | ~ | No inversion
1145
+ True | | Inversion
1146
+ False | ~ | Inversion
1147
+ False | | No inversion
1148
+
1149
+ Args:
1150
+ terms (str | Path):
1151
+ substructure SMARTS expression or a path to predefined descriptor ranges.
1152
+ invert (bool, optional): whether to invert the result. Defaults to False.
1153
+
1154
+ Returns:
1155
+ bool: True if matches.
1156
+ """
1157
+ if isinstance(terms, pathlib.PosixPath):
1158
+ path = terms.as_posix()
1159
+ elif isinstance(terms, str):
1160
+ if terms.startswith('~') or terms.startswith('!'):
1161
+ terms = terms.replace('~','').replace('!','')
1162
+ invert = (invert ^ True)
1163
+ try:
1164
+ path = pathlib.Path(terms) # test if terms points to a xml file
1165
+ assert path.is_file()
1166
+ except:
1167
+ path = get_predefined_xml(terms)
1168
+ else:
1169
+ print(list_predefined_xml())
1170
+ return False
1171
+
1172
+ (lterms, combine) = parse_xml(path)
1173
+ mask = []
1174
+ for (name, smarts, lb, ub) in lterms:
1175
+ if smarts:
1176
+ query= Chem.MolFromSmarts(smarts)
1177
+ if len(self.rdmol.GetSubstructMatches(query)) > 0:
1178
+ mask.append(True)
1179
+ else:
1180
+ mask.append(False)
1181
+ else: # descriptor lower and upper bounds
1182
+ if name not in self.props:
1183
+ val = rd_descriptor_f[name](self.rdmol)
1184
+ self.props.update({name: val})
1185
+ else:
1186
+ val = self.props[name]
1187
+ # return if lower and upper boundaries are satisfied
1188
+ if ((not lb) or (val >= lb)) and ((not ub) or (val <= ub)):
1189
+ mask.append(True)
1190
+ else:
1191
+ mask.append(False)
1192
+ if combine.lower() == 'or' and any(mask):
1193
+ # early termination if any term is satisfied
1194
+ return invert ^ True # XOR(^) inverts only if invert is True
1195
+ if combine.lower() == 'and' and all(mask):
1196
+ return invert ^ True
1197
+ return invert ^ False
1198
+
1199
+
1200
+ def is_stereo_specified(self) -> bool:
1201
+ """Check if the molecule is stereo-specified at tetrahedral atom and double bond.
1202
+
1203
+ This function uses `Chem.FindPotentialStereo()` function which returns a list of `elements`.
1204
+ Explanation of the elements:
1205
+ element.type:
1206
+ whether the element is a stereocenter ('stereoAtom') or a stereobond ('stereoBond')
1207
+ - Atom_Octahedral
1208
+ - Atom_SquarePlanar
1209
+ - *Atom_Tetrahedral*
1210
+ - Atom_TrigonalBipyramidal
1211
+ - Bond_Atropisomer
1212
+ - Bond_Cumulene_Even
1213
+ - *Bond_Double*m.
1214
+ - Unspecified
1215
+
1216
+ element.centeredOn:
1217
+ The atom or bond index where the stereochemistry is centered.
1218
+
1219
+ element.specified:
1220
+ A boolean indicating whether the stereochemistry at that location
1221
+ is explicitly specified in the molecule.
1222
+ values = {
1223
+ 0: rdkit.Chem.rdchem.StereoSpecified.Unspecified,
1224
+ 1: rdkit.Chem.rdchem.StereoSpecified.Specified,
1225
+ 2: rdkit.Chem.rdchem.StereoSpecified.Unknown,
1226
+ }
1227
+
1228
+ element.descriptor:
1229
+ A descriptor that can be used to identify the type of stereochemistry (e.g., 'R', 'S', 'E', 'Z').
1230
+ - Bond_Cis = rdkit.Chem.StereoDescriptor.Bond_Cis
1231
+ - Bond_Trans = rdkit.Chem.StereoDescriptor.Bond_Trans
1232
+ - NoValue = rdkit.Chem.StereoDescriptor.NoValue
1233
+ - Tet_CCW = rdkit.Chem.StereoDescriptor.Tet_CCW
1234
+ - Tet_CW = rdkit.Chem.StereoDescriptor.Tet_CW
1235
+
1236
+ Returns:
1237
+ bool: True if stereo-specified.
1238
+ """
1239
+ stereos = []
1240
+ for element in Chem.FindPotentialStereo(self.rdmol):
1241
+ if element.type == Chem.StereoType.Atom_Tetrahedral:
1242
+ stereos.append(element.specified == Chem.StereoSpecified.Specified)
1243
+ elif element.type == Chem.StereoType.Bond_Double :
1244
+ bond = self.rdmol.GetBondWithIdx(element.centeredOn)
1245
+ if bond.GetBeginAtom().GetSymbol() == 'N' or bond.GetEndAtom().GetSymbol() == 'N':
1246
+ continue
1247
+ else:
1248
+ stereos.append(element.specified == Chem.StereoSpecified.Specified)
1249
+ # note all([]) returns True
1250
+ return all(stereos)
1251
+
1252
+
1253
+ def get_ring_bond_stereo(self) -> list[tuple]:
1254
+ """Returns double bond and cis/trans stereochemistry information.
1255
+
1256
+ Returns:
1257
+ list[tuple]: [(element.centeredOn, element.descriptor), ...]
1258
+ """
1259
+ stereo_info = Chem.FindPotentialStereo(self.rdmol)
1260
+ ring_bond_stereo_info = []
1261
+ for element in stereo_info:
1262
+ if element.type == Chem.StereoType.Bond_Double:
1263
+ if self.rdmol.GetBondWithIdx(element.centeredOn).IsInRing():
1264
+ ring_bond_stereo_info.append((element.centeredOn, element.descriptor))
1265
+ return ring_bond_stereo_info
1266
+
1267
+
1268
+ def report_stereo(self) -> None:
1269
+ """Print out stereochemistry information.
1270
+ """
1271
+ num_chiral_centers = rdMolDescriptors.CalcNumAtomStereoCenters(self.rdmol)
1272
+ # Returns the total number of atomic stereocenters (specified and unspecified)
1273
+ num_unspecified_chiral_centers = rdMolDescriptors.CalcNumUnspecifiedAtomStereoCenters(self.rdmol)
1274
+ print(f"chiral centers = unspecified {num_unspecified_chiral_centers} / total {num_chiral_centers}")
1275
+ print(f"stereogenic double bonds =")
1276
+ for element in Chem.FindPotentialStereo(self.rdmol):
1277
+ # element.type= Atom_Octahedral, Atom_SquarePlanar, Atom_Tetrahedral,
1278
+ # Atom_TrigonalBipyramidal,
1279
+ # Bond_Atropisomer, Bond_Cumulene_Even, Bond_Double,
1280
+ # Unspecified
1281
+ if element.type == Chem.StereoType.Bond_Double:
1282
+ bond = self.rdmol.GetBondWithIdx(element.centeredOn)
1283
+ atom1 = bond.GetBeginAtom().GetSymbol()
1284
+ atom2 = bond.GetEndAtom().GetSymbol()
1285
+ is_nitrogen = (atom1 == 'N' or atom2 == 'N')
1286
+ print(f' {element.type} bond: {element.centeredOn}', end=' ')
1287
+ print(f'ring: {bond.IsInRing()} N: {is_nitrogen}', end=' ')
1288
+ elif element.type == Chem.StereoType.Atom_Tetrahedral:
1289
+ print(f' {element.type} atom: {element.centeredOn}', end=' ')
1290
+ print(f'atoms {list(element.controllingAtoms)}', end=' ')
1291
+ print(f'{element.specified} {element.descriptor}') # type: Chem.StereoDescriptor
1292
+
1293
+
1294
+ def report_props(self) -> None:
1295
+ """Print out properties.
1296
+ """
1297
+ if self.props:
1298
+ print(f"Properties({len(self.props)}):")
1299
+ fixed_width = max([len(k) for k in self.props]) + 4
1300
+ for k,v in self.props.items():
1301
+ while len(k) <= fixed_width:
1302
+ k = k + ' '
1303
+ print(f" {k} {v}")
1304
+ else:
1305
+ print(f"Properties: None")
1306
+
1307
+
1308
+ def to_sdf(self, confs:bool=False, props:bool=True) -> str:
1309
+ """Returns strings of SDF output.
1310
+
1311
+ Args:
1312
+ confs (bool, optional): whether to include conformers. Defaults to False.
1313
+ props (bool, optional): whether to include properties. Defaults to True.
1314
+
1315
+ Returns:
1316
+ str: strings of SDF output.
1317
+ """
1318
+ in_memory = io.StringIO()
1319
+ with Chem.SDWriter(in_memory) as f:
1320
+ if confs:
1321
+ for conf in self.confs:
1322
+ rdmol = Chem.Mol(conf.rdmol)
1323
+ rdmol.SetProp('_Name', conf.name)
1324
+ if props:
1325
+ # molcule props.
1326
+ for k,v in self.props.items():
1327
+ rdmol.SetProp(k, str(v))
1328
+ # conformer props.
1329
+ for k,v in conf.props.items():
1330
+ rdmol.SetProp(k, str(v))
1331
+ f.write(rdmol)
1332
+ else:
1333
+ rdmol = Chem.Mol(self.rdmol)
1334
+ rdmol.SetProp('_Name', self.name)
1335
+ if props:
1336
+ for k,v in self.props.items():
1337
+ rdmol.SetProp(k, str(v))
1338
+ f.write(rdmol)
1339
+ return in_memory.getvalue()
1340
+
1341
+
1342
+ def to_image(self, width:int=300, height:int=300, index:bool=False, svg:bool=True) -> object:
1343
+ """Returns PIL(Python Image Library) image object.
1344
+
1345
+ Use .save(output_filename) method to save as an image file.
1346
+
1347
+ Args:
1348
+ width (int, optional): width of image. Defaults to 300.
1349
+ height (int, optional): height of image. Defaults to 300.
1350
+ index (bool, optional): whether to highlight atom indexes. Defaults to False.
1351
+ svg (bool, optional): whether to return in SVG format. Defaults to True.
1352
+
1353
+ Returns:
1354
+ object: PIL image object.
1355
+ """
1356
+ if index:
1357
+ for a in self.rdmol.GetAtoms():
1358
+ a.SetProp("atomNote", str(a.GetIdx()+1))
1359
+
1360
+ return Draw.MolsToImage(self.rdmol,
1361
+ size=(width,height),
1362
+ kekulize=True,
1363
+ wedgeBonds=True, # draw wedge (stereo)
1364
+ fitImage=False,
1365
+ options=None,
1366
+ canvas=None,
1367
+ useSVG=svg)
1368
+
1369
+
1370
+ def to_svg(self,
1371
+ width:int = 400,
1372
+ height:int = 400,
1373
+ legend:str = '',
1374
+ index:bool = False,
1375
+ highlight: list[int] | None = None,
1376
+ coordgen:bool = False) -> str:
1377
+ """Returns depiction strings in SVG format.
1378
+
1379
+ Examples:
1380
+ For Jupyternotebook, wrap the output with SVG:
1381
+
1382
+ >>> from IPython.display import SVG
1383
+ >>> SVG(libr[0].to_svg())
1384
+
1385
+ Args:
1386
+ width (int): width (default:400)
1387
+ height (int): height (default:400)
1388
+ legend (str): legend
1389
+ index (bool): True/False whether to display atom index
1390
+ highlight (list): list of atom indices to highlight
1391
+
1392
+ Returns:
1393
+ str: SVG text
1394
+ """
1395
+ rdDepictor.SetPreferCoordGen(coordgen)
1396
+
1397
+ rdmol_2d = Chem.Mol(self.rdmol)
1398
+ rdDepictor.Compute2DCoords(rdmol_2d)
1399
+ rdDepictor.StraightenDepiction(rdmol_2d)
1400
+
1401
+ for atom in rdmol_2d.GetAtoms():
1402
+ for key in atom.GetPropsAsDict():
1403
+ atom.ClearProp(key)
1404
+
1405
+ if index: # index hides polar hydrogens
1406
+ for atom in rdmol_2d.GetAtoms():
1407
+ atom.SetProp("atomLabel", str(atom.GetIdx()))
1408
+ # atom.SetProp("atomNote", str(atom.GetIdx()))
1409
+ # atom.SetProp("molAtomMapNumber", str(atom.GetIdx()))
1410
+
1411
+ drawer = rdMolDraw2D.MolDraw2DSVG(width, height)
1412
+ if highlight:
1413
+ drawer.DrawMolecule(rdmol_2d, legend=legend, highlightAtoms=highlight)
1414
+ else:
1415
+ drawer.DrawMolecule(rdmol_2d, legend=legend)
1416
+ drawer.FinishDrawing()
1417
+ return drawer.GetDrawingText()
1418
+
1419
+
1420
+ def plot_energy(self, df:pd.DataFrame) -> str:
1421
+ """Returns Seaborn plot strings for dihedral energy profile in SVG format.
1422
+
1423
+ Input pandas DataFrame must have columns: `angle` and `E_rel(kcal/mol)`
1424
+
1425
+ Args:
1426
+ df (pd.DataFrame): input dataframe.
1427
+
1428
+ Returns:
1429
+ str: Seaborn plot in strings.
1430
+ """
1431
+
1432
+ # sns.set_theme()
1433
+ sns.color_palette("tab10")
1434
+ sns.set_style("whitegrid")
1435
+ if len(df['angle']) == len(df['angle'].drop_duplicates()):
1436
+ g = sns.lineplot(x="angle",
1437
+ y="E_rel(kcal/mol)",
1438
+ data=df,
1439
+ marker='o',
1440
+ markersize=10)
1441
+ else:
1442
+ g = sns.lineplot(x="angle",
1443
+ y="E_rel(kcal/mol)",
1444
+ data=df,
1445
+ errorbar=('ci', 95),
1446
+ err_style='bars',
1447
+ marker='o',
1448
+ markersize=10)
1449
+ g.xaxis.set_major_locator(ticker.MultipleLocator(30))
1450
+ g.xaxis.set_major_formatter(ticker.ScalarFormatter())
1451
+ if df["E_rel(kcal/mol)"].max() > 35.0:
1452
+ g.set(title=self.name,
1453
+ xlabel='Dihedral Angle (degree)',
1454
+ ylabel='Relative Energy (Kcal/mol)',
1455
+ xlim=(-190, 190),
1456
+ ylim=(-1.5, 35.0))
1457
+ elif df["E_rel(kcal/mol)"].max() < 5.0:
1458
+ g.set(title=self.name,
1459
+ xlabel='Dihedral Angle (degree)',
1460
+ ylabel='Relative Energy (Kcal/mol)',
1461
+ xlim=(-190, 190),
1462
+ ylim=(-1.5, 5.0))
1463
+ else:
1464
+ g.set(title=self.name,
1465
+ xlabel='Dihedral Angle (degree)',
1466
+ ylabel='Relative Energy (Kcal/mol)',
1467
+ xlim=(-190, 190),)
1468
+ g.tick_params(axis='x', rotation=30)
1469
+ in_memory = io.StringIO()
1470
+ plt.savefig(in_memory, format='svg', bbox_inches='tight')
1471
+ plt.clf()
1472
+ return in_memory.getvalue()
1473
+
1474
+
1475
+ def to_html(self, htmlbody:bool=False) -> str:
1476
+ """Returns HTML text of dihedral energy profile.
1477
+
1478
+ Args:
1479
+ htmlbody (bool, optional): whether to wrap around with `<html><body>`. Defaults to False.
1480
+
1481
+ Returns:
1482
+ str: HTML text.
1483
+ """
1484
+ if htmlbody:
1485
+ HTML = "<html><body>"
1486
+ else:
1487
+ HTML = ""
1488
+ # start of content
1489
+ HTML += f'<h1 style="text-align:left">{self.name}</h1>'
1490
+ HTML += "<table>"
1491
+ for datadict in self.props['torsion']: # list of dict
1492
+ (a1, a2, a3, a4, _, _) = datadict['indices']
1493
+ df = pd.DataFrame({k:datadict[k] for k in ['angle', 'E_rel(kcal/mol)']})
1494
+ svg_rdmol = self.to_svg(highlight=[a1, a2, a3, a4], index=True)
1495
+ svg_energy_plot = self.plot_energy(df)
1496
+ HTML += f"<tr>"
1497
+ HTML += f"<td>{a1}-{a2}-{a3}-{a4}</td>"
1498
+ HTML += f"<td>{svg_rdmol}</td>"
1499
+ HTML += f"<td>{svg_energy_plot}</td>"
1500
+ HTML += f"</tr>"
1501
+ HTML += '</table>'
1502
+ HTML += '<hr style="height:2px;border-width:0;color:gray;background-color:gray">'
1503
+ # end of content
1504
+ if htmlbody:
1505
+ HTML += "</body></html>"
1506
+ return HTML
1507
+
1508
+
1509
+ def serialize(self, key: str | None = None, decimal_places:int=2) -> str:
1510
+ """Returns JSON dumps of properties.
1511
+
1512
+ Args:
1513
+ key (str | None): key for a subset of properties. Defaults to None.
1514
+ decimal_places (int, optional): decimal places for float numbers. Defaults to 2.
1515
+
1516
+ Returns:
1517
+ str: serialized JSON dumps.
1518
+ """
1519
+ props = fix_decimal_places_in_dict(self.props, decimal_places)
1520
+ if key:
1521
+ return json.dumps({key:props[key]})
1522
+ return json.dumps(props)