rdworks 0.25.8__py3-none-any.whl → 0.36.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdworks/__init__.py +19 -20
- rdworks/conf.py +319 -118
- rdworks/display.py +244 -83
- rdworks/mol.py +620 -489
- rdworks/mollibr.py +336 -180
- rdworks/readin.py +2 -4
- rdworks/scaffold.py +1 -1
- rdworks/std.py +64 -24
- rdworks/torsion.py +477 -0
- rdworks/units.py +7 -58
- rdworks/utils.py +141 -258
- rdworks/xtb/__init__.py +0 -0
- rdworks/xtb/wrapper.py +304 -0
- {rdworks-0.25.8.dist-info → rdworks-0.36.1.dist-info}/METADATA +6 -9
- {rdworks-0.25.8.dist-info → rdworks-0.36.1.dist-info}/RECORD +18 -15
- {rdworks-0.25.8.dist-info → rdworks-0.36.1.dist-info}/WHEEL +1 -1
- {rdworks-0.25.8.dist-info → rdworks-0.36.1.dist-info}/licenses/LICENSE +0 -0
- {rdworks-0.25.8.dist-info → rdworks-0.36.1.dist-info}/top_level.txt +0 -0
rdworks/mol.py
CHANGED
@@ -1,14 +1,13 @@
|
|
1
|
-
import io
|
2
1
|
import copy
|
3
|
-
import pathlib
|
4
2
|
import itertools
|
5
3
|
import json
|
6
4
|
import logging
|
7
5
|
import tempfile
|
8
6
|
|
7
|
+
from io import StringIO, BytesIO
|
8
|
+
from pathlib import Path
|
9
9
|
from collections import defaultdict
|
10
10
|
from collections.abc import Callable
|
11
|
-
from pathlib import Path
|
12
11
|
from typing import Iterator, Self
|
13
12
|
|
14
13
|
import numpy as np
|
@@ -22,36 +21,35 @@ import CDPL.Chem
|
|
22
21
|
import CDPL.ConfGen
|
23
22
|
|
24
23
|
from rdkit import Chem, DataStructs
|
25
|
-
|
26
24
|
from rdkit.Chem import (
|
27
25
|
rdMolDescriptors, AllChem, Descriptors, QED,
|
28
26
|
rdFingerprintGenerator,
|
29
|
-
Draw, rdDepictor,
|
27
|
+
Draw, rdDepictor, inchi,
|
30
28
|
rdDistGeom, rdMolAlign, rdMolTransforms, rdmolops
|
31
29
|
)
|
32
|
-
|
33
30
|
from rdkit.Chem.Draw import rdMolDraw2D
|
34
|
-
|
35
31
|
from rdkit.ML.Cluster import Butina
|
32
|
+
from PIL import Image
|
36
33
|
|
37
|
-
from rdworks.
|
34
|
+
from rdworks.conf import Conf
|
35
|
+
from rdworks.std import desalt_smiles, standardize, clean_2d
|
38
36
|
from rdworks.xml import list_predefined_xml, get_predefined_xml, parse_xml
|
39
37
|
from rdworks.scaffold import rigid_fragment_indices
|
40
38
|
from rdworks.descriptor import rd_descriptor, rd_descriptor_f
|
41
|
-
from rdworks.
|
42
|
-
from rdworks.utils import convert_tril_to_symm, QT, fix_decimal_places_in_dict
|
39
|
+
from rdworks.utils import convert_tril_to_symm, QT, recursive_round
|
43
40
|
from rdworks.units import ev2kcalpermol
|
44
41
|
from rdworks.autograph import NMRCLUST, DynamicTreeCut, RCKmeans, AutoGraph
|
45
42
|
from rdworks.bitqt import BitQT
|
46
|
-
from rdworks.
|
43
|
+
from rdworks.torsion import create_torsion_fragment, get_torsion_atoms
|
44
|
+
from rdworks.display import render_svg, render_png
|
47
45
|
|
46
|
+
from scour.scour import scourString
|
48
47
|
|
49
48
|
main_logger = logging.getLogger()
|
50
49
|
|
51
50
|
|
52
51
|
class Mol:
|
53
|
-
"""Container for molecular structure, conformers, and other information.
|
54
|
-
"""
|
52
|
+
"""Container for molecular structure, conformers, and other information."""
|
55
53
|
|
56
54
|
MFP2 = rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=2048)
|
57
55
|
|
@@ -59,100 +57,98 @@ class Mol:
|
|
59
57
|
ETKDG_params.useSmallRingTorsions = True
|
60
58
|
ETKDG_params.maxIterations = 2000
|
61
59
|
|
62
|
-
|
63
60
|
def __init__(self,
|
64
|
-
|
65
|
-
name:str='',
|
66
|
-
std:bool=False,
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
61
|
+
molecule: str | Chem.Mol | Conf | None = None,
|
62
|
+
name: str = '',
|
63
|
+
std: bool = False,
|
64
|
+
reset_isotope: bool = True,
|
65
|
+
remove_H: bool = True,
|
66
|
+
max_workers: int = 1,
|
67
|
+
chunksize: int = 4,
|
68
|
+
progress: bool = False) -> None:
|
69
|
+
"""Initialize.
|
71
70
|
|
72
71
|
Examples:
|
73
|
-
>>> import
|
74
|
-
>>> m =
|
72
|
+
>>> from rdworks import Mol
|
73
|
+
>>> m = Mol('c1ccccc1', name='benzene')
|
75
74
|
|
76
75
|
Args:
|
77
|
-
|
78
|
-
name (str
|
79
|
-
std (bool
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
TypeError: No SMILES or rdkit.Chem.Mol object is provided.
|
84
|
-
RuntimeError: Desalting or standardization process failed.
|
76
|
+
molecule (str | Chem.Mol | None): SMILES or rdkit.Chem.Mol or None
|
77
|
+
name (str): name of the molecule. Defaults to ''.
|
78
|
+
std (bool): whether to standardize the molecule. Defaults to False.
|
79
|
+
max_workers (int): number of maximum workers for parallelization. Defaults to 1.
|
80
|
+
chunksize (int): batch size for parallelization. Defaults to 4.
|
81
|
+
progress (bool): whether to show progress bar. Defaults to False.
|
85
82
|
"""
|
86
|
-
|
87
|
-
|
88
|
-
self.
|
89
|
-
self.
|
83
|
+
assert isinstance(molecule, str | Chem.Mol | Conf) or molecule is None
|
84
|
+
|
85
|
+
self.rdmol = None # 2D, one and only one Conformer
|
86
|
+
self.smiles = '' # isomeric SMILES
|
87
|
+
self.confs = [] # container for 3D conformers
|
88
|
+
self.name = ''
|
89
|
+
self.InChIKey = '' # 27 characters (SHA-256 hash of InChI)
|
90
|
+
self.InChI = ''
|
90
91
|
self.props = {}
|
91
|
-
self.confs = [] # 3D conformers (iterable)
|
92
92
|
self.fp = None
|
93
93
|
self.max_workers = max_workers
|
94
94
|
self.chunksize = chunksize
|
95
95
|
self.progress = progress
|
96
|
-
|
97
|
-
if
|
96
|
+
|
97
|
+
if molecule is None:
|
98
|
+
return
|
99
|
+
|
100
|
+
if isinstance(molecule, str): # 1-D SMILES
|
98
101
|
try:
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
+
if "." in molecule: # mandatory desalting
|
103
|
+
(self.smiles, self.rdmol) = desalt_smiles(molecule)
|
104
|
+
else:
|
105
|
+
self.rdmol = Chem.MolFromSmiles(molecule)
|
106
|
+
self.smiles = Chem.MolToSmiles(self.rdmol)
|
102
107
|
except:
|
103
|
-
raise ValueError(f'Mol()
|
104
|
-
|
108
|
+
raise ValueError(f'Mol() Error: invalid SMILES {molecule}')
|
109
|
+
|
110
|
+
elif isinstance(molecule, Chem.Mol): # 2-D or 3-D Chem.Mol
|
105
111
|
try:
|
106
|
-
self.rdmol =
|
107
|
-
assert self.rdmol
|
112
|
+
self.rdmol, _ = clean_2d(molecule, reset_isotope, remove_H)
|
108
113
|
self.smiles = Chem.MolToSmiles(self.rdmol)
|
114
|
+
self.confs = [Conf(x) for x in _]
|
109
115
|
except:
|
110
|
-
raise ValueError('Mol()
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
### desalting
|
115
|
-
if "." in self.smiles:
|
116
|
+
raise ValueError(f'Mol() Error: invalid Chem.Mol object')
|
117
|
+
|
118
|
+
elif isinstance(molecule, Conf): # 3-D input
|
116
119
|
try:
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
+
self.rdmol, _ = clean_2d(molecule.rdmol, reset_isotope, remove_H)
|
121
|
+
self.smiles = Chem.MolToSmiles(self.rdmol)
|
122
|
+
self.confs = [molecule]
|
120
123
|
except:
|
121
|
-
raise
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
# standardization changes self.rdmol
|
126
|
-
try:
|
124
|
+
raise ValueError(f'Mol() Error: invalid Conf object')
|
125
|
+
|
126
|
+
try:
|
127
|
+
if std:
|
127
128
|
self.rdmol = standardize(self.rdmol)
|
128
129
|
self.smiles = Chem.MolToSmiles(self.rdmol)
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
130
|
+
except:
|
131
|
+
raise RuntimeError('Mol() Error: standardization')
|
132
|
+
|
133
|
+
assert self.smiles and self.rdmol, "Mol() Error: invalid molecule"
|
134
|
+
|
135
|
+
rdDepictor.Compute2DCoords(self.rdmol)
|
136
|
+
|
135
137
|
try:
|
136
138
|
self.name = str(name)
|
137
139
|
except:
|
138
140
|
self.name = 'untitled'
|
141
|
+
|
139
142
|
self.rdmol.SetProp('_Name', self.name) # _Name can't be None
|
140
|
-
|
141
|
-
|
143
|
+
self.InChI = Chem.MolToInchi(self.rdmol)
|
144
|
+
self.InChIKey = inchi.InchiToInchiKey(self.InChI)
|
142
145
|
self.props.update({
|
143
146
|
'aka' : [], # <-- to be set by MolLibr.unique()
|
144
|
-
'atoms' : self.rdmol.GetNumAtoms(),
|
145
|
-
# hydrogens not excluded
|
146
|
-
# m = Chem.MolFromSmiles("c1c[nH]cc1")
|
147
|
-
# m.GetNumAtoms()
|
148
|
-
# >> 5
|
149
|
-
# Chem.AddHs(m).GetNumAtoms()
|
150
|
-
# >> 10
|
147
|
+
'atoms' : self.rdmol.GetNumAtoms(), # hydrogens not excluded?
|
151
148
|
'charge': rdmolops.GetFormalCharge(self.rdmol),
|
152
|
-
# number of rotatable bonds
|
153
149
|
"nrb" : Descriptors.NumRotatableBonds(self.rdmol),
|
154
150
|
})
|
155
|
-
|
151
|
+
|
156
152
|
|
157
153
|
def __str__(self) -> str:
|
158
154
|
"""String representation of the molecule.
|
@@ -184,21 +180,21 @@ class Mol:
|
|
184
180
|
return hash(self.smiles)
|
185
181
|
|
186
182
|
|
187
|
-
def __eq__(self, other:
|
188
|
-
"""True if `other`
|
183
|
+
def __eq__(self, other: Self) -> bool:
|
184
|
+
"""True if `other` Mol is identical with this Mol.
|
189
185
|
|
190
|
-
It compares
|
186
|
+
It compares InChIKeys.
|
191
187
|
|
192
188
|
Examples:
|
193
189
|
>>> m1 == m2
|
194
190
|
|
195
191
|
Args:
|
196
|
-
other (object): other
|
192
|
+
other (object): other Mol object.
|
197
193
|
|
198
194
|
Returns:
|
199
195
|
bool: True if identical.
|
200
196
|
"""
|
201
|
-
return self.
|
197
|
+
return self.InChIKey == other.InChIKey
|
202
198
|
|
203
199
|
|
204
200
|
def __iter__(self) -> Iterator:
|
@@ -223,7 +219,7 @@ class Mol:
|
|
223
219
|
return next(self.confs)
|
224
220
|
|
225
221
|
|
226
|
-
def __getitem__(self, index: int | slice) -> Conf:
|
222
|
+
def __getitem__(self, index: int | slice) -> Conf | Self:
|
227
223
|
"""Conformer object of conformers of the molecule with given index or slice of indexes.
|
228
224
|
|
229
225
|
Examples:
|
@@ -232,31 +228,34 @@ class Mol:
|
|
232
228
|
Args:
|
233
229
|
index (int | slice): index for conformers.
|
234
230
|
|
235
|
-
Raises:
|
236
|
-
ValueError: conformers are not defined in the molecule or index is out of range.
|
237
|
-
|
238
231
|
Returns:
|
239
|
-
Conf
|
232
|
+
Conf or Mol(copy) with conformers specified by index.
|
240
233
|
"""
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
234
|
+
assert self.count() > 0, "no conformers"
|
235
|
+
|
236
|
+
if isinstance(index, slice):
|
237
|
+
new_object = self.copy()
|
238
|
+
new_object.confs = new_object.confs[index]
|
239
|
+
return new_object
|
247
240
|
|
241
|
+
else:
|
242
|
+
return self.confs[index]
|
248
243
|
|
244
|
+
|
249
245
|
def copy(self) -> Self:
|
250
246
|
"""Returns a copy of self.
|
251
247
|
|
252
248
|
Returns:
|
253
|
-
|
249
|
+
a copy of self.
|
254
250
|
"""
|
255
251
|
return copy.deepcopy(self)
|
256
252
|
|
257
253
|
|
258
|
-
def rename(self,
|
259
|
-
|
254
|
+
def rename(self,
|
255
|
+
prefix: str = '',
|
256
|
+
sep: str = '/',
|
257
|
+
start: int = 1) -> Self:
|
258
|
+
"""Updates name and conformer names.
|
260
259
|
|
261
260
|
The first conformer name is {prefix}{sep}{start}
|
262
261
|
|
@@ -266,11 +265,12 @@ class Mol:
|
|
266
265
|
start (int, optional): first serial number. Defaults to 1.
|
267
266
|
|
268
267
|
Returns:
|
269
|
-
Self:
|
268
|
+
Self: modified self.
|
270
269
|
"""
|
271
270
|
if prefix :
|
272
271
|
self.name = prefix
|
273
272
|
self.rdmol.SetProp('_Name', prefix)
|
273
|
+
|
274
274
|
# update conformer names
|
275
275
|
num_digits = len(str(self.count())) # ex. '100' -> 3
|
276
276
|
for (serial, conf) in enumerate(self.confs, start=start):
|
@@ -278,11 +278,13 @@ class Mol:
|
|
278
278
|
while len(serial_str) < num_digits:
|
279
279
|
serial_str = '0' + serial_str
|
280
280
|
conf.rename(f'{self.name}{sep}{serial_str}')
|
281
|
+
|
281
282
|
return self
|
282
283
|
|
283
284
|
|
284
|
-
def qed(self,
|
285
|
-
|
285
|
+
def qed(self,
|
286
|
+
properties: list[str] = ['QED', 'MolWt', 'LogP', 'TPSA', 'HBD']) -> Self:
|
287
|
+
"""Updates quantitative estimate of drug-likeness (QED) and other descriptors.
|
286
288
|
|
287
289
|
Args:
|
288
290
|
properties (list[str], optional): Defaults to ['QED', 'MolWt', 'LogP', 'TPSA', 'HBD'].
|
@@ -291,53 +293,54 @@ class Mol:
|
|
291
293
|
KeyError: if property key is unknown.
|
292
294
|
|
293
295
|
Returns:
|
294
|
-
Self:
|
296
|
+
Self: modified self.
|
295
297
|
"""
|
296
298
|
props_dict = {}
|
297
299
|
for k in properties:
|
298
300
|
try:
|
299
301
|
props_dict[k] = rd_descriptor_f[k](self.rdmol)
|
300
302
|
except:
|
301
|
-
raise KeyError(f'
|
303
|
+
raise KeyError(f'qed() Error: unknown property {k}')
|
302
304
|
self.props.update(props_dict)
|
305
|
+
|
303
306
|
return self
|
304
307
|
|
305
308
|
|
306
309
|
def remove_stereo(self) -> Self:
|
307
|
-
"""Removes stereochemistry
|
310
|
+
"""Removes stereochemistry.
|
308
311
|
|
309
312
|
Examples:
|
310
|
-
>>> m =
|
313
|
+
>>> m = Mol("C/C=C/C=C\\C", "double_bond")
|
311
314
|
>>> m.remove_stereo().smiles == "CC=CC=CC"
|
312
315
|
|
313
316
|
Returns:
|
314
|
-
Self:
|
317
|
+
Self: modified self.
|
315
318
|
"""
|
316
|
-
obj = copy.deepcopy(self)
|
317
319
|
# keep the original stereo info. for ring double bond
|
318
|
-
Chem.RemoveStereochemistry(
|
319
|
-
Chem.AssignStereochemistry(
|
320
|
+
Chem.RemoveStereochemistry(self.rdmol)
|
321
|
+
Chem.AssignStereochemistry(self.rdmol,
|
320
322
|
cleanIt=False,
|
321
323
|
force=False,
|
322
324
|
flagPossibleStereoCenters=False)
|
323
|
-
|
324
|
-
|
325
|
+
self.smiles = Chem.MolToSmiles(self.rdmol)
|
326
|
+
|
327
|
+
return self
|
325
328
|
|
326
329
|
|
327
330
|
def make_confs(self,
|
328
331
|
n:int = 50,
|
329
|
-
method:str = '
|
330
|
-
calculator:str | Callable = 'MMFF94'
|
332
|
+
method:str = 'ETKDG',
|
333
|
+
calculator:str | Callable = 'MMFF94',
|
334
|
+
) -> Self:
|
331
335
|
"""Generates 3D conformers.
|
332
336
|
|
333
337
|
Args:
|
334
338
|
n (int, optional): number of conformers to generate. Defaults to 50.
|
335
339
|
method (str, optional): conformer generation method.
|
336
|
-
Choices are `
|
337
|
-
Defaults to 'RDKit_ETKDG'.
|
340
|
+
Choices are `ETKDG`, `CONFORGE`. Defaults to 'ETKDG'.
|
338
341
|
|
339
342
|
Returns:
|
340
|
-
Self:
|
343
|
+
Self: modified self.
|
341
344
|
|
342
345
|
Reference:
|
343
346
|
T. Seidel, C. Permann, O. Wieder, S. M. Kohlbacher, T. Langer,
|
@@ -352,9 +355,9 @@ class Mol:
|
|
352
355
|
|
353
356
|
self.confs = []
|
354
357
|
|
355
|
-
if method.upper() == '
|
358
|
+
if method.upper() == 'ETKDG':
|
356
359
|
rdmol_H = Chem.AddHs(self.rdmol, addCoords=True) # returns a copy with hydrogens added
|
357
|
-
conf_ids = rdDistGeom.EmbedMultipleConfs(rdmol_H, n, params=self.ETKDG_params)
|
360
|
+
conf_ids = rdDistGeom.EmbedMultipleConfs(rdmol_H, numConfs=n, params=self.ETKDG_params)
|
358
361
|
for rdConformer in rdmol_H.GetConformers():
|
359
362
|
# number of atoms should match with conformer(s)
|
360
363
|
rdmol_conf = Chem.Mol(rdmol_H)
|
@@ -363,7 +366,7 @@ class Mol:
|
|
363
366
|
conf = Conf(rdmol_conf)
|
364
367
|
self.confs.append(conf)
|
365
368
|
|
366
|
-
elif method.upper() == '
|
369
|
+
elif method.upper() == 'CONFORGE':
|
367
370
|
with tempfile.NamedTemporaryFile() as tmpfile:
|
368
371
|
mol = CDPL.Chem.parseSMILES(self.smiles)
|
369
372
|
# create and initialize an instance of the class ConfGen.ConformerGenerator which
|
@@ -412,7 +415,7 @@ class Mol:
|
|
412
415
|
|
413
416
|
# energy evaluations for ranking
|
414
417
|
for conf in self.confs:
|
415
|
-
conf.
|
418
|
+
conf.potential_energy(calculator) # default: MMFF94
|
416
419
|
|
417
420
|
# set relative energy, E_rel(kcal/mol)
|
418
421
|
sort_by = 'E_tot(kcal/mol)'
|
@@ -421,50 +424,97 @@ class Mol:
|
|
421
424
|
for conf in self.confs:
|
422
425
|
conf.props.update({"E_rel(kcal/mol)": conf.props[sort_by] - lowest_energy})
|
423
426
|
|
424
|
-
|
427
|
+
self = self.rename()
|
428
|
+
|
429
|
+
return self
|
425
430
|
|
426
431
|
|
427
|
-
def
|
428
|
-
|
432
|
+
def optimize_confs(self,
|
433
|
+
calculator: str | Callable = 'MMFF94',
|
434
|
+
fmax: float = 0.05,
|
435
|
+
max_iter: int = 1000,
|
436
|
+
) -> Self:
|
437
|
+
"""Optimizes 3D geometry of conformers.
|
429
438
|
|
430
439
|
Args:
|
431
|
-
calculator (str | Callable):
|
432
|
-
|
440
|
+
calculator (str | Callable): MMFF94 (= MMFF), MMFF94s, UFF, or ASE calculator.
|
441
|
+
`MMFF94` or `MMFF` - Intended for general use, including organic molecules and proteins,
|
442
|
+
and primarily relies on data from quantum mechanical calculations.
|
443
|
+
It's often used in molecular dynamics simulations.
|
444
|
+
`MMFF94s` - A "static" variant of MMFF94, with adjusted parameters for out-of-plane
|
445
|
+
bending and dihedral torsions to favor planar geometries for specific nitrogen atoms.
|
446
|
+
This makes it better suited for geometry optimization studies where a static,
|
447
|
+
time-averaged structure is desired. The "s" stands for "static".
|
448
|
+
`UFF` - UFF refers to the "Universal Force Field," a force field model used for
|
449
|
+
molecular mechanics calculations. It's a tool for geometry optimization,
|
450
|
+
energy minimization, and exploring molecular conformations in 3D space.
|
451
|
+
UFF is often used to refine conformers generated by other methods,
|
452
|
+
such as random conformer generation, to produce more physically plausible
|
453
|
+
and stable structures.
|
454
|
+
fmax (float, optional): fmax for the calculator convergence. Defaults to 0.05.
|
455
|
+
max_iter (int, optional): max iterations for the calculator. Defaults to 1000.
|
433
456
|
|
434
457
|
Returns:
|
435
|
-
Self:
|
458
|
+
Self: modified self.
|
436
459
|
"""
|
437
|
-
self.confs = [ conf.optimize(calculator, fmax) for conf in self.confs ]
|
460
|
+
self.confs = [ conf.optimize(calculator, fmax, max_iter) for conf in self.confs ]
|
461
|
+
|
438
462
|
return self
|
439
463
|
|
440
464
|
|
441
|
-
def sort_confs(self) -> Self:
|
442
|
-
"""Sorts
|
465
|
+
def sort_confs(self, calculator: str | Callable | None = None) -> Self:
|
466
|
+
"""Sorts by `E_tot(kcal/mol)` or `E_tot(eV)` and sets `E_rel(kcal/mol)`.
|
443
467
|
|
468
|
+
Args:
|
469
|
+
calculator (str | Callable | None): MMFF94 (= MMFF), MMFF94s, UFF, or ASE calculator.
|
470
|
+
`MMFF94` or `MMFF` - Intended for general use, including organic molecules and proteins,
|
471
|
+
and primarily relies on data from quantum mechanical calculations.
|
472
|
+
It's often used in molecular dynamics simulations.
|
473
|
+
`MMFF94s` - A "static" variant of MMFF94, with adjusted parameters for out-of-plane
|
474
|
+
bending and dihedral torsions to favor planar geometries for specific nitrogen atoms.
|
475
|
+
This makes it better suited for geometry optimization studies where a static,
|
476
|
+
time-averaged structure is desired. The "s" stands for "static".
|
477
|
+
`UFF` - UFF refers to the "Universal Force Field," a force field model used for
|
478
|
+
molecular mechanics calculations. It's a tool for geometry optimization,
|
479
|
+
energy minimization, and exploring molecular conformations in 3D space.
|
480
|
+
UFF is often used to refine conformers generated by other methods,
|
481
|
+
such as random conformer generation, to produce more physically plausible
|
482
|
+
and stable structures.
|
444
483
|
Raises:
|
445
484
|
KeyError: if `E_tot(eV)` or `E_tot(kcal/mol)` is not defined.
|
446
485
|
|
447
486
|
Returns:
|
448
|
-
Self:
|
487
|
+
Self: modified self.
|
449
488
|
"""
|
450
|
-
if
|
451
|
-
|
452
|
-
|
453
|
-
|
489
|
+
if calculator is not None:
|
490
|
+
# re-calculate potential energies
|
491
|
+
for conf in self.confs:
|
492
|
+
PE = conf.potential_energy(calculator=calculator) # sets `E_tot(kcal/mol)`
|
493
|
+
|
494
|
+
if all(['E_tot(kcal/mol)' in conf.props for conf in self.confs]):
|
454
495
|
sort_by = 'E_tot(kcal/mol)'
|
455
496
|
conversion = 1.0
|
497
|
+
|
498
|
+
elif all(['E_tot(eV)' in conf.props for conf in self.confs]):
|
499
|
+
sort_by = 'E_tot(eV)'
|
500
|
+
conversion = ev2kcalpermol # eV to kcal/mol
|
501
|
+
|
456
502
|
else:
|
457
|
-
raise KeyError(f'
|
458
|
-
|
503
|
+
raise KeyError(f'sort_confs() requires `E_tot(eV)` or `E_tot(kcal/mol)` property')
|
504
|
+
|
505
|
+
# ascending order
|
506
|
+
self.confs = sorted(self.confs, key=lambda c: c.props[sort_by])
|
507
|
+
|
459
508
|
if self.count() > 0:
|
460
|
-
|
509
|
+
E_min = self.confs[0].props[sort_by]
|
461
510
|
for conf in self.confs:
|
462
|
-
E_rel = (conf.props[sort_by] -
|
511
|
+
E_rel = conversion * (conf.props[sort_by] - E_min)
|
463
512
|
conf.props.update({"E_rel(kcal/mol)": E_rel})
|
513
|
+
|
464
514
|
return self
|
465
515
|
|
466
516
|
|
467
|
-
def align_confs(self, method:str='rigid_fragment') -> Self:
|
517
|
+
def align_confs(self, method: str = 'rigid_fragment') -> Self:
|
468
518
|
"""Aligns all conformers to the first conformer.
|
469
519
|
|
470
520
|
Args:
|
@@ -473,7 +523,7 @@ class Mol:
|
|
473
523
|
Defaults to `rigid_fragment`.
|
474
524
|
|
475
525
|
Returns:
|
476
|
-
Self:
|
526
|
+
Self: modified self.
|
477
527
|
"""
|
478
528
|
|
479
529
|
if self.count() < 2: # nothing to do
|
@@ -524,7 +574,10 @@ class Mol:
|
|
524
574
|
return self
|
525
575
|
|
526
576
|
|
527
|
-
def cluster_confs(self,
|
577
|
+
def cluster_confs(self,
|
578
|
+
method: str = 'QT',
|
579
|
+
threshold: float = 1.0,
|
580
|
+
sort: str = 'size') -> Self:
|
528
581
|
"""Clusters all conformers and sets cluster properties.
|
529
582
|
|
530
583
|
Following cluster properties will be added: `cluster`, `cluster_mean_energy`,
|
@@ -543,14 +596,14 @@ class Mol:
|
|
543
596
|
`AutoGraph`.
|
544
597
|
Defaults to `QT`.
|
545
598
|
threshold (float, optional): RMSD threshold of a cluster. Defaults to 1.0.
|
546
|
-
|
599
|
+
sort (str, optional): sort cluster(s) by mean `energy` or cluster `size`.
|
547
600
|
Defaults to `size`.
|
548
601
|
|
549
602
|
Raises:
|
550
603
|
NotImplementedError: if unsupported method is requested.
|
551
604
|
|
552
605
|
Returns:
|
553
|
-
Self:
|
606
|
+
Self: modified self.
|
554
607
|
"""
|
555
608
|
if method != 'DQT': # rmsd of x,y,z coordinates (non-H)
|
556
609
|
conf_rdmols_noH = [Chem.RemoveHs(Chem.Mol(conf.rdmol)) for conf in self.confs]
|
@@ -569,7 +622,7 @@ class Mol:
|
|
569
622
|
torsions = []
|
570
623
|
for conf in self.confs:
|
571
624
|
t_radians = []
|
572
|
-
for (i, j, k, l
|
625
|
+
for torsion_key, (i, j, k, l) in torsion_atom_indices.items():
|
573
626
|
t_radians.append(
|
574
627
|
rdMolTransforms.GetDihedralRad(conf.rdmol.GetConformer(), i, j, k, l))
|
575
628
|
torsions.append(np.array(t_radians))
|
@@ -661,14 +714,14 @@ class Mol:
|
|
661
714
|
'iqr_energy' : iqr_energy,
|
662
715
|
})
|
663
716
|
# sort cluster index
|
664
|
-
if
|
717
|
+
if sort == 'size':
|
665
718
|
cluster_list = sorted(cluster_list, key=lambda x: x['size'], reverse=True)
|
666
719
|
|
667
|
-
elif
|
720
|
+
elif sort == 'energy':
|
668
721
|
cluster_list = sorted(cluster_list, key=lambda x: x['median_energy'], reverse=False)
|
669
722
|
|
670
723
|
else:
|
671
|
-
raise NotImplementedError(f'{
|
724
|
+
raise NotImplementedError(f'{sort} is not implemented yet.')
|
672
725
|
|
673
726
|
for cluster_idx, cluster_dict in enumerate(cluster_list, start=1):
|
674
727
|
for conf_idx in cluster_dict['confs']:
|
@@ -694,10 +747,10 @@ class Mol:
|
|
694
747
|
|
695
748
|
|
696
749
|
def drop_confs(self,
|
697
|
-
stereo_flipped:bool=True,
|
698
|
-
unconverged:bool=True,
|
750
|
+
stereo_flipped: bool = True,
|
751
|
+
unconverged: bool = True,
|
699
752
|
similar: bool | None = None,
|
700
|
-
similar_rmsd:float=0.3,
|
753
|
+
similar_rmsd: float = 0.3,
|
701
754
|
cluster: bool | None =None,
|
702
755
|
k: int | None = None,
|
703
756
|
window: float | None = None,
|
@@ -715,39 +768,46 @@ class Mol:
|
|
715
768
|
k (int, optional): drop all except for `k` lowest energy conformers.
|
716
769
|
window (float, optional): drop all except for conformers within `window` of relative energy.
|
717
770
|
|
718
|
-
Returns:
|
719
|
-
Self: a copy of rdworks.Mol object.
|
720
|
-
|
721
771
|
Examples:
|
722
772
|
To drop similar conformers within rmsd of 0.5 A
|
723
773
|
>>> mol.drop_confs(similar=True, similar_rmsd=0.5)
|
724
774
|
|
725
775
|
To drop conformers beyond 5 kcal/mol
|
726
776
|
>>> mol.drop_confs(window=5.0)
|
727
|
-
|
777
|
+
|
778
|
+
Returns:
|
779
|
+
Self: modified self.
|
728
780
|
"""
|
729
|
-
|
781
|
+
|
782
|
+
reasons = [f'stereo flipped',
|
783
|
+
f'unconverged',
|
784
|
+
f'similar({similar_rmsd})',
|
785
|
+
f'cluster(non-centroid)',
|
786
|
+
f'k and/or energy window',
|
787
|
+
]
|
788
|
+
|
789
|
+
w = max([len(s) for s in reasons])
|
730
790
|
|
731
|
-
if stereo_flipped and
|
732
|
-
mask = [Chem.MolToSmiles(Chem.RemoveHs(_.rdmol)) ==
|
733
|
-
|
791
|
+
if stereo_flipped and self.count() > 0:
|
792
|
+
mask = [Chem.MolToSmiles(Chem.RemoveHs(_.rdmol)) == self.smiles for _ in self.confs]
|
793
|
+
self.confs = list(itertools.compress(self.confs, mask))
|
734
794
|
if verbose:
|
735
|
-
main_logger.info(f'drop_confs
|
795
|
+
main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[0]:<{w}} -> {self.count()}')
|
736
796
|
|
737
|
-
if unconverged and
|
738
|
-
mask = [_.props['Converged'] if 'Converged' in _.props else True for _ in
|
739
|
-
|
797
|
+
if unconverged and self.count() > 0:
|
798
|
+
mask = [_.props['Converged'] if 'Converged' in _.props else True for _ in self.confs]
|
799
|
+
self.confs = list(itertools.compress(self.confs, mask))
|
740
800
|
if verbose:
|
741
|
-
main_logger.info(f'drop_confs
|
801
|
+
main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[1]:<{w}} -> {self.count()}')
|
742
802
|
|
743
|
-
if similar and
|
803
|
+
if similar and self.count() > 1:
|
744
804
|
# it is observed that there are essentially identical conformers
|
745
805
|
# such as 180-degree ring rotation and there is not minor conformational variations
|
746
806
|
# in the RDKit ETKDG generated conformers.
|
747
|
-
conf_rdmols_noH = [Chem.RemoveHs(Chem.Mol(_.rdmol)) for _ in
|
807
|
+
conf_rdmols_noH = [Chem.RemoveHs(Chem.Mol(_.rdmol)) for _ in self.confs]
|
748
808
|
# copies are made for rmsd calculations to prevent coordinates changes
|
749
809
|
lower_triangle_values = []
|
750
|
-
for i in range(
|
810
|
+
for i in range(self.count()): # number of conformers
|
751
811
|
for j in range(i):
|
752
812
|
# rdMolAlign.GetBestRMS takes symmetry into account
|
753
813
|
# removed hydrogens to speed up
|
@@ -755,10 +815,10 @@ class Mol:
|
|
755
815
|
lower_triangle_values.append(best_rms)
|
756
816
|
symm_matrix = convert_tril_to_symm(lower_triangle_values)
|
757
817
|
cluster_assignment, centroid_indices = QT(symm_matrix, similar_rmsd)
|
758
|
-
mask = [conf_idx in centroid_indices for conf_idx, conf in enumerate(
|
759
|
-
|
818
|
+
mask = [conf_idx in centroid_indices for conf_idx, conf in enumerate(self.confs)]
|
819
|
+
self.confs = list(itertools.compress(self.confs, mask))
|
760
820
|
if verbose:
|
761
|
-
main_logger.info(f'drop_confs
|
821
|
+
main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[2]:<{w}} -> {self.count()}')
|
762
822
|
|
763
823
|
# note: it will retain the conformers with lower index
|
764
824
|
# so, it should be sorted before dropping
|
@@ -778,29 +838,29 @@ class Mol:
|
|
778
838
|
# retained_confs.append(Chem.RemoveHs(conf_i.rdmol)) # store a copy of H-removed rdmol
|
779
839
|
# obj.confs = list(itertools.compress(obj.confs, mask))
|
780
840
|
|
781
|
-
if cluster and
|
841
|
+
if cluster and self.count() > 1:
|
782
842
|
# drop non-centroid cluster member(s)
|
783
|
-
mask = [_.props['centroid'] if 'centroid' in _.props else True for _ in
|
784
|
-
|
843
|
+
mask = [_.props['centroid'] if 'centroid' in _.props else True for _ in self.confs]
|
844
|
+
self.confs = list(itertools.compress(self.confs, mask))
|
785
845
|
if verbose:
|
786
|
-
main_logger.info(f'drop_confs
|
846
|
+
main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[3]:<{w}} -> {self.count()}')
|
787
847
|
|
788
|
-
if (k or window) and
|
848
|
+
if (k or window) and self.count() > 0:
|
789
849
|
if k:
|
790
|
-
mask_k = [i < k for i,_ in enumerate(
|
850
|
+
mask_k = [i < k for i,_ in enumerate(self.confs)]
|
791
851
|
else:
|
792
|
-
mask_k = [True,] *
|
852
|
+
mask_k = [True,] * self.count()
|
793
853
|
if window:
|
794
|
-
mask_window = [_.props['E_rel(kcal/mol)'] < window if 'E_rel(kcal/mol)' in _.props else True for _ in
|
854
|
+
mask_window = [_.props['E_rel(kcal/mol)'] < window if 'E_rel(kcal/mol)' in _.props else True for _ in self.confs]
|
795
855
|
else:
|
796
|
-
mask_window = [True,] *
|
856
|
+
mask_window = [True,] * self.count()
|
797
857
|
# retain conformer(s) that satisfy both k and window conditions
|
798
858
|
mask = [(x and y) for (x,y) in zip(mask_k, mask_window)]
|
799
|
-
|
859
|
+
self.confs = list(itertools.compress(self.confs, mask))
|
800
860
|
if verbose:
|
801
|
-
main_logger.info(f'drop_confs
|
861
|
+
main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[4]:<{w}} -> {self.count()}')
|
802
862
|
|
803
|
-
return
|
863
|
+
return self
|
804
864
|
|
805
865
|
|
806
866
|
def count(self) -> int:
|
@@ -812,7 +872,7 @@ class Mol:
|
|
812
872
|
return len(self.confs)
|
813
873
|
|
814
874
|
|
815
|
-
def
|
875
|
+
def nnp_ready(self, model: str = 'aimnet2') -> bool:
|
816
876
|
"""Check if a particular neural network model is applicable to current molecule.
|
817
877
|
|
818
878
|
Args:
|
@@ -830,19 +890,18 @@ class Mol:
|
|
830
890
|
# H, C, N, O, F, S, Cl
|
831
891
|
atomic_numbers = [1, 6, 7, 8, 9, 16, 17 ]
|
832
892
|
|
833
|
-
elif model in ['aimnet', 'aimnet2']:
|
893
|
+
elif model.lower() in ['aimnet', 'aimnet2']:
|
834
894
|
# H, B, C, N, O, F, Si, P, S, Cl, As, Se, Br, I
|
835
895
|
atomic_numbers = [1, 5, 6, 7, 8, 9, 14, 15, 16, 17, 33, 34, 35, 53 ]
|
836
896
|
|
837
897
|
else:
|
838
|
-
raise ValueError('
|
898
|
+
raise ValueError('nnp_ready() supports ANI-2x, ANI-2xt, AIMNet, or AIMNet2')
|
839
899
|
|
840
|
-
for a in self.rdmol.GetAtoms():
|
841
|
-
|
842
|
-
|
900
|
+
if all([ a.GetAtomicNum() in atomic_numbers for a in self.rdmol.GetAtoms() ]):
|
901
|
+
return True
|
902
|
+
else:
|
903
|
+
return False
|
843
904
|
|
844
|
-
return True
|
845
|
-
|
846
905
|
|
847
906
|
def charge(self) -> int:
|
848
907
|
"""Returns molecular formal charge
|
@@ -859,7 +918,7 @@ class Mol:
|
|
859
918
|
Returns:
|
860
919
|
list: list of element symbols.
|
861
920
|
"""
|
862
|
-
return [
|
921
|
+
return [atom.GetSymbol() for atom in self.rdmol.GetAtoms()]
|
863
922
|
|
864
923
|
|
865
924
|
def numbers(self) -> list[int]:
|
@@ -868,111 +927,19 @@ class Mol:
|
|
868
927
|
Returns:
|
869
928
|
list: list of atomic numbers.
|
870
929
|
"""
|
871
|
-
return [
|
930
|
+
return [atom.GetAtomicNum() for atom in self.rdmol.GetAtoms()]
|
872
931
|
|
873
932
|
|
874
|
-
def torsion_atoms(self, strict:bool=True) ->
|
875
|
-
"""Determine dihedral angle atoms (
|
933
|
+
def torsion_atoms(self, strict: bool = True) -> dict[int, tuple]:
|
934
|
+
"""Determine torsion/dihedral angle atoms (i-j-k-l) and rotating group for each rotatable bond (j-k).
|
876
935
|
|
877
936
|
Args:
|
878
937
|
strict (bool): whether to exclude amide/imide/ester/acid bonds.
|
879
938
|
|
880
939
|
Returns:
|
881
|
-
|
882
|
-
(a, b, c, d, rot_atom_indices, fix_atom_indices),
|
883
|
-
...,
|
884
|
-
]
|
940
|
+
{torsion_key: (i, j, k, l), ...,}
|
885
941
|
"""
|
886
|
-
|
887
|
-
# https://github.com/rdkit/rdkit/blob/de602c88809ea6ceba1e8ed50fd543b6e406e9c4/Code/GraphMol/Descriptors/Lipinski.cpp#L108
|
888
|
-
if strict :
|
889
|
-
# excludes amide/imide/ester/acid bonds
|
890
|
-
rotatable_bond_pattern = Chem.MolFromSmarts(
|
891
|
-
(
|
892
|
-
"[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])("
|
893
|
-
"[CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1])&!$([#7,O,S!D1]-!@[CD3]="
|
894
|
-
"[N,O,S])&!$([CD3](=[N+])-!@[#7!D1])&!$([#7!D1]-!@[CD3]=[N+])]-,:;!@[!$"
|
895
|
-
"(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])(["
|
896
|
-
"CH3])[CH3])]"
|
897
|
-
)
|
898
|
-
)
|
899
|
-
else:
|
900
|
-
rotatable_bond_pattern = Chem.MolFromSmarts('[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]')
|
901
|
-
rotatable_bonds = self.rdmol.GetSubstructMatches(rotatable_bond_pattern)
|
902
|
-
torsion_angle_atom_indices = []
|
903
|
-
|
904
|
-
# small rings (n=3 or 4)
|
905
|
-
small_rings = [ r for r in list(self.rdmol.GetRingInfo().AtomRings()) if len(r) < 5 ]
|
906
|
-
# ex. = [(1, 37, 35, 34, 3, 2), (29, 28, 30)]
|
907
|
-
|
908
|
-
forbidden_terminal_nuclei = [1, 9, 17, 35, 53] # H,F,Cl,Br,I
|
909
|
-
|
910
|
-
for (b_idx, c_idx) in rotatable_bonds:
|
911
|
-
# determine a atom ``a`` that define a dihedral angle
|
912
|
-
a_candidates = []
|
913
|
-
for neighbor in self.rdmol.GetAtomWithIdx(b_idx).GetNeighbors():
|
914
|
-
neighbor_idx = neighbor.GetIdx()
|
915
|
-
if neighbor_idx == c_idx:
|
916
|
-
continue
|
917
|
-
neighbor_atomic_num = neighbor.GetAtomicNum()
|
918
|
-
if neighbor_atomic_num not in forbidden_terminal_nuclei:
|
919
|
-
a_candidates.append((neighbor_atomic_num, neighbor_idx))
|
920
|
-
|
921
|
-
if not a_candidates:
|
922
|
-
continue
|
923
|
-
|
924
|
-
(a_atomic_num, a_idx) = sorted(a_candidates, key=lambda x: (x[0], -x[1]), reverse=True)[0]
|
925
|
-
|
926
|
-
# is a-b in a small ring (n=3 or 4)?
|
927
|
-
is_in_small_ring = False
|
928
|
-
for small_ring in small_rings:
|
929
|
-
if (a_idx in small_ring) and (b_idx in small_ring):
|
930
|
-
is_in_small_ring = True
|
931
|
-
break
|
932
|
-
|
933
|
-
if is_in_small_ring:
|
934
|
-
continue
|
935
|
-
|
936
|
-
# determine a atom ``d`` that define a dihedral angle
|
937
|
-
d_candidates = []
|
938
|
-
for neighbor in self.rdmol.GetAtomWithIdx(c_idx).GetNeighbors():
|
939
|
-
neighbor_idx = neighbor.GetIdx()
|
940
|
-
if (neighbor_idx == b_idx):
|
941
|
-
continue
|
942
|
-
neighbor_atomic_num = neighbor.GetAtomicNum()
|
943
|
-
if neighbor_atomic_num not in forbidden_terminal_nuclei:
|
944
|
-
d_candidates.append((neighbor_atomic_num, neighbor_idx))
|
945
|
-
|
946
|
-
if not d_candidates:
|
947
|
-
continue
|
948
|
-
|
949
|
-
(d_atomic_num, d_idx) = sorted(d_candidates, key=lambda x: (x[0], -x[1]), reverse=True)[0]
|
950
|
-
|
951
|
-
# is c-d in a small ring?
|
952
|
-
is_in_small_ring = False
|
953
|
-
for small_ring in small_rings:
|
954
|
-
if (c_idx in small_ring) and (d_idx in small_ring):
|
955
|
-
is_in_small_ring = True
|
956
|
-
break
|
957
|
-
|
958
|
-
if is_in_small_ring:
|
959
|
-
continue
|
960
|
-
|
961
|
-
# determine a group of atoms to be rotated
|
962
|
-
# https://ctr.fandom.com/wiki/Break_rotatable_bonds_and_report_the_fragments
|
963
|
-
em = Chem.EditableMol(self.rdmol)
|
964
|
-
em.RemoveBond(b_idx, c_idx)
|
965
|
-
fragmented = em.GetMol()
|
966
|
-
(frag1, frag2) = Chem.GetMolFrags(fragmented, asMols=False) # returns tuple of tuple
|
967
|
-
hac1 = sum([ 1 for i in frag1 if self.rdmol.GetAtomWithIdx(i).GetAtomicNum() > 1 ])
|
968
|
-
hac2 = sum([ 1 for i in frag2 if self.rdmol.GetAtomWithIdx(i).GetAtomicNum() > 1 ])
|
969
|
-
|
970
|
-
# smaller fragment will be rotated and must contain at least three heavy atoms
|
971
|
-
if min(hac1, hac2) >= 3:
|
972
|
-
(frag_rot, frag_fix) = sorted([(hac1, frag1), (hac2, frag2)])
|
973
|
-
torsion_angle_atom_indices.append((a_idx, b_idx, c_idx, d_idx, frag_rot[1], frag_fix[1]))
|
974
|
-
|
975
|
-
return torsion_angle_atom_indices
|
942
|
+
return {i: d[:4] for i, d in enumerate(get_torsion_atoms(self.rdmol, strict))}
|
976
943
|
|
977
944
|
|
978
945
|
def compute(self, **kwargs) -> Self:
|
@@ -984,147 +951,152 @@ class Mol:
|
|
984
951
|
progress (bool): whether to show progress bar.
|
985
952
|
|
986
953
|
Returns:
|
987
|
-
Self:
|
954
|
+
Self: modified self.
|
988
955
|
"""
|
989
956
|
self.max_workers = kwargs.get('max_workers', self.max_workers)
|
990
957
|
self.chunksize = kwargs.get('chunksize', self.chunksize)
|
991
958
|
self.progress = kwargs.get('progress', self.progress)
|
959
|
+
|
992
960
|
return self
|
993
961
|
|
994
962
|
|
995
|
-
@staticmethod
|
996
|
-
def _map_optimize_conf(conf:Conf, targs:tuple) -> Conf:
|
997
|
-
"""A map function to apply Conf.optimize() on `conf`.
|
998
|
-
|
999
|
-
The default behavior of map() is to pass the elements of the iterable to the function by reference.
|
1000
|
-
This means that if the function modifies the elements of the iterable,
|
1001
|
-
those changes will be reflected in the iterable itself.
|
1002
|
-
|
1003
|
-
Args:
|
1004
|
-
conf (Conf): subject rdworks.Conf object.
|
1005
|
-
targs (tuple): tuple of arguments to be passed to Conf.optimize().
|
1006
|
-
|
1007
|
-
Returns:
|
1008
|
-
Conf: rdworks.Conf object
|
1009
|
-
"""
|
1010
|
-
return conf.optimize(*targs)
|
1011
|
-
|
1012
|
-
|
1013
963
|
def torsion_energies(self,
|
1014
|
-
calculator:str | Callable,
|
1015
|
-
|
1016
|
-
|
964
|
+
calculator: str | Callable,
|
965
|
+
torsion_key: int | None = None,
|
966
|
+
simplify: bool = True,
|
967
|
+
fmax: float = 0.05,
|
968
|
+
interval: float = 20.0,
|
1017
969
|
use_converged_only: bool = True,
|
1018
|
-
optimize_ref: bool = False,
|
1019
970
|
**kwargs,
|
1020
971
|
) -> Self:
|
1021
972
|
"""Calculates potential energy profiles for each torsion angle using ASE optimizer.
|
1022
973
|
|
974
|
+
It uses the first conformer as a reference.
|
975
|
+
|
1023
976
|
Args:
|
1024
977
|
calculator (str | Callable): 'MMFF', 'UFF', or ASE calculator.
|
978
|
+
torsion_key (int | None): torsion index to calculate. Defaults to None (all).
|
979
|
+
simplify (bool, optional): whether to use fragment surrogate. Defaults to True.
|
1025
980
|
fmax (float, optional): fmax of ASE optimizer. Defaults to 0.05.
|
1026
981
|
interval (float, optional): interval of torsion angles in degree. Defaults to 15.0.
|
1027
982
|
use_converged_only (bool, optional): whether to use only converged data. Defaults to True.
|
1028
983
|
|
1029
984
|
Returns:
|
1030
|
-
|
985
|
+
Self: modified self.
|
1031
986
|
"""
|
987
|
+
assert self.count() > 0, "torsion_energies() requires at least one conformer"
|
988
|
+
|
1032
989
|
self = self.compute(**kwargs)
|
1033
990
|
|
1034
|
-
|
1035
|
-
|
1036
|
-
|
1037
|
-
|
1038
|
-
|
1039
|
-
|
1040
|
-
|
1041
|
-
|
1042
|
-
|
1043
|
-
|
1044
|
-
|
1045
|
-
|
1046
|
-
|
1047
|
-
|
1048
|
-
|
1049
|
-
|
1050
|
-
|
1051
|
-
|
1052
|
-
|
1053
|
-
|
1054
|
-
|
1055
|
-
|
1056
|
-
|
1057
|
-
|
1058
|
-
|
1059
|
-
|
1060
|
-
|
1061
|
-
|
1062
|
-
|
1063
|
-
|
1064
|
-
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1071
|
-
|
1072
|
-
|
1073
|
-
|
1074
|
-
|
1075
|
-
|
1076
|
-
|
1077
|
-
|
1078
|
-
|
991
|
+
if torsion_key is None:
|
992
|
+
torsion_atoms_indices = self.torsion_atoms()
|
993
|
+
else:
|
994
|
+
torsion_atoms_indices = {torsion_key: self.torsion_atoms()[torsion_key]}
|
995
|
+
|
996
|
+
ref_conf = self.confs[0].copy()
|
997
|
+
|
998
|
+
data = {}
|
999
|
+
|
1000
|
+
if simplify:
|
1001
|
+
for tk, indices in torsion_atoms_indices.items():
|
1002
|
+
frag, frag_ijkl = create_torsion_fragment(ref_conf.rdmol, indices)
|
1003
|
+
frag_conf = Conf(frag)
|
1004
|
+
data[tk] = {'indices': indices, 'angle':[], 'init':[], 'last':[], 'Converged':[]}
|
1005
|
+
for angle in np.arange(-180.0, 180.0, interval):
|
1006
|
+
# Iterated numpy.ndarray does not contain the last 180: -180., ..., (180).
|
1007
|
+
conf = frag_conf.copy()
|
1008
|
+
conf.props.update({'torsion_key': tk, 'angle': float(angle)})
|
1009
|
+
conf.set_torsion(*frag_ijkl, angle) # atoms bonded to `l` move.
|
1010
|
+
conf = conf.optimize(calculator, fmax, **kwargs)
|
1011
|
+
# conf.optimize() updates coordinates and conf.props:
|
1012
|
+
# `angle`, `E_tot_init(kcal/mol)`, `E_tot(kcal/mol)`, `Converged`.
|
1013
|
+
tk = conf.props['torsion_key']
|
1014
|
+
data[tk]['angle'].append(conf.props['angle'])
|
1015
|
+
data[tk]['init'].append(conf.props['E_tot_init(kcal/mol)'])
|
1016
|
+
data[tk]['last'].append(conf.props['E_tot(kcal/mol)'])
|
1017
|
+
data[tk]['Converged'].append(conf.props['Converged'])
|
1018
|
+
frag_cleaned, _ = clean_2d(frag, reset_isotope=True, remove_H=True)
|
1019
|
+
rdDepictor.Compute2DCoords(frag_cleaned)
|
1020
|
+
# to serialize the molecule
|
1021
|
+
data[tk]['frag'] = Chem.MolToMolBlock(frag_cleaned)
|
1022
|
+
data[tk]['frag_indices'] = frag_ijkl
|
1023
|
+
|
1024
|
+
else:
|
1025
|
+
# mol.confs will be populated with torsion conformers.
|
1026
|
+
# It is designed for a batch optimization in the future.
|
1027
|
+
mol = self.copy()
|
1028
|
+
mol.confs = []
|
1029
|
+
for tk, indices in torsion_atoms_indices.items():
|
1030
|
+
data[tk] = {'indices': indices, 'angle':[], 'init':[], 'last':[], 'Converged':[]}
|
1031
|
+
for angle in np.arange(-180.0, 180.0, interval):
|
1032
|
+
# Iterated numpy.ndarray does not contain the last 180: -180., ..., (180).
|
1033
|
+
x = ref_conf.copy()
|
1034
|
+
x.props.update({'torsion_key': tk, 'angle': float(angle)})
|
1035
|
+
x.set_torsion(*indices, angle) # atoms bonded to `l` move.
|
1036
|
+
mol.confs.append(x)
|
1037
|
+
|
1038
|
+
# Calculate relaxation energies
|
1039
|
+
for conf in mol.confs:
|
1040
|
+
conf = conf.optimize(calculator, fmax, **kwargs)
|
1041
|
+
# conf.optimize() updates coordinates and conf.props:
|
1042
|
+
# `angle`, `E_tot_init(kcal/mol)`, `E_tot(kcal/mol)`, `Converged`.
|
1043
|
+
tk = conf.props['torsion_key']
|
1044
|
+
data[tk]['angle'].append(conf.props['angle'])
|
1045
|
+
data[tk]['init'].append(conf.props['E_tot_init(kcal/mol)'])
|
1046
|
+
data[tk]['last'].append(conf.props['E_tot(kcal/mol)'])
|
1047
|
+
data[tk]['Converged'].append(conf.props['Converged'])
|
1079
1048
|
|
1080
1049
|
# Post-processing
|
1081
|
-
torsion_energy_profiles =
|
1082
|
-
for
|
1050
|
+
torsion_energy_profiles = {}
|
1051
|
+
for tk, dictdata in data.items():
|
1083
1052
|
if use_converged_only:
|
1084
|
-
|
1085
|
-
|
1086
|
-
|
1087
|
-
relax = np.array(
|
1053
|
+
dictdata['angle'] = list(itertools.compress(dictdata['angle'], dictdata['Converged']))
|
1054
|
+
dictdata['init'] = list(itertools.compress(dictdata['init'], dictdata['Converged']))
|
1055
|
+
dictdata['last'] = list(itertools.compress(dictdata['last'], dictdata['Converged']))
|
1056
|
+
relax = np.array(dictdata['init']) - np.median(dictdata['last'])
|
1088
1057
|
E_rel = relax - np.min(relax)
|
1089
|
-
torsion_energy_profiles
|
1090
|
-
'indices': indices,
|
1091
|
-
'angle': np.array(
|
1092
|
-
'E_rel(kcal/mol)': E_rel.tolist(), # np.ndarray -> list for serialization
|
1093
|
-
|
1058
|
+
torsion_energy_profiles[tk] = {
|
1059
|
+
'indices' : dictdata['indices'],
|
1060
|
+
'angle' : np.round(np.array(dictdata['angle']), 1).tolist(), # np.ndarray -> list for serialization
|
1061
|
+
'E_rel(kcal/mol)': np.round(E_rel, 2).tolist(), # np.ndarray -> list for serialization
|
1062
|
+
'frag' : dictdata.get('frag', None),
|
1063
|
+
'frag_indices' : dictdata.get('frag_indices', None),
|
1064
|
+
}
|
1065
|
+
|
1094
1066
|
self.props['torsion'] = torsion_energy_profiles
|
1095
1067
|
self.props['torsion_calculator'] = str(calculator)
|
1096
1068
|
|
1097
1069
|
return self
|
1098
1070
|
|
1099
1071
|
|
1100
|
-
|
1101
|
-
|
1102
|
-
def similarity(self, other:object) -> float:
|
1103
|
-
"""Returns Tanimoto similarity with `other` rdworks.Mol object.
|
1072
|
+
def similarity(self, other: Self) -> float:
|
1073
|
+
"""Returns Tanimoto similarity with other Mol object.
|
1104
1074
|
|
1105
1075
|
Args:
|
1106
|
-
other (
|
1076
|
+
other (Mol): other Mol object.
|
1107
1077
|
|
1108
1078
|
Raises:
|
1109
|
-
TypeError: if `other` is not
|
1079
|
+
TypeError: if `other` is not Mol object type.
|
1110
1080
|
|
1111
1081
|
Returns:
|
1112
1082
|
float: Tanimoto similarity.
|
1113
1083
|
"""
|
1114
|
-
|
1115
|
-
|
1084
|
+
assert isinstance(other, Mol), "similarity() Error: invalid Mol object"
|
1085
|
+
|
1116
1086
|
if not self.fp:
|
1117
1087
|
self.fp = self.MFP2.GetFingerprint(self.rdmol)
|
1088
|
+
|
1118
1089
|
if not other.fp:
|
1119
1090
|
other.fp = other.MFP2.GetFingerprint(other.rdmol)
|
1091
|
+
|
1120
1092
|
return DataStructs.TanimotoSimilarity(self.fp, other.fp)
|
1121
1093
|
|
1122
1094
|
|
1123
|
-
def is_similar(self, other:
|
1124
|
-
"""Check if
|
1095
|
+
def is_similar(self, other: Self, threshold: float) -> bool:
|
1096
|
+
"""Check if other molecule is similar within Tanimoto similarity threshold.
|
1125
1097
|
|
1126
1098
|
Args:
|
1127
|
-
other (
|
1099
|
+
other (Mol): other Mol object to compare with.
|
1128
1100
|
threshold (float): Tanimoto similarity threshold.
|
1129
1101
|
|
1130
1102
|
Returns:
|
@@ -1132,8 +1104,21 @@ class Mol:
|
|
1132
1104
|
"""
|
1133
1105
|
return self.similarity(other) >= threshold
|
1134
1106
|
|
1135
|
-
|
1136
|
-
def
|
1107
|
+
|
1108
|
+
def has_substr(self, substr: str) -> bool:
|
1109
|
+
"""Determine if the molecule has the substructure match.
|
1110
|
+
|
1111
|
+
Args:
|
1112
|
+
pattern (str): SMARTS or SMILES.
|
1113
|
+
|
1114
|
+
Returns:
|
1115
|
+
bool: True if matches.
|
1116
|
+
"""
|
1117
|
+
query = Chem.MolFromSmarts(substr)
|
1118
|
+
return self.rdmol.HasSubstructMatch(query)
|
1119
|
+
|
1120
|
+
|
1121
|
+
def is_matching(self, terms: str | Path, invert: bool = False) -> bool:
|
1137
1122
|
"""Determines if the molecule matches the predefined substructure and/or descriptor ranges.
|
1138
1123
|
|
1139
1124
|
invert | terms(~ or !) | effect
|
@@ -1151,14 +1136,15 @@ class Mol:
|
|
1151
1136
|
Returns:
|
1152
1137
|
bool: True if matches.
|
1153
1138
|
"""
|
1154
|
-
if isinstance(terms,
|
1139
|
+
if isinstance(terms, Path):
|
1155
1140
|
path = terms.as_posix()
|
1141
|
+
|
1156
1142
|
elif isinstance(terms, str):
|
1157
1143
|
if terms.startswith('~') or terms.startswith('!'):
|
1158
1144
|
terms = terms.replace('~','').replace('!','')
|
1159
1145
|
invert = (invert ^ True)
|
1160
1146
|
try:
|
1161
|
-
path =
|
1147
|
+
path = Path(terms) # test if terms points to a xml file
|
1162
1148
|
assert path.is_file()
|
1163
1149
|
except:
|
1164
1150
|
path = get_predefined_xml(terms)
|
@@ -1189,8 +1175,10 @@ class Mol:
|
|
1189
1175
|
if combine.lower() == 'or' and any(mask):
|
1190
1176
|
# early termination if any term is satisfied
|
1191
1177
|
return invert ^ True # XOR(^) inverts only if invert is True
|
1178
|
+
|
1192
1179
|
if combine.lower() == 'and' and all(mask):
|
1193
1180
|
return invert ^ True
|
1181
|
+
|
1194
1182
|
return invert ^ False
|
1195
1183
|
|
1196
1184
|
|
@@ -1243,6 +1231,7 @@ class Mol:
|
|
1243
1231
|
continue
|
1244
1232
|
else:
|
1245
1233
|
stereos.append(element.specified == Chem.StereoSpecified.Specified)
|
1234
|
+
|
1246
1235
|
# note all([]) returns True
|
1247
1236
|
return all(stereos)
|
1248
1237
|
|
@@ -1259,12 +1248,12 @@ class Mol:
|
|
1259
1248
|
if element.type == Chem.StereoType.Bond_Double:
|
1260
1249
|
if self.rdmol.GetBondWithIdx(element.centeredOn).IsInRing():
|
1261
1250
|
ring_bond_stereo_info.append((element.centeredOn, element.descriptor))
|
1251
|
+
|
1262
1252
|
return ring_bond_stereo_info
|
1263
1253
|
|
1264
1254
|
|
1265
1255
|
def report_stereo(self) -> None:
|
1266
|
-
"""
|
1267
|
-
"""
|
1256
|
+
"""Report stereochemistry information for debug"""
|
1268
1257
|
num_chiral_centers = rdMolDescriptors.CalcNumAtomStereoCenters(self.rdmol)
|
1269
1258
|
# Returns the total number of atomic stereocenters (specified and unspecified)
|
1270
1259
|
num_unspecified_chiral_centers = rdMolDescriptors.CalcNumUnspecifiedAtomStereoCenters(self.rdmol)
|
@@ -1289,8 +1278,7 @@ class Mol:
|
|
1289
1278
|
|
1290
1279
|
|
1291
1280
|
def report_props(self) -> None:
|
1292
|
-
"""
|
1293
|
-
"""
|
1281
|
+
"""Report properties"""
|
1294
1282
|
if self.props:
|
1295
1283
|
print(f"Properties({len(self.props)}):")
|
1296
1284
|
fixed_width = max([len(k) for k in self.props]) + 4
|
@@ -1302,7 +1290,59 @@ class Mol:
|
|
1302
1290
|
print(f"Properties: None")
|
1303
1291
|
|
1304
1292
|
|
1305
|
-
def
|
1293
|
+
def draw(self,
|
1294
|
+
coordgen: bool = False,
|
1295
|
+
rotate: bool = False,
|
1296
|
+
axis: str = 'z',
|
1297
|
+
degree: float = 0.0,
|
1298
|
+
) -> Self:
|
1299
|
+
"""Draw molecule in 2D.
|
1300
|
+
|
1301
|
+
Args:
|
1302
|
+
coordgen (bool, optional): whether to use `coordgen`. Defaults to False.
|
1303
|
+
rotate (bool, optional): whether to rotate drawing. Defaults to False.
|
1304
|
+
axis (str, optional): axis for rotation. Defaults to 'z'.
|
1305
|
+
degree (float, optional): degree for rotation. Defaults to 0.0.
|
1306
|
+
|
1307
|
+
Returns:
|
1308
|
+
Self.
|
1309
|
+
"""
|
1310
|
+
rdDepictor.SetPreferCoordGen(coordgen)
|
1311
|
+
rdDepictor.Compute2DCoords(self.rdmol)
|
1312
|
+
|
1313
|
+
if rotate:
|
1314
|
+
rad = (np.pi/180.0) * degree
|
1315
|
+
c = np.cos(rad)
|
1316
|
+
s = np.sin(rad)
|
1317
|
+
if axis.lower() == 'x':
|
1318
|
+
rotmat = np.array([
|
1319
|
+
[1., 0., 0., 0.],
|
1320
|
+
[0., c, -s, 0.],
|
1321
|
+
[0., s, c, 0.],
|
1322
|
+
[0., 0., 0., 1.],
|
1323
|
+
])
|
1324
|
+
elif axis.lower() == 'y':
|
1325
|
+
rotmat = np.array([
|
1326
|
+
[ c, 0., s, 0.],
|
1327
|
+
[ 0., 1., 0., 0.],
|
1328
|
+
[-s, 0., c, 0.],
|
1329
|
+
[ 0., 0., 0., 1.],
|
1330
|
+
])
|
1331
|
+
elif axis.lower() == 'z':
|
1332
|
+
rotmat = np.array([
|
1333
|
+
[c, -s, 0., 0.],
|
1334
|
+
[s, c, 0., 0.],
|
1335
|
+
[0., 0., 1., 0.],
|
1336
|
+
[0., 0., 0., 1.],
|
1337
|
+
])
|
1338
|
+
rdMolTransforms.TransformConformer(
|
1339
|
+
self.rdmol.GetConformer(),
|
1340
|
+
rotmat)
|
1341
|
+
|
1342
|
+
return self
|
1343
|
+
|
1344
|
+
|
1345
|
+
def to_sdf(self, confs: bool = False, props: bool = True) -> str:
|
1306
1346
|
"""Returns strings of SDF output.
|
1307
1347
|
|
1308
1348
|
Args:
|
@@ -1312,8 +1352,8 @@ class Mol:
|
|
1312
1352
|
Returns:
|
1313
1353
|
str: strings of SDF output.
|
1314
1354
|
"""
|
1315
|
-
|
1316
|
-
with Chem.SDWriter(
|
1355
|
+
buf = StringIO()
|
1356
|
+
with Chem.SDWriter(buf) as f:
|
1317
1357
|
if confs:
|
1318
1358
|
for conf in self.confs:
|
1319
1359
|
rdmol = Chem.Mol(conf.rdmol)
|
@@ -1333,45 +1373,61 @@ class Mol:
|
|
1333
1373
|
for k,v in self.props.items():
|
1334
1374
|
rdmol.SetProp(k, str(v))
|
1335
1375
|
f.write(rdmol)
|
1336
|
-
|
1337
|
-
|
1338
|
-
|
1339
|
-
def to_image(self, width:int=300, height:int=300, index:bool=False, svg:bool=True) -> object:
|
1340
|
-
"""Returns PIL(Python Image Library) image object.
|
1376
|
+
|
1377
|
+
return buf.getvalue()
|
1378
|
+
|
1341
1379
|
|
1342
|
-
|
1380
|
+
def to_png(self,
|
1381
|
+
width: int = 300,
|
1382
|
+
height: int = 300,
|
1383
|
+
legend: str = '',
|
1384
|
+
atom_index: bool = False,
|
1385
|
+
highlight_atoms: list[int] | None = None,
|
1386
|
+
highlight_bonds: list[int] | None = None,
|
1387
|
+
redraw: bool = False,
|
1388
|
+
coordgen: bool = False,
|
1389
|
+
trim: bool = True,
|
1390
|
+
) -> Image.Image:
|
1391
|
+
"""Draw 2D molecule in PNG format.
|
1343
1392
|
|
1344
1393
|
Args:
|
1345
|
-
width (int, optional): width
|
1346
|
-
height (int, optional): height
|
1347
|
-
|
1348
|
-
|
1349
|
-
|
1394
|
+
width (int, optional): width. Defaults to 300.
|
1395
|
+
height (int, optional): height. Defaults to 300.
|
1396
|
+
legend (str, optional): legend. Defaults to ''.
|
1397
|
+
atom_index (bool, optional): whether to show atom index. Defaults to False.
|
1398
|
+
highlight_atoms (list[int] | None, optional): atom(s) to highlight. Defaults to None.
|
1399
|
+
highlight_bonds (list[int] | None, optional): bond(s) to highlight. Defaults to None.
|
1400
|
+
redraw (bool, optional): whether to redraw. Defaults to False.
|
1401
|
+
coordgen (bool, optional): whether to use coordgen. Defaults to False.
|
1402
|
+
trim (bool, optional): whether to trim white margins. Default to True.
|
1403
|
+
|
1350
1404
|
Returns:
|
1351
|
-
|
1405
|
+
Image.Image: output PIL Image object.
|
1352
1406
|
"""
|
1353
|
-
if index:
|
1354
|
-
for a in self.rdmol.GetAtoms():
|
1355
|
-
a.SetProp("atomNote", str(a.GetIdx()+1))
|
1356
1407
|
|
1357
|
-
return
|
1358
|
-
|
1359
|
-
|
1360
|
-
|
1361
|
-
|
1362
|
-
|
1363
|
-
|
1364
|
-
|
1365
|
-
|
1408
|
+
return render_png(self.rdmol,
|
1409
|
+
width = width,
|
1410
|
+
height = height,
|
1411
|
+
legend = legend,
|
1412
|
+
atom_index = atom_index,
|
1413
|
+
highlight_atoms = highlight_atoms,
|
1414
|
+
highlight_bonds = highlight_bonds,
|
1415
|
+
redraw = redraw,
|
1416
|
+
coordgen = coordgen,
|
1417
|
+
trim = trim)
|
1366
1418
|
|
1367
1419
|
def to_svg(self,
|
1368
|
-
width:int =
|
1369
|
-
height:int =
|
1370
|
-
legend:str = '',
|
1371
|
-
|
1372
|
-
|
1373
|
-
|
1374
|
-
|
1420
|
+
width: int = 300,
|
1421
|
+
height: int = 300,
|
1422
|
+
legend: str = '',
|
1423
|
+
atom_index: bool = False,
|
1424
|
+
highlight_atoms: list[int] | None = None,
|
1425
|
+
highlight_bonds: list[int] | None = None,
|
1426
|
+
redraw: bool = False,
|
1427
|
+
coordgen: bool = False,
|
1428
|
+
optimize: bool = True,
|
1429
|
+
) -> str:
|
1430
|
+
"""Draw 2D molecule in SVG format.
|
1375
1431
|
|
1376
1432
|
Examples:
|
1377
1433
|
For Jupyternotebook, wrap the output with SVG:
|
@@ -1380,55 +1436,60 @@ class Mol:
|
|
1380
1436
|
>>> SVG(libr[0].to_svg())
|
1381
1437
|
|
1382
1438
|
Args:
|
1383
|
-
width (int): width
|
1384
|
-
height (int): height
|
1385
|
-
legend (str): legend
|
1386
|
-
|
1387
|
-
|
1439
|
+
width (int, optional): width. Defaults to 300.
|
1440
|
+
height (int, optional): height. Defaults to 300.
|
1441
|
+
legend (str, optional): legend. Defaults to ''.
|
1442
|
+
atom_index (bool, optional): whether to show atom index. Defaults to False.
|
1443
|
+
highlight_atoms (list[int] | None, optional): atom(s) to highlight. Defaults to None.
|
1444
|
+
highlight_bonds (list[int] | None, optional): bond(s) to highlight. Defaults to None.
|
1445
|
+
redraw (bool, optional): whether to redraw. Defaults to False.
|
1446
|
+
coordgen (bool, optional): whether to use coordgen. Defaults to False.
|
1447
|
+
optimize (bool, optional): whether to optimize SVG string. Defaults to True.
|
1388
1448
|
|
1389
1449
|
Returns:
|
1390
|
-
str: SVG
|
1450
|
+
str: SVG string
|
1391
1451
|
"""
|
1392
|
-
|
1393
|
-
|
1394
|
-
|
1395
|
-
|
1396
|
-
|
1397
|
-
|
1398
|
-
|
1399
|
-
|
1400
|
-
|
1401
|
-
|
1402
|
-
|
1403
|
-
for atom in rdmol_2d.GetAtoms():
|
1404
|
-
atom.SetProp("atomLabel", str(atom.GetIdx()))
|
1405
|
-
# atom.SetProp("atomNote", str(atom.GetIdx()))
|
1406
|
-
# atom.SetProp("molAtomMapNumber", str(atom.GetIdx()))
|
1407
|
-
|
1408
|
-
drawer = rdMolDraw2D.MolDraw2DSVG(width, height)
|
1409
|
-
if highlight:
|
1410
|
-
drawer.DrawMolecule(rdmol_2d, legend=legend, highlightAtoms=highlight)
|
1411
|
-
else:
|
1412
|
-
drawer.DrawMolecule(rdmol_2d, legend=legend)
|
1413
|
-
drawer.FinishDrawing()
|
1414
|
-
return drawer.GetDrawingText()
|
1415
|
-
|
1416
|
-
|
1417
|
-
def plot_energy(self, df:pd.DataFrame) -> str:
|
1418
|
-
"""Returns Seaborn plot strings for dihedral energy profile in SVG format.
|
1452
|
+
return render_svg(self.rdmol,
|
1453
|
+
width = width,
|
1454
|
+
height = height,
|
1455
|
+
legend = legend,
|
1456
|
+
atom_index = atom_index,
|
1457
|
+
highlight_atoms = highlight_atoms,
|
1458
|
+
highlight_bonds = highlight_bonds,
|
1459
|
+
redraw = redraw,
|
1460
|
+
coordgen = coordgen,
|
1461
|
+
optimize = optimize)
|
1462
|
+
|
1419
1463
|
|
1420
|
-
|
1464
|
+
def plot_torsion_energies(self,
|
1465
|
+
torsion_key: int,
|
1466
|
+
svg: bool = False,
|
1467
|
+
upper_limit: float = 35.0,
|
1468
|
+
zoomin_limit: float = 5.0,
|
1469
|
+
**kwargs,
|
1470
|
+
) -> str | None:
|
1471
|
+
"""Plot torsion energies.
|
1421
1472
|
|
1422
1473
|
Args:
|
1423
|
-
|
1474
|
+
torsion_key (int): torsion data to plot.
|
1475
|
+
svg (bool, optional): whether to return SVG strings. Defaults to False.
|
1476
|
+
upper_limit (float, optional): upper limit for E_rel(kcal/mol). Defaults to 35.0.
|
1477
|
+
zoomin_limit (float, optional): lower limit for E_rel(kcal/mol). Defaults to 5.0.
|
1478
|
+
**kwargs: matplotlib.pyplot.plt.figure options.
|
1424
1479
|
|
1425
1480
|
Returns:
|
1426
|
-
|
1481
|
+
SVG strings or None for Jupyter Notebook.
|
1427
1482
|
"""
|
1483
|
+
data = self.props['torsion'][torsion_key]
|
1484
|
+
df = pd.DataFrame({ax: data[ax] for ax in ['angle', 'E_rel(kcal/mol)']})
|
1428
1485
|
|
1429
|
-
|
1486
|
+
plt.figure(**kwargs)
|
1487
|
+
plt.clf() # Clear the current figure to prevent overlapping plots
|
1488
|
+
|
1489
|
+
sns.set_theme()
|
1430
1490
|
sns.color_palette("tab10")
|
1431
1491
|
sns.set_style("whitegrid")
|
1492
|
+
|
1432
1493
|
if len(df['angle']) == len(df['angle'].drop_duplicates()):
|
1433
1494
|
g = sns.lineplot(x="angle",
|
1434
1495
|
y="E_rel(kcal/mol)",
|
@@ -1445,31 +1506,57 @@ class Mol:
|
|
1445
1506
|
markersize=10)
|
1446
1507
|
g.xaxis.set_major_locator(ticker.MultipleLocator(30))
|
1447
1508
|
g.xaxis.set_major_formatter(ticker.ScalarFormatter())
|
1448
|
-
if df["E_rel(kcal/mol)"].max() >
|
1509
|
+
if df["E_rel(kcal/mol)"].max() > upper_limit:
|
1449
1510
|
g.set(title=self.name,
|
1450
1511
|
xlabel='Dihedral Angle (degree)',
|
1451
1512
|
ylabel='Relative Energy (Kcal/mol)',
|
1452
1513
|
xlim=(-190, 190),
|
1453
|
-
ylim=(-1.5,
|
1454
|
-
elif df["E_rel(kcal/mol)"].max() <
|
1514
|
+
ylim=(-1.5, upper_limit))
|
1515
|
+
elif df["E_rel(kcal/mol)"].max() < zoomin_limit:
|
1455
1516
|
g.set(title=self.name,
|
1456
1517
|
xlabel='Dihedral Angle (degree)',
|
1457
1518
|
ylabel='Relative Energy (Kcal/mol)',
|
1458
1519
|
xlim=(-190, 190),
|
1459
|
-
ylim=(-1.5,
|
1520
|
+
ylim=(-1.5, zoomin_limit))
|
1460
1521
|
else:
|
1461
1522
|
g.set(title=self.name,
|
1462
1523
|
xlabel='Dihedral Angle (degree)',
|
1463
1524
|
ylabel='Relative Energy (Kcal/mol)',
|
1464
1525
|
xlim=(-190, 190),)
|
1465
1526
|
g.tick_params(axis='x', rotation=30)
|
1466
|
-
|
1467
|
-
|
1468
|
-
|
1469
|
-
|
1470
|
-
|
1527
|
+
|
1528
|
+
if svg:
|
1529
|
+
buf = StringIO()
|
1530
|
+
plt.savefig(buf, format='svg', bbox_inches='tight')
|
1531
|
+
plt.close() # prevents duplicate plot outputs in Jupyter Notebook
|
1532
|
+
svg_string = buf.getvalue()
|
1533
|
+
# optimize SVG string
|
1534
|
+
scour_options = {
|
1535
|
+
'strip_comments': True,
|
1536
|
+
'strip_ids': True,
|
1537
|
+
'shorten_ids': True,
|
1538
|
+
'compact_paths': True,
|
1539
|
+
'indent_type': 'none',
|
1540
|
+
}
|
1541
|
+
svg_string = scourString(svg_string, options=scour_options)
|
1542
|
+
|
1543
|
+
return svg_string
|
1544
|
+
|
1545
|
+
else:
|
1546
|
+
buf = BytesIO()
|
1547
|
+
plt.savefig(buf, format='png', bbox_inches='tight')
|
1548
|
+
plt.close() # prevents duplicate plot outputs in Jupyter Notebook
|
1549
|
+
buf.seek(0)
|
1550
|
+
img = Image.open(buf)
|
1551
|
+
plt.imshow(img)
|
1552
|
+
plt.axis('off') # Optional: remove axes
|
1553
|
+
plt.show()
|
1554
|
+
|
1471
1555
|
|
1472
|
-
def to_html(self,
|
1556
|
+
def to_html(self,
|
1557
|
+
htmlbody: bool = False,
|
1558
|
+
contents: str = 'torsion',
|
1559
|
+
) -> str:
|
1473
1560
|
"""Returns HTML text of dihedral energy profile.
|
1474
1561
|
|
1475
1562
|
Args:
|
@@ -1478,42 +1565,86 @@ class Mol:
|
|
1478
1565
|
Returns:
|
1479
1566
|
str: HTML text.
|
1480
1567
|
"""
|
1568
|
+
HTML = ''
|
1481
1569
|
if htmlbody:
|
1482
|
-
HTML =
|
1483
|
-
|
1484
|
-
|
1485
|
-
|
1486
|
-
|
1487
|
-
|
1488
|
-
|
1489
|
-
|
1490
|
-
|
1491
|
-
|
1492
|
-
|
1493
|
-
|
1494
|
-
|
1495
|
-
|
1496
|
-
|
1497
|
-
|
1498
|
-
|
1499
|
-
|
1500
|
-
|
1570
|
+
HTML = '<html><body>'
|
1571
|
+
|
1572
|
+
if contents.lower() == 'torsion':
|
1573
|
+
# start of content
|
1574
|
+
HTML += f'<h1 style="text-align:left">{self.name}</h1>'
|
1575
|
+
HTML += '<table>'
|
1576
|
+
for tk, dictdata in self.props['torsion'].items():
|
1577
|
+
ijkl = dictdata['indices']
|
1578
|
+
ijkl_str = '-'.join([str(i) for i in ijkl])
|
1579
|
+
svg_mol = self.to_svg(highlight_atoms=ijkl, atom_index=True)
|
1580
|
+
svg_plot = self.plot_torsion_energies(torsion_key=tk, svg=True)
|
1581
|
+
frag = dictdata.get('frag', None)
|
1582
|
+
if frag is not None:
|
1583
|
+
frag = Chem.MolFromMolBlock(frag)
|
1584
|
+
pqrs = dictdata['frag_indices']
|
1585
|
+
pqrs_str = '-'.join([str(i) for i in pqrs])
|
1586
|
+
svg_frag = render_svg(frag, highlight_atoms=pqrs, atom_index=True)
|
1587
|
+
HTML += f'<tr><td>{ijkl_str}</td><td>{svg_mol}</td>'
|
1588
|
+
HTML += f'<td>{pqrs_str}<td>{svg_frag}</td><td>{svg_plot}</td></tr>'
|
1589
|
+
else:
|
1590
|
+
HTML += f'<tr><td>{ijkl_str}</td><td>{svg_mol}</td><td>{svg_plot}</td></tr>'
|
1591
|
+
HTML += '</table>'
|
1592
|
+
HTML += '<hr style="height:2px;border-width:0;color:gray;background-color:gray">'
|
1593
|
+
# end of content
|
1594
|
+
|
1501
1595
|
if htmlbody:
|
1502
|
-
HTML +=
|
1596
|
+
HTML += '</body></html>'
|
1597
|
+
|
1503
1598
|
return HTML
|
1504
1599
|
|
1505
1600
|
|
1506
|
-
def
|
1601
|
+
def dumps(self, key: str = "", decimals: int = 2) -> str:
|
1507
1602
|
"""Returns JSON dumps of properties.
|
1508
1603
|
|
1509
1604
|
Args:
|
1510
1605
|
key (str | None): key for a subset of properties. Defaults to None.
|
1511
|
-
|
1606
|
+
decimals (int, optional): decimal places for float numbers. Defaults to 2.
|
1512
1607
|
|
1513
1608
|
Returns:
|
1514
|
-
str:
|
1609
|
+
str: JSON dumps.
|
1515
1610
|
"""
|
1516
|
-
props =
|
1611
|
+
props = recursive_round(self.props, decimals)
|
1612
|
+
|
1517
1613
|
if key:
|
1518
1614
|
return json.dumps({key:props[key]})
|
1615
|
+
|
1519
1616
|
return json.dumps(props)
|
1617
|
+
|
1618
|
+
|
1619
|
+
def serialize(self, decimals: int = 2) -> str:
|
1620
|
+
serialized = json.dumps({
|
1621
|
+
'name' : self.name,
|
1622
|
+
'smiles': self.smiles,
|
1623
|
+
'props' : recursive_round(self.props, decimals),
|
1624
|
+
'confs' : [conf.serialize() for conf in self.confs],
|
1625
|
+
})
|
1626
|
+
|
1627
|
+
return serialized
|
1628
|
+
|
1629
|
+
|
1630
|
+
def deserialize(self, serialized: str) -> Self:
|
1631
|
+
"""Updates self with the serialized string input.
|
1632
|
+
|
1633
|
+
Args:
|
1634
|
+
serialized (str): input
|
1635
|
+
|
1636
|
+
Returns:
|
1637
|
+
Self: modified self.
|
1638
|
+
"""
|
1639
|
+
data = json.loads(serialized)
|
1640
|
+
|
1641
|
+
self.name = data['name']
|
1642
|
+
self.smiles = data['smiles'] # isomeric SMILES, no H
|
1643
|
+
self.rdmol = Chem.MolFromSmiles(data['smiles']) # for 2D depiction
|
1644
|
+
self.rdmol.SetProp('_Name', self.name)
|
1645
|
+
self.InChI = Chem.MolToInchi(self.rdmol)
|
1646
|
+
self.InChIKey = inchi.InchiToInchiKey(self.InChI)
|
1647
|
+
self.props = data['props']
|
1648
|
+
self.confs = [Conf().deserialize(_) for _ in data['confs']] # for 3D conformers (iterable)
|
1649
|
+
|
1650
|
+
return self
|