rdworks 0.25.7__py3-none-any.whl → 0.35.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdworks/__init__.py +19 -20
- rdworks/conf.py +308 -117
- rdworks/display.py +244 -83
- rdworks/mol.py +621 -493
- rdworks/mollibr.py +336 -182
- rdworks/readin.py +2 -4
- rdworks/scaffold.py +1 -1
- rdworks/std.py +64 -24
- rdworks/torsion.py +477 -0
- rdworks/units.py +7 -58
- rdworks/utils.py +141 -258
- rdworks/xtb/__init__.py +0 -0
- rdworks/xtb/wrapper.py +304 -0
- {rdworks-0.25.7.dist-info → rdworks-0.35.1.dist-info}/METADATA +7 -10
- {rdworks-0.25.7.dist-info → rdworks-0.35.1.dist-info}/RECORD +18 -15
- {rdworks-0.25.7.dist-info → rdworks-0.35.1.dist-info}/WHEEL +1 -1
- {rdworks-0.25.7.dist-info → rdworks-0.35.1.dist-info}/licenses/LICENSE +0 -0
- {rdworks-0.25.7.dist-info → rdworks-0.35.1.dist-info}/top_level.txt +0 -0
rdworks/mol.py
CHANGED
@@ -1,19 +1,13 @@
|
|
1
|
-
# rdworks/mol.py
|
2
|
-
|
3
|
-
import os
|
4
|
-
import io
|
5
1
|
import copy
|
6
|
-
import types
|
7
|
-
import pathlib
|
8
2
|
import itertools
|
9
|
-
import math
|
10
3
|
import json
|
11
4
|
import logging
|
12
5
|
import tempfile
|
13
6
|
|
7
|
+
from io import StringIO, BytesIO
|
8
|
+
from pathlib import Path
|
14
9
|
from collections import defaultdict
|
15
10
|
from collections.abc import Callable
|
16
|
-
from pathlib import Path
|
17
11
|
from typing import Iterator, Self
|
18
12
|
|
19
13
|
import numpy as np
|
@@ -27,34 +21,35 @@ import CDPL.Chem
|
|
27
21
|
import CDPL.ConfGen
|
28
22
|
|
29
23
|
from rdkit import Chem, DataStructs
|
30
|
-
|
31
24
|
from rdkit.Chem import (
|
32
25
|
rdMolDescriptors, AllChem, Descriptors, QED,
|
33
26
|
rdFingerprintGenerator,
|
34
|
-
Draw, rdDepictor,
|
27
|
+
Draw, rdDepictor, inchi,
|
35
28
|
rdDistGeom, rdMolAlign, rdMolTransforms, rdmolops
|
36
29
|
)
|
37
30
|
from rdkit.Chem.Draw import rdMolDraw2D
|
38
|
-
|
39
31
|
from rdkit.ML.Cluster import Butina
|
32
|
+
from PIL import Image
|
40
33
|
|
41
|
-
from rdworks.
|
34
|
+
from rdworks.conf import Conf
|
35
|
+
from rdworks.std import desalt_smiles, standardize, clean_2d
|
42
36
|
from rdworks.xml import list_predefined_xml, get_predefined_xml, parse_xml
|
43
37
|
from rdworks.scaffold import rigid_fragment_indices
|
44
38
|
from rdworks.descriptor import rd_descriptor, rd_descriptor_f
|
45
|
-
from rdworks.
|
46
|
-
from rdworks.utils import convert_tril_to_symm, QT, fix_decimal_places_in_dict
|
39
|
+
from rdworks.utils import convert_tril_to_symm, QT, recursive_round
|
47
40
|
from rdworks.units import ev2kcalpermol
|
48
41
|
from rdworks.autograph import NMRCLUST, DynamicTreeCut, RCKmeans, AutoGraph
|
49
42
|
from rdworks.bitqt import BitQT
|
50
|
-
from rdworks.
|
43
|
+
from rdworks.torsion import create_torsion_fragment, get_torsion_atoms
|
44
|
+
from rdworks.display import render_svg, render_png
|
45
|
+
|
46
|
+
from scour.scour import scourString
|
51
47
|
|
52
48
|
main_logger = logging.getLogger()
|
53
49
|
|
54
50
|
|
55
51
|
class Mol:
|
56
|
-
"""Container for molecular structure, conformers, and other information.
|
57
|
-
"""
|
52
|
+
"""Container for molecular structure, conformers, and other information."""
|
58
53
|
|
59
54
|
MFP2 = rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=2048)
|
60
55
|
|
@@ -62,100 +57,98 @@ class Mol:
|
|
62
57
|
ETKDG_params.useSmallRingTorsions = True
|
63
58
|
ETKDG_params.maxIterations = 2000
|
64
59
|
|
65
|
-
|
66
60
|
def __init__(self,
|
67
|
-
|
68
|
-
name:str='',
|
69
|
-
std:bool=False,
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
61
|
+
molecule: str | Chem.Mol | Conf | None = None,
|
62
|
+
name: str = '',
|
63
|
+
std: bool = False,
|
64
|
+
reset_isotope: bool = True,
|
65
|
+
remove_H: bool = True,
|
66
|
+
max_workers: int = 1,
|
67
|
+
chunksize: int = 4,
|
68
|
+
progress: bool = False) -> None:
|
69
|
+
"""Initialize.
|
74
70
|
|
75
71
|
Examples:
|
76
|
-
>>> import
|
77
|
-
>>> m =
|
72
|
+
>>> from rdworks import Mol
|
73
|
+
>>> m = Mol('c1ccccc1', name='benzene')
|
78
74
|
|
79
75
|
Args:
|
80
|
-
|
81
|
-
name (str
|
82
|
-
std (bool
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
TypeError: No SMILES or rdkit.Chem.Mol object is provided.
|
87
|
-
RuntimeError: Desalting or standardization process failed.
|
76
|
+
molecule (str | Chem.Mol | None): SMILES or rdkit.Chem.Mol or None
|
77
|
+
name (str): name of the molecule. Defaults to ''.
|
78
|
+
std (bool): whether to standardize the molecule. Defaults to False.
|
79
|
+
max_workers (int): number of maximum workers for parallelization. Defaults to 1.
|
80
|
+
chunksize (int): batch size for parallelization. Defaults to 4.
|
81
|
+
progress (bool): whether to show progress bar. Defaults to False.
|
88
82
|
"""
|
89
|
-
|
90
|
-
|
91
|
-
self.
|
92
|
-
self.
|
83
|
+
assert isinstance(molecule, str | Chem.Mol | Conf) or molecule is None
|
84
|
+
|
85
|
+
self.rdmol = None # 2D, one and only one Conformer
|
86
|
+
self.smiles = '' # isomeric SMILES
|
87
|
+
self.confs = [] # container for 3D conformers
|
88
|
+
self.name = ''
|
89
|
+
self.InChIKey = '' # 27 characters (SHA-256 hash of InChI)
|
90
|
+
self.InChI = ''
|
93
91
|
self.props = {}
|
94
|
-
self.confs = [] # 3D conformers (iterable)
|
95
92
|
self.fp = None
|
96
93
|
self.max_workers = max_workers
|
97
94
|
self.chunksize = chunksize
|
98
95
|
self.progress = progress
|
99
|
-
|
100
|
-
if
|
96
|
+
|
97
|
+
if molecule is None:
|
98
|
+
return
|
99
|
+
|
100
|
+
if isinstance(molecule, str): # 1-D SMILES
|
101
101
|
try:
|
102
|
-
|
103
|
-
|
104
|
-
|
102
|
+
if "." in molecule: # mandatory desalting
|
103
|
+
(self.smiles, self.rdmol) = desalt_smiles(molecule)
|
104
|
+
else:
|
105
|
+
self.rdmol = Chem.MolFromSmiles(molecule)
|
106
|
+
self.smiles = Chem.MolToSmiles(self.rdmol)
|
105
107
|
except:
|
106
|
-
raise ValueError(f'Mol()
|
107
|
-
|
108
|
+
raise ValueError(f'Mol() Error: invalid SMILES {molecule}')
|
109
|
+
|
110
|
+
elif isinstance(molecule, Chem.Mol): # 2-D or 3-D Chem.Mol
|
108
111
|
try:
|
109
|
-
self.rdmol =
|
110
|
-
assert self.rdmol
|
112
|
+
self.rdmol, _ = clean_2d(molecule, reset_isotope, remove_H)
|
111
113
|
self.smiles = Chem.MolToSmiles(self.rdmol)
|
114
|
+
self.confs = [Conf(x) for x in _]
|
112
115
|
except:
|
113
|
-
raise ValueError('Mol()
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
### desalting
|
118
|
-
if "." in self.smiles:
|
116
|
+
raise ValueError(f'Mol() Error: invalid Chem.Mol object')
|
117
|
+
|
118
|
+
elif isinstance(molecule, Conf): # 3-D input
|
119
119
|
try:
|
120
|
-
|
121
|
-
|
122
|
-
|
120
|
+
self.rdmol, _ = clean_2d(molecule.rdmol, reset_isotope, remove_H)
|
121
|
+
self.smiles = Chem.MolToSmiles(self.rdmol)
|
122
|
+
self.confs = [molecule]
|
123
123
|
except:
|
124
|
-
raise
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
# standardization changes self.rdmol
|
129
|
-
try:
|
124
|
+
raise ValueError(f'Mol() Error: invalid Conf object')
|
125
|
+
|
126
|
+
try:
|
127
|
+
if std:
|
130
128
|
self.rdmol = standardize(self.rdmol)
|
131
129
|
self.smiles = Chem.MolToSmiles(self.rdmol)
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
130
|
+
except:
|
131
|
+
raise RuntimeError('Mol() Error: standardization')
|
132
|
+
|
133
|
+
assert self.smiles and self.rdmol, "Mol() Error: invalid molecule"
|
134
|
+
|
135
|
+
rdDepictor.Compute2DCoords(self.rdmol)
|
136
|
+
|
138
137
|
try:
|
139
138
|
self.name = str(name)
|
140
139
|
except:
|
141
140
|
self.name = 'untitled'
|
141
|
+
|
142
142
|
self.rdmol.SetProp('_Name', self.name) # _Name can't be None
|
143
|
-
|
144
|
-
|
143
|
+
self.InChI = Chem.MolToInchi(self.rdmol)
|
144
|
+
self.InChIKey = inchi.InchiToInchiKey(self.InChI)
|
145
145
|
self.props.update({
|
146
146
|
'aka' : [], # <-- to be set by MolLibr.unique()
|
147
|
-
'atoms' : self.rdmol.GetNumAtoms(),
|
148
|
-
# hydrogens not excluded
|
149
|
-
# m = Chem.MolFromSmiles("c1c[nH]cc1")
|
150
|
-
# m.GetNumAtoms()
|
151
|
-
# >> 5
|
152
|
-
# Chem.AddHs(m).GetNumAtoms()
|
153
|
-
# >> 10
|
147
|
+
'atoms' : self.rdmol.GetNumAtoms(), # hydrogens not excluded?
|
154
148
|
'charge': rdmolops.GetFormalCharge(self.rdmol),
|
155
|
-
# number of rotatable bonds
|
156
149
|
"nrb" : Descriptors.NumRotatableBonds(self.rdmol),
|
157
150
|
})
|
158
|
-
|
151
|
+
|
159
152
|
|
160
153
|
def __str__(self) -> str:
|
161
154
|
"""String representation of the molecule.
|
@@ -187,21 +180,21 @@ class Mol:
|
|
187
180
|
return hash(self.smiles)
|
188
181
|
|
189
182
|
|
190
|
-
def __eq__(self, other:
|
191
|
-
"""True if `other`
|
183
|
+
def __eq__(self, other: Self) -> bool:
|
184
|
+
"""True if `other` Mol is identical with this Mol.
|
192
185
|
|
193
|
-
It compares
|
186
|
+
It compares InChIKeys.
|
194
187
|
|
195
188
|
Examples:
|
196
189
|
>>> m1 == m2
|
197
190
|
|
198
191
|
Args:
|
199
|
-
other (object): other
|
192
|
+
other (object): other Mol object.
|
200
193
|
|
201
194
|
Returns:
|
202
195
|
bool: True if identical.
|
203
196
|
"""
|
204
|
-
return self.
|
197
|
+
return self.InChIKey == other.InChIKey
|
205
198
|
|
206
199
|
|
207
200
|
def __iter__(self) -> Iterator:
|
@@ -226,7 +219,7 @@ class Mol:
|
|
226
219
|
return next(self.confs)
|
227
220
|
|
228
221
|
|
229
|
-
def __getitem__(self, index: int | slice) -> Conf:
|
222
|
+
def __getitem__(self, index: int | slice) -> Conf | Self:
|
230
223
|
"""Conformer object of conformers of the molecule with given index or slice of indexes.
|
231
224
|
|
232
225
|
Examples:
|
@@ -235,31 +228,34 @@ class Mol:
|
|
235
228
|
Args:
|
236
229
|
index (int | slice): index for conformers.
|
237
230
|
|
238
|
-
Raises:
|
239
|
-
ValueError: conformers are not defined in the molecule or index is out of range.
|
240
|
-
|
241
231
|
Returns:
|
242
|
-
Conf
|
232
|
+
Conf or Mol(copy) with conformers specified by index.
|
243
233
|
"""
|
244
|
-
|
245
|
-
raise ValueError(f"no conformers")
|
246
|
-
try:
|
247
|
-
return self.confs[index]
|
248
|
-
except:
|
249
|
-
raise ValueError(f"index should be 0..{self.count()-1}")
|
234
|
+
assert self.count() > 0, "no conformers"
|
250
235
|
|
236
|
+
if isinstance(index, slice):
|
237
|
+
new_object = self.copy()
|
238
|
+
new_object.confs = new_object.confs[index]
|
239
|
+
return new_object
|
240
|
+
|
241
|
+
else:
|
242
|
+
return self.confs[index]
|
251
243
|
|
244
|
+
|
252
245
|
def copy(self) -> Self:
|
253
246
|
"""Returns a copy of self.
|
254
247
|
|
255
248
|
Returns:
|
256
|
-
|
249
|
+
a copy of self.
|
257
250
|
"""
|
258
251
|
return copy.deepcopy(self)
|
259
252
|
|
260
253
|
|
261
|
-
def rename(self,
|
262
|
-
|
254
|
+
def rename(self,
|
255
|
+
prefix: str = '',
|
256
|
+
sep: str = '/',
|
257
|
+
start: int = 1) -> Self:
|
258
|
+
"""Updates name and conformer names.
|
263
259
|
|
264
260
|
The first conformer name is {prefix}{sep}{start}
|
265
261
|
|
@@ -269,11 +265,12 @@ class Mol:
|
|
269
265
|
start (int, optional): first serial number. Defaults to 1.
|
270
266
|
|
271
267
|
Returns:
|
272
|
-
Self:
|
268
|
+
Self: modified self.
|
273
269
|
"""
|
274
270
|
if prefix :
|
275
271
|
self.name = prefix
|
276
272
|
self.rdmol.SetProp('_Name', prefix)
|
273
|
+
|
277
274
|
# update conformer names
|
278
275
|
num_digits = len(str(self.count())) # ex. '100' -> 3
|
279
276
|
for (serial, conf) in enumerate(self.confs, start=start):
|
@@ -281,11 +278,13 @@ class Mol:
|
|
281
278
|
while len(serial_str) < num_digits:
|
282
279
|
serial_str = '0' + serial_str
|
283
280
|
conf.rename(f'{self.name}{sep}{serial_str}')
|
281
|
+
|
284
282
|
return self
|
285
283
|
|
286
284
|
|
287
|
-
def qed(self,
|
288
|
-
|
285
|
+
def qed(self,
|
286
|
+
properties: list[str] = ['QED', 'MolWt', 'LogP', 'TPSA', 'HBD']) -> Self:
|
287
|
+
"""Updates quantitative estimate of drug-likeness (QED) and other descriptors.
|
289
288
|
|
290
289
|
Args:
|
291
290
|
properties (list[str], optional): Defaults to ['QED', 'MolWt', 'LogP', 'TPSA', 'HBD'].
|
@@ -294,53 +293,54 @@ class Mol:
|
|
294
293
|
KeyError: if property key is unknown.
|
295
294
|
|
296
295
|
Returns:
|
297
|
-
Self:
|
296
|
+
Self: modified self.
|
298
297
|
"""
|
299
298
|
props_dict = {}
|
300
299
|
for k in properties:
|
301
300
|
try:
|
302
301
|
props_dict[k] = rd_descriptor_f[k](self.rdmol)
|
303
302
|
except:
|
304
|
-
raise KeyError(f'
|
303
|
+
raise KeyError(f'qed() Error: unknown property {k}')
|
305
304
|
self.props.update(props_dict)
|
305
|
+
|
306
306
|
return self
|
307
307
|
|
308
308
|
|
309
309
|
def remove_stereo(self) -> Self:
|
310
|
-
"""Removes stereochemistry
|
310
|
+
"""Removes stereochemistry.
|
311
311
|
|
312
312
|
Examples:
|
313
|
-
>>> m =
|
313
|
+
>>> m = Mol("C/C=C/C=C\\C", "double_bond")
|
314
314
|
>>> m.remove_stereo().smiles == "CC=CC=CC"
|
315
315
|
|
316
316
|
Returns:
|
317
|
-
Self:
|
317
|
+
Self: modified self.
|
318
318
|
"""
|
319
|
-
obj = copy.deepcopy(self)
|
320
319
|
# keep the original stereo info. for ring double bond
|
321
|
-
Chem.RemoveStereochemistry(
|
322
|
-
Chem.AssignStereochemistry(
|
320
|
+
Chem.RemoveStereochemistry(self.rdmol)
|
321
|
+
Chem.AssignStereochemistry(self.rdmol,
|
323
322
|
cleanIt=False,
|
324
323
|
force=False,
|
325
324
|
flagPossibleStereoCenters=False)
|
326
|
-
|
327
|
-
|
325
|
+
self.smiles = Chem.MolToSmiles(self.rdmol)
|
326
|
+
|
327
|
+
return self
|
328
328
|
|
329
329
|
|
330
330
|
def make_confs(self,
|
331
331
|
n:int = 50,
|
332
|
-
method:str = '
|
333
|
-
calculator:str | Callable = 'MMFF94'
|
332
|
+
method:str = 'ETKDG',
|
333
|
+
calculator:str | Callable = 'MMFF94',
|
334
|
+
) -> Self:
|
334
335
|
"""Generates 3D conformers.
|
335
336
|
|
336
337
|
Args:
|
337
338
|
n (int, optional): number of conformers to generate. Defaults to 50.
|
338
339
|
method (str, optional): conformer generation method.
|
339
|
-
Choices are `
|
340
|
-
Defaults to 'RDKit_ETKDG'.
|
340
|
+
Choices are `ETKDG`, `CONFORGE`. Defaults to 'ETKDG'.
|
341
341
|
|
342
342
|
Returns:
|
343
|
-
Self:
|
343
|
+
Self: modified self.
|
344
344
|
|
345
345
|
Reference:
|
346
346
|
T. Seidel, C. Permann, O. Wieder, S. M. Kohlbacher, T. Langer,
|
@@ -355,9 +355,9 @@ class Mol:
|
|
355
355
|
|
356
356
|
self.confs = []
|
357
357
|
|
358
|
-
if method.upper() == '
|
358
|
+
if method.upper() == 'ETKDG':
|
359
359
|
rdmol_H = Chem.AddHs(self.rdmol, addCoords=True) # returns a copy with hydrogens added
|
360
|
-
conf_ids = rdDistGeom.EmbedMultipleConfs(rdmol_H, n, params=self.ETKDG_params)
|
360
|
+
conf_ids = rdDistGeom.EmbedMultipleConfs(rdmol_H, numConfs=n, params=self.ETKDG_params)
|
361
361
|
for rdConformer in rdmol_H.GetConformers():
|
362
362
|
# number of atoms should match with conformer(s)
|
363
363
|
rdmol_conf = Chem.Mol(rdmol_H)
|
@@ -366,7 +366,7 @@ class Mol:
|
|
366
366
|
conf = Conf(rdmol_conf)
|
367
367
|
self.confs.append(conf)
|
368
368
|
|
369
|
-
elif method.upper() == '
|
369
|
+
elif method.upper() == 'CONFORGE':
|
370
370
|
with tempfile.NamedTemporaryFile() as tmpfile:
|
371
371
|
mol = CDPL.Chem.parseSMILES(self.smiles)
|
372
372
|
# create and initialize an instance of the class ConfGen.ConformerGenerator which
|
@@ -415,7 +415,7 @@ class Mol:
|
|
415
415
|
|
416
416
|
# energy evaluations for ranking
|
417
417
|
for conf in self.confs:
|
418
|
-
conf.
|
418
|
+
conf.potential_energy(calculator) # default: MMFF94
|
419
419
|
|
420
420
|
# set relative energy, E_rel(kcal/mol)
|
421
421
|
sort_by = 'E_tot(kcal/mol)'
|
@@ -424,50 +424,97 @@ class Mol:
|
|
424
424
|
for conf in self.confs:
|
425
425
|
conf.props.update({"E_rel(kcal/mol)": conf.props[sort_by] - lowest_energy})
|
426
426
|
|
427
|
-
|
427
|
+
self = self.rename()
|
428
|
+
|
429
|
+
return self
|
428
430
|
|
429
431
|
|
430
|
-
def
|
431
|
-
|
432
|
+
def optimize_confs(self,
|
433
|
+
calculator: str | Callable = 'MMFF94',
|
434
|
+
fmax: float = 0.05,
|
435
|
+
max_iter: int = 1000,
|
436
|
+
) -> Self:
|
437
|
+
"""Optimizes 3D geometry of conformers.
|
432
438
|
|
433
439
|
Args:
|
434
|
-
calculator (str | Callable):
|
435
|
-
|
440
|
+
calculator (str | Callable): MMFF94 (= MMFF), MMFF94s, UFF, or ASE calculator.
|
441
|
+
`MMFF94` or `MMFF` - Intended for general use, including organic molecules and proteins,
|
442
|
+
and primarily relies on data from quantum mechanical calculations.
|
443
|
+
It's often used in molecular dynamics simulations.
|
444
|
+
`MMFF94s` - A "static" variant of MMFF94, with adjusted parameters for out-of-plane
|
445
|
+
bending and dihedral torsions to favor planar geometries for specific nitrogen atoms.
|
446
|
+
This makes it better suited for geometry optimization studies where a static,
|
447
|
+
time-averaged structure is desired. The "s" stands for "static".
|
448
|
+
`UFF` - UFF refers to the "Universal Force Field," a force field model used for
|
449
|
+
molecular mechanics calculations. It's a tool for geometry optimization,
|
450
|
+
energy minimization, and exploring molecular conformations in 3D space.
|
451
|
+
UFF is often used to refine conformers generated by other methods,
|
452
|
+
such as random conformer generation, to produce more physically plausible
|
453
|
+
and stable structures.
|
454
|
+
fmax (float, optional): fmax for the calculator convergence. Defaults to 0.05.
|
455
|
+
max_iter (int, optional): max iterations for the calculator. Defaults to 1000.
|
436
456
|
|
437
457
|
Returns:
|
438
|
-
Self:
|
458
|
+
Self: modified self.
|
439
459
|
"""
|
440
|
-
self.confs = [ conf.optimize(calculator, fmax) for conf in self.confs ]
|
460
|
+
self.confs = [ conf.optimize(calculator, fmax, max_iter) for conf in self.confs ]
|
461
|
+
|
441
462
|
return self
|
442
463
|
|
443
464
|
|
444
|
-
def sort_confs(self) -> Self:
|
445
|
-
"""Sorts
|
465
|
+
def sort_confs(self, calculator: str | Callable | None = None) -> Self:
|
466
|
+
"""Sorts by `E_tot(kcal/mol)` or `E_tot(eV)` and sets `E_rel(kcal/mol)`.
|
446
467
|
|
468
|
+
Args:
|
469
|
+
calculator (str | Callable | None): MMFF94 (= MMFF), MMFF94s, UFF, or ASE calculator.
|
470
|
+
`MMFF94` or `MMFF` - Intended for general use, including organic molecules and proteins,
|
471
|
+
and primarily relies on data from quantum mechanical calculations.
|
472
|
+
It's often used in molecular dynamics simulations.
|
473
|
+
`MMFF94s` - A "static" variant of MMFF94, with adjusted parameters for out-of-plane
|
474
|
+
bending and dihedral torsions to favor planar geometries for specific nitrogen atoms.
|
475
|
+
This makes it better suited for geometry optimization studies where a static,
|
476
|
+
time-averaged structure is desired. The "s" stands for "static".
|
477
|
+
`UFF` - UFF refers to the "Universal Force Field," a force field model used for
|
478
|
+
molecular mechanics calculations. It's a tool for geometry optimization,
|
479
|
+
energy minimization, and exploring molecular conformations in 3D space.
|
480
|
+
UFF is often used to refine conformers generated by other methods,
|
481
|
+
such as random conformer generation, to produce more physically plausible
|
482
|
+
and stable structures.
|
447
483
|
Raises:
|
448
484
|
KeyError: if `E_tot(eV)` or `E_tot(kcal/mol)` is not defined.
|
449
485
|
|
450
486
|
Returns:
|
451
|
-
Self:
|
487
|
+
Self: modified self.
|
452
488
|
"""
|
453
|
-
if
|
454
|
-
|
455
|
-
|
456
|
-
|
489
|
+
if calculator is not None:
|
490
|
+
# re-calculate potential energies
|
491
|
+
for conf in self.confs:
|
492
|
+
PE = conf.potential_energy(calculator=calculator) # sets `E_tot(kcal/mol)`
|
493
|
+
|
494
|
+
if all(['E_tot(kcal/mol)' in conf.props for conf in self.confs]):
|
457
495
|
sort_by = 'E_tot(kcal/mol)'
|
458
496
|
conversion = 1.0
|
497
|
+
|
498
|
+
elif all(['E_tot(eV)' in conf.props for conf in self.confs]):
|
499
|
+
sort_by = 'E_tot(eV)'
|
500
|
+
conversion = ev2kcalpermol # eV to kcal/mol
|
501
|
+
|
459
502
|
else:
|
460
|
-
raise KeyError(f'
|
461
|
-
|
503
|
+
raise KeyError(f'sort_confs() requires `E_tot(eV)` or `E_tot(kcal/mol)` property')
|
504
|
+
|
505
|
+
# ascending order
|
506
|
+
self.confs = sorted(self.confs, key=lambda c: c.props[sort_by])
|
507
|
+
|
462
508
|
if self.count() > 0:
|
463
|
-
|
509
|
+
E_min = self.confs[0].props[sort_by]
|
464
510
|
for conf in self.confs:
|
465
|
-
E_rel = (conf.props[sort_by] -
|
511
|
+
E_rel = conversion * (conf.props[sort_by] - E_min)
|
466
512
|
conf.props.update({"E_rel(kcal/mol)": E_rel})
|
513
|
+
|
467
514
|
return self
|
468
515
|
|
469
516
|
|
470
|
-
def align_confs(self, method:str='rigid_fragment') -> Self:
|
517
|
+
def align_confs(self, method: str = 'rigid_fragment') -> Self:
|
471
518
|
"""Aligns all conformers to the first conformer.
|
472
519
|
|
473
520
|
Args:
|
@@ -476,7 +523,7 @@ class Mol:
|
|
476
523
|
Defaults to `rigid_fragment`.
|
477
524
|
|
478
525
|
Returns:
|
479
|
-
Self:
|
526
|
+
Self: modified self.
|
480
527
|
"""
|
481
528
|
|
482
529
|
if self.count() < 2: # nothing to do
|
@@ -527,7 +574,10 @@ class Mol:
|
|
527
574
|
return self
|
528
575
|
|
529
576
|
|
530
|
-
def cluster_confs(self,
|
577
|
+
def cluster_confs(self,
|
578
|
+
method: str = 'QT',
|
579
|
+
threshold: float = 1.0,
|
580
|
+
sort: str = 'size') -> Self:
|
531
581
|
"""Clusters all conformers and sets cluster properties.
|
532
582
|
|
533
583
|
Following cluster properties will be added: `cluster`, `cluster_mean_energy`,
|
@@ -546,14 +596,14 @@ class Mol:
|
|
546
596
|
`AutoGraph`.
|
547
597
|
Defaults to `QT`.
|
548
598
|
threshold (float, optional): RMSD threshold of a cluster. Defaults to 1.0.
|
549
|
-
|
599
|
+
sort (str, optional): sort cluster(s) by mean `energy` or cluster `size`.
|
550
600
|
Defaults to `size`.
|
551
601
|
|
552
602
|
Raises:
|
553
603
|
NotImplementedError: if unsupported method is requested.
|
554
604
|
|
555
605
|
Returns:
|
556
|
-
Self:
|
606
|
+
Self: modified self.
|
557
607
|
"""
|
558
608
|
if method != 'DQT': # rmsd of x,y,z coordinates (non-H)
|
559
609
|
conf_rdmols_noH = [Chem.RemoveHs(Chem.Mol(conf.rdmol)) for conf in self.confs]
|
@@ -572,7 +622,7 @@ class Mol:
|
|
572
622
|
torsions = []
|
573
623
|
for conf in self.confs:
|
574
624
|
t_radians = []
|
575
|
-
for (i, j, k, l
|
625
|
+
for torsion_key, (i, j, k, l) in torsion_atom_indices.items():
|
576
626
|
t_radians.append(
|
577
627
|
rdMolTransforms.GetDihedralRad(conf.rdmol.GetConformer(), i, j, k, l))
|
578
628
|
torsions.append(np.array(t_radians))
|
@@ -664,14 +714,14 @@ class Mol:
|
|
664
714
|
'iqr_energy' : iqr_energy,
|
665
715
|
})
|
666
716
|
# sort cluster index
|
667
|
-
if
|
717
|
+
if sort == 'size':
|
668
718
|
cluster_list = sorted(cluster_list, key=lambda x: x['size'], reverse=True)
|
669
719
|
|
670
|
-
elif
|
720
|
+
elif sort == 'energy':
|
671
721
|
cluster_list = sorted(cluster_list, key=lambda x: x['median_energy'], reverse=False)
|
672
722
|
|
673
723
|
else:
|
674
|
-
raise NotImplementedError(f'{
|
724
|
+
raise NotImplementedError(f'{sort} is not implemented yet.')
|
675
725
|
|
676
726
|
for cluster_idx, cluster_dict in enumerate(cluster_list, start=1):
|
677
727
|
for conf_idx in cluster_dict['confs']:
|
@@ -697,10 +747,10 @@ class Mol:
|
|
697
747
|
|
698
748
|
|
699
749
|
def drop_confs(self,
|
700
|
-
stereo_flipped:bool=True,
|
701
|
-
unconverged:bool=True,
|
750
|
+
stereo_flipped: bool = True,
|
751
|
+
unconverged: bool = True,
|
702
752
|
similar: bool | None = None,
|
703
|
-
similar_rmsd:float=0.3,
|
753
|
+
similar_rmsd: float = 0.3,
|
704
754
|
cluster: bool | None =None,
|
705
755
|
k: int | None = None,
|
706
756
|
window: float | None = None,
|
@@ -718,39 +768,46 @@ class Mol:
|
|
718
768
|
k (int, optional): drop all except for `k` lowest energy conformers.
|
719
769
|
window (float, optional): drop all except for conformers within `window` of relative energy.
|
720
770
|
|
721
|
-
Returns:
|
722
|
-
Self: a copy of rdworks.Mol object.
|
723
|
-
|
724
771
|
Examples:
|
725
772
|
To drop similar conformers within rmsd of 0.5 A
|
726
773
|
>>> mol.drop_confs(similar=True, similar_rmsd=0.5)
|
727
774
|
|
728
775
|
To drop conformers beyond 5 kcal/mol
|
729
776
|
>>> mol.drop_confs(window=5.0)
|
730
|
-
|
777
|
+
|
778
|
+
Returns:
|
779
|
+
Self: modified self.
|
731
780
|
"""
|
732
|
-
|
781
|
+
|
782
|
+
reasons = [f'stereo flipped',
|
783
|
+
f'unconverged',
|
784
|
+
f'similar({similar_rmsd})',
|
785
|
+
f'cluster(non-centroid)',
|
786
|
+
f'k and/or energy window',
|
787
|
+
]
|
788
|
+
|
789
|
+
w = max([len(s) for s in reasons])
|
733
790
|
|
734
|
-
if stereo_flipped and
|
735
|
-
mask = [Chem.MolToSmiles(Chem.RemoveHs(_.rdmol)) ==
|
736
|
-
|
791
|
+
if stereo_flipped and self.count() > 0:
|
792
|
+
mask = [Chem.MolToSmiles(Chem.RemoveHs(_.rdmol)) == self.smiles for _ in self.confs]
|
793
|
+
self.confs = list(itertools.compress(self.confs, mask))
|
737
794
|
if verbose:
|
738
|
-
main_logger.info(f'drop_confs
|
795
|
+
main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[0]:<{w}} -> {self.count()}')
|
739
796
|
|
740
|
-
if unconverged and
|
741
|
-
mask = [_.props['Converged'] if 'Converged' in _.props else True for _ in
|
742
|
-
|
797
|
+
if unconverged and self.count() > 0:
|
798
|
+
mask = [_.props['Converged'] if 'Converged' in _.props else True for _ in self.confs]
|
799
|
+
self.confs = list(itertools.compress(self.confs, mask))
|
743
800
|
if verbose:
|
744
|
-
main_logger.info(f'drop_confs
|
801
|
+
main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[1]:<{w}} -> {self.count()}')
|
745
802
|
|
746
|
-
if similar and
|
803
|
+
if similar and self.count() > 1:
|
747
804
|
# it is observed that there are essentially identical conformers
|
748
805
|
# such as 180-degree ring rotation and there is not minor conformational variations
|
749
806
|
# in the RDKit ETKDG generated conformers.
|
750
|
-
conf_rdmols_noH = [Chem.RemoveHs(Chem.Mol(_.rdmol)) for _ in
|
807
|
+
conf_rdmols_noH = [Chem.RemoveHs(Chem.Mol(_.rdmol)) for _ in self.confs]
|
751
808
|
# copies are made for rmsd calculations to prevent coordinates changes
|
752
809
|
lower_triangle_values = []
|
753
|
-
for i in range(
|
810
|
+
for i in range(self.count()): # number of conformers
|
754
811
|
for j in range(i):
|
755
812
|
# rdMolAlign.GetBestRMS takes symmetry into account
|
756
813
|
# removed hydrogens to speed up
|
@@ -758,10 +815,10 @@ class Mol:
|
|
758
815
|
lower_triangle_values.append(best_rms)
|
759
816
|
symm_matrix = convert_tril_to_symm(lower_triangle_values)
|
760
817
|
cluster_assignment, centroid_indices = QT(symm_matrix, similar_rmsd)
|
761
|
-
mask = [conf_idx in centroid_indices for conf_idx, conf in enumerate(
|
762
|
-
|
818
|
+
mask = [conf_idx in centroid_indices for conf_idx, conf in enumerate(self.confs)]
|
819
|
+
self.confs = list(itertools.compress(self.confs, mask))
|
763
820
|
if verbose:
|
764
|
-
main_logger.info(f'drop_confs
|
821
|
+
main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[2]:<{w}} -> {self.count()}')
|
765
822
|
|
766
823
|
# note: it will retain the conformers with lower index
|
767
824
|
# so, it should be sorted before dropping
|
@@ -781,29 +838,29 @@ class Mol:
|
|
781
838
|
# retained_confs.append(Chem.RemoveHs(conf_i.rdmol)) # store a copy of H-removed rdmol
|
782
839
|
# obj.confs = list(itertools.compress(obj.confs, mask))
|
783
840
|
|
784
|
-
if cluster and
|
841
|
+
if cluster and self.count() > 1:
|
785
842
|
# drop non-centroid cluster member(s)
|
786
|
-
mask = [_.props['centroid'] if 'centroid' in _.props else True for _ in
|
787
|
-
|
843
|
+
mask = [_.props['centroid'] if 'centroid' in _.props else True for _ in self.confs]
|
844
|
+
self.confs = list(itertools.compress(self.confs, mask))
|
788
845
|
if verbose:
|
789
|
-
main_logger.info(f'drop_confs
|
846
|
+
main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[3]:<{w}} -> {self.count()}')
|
790
847
|
|
791
|
-
if (k or window) and
|
848
|
+
if (k or window) and self.count() > 0:
|
792
849
|
if k:
|
793
|
-
mask_k = [i < k for i,_ in enumerate(
|
850
|
+
mask_k = [i < k for i,_ in enumerate(self.confs)]
|
794
851
|
else:
|
795
|
-
mask_k = [True,] *
|
852
|
+
mask_k = [True,] * self.count()
|
796
853
|
if window:
|
797
|
-
mask_window = [_.props['E_rel(kcal/mol)'] < window if 'E_rel(kcal/mol)' in _.props else True for _ in
|
854
|
+
mask_window = [_.props['E_rel(kcal/mol)'] < window if 'E_rel(kcal/mol)' in _.props else True for _ in self.confs]
|
798
855
|
else:
|
799
|
-
mask_window = [True,] *
|
856
|
+
mask_window = [True,] * self.count()
|
800
857
|
# retain conformer(s) that satisfy both k and window conditions
|
801
858
|
mask = [(x and y) for (x,y) in zip(mask_k, mask_window)]
|
802
|
-
|
859
|
+
self.confs = list(itertools.compress(self.confs, mask))
|
803
860
|
if verbose:
|
804
|
-
main_logger.info(f'drop_confs
|
861
|
+
main_logger.info(f'drop_confs() {mask.count(False):3d} {reasons[4]:<{w}} -> {self.count()}')
|
805
862
|
|
806
|
-
return
|
863
|
+
return self
|
807
864
|
|
808
865
|
|
809
866
|
def count(self) -> int:
|
@@ -815,7 +872,7 @@ class Mol:
|
|
815
872
|
return len(self.confs)
|
816
873
|
|
817
874
|
|
818
|
-
def
|
875
|
+
def nnp_ready(self, model: str = 'aimnet2') -> bool:
|
819
876
|
"""Check if a particular neural network model is applicable to current molecule.
|
820
877
|
|
821
878
|
Args:
|
@@ -833,19 +890,18 @@ class Mol:
|
|
833
890
|
# H, C, N, O, F, S, Cl
|
834
891
|
atomic_numbers = [1, 6, 7, 8, 9, 16, 17 ]
|
835
892
|
|
836
|
-
elif model in ['aimnet', 'aimnet2']:
|
893
|
+
elif model.lower() in ['aimnet', 'aimnet2']:
|
837
894
|
# H, B, C, N, O, F, Si, P, S, Cl, As, Se, Br, I
|
838
895
|
atomic_numbers = [1, 5, 6, 7, 8, 9, 14, 15, 16, 17, 33, 34, 35, 53 ]
|
839
896
|
|
840
897
|
else:
|
841
|
-
raise ValueError('
|
898
|
+
raise ValueError('nnp_ready() supports ANI-2x, ANI-2xt, AIMNet, or AIMNet2')
|
842
899
|
|
843
|
-
for a in self.rdmol.GetAtoms():
|
844
|
-
|
845
|
-
|
900
|
+
if all([ a.GetAtomicNum() in atomic_numbers for a in self.rdmol.GetAtoms() ]):
|
901
|
+
return True
|
902
|
+
else:
|
903
|
+
return False
|
846
904
|
|
847
|
-
return True
|
848
|
-
|
849
905
|
|
850
906
|
def charge(self) -> int:
|
851
907
|
"""Returns molecular formal charge
|
@@ -862,7 +918,7 @@ class Mol:
|
|
862
918
|
Returns:
|
863
919
|
list: list of element symbols.
|
864
920
|
"""
|
865
|
-
return [
|
921
|
+
return [atom.GetSymbol() for atom in self.rdmol.GetAtoms()]
|
866
922
|
|
867
923
|
|
868
924
|
def numbers(self) -> list[int]:
|
@@ -871,111 +927,19 @@ class Mol:
|
|
871
927
|
Returns:
|
872
928
|
list: list of atomic numbers.
|
873
929
|
"""
|
874
|
-
return [
|
930
|
+
return [atom.GetAtomicNum() for atom in self.rdmol.GetAtoms()]
|
875
931
|
|
876
932
|
|
877
|
-
def torsion_atoms(self, strict:bool=True) ->
|
878
|
-
"""Determine dihedral angle atoms (
|
933
|
+
def torsion_atoms(self, strict: bool = True) -> dict[int, tuple]:
|
934
|
+
"""Determine torsion/dihedral angle atoms (i-j-k-l) and rotating group for each rotatable bond (j-k).
|
879
935
|
|
880
936
|
Args:
|
881
937
|
strict (bool): whether to exclude amide/imide/ester/acid bonds.
|
882
938
|
|
883
939
|
Returns:
|
884
|
-
|
885
|
-
(a, b, c, d, rot_atom_indices, fix_atom_indices),
|
886
|
-
...,
|
887
|
-
]
|
940
|
+
{torsion_key: (i, j, k, l), ...,}
|
888
941
|
"""
|
889
|
-
|
890
|
-
# https://github.com/rdkit/rdkit/blob/de602c88809ea6ceba1e8ed50fd543b6e406e9c4/Code/GraphMol/Descriptors/Lipinski.cpp#L108
|
891
|
-
if strict :
|
892
|
-
# excludes amide/imide/ester/acid bonds
|
893
|
-
rotatable_bond_pattern = Chem.MolFromSmarts(
|
894
|
-
(
|
895
|
-
"[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])("
|
896
|
-
"[CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1])&!$([#7,O,S!D1]-!@[CD3]="
|
897
|
-
"[N,O,S])&!$([CD3](=[N+])-!@[#7!D1])&!$([#7!D1]-!@[CD3]=[N+])]-,:;!@[!$"
|
898
|
-
"(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])(["
|
899
|
-
"CH3])[CH3])]"
|
900
|
-
)
|
901
|
-
)
|
902
|
-
else:
|
903
|
-
rotatable_bond_pattern = Chem.MolFromSmarts('[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]')
|
904
|
-
rotatable_bonds = self.rdmol.GetSubstructMatches(rotatable_bond_pattern)
|
905
|
-
torsion_angle_atom_indices = []
|
906
|
-
|
907
|
-
# small rings (n=3 or 4)
|
908
|
-
small_rings = [ r for r in list(self.rdmol.GetRingInfo().AtomRings()) if len(r) < 5 ]
|
909
|
-
# ex. = [(1, 37, 35, 34, 3, 2), (29, 28, 30)]
|
910
|
-
|
911
|
-
forbidden_terminal_nuclei = [1, 9, 17, 35, 53] # H,F,Cl,Br,I
|
912
|
-
|
913
|
-
for (b_idx, c_idx) in rotatable_bonds:
|
914
|
-
# determine a atom ``a`` that define a dihedral angle
|
915
|
-
a_candidates = []
|
916
|
-
for neighbor in self.rdmol.GetAtomWithIdx(b_idx).GetNeighbors():
|
917
|
-
neighbor_idx = neighbor.GetIdx()
|
918
|
-
if neighbor_idx == c_idx:
|
919
|
-
continue
|
920
|
-
neighbor_atomic_num = neighbor.GetAtomicNum()
|
921
|
-
if neighbor_atomic_num not in forbidden_terminal_nuclei:
|
922
|
-
a_candidates.append((neighbor_atomic_num, neighbor_idx))
|
923
|
-
|
924
|
-
if not a_candidates:
|
925
|
-
continue
|
926
|
-
|
927
|
-
(a_atomic_num, a_idx) = sorted(a_candidates, key=lambda x: (x[0], -x[1]), reverse=True)[0]
|
928
|
-
|
929
|
-
# is a-b in a small ring (n=3 or 4)?
|
930
|
-
is_in_small_ring = False
|
931
|
-
for small_ring in small_rings:
|
932
|
-
if (a_idx in small_ring) and (b_idx in small_ring):
|
933
|
-
is_in_small_ring = True
|
934
|
-
break
|
935
|
-
|
936
|
-
if is_in_small_ring:
|
937
|
-
continue
|
938
|
-
|
939
|
-
# determine a atom ``d`` that define a dihedral angle
|
940
|
-
d_candidates = []
|
941
|
-
for neighbor in self.rdmol.GetAtomWithIdx(c_idx).GetNeighbors():
|
942
|
-
neighbor_idx = neighbor.GetIdx()
|
943
|
-
if (neighbor_idx == b_idx):
|
944
|
-
continue
|
945
|
-
neighbor_atomic_num = neighbor.GetAtomicNum()
|
946
|
-
if neighbor_atomic_num not in forbidden_terminal_nuclei:
|
947
|
-
d_candidates.append((neighbor_atomic_num, neighbor_idx))
|
948
|
-
|
949
|
-
if not d_candidates:
|
950
|
-
continue
|
951
|
-
|
952
|
-
(d_atomic_num, d_idx) = sorted(d_candidates, key=lambda x: (x[0], -x[1]), reverse=True)[0]
|
953
|
-
|
954
|
-
# is c-d in a small ring?
|
955
|
-
is_in_small_ring = False
|
956
|
-
for small_ring in small_rings:
|
957
|
-
if (c_idx in small_ring) and (d_idx in small_ring):
|
958
|
-
is_in_small_ring = True
|
959
|
-
break
|
960
|
-
|
961
|
-
if is_in_small_ring:
|
962
|
-
continue
|
963
|
-
|
964
|
-
# determine a group of atoms to be rotated
|
965
|
-
# https://ctr.fandom.com/wiki/Break_rotatable_bonds_and_report_the_fragments
|
966
|
-
em = Chem.EditableMol(self.rdmol)
|
967
|
-
em.RemoveBond(b_idx, c_idx)
|
968
|
-
fragmented = em.GetMol()
|
969
|
-
(frag1, frag2) = Chem.GetMolFrags(fragmented, asMols=False) # returns tuple of tuple
|
970
|
-
hac1 = sum([ 1 for i in frag1 if self.rdmol.GetAtomWithIdx(i).GetAtomicNum() > 1 ])
|
971
|
-
hac2 = sum([ 1 for i in frag2 if self.rdmol.GetAtomWithIdx(i).GetAtomicNum() > 1 ])
|
972
|
-
|
973
|
-
# smaller fragment will be rotated and must contain at least three heavy atoms
|
974
|
-
if min(hac1, hac2) >= 3:
|
975
|
-
(frag_rot, frag_fix) = sorted([(hac1, frag1), (hac2, frag2)])
|
976
|
-
torsion_angle_atom_indices.append((a_idx, b_idx, c_idx, d_idx, frag_rot[1], frag_fix[1]))
|
977
|
-
|
978
|
-
return torsion_angle_atom_indices
|
942
|
+
return {i: d[:4] for i, d in enumerate(get_torsion_atoms(self.rdmol, strict))}
|
979
943
|
|
980
944
|
|
981
945
|
def compute(self, **kwargs) -> Self:
|
@@ -987,147 +951,152 @@ class Mol:
|
|
987
951
|
progress (bool): whether to show progress bar.
|
988
952
|
|
989
953
|
Returns:
|
990
|
-
Self:
|
954
|
+
Self: modified self.
|
991
955
|
"""
|
992
956
|
self.max_workers = kwargs.get('max_workers', self.max_workers)
|
993
957
|
self.chunksize = kwargs.get('chunksize', self.chunksize)
|
994
958
|
self.progress = kwargs.get('progress', self.progress)
|
959
|
+
|
995
960
|
return self
|
996
961
|
|
997
962
|
|
998
|
-
@staticmethod
|
999
|
-
def _map_optimize_conf(conf:Conf, targs:tuple) -> Conf:
|
1000
|
-
"""A map function to apply Conf.optimize() on `conf`.
|
1001
|
-
|
1002
|
-
The default behavior of map() is to pass the elements of the iterable to the function by reference.
|
1003
|
-
This means that if the function modifies the elements of the iterable,
|
1004
|
-
those changes will be reflected in the iterable itself.
|
1005
|
-
|
1006
|
-
Args:
|
1007
|
-
conf (Conf): subject rdworks.Conf object.
|
1008
|
-
targs (tuple): tuple of arguments to be passed to Conf.optimize().
|
1009
|
-
|
1010
|
-
Returns:
|
1011
|
-
Conf: rdworks.Conf object
|
1012
|
-
"""
|
1013
|
-
return conf.optimize(*targs)
|
1014
|
-
|
1015
|
-
|
1016
963
|
def torsion_energies(self,
|
1017
|
-
calculator:str | Callable,
|
1018
|
-
|
1019
|
-
|
964
|
+
calculator: str | Callable,
|
965
|
+
torsion_key: int | None = None,
|
966
|
+
simplify: bool = True,
|
967
|
+
fmax: float = 0.05,
|
968
|
+
interval: float = 20.0,
|
1020
969
|
use_converged_only: bool = True,
|
1021
|
-
optimize_ref: bool = False,
|
1022
970
|
**kwargs,
|
1023
971
|
) -> Self:
|
1024
972
|
"""Calculates potential energy profiles for each torsion angle using ASE optimizer.
|
1025
973
|
|
974
|
+
It uses the first conformer as a reference.
|
975
|
+
|
1026
976
|
Args:
|
1027
977
|
calculator (str | Callable): 'MMFF', 'UFF', or ASE calculator.
|
978
|
+
torsion_key (int | None): torsion index to calculate. Defaults to None (all).
|
979
|
+
simplify (bool, optional): whether to use fragment surrogate. Defaults to True.
|
1028
980
|
fmax (float, optional): fmax of ASE optimizer. Defaults to 0.05.
|
1029
981
|
interval (float, optional): interval of torsion angles in degree. Defaults to 15.0.
|
1030
982
|
use_converged_only (bool, optional): whether to use only converged data. Defaults to True.
|
1031
983
|
|
1032
984
|
Returns:
|
1033
|
-
|
985
|
+
Self: modified self.
|
1034
986
|
"""
|
987
|
+
assert self.count() > 0, "torsion_energies() requires at least one conformer"
|
988
|
+
|
1035
989
|
self = self.compute(**kwargs)
|
1036
990
|
|
1037
|
-
|
1038
|
-
|
1039
|
-
|
1040
|
-
|
1041
|
-
|
1042
|
-
|
1043
|
-
|
1044
|
-
|
1045
|
-
|
1046
|
-
|
1047
|
-
|
1048
|
-
|
1049
|
-
|
1050
|
-
|
1051
|
-
|
1052
|
-
|
1053
|
-
|
1054
|
-
|
1055
|
-
|
1056
|
-
|
1057
|
-
|
1058
|
-
|
1059
|
-
|
1060
|
-
|
1061
|
-
|
1062
|
-
|
1063
|
-
|
1064
|
-
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1071
|
-
|
1072
|
-
|
1073
|
-
|
1074
|
-
|
1075
|
-
|
1076
|
-
|
1077
|
-
|
1078
|
-
|
1079
|
-
|
1080
|
-
|
1081
|
-
|
991
|
+
if torsion_key is None:
|
992
|
+
torsion_atoms_indices = self.torsion_atoms()
|
993
|
+
else:
|
994
|
+
torsion_atoms_indices = {torsion_key: self.torsion_atoms()[torsion_key]}
|
995
|
+
|
996
|
+
ref_conf = self.confs[0].copy()
|
997
|
+
|
998
|
+
data = {}
|
999
|
+
|
1000
|
+
if simplify:
|
1001
|
+
for tk, indices in torsion_atoms_indices.items():
|
1002
|
+
frag, frag_ijkl = create_torsion_fragment(ref_conf.rdmol, indices)
|
1003
|
+
frag_conf = Conf(frag)
|
1004
|
+
data[tk] = {'indices': indices, 'angle':[], 'init':[], 'last':[], 'Converged':[]}
|
1005
|
+
for angle in np.arange(-180.0, 180.0, interval):
|
1006
|
+
# Iterated numpy.ndarray does not contain the last 180: -180., ..., (180).
|
1007
|
+
conf = frag_conf.copy()
|
1008
|
+
conf.props.update({'torsion_key': tk, 'angle': float(angle)})
|
1009
|
+
conf.set_torsion(*frag_ijkl, angle) # atoms bonded to `l` move.
|
1010
|
+
conf = conf.optimize(calculator, fmax, **kwargs)
|
1011
|
+
# conf.optimize() updates coordinates and conf.props:
|
1012
|
+
# `angle`, `E_tot_init(kcal/mol)`, `E_tot(kcal/mol)`, `Converged`.
|
1013
|
+
tk = conf.props['torsion_key']
|
1014
|
+
data[tk]['angle'].append(conf.props['angle'])
|
1015
|
+
data[tk]['init'].append(conf.props['E_tot_init(kcal/mol)'])
|
1016
|
+
data[tk]['last'].append(conf.props['E_tot(kcal/mol)'])
|
1017
|
+
data[tk]['Converged'].append(conf.props['Converged'])
|
1018
|
+
frag_cleaned, _ = clean_2d(frag, reset_isotope=True, remove_H=True)
|
1019
|
+
rdDepictor.Compute2DCoords(frag_cleaned)
|
1020
|
+
# to serialize the molecule
|
1021
|
+
data[tk]['frag'] = Chem.MolToMolBlock(frag_cleaned)
|
1022
|
+
data[tk]['frag_indices'] = frag_ijkl
|
1023
|
+
|
1024
|
+
else:
|
1025
|
+
# mol.confs will be populated with torsion conformers.
|
1026
|
+
# It is designed for a batch optimization in the future.
|
1027
|
+
mol = self.copy()
|
1028
|
+
mol.confs = []
|
1029
|
+
for tk, indices in torsion_atoms_indices.items():
|
1030
|
+
data[tk] = {'indices': indices, 'angle':[], 'init':[], 'last':[], 'Converged':[]}
|
1031
|
+
for angle in np.arange(-180.0, 180.0, interval):
|
1032
|
+
# Iterated numpy.ndarray does not contain the last 180: -180., ..., (180).
|
1033
|
+
x = ref_conf.copy()
|
1034
|
+
x.props.update({'torsion_key': tk, 'angle': float(angle)})
|
1035
|
+
x.set_torsion(*indices, angle) # atoms bonded to `l` move.
|
1036
|
+
mol.confs.append(x)
|
1037
|
+
|
1038
|
+
# Calculate relaxation energies
|
1039
|
+
for conf in mol.confs:
|
1040
|
+
conf = conf.optimize(calculator, fmax, **kwargs)
|
1041
|
+
# conf.optimize() updates coordinates and conf.props:
|
1042
|
+
# `angle`, `E_tot_init(kcal/mol)`, `E_tot(kcal/mol)`, `Converged`.
|
1043
|
+
tk = conf.props['torsion_key']
|
1044
|
+
data[tk]['angle'].append(conf.props['angle'])
|
1045
|
+
data[tk]['init'].append(conf.props['E_tot_init(kcal/mol)'])
|
1046
|
+
data[tk]['last'].append(conf.props['E_tot(kcal/mol)'])
|
1047
|
+
data[tk]['Converged'].append(conf.props['Converged'])
|
1082
1048
|
|
1083
1049
|
# Post-processing
|
1084
|
-
torsion_energy_profiles =
|
1085
|
-
for
|
1050
|
+
torsion_energy_profiles = {}
|
1051
|
+
for tk, dictdata in data.items():
|
1086
1052
|
if use_converged_only:
|
1087
|
-
|
1088
|
-
|
1089
|
-
|
1090
|
-
relax = np.array(
|
1053
|
+
dictdata['angle'] = list(itertools.compress(dictdata['angle'], dictdata['Converged']))
|
1054
|
+
dictdata['init'] = list(itertools.compress(dictdata['init'], dictdata['Converged']))
|
1055
|
+
dictdata['last'] = list(itertools.compress(dictdata['last'], dictdata['Converged']))
|
1056
|
+
relax = np.array(dictdata['init']) - np.median(dictdata['last'])
|
1091
1057
|
E_rel = relax - np.min(relax)
|
1092
|
-
torsion_energy_profiles
|
1093
|
-
'indices': indices,
|
1094
|
-
'angle': np.array(
|
1095
|
-
'E_rel(kcal/mol)': E_rel.tolist(), # np.ndarray -> list for serialization
|
1096
|
-
|
1058
|
+
torsion_energy_profiles[tk] = {
|
1059
|
+
'indices' : dictdata['indices'],
|
1060
|
+
'angle' : np.round(np.array(dictdata['angle']), 1).tolist(), # np.ndarray -> list for serialization
|
1061
|
+
'E_rel(kcal/mol)': np.round(E_rel, 2).tolist(), # np.ndarray -> list for serialization
|
1062
|
+
'frag' : dictdata.get('frag', None),
|
1063
|
+
'frag_indices' : dictdata.get('frag_indices', None),
|
1064
|
+
}
|
1065
|
+
|
1097
1066
|
self.props['torsion'] = torsion_energy_profiles
|
1098
1067
|
self.props['torsion_calculator'] = str(calculator)
|
1099
1068
|
|
1100
1069
|
return self
|
1101
1070
|
|
1102
1071
|
|
1103
|
-
|
1104
|
-
|
1105
|
-
def similarity(self, other:object) -> float:
|
1106
|
-
"""Returns Tanimoto similarity with `other` rdworks.Mol object.
|
1072
|
+
def similarity(self, other: Self) -> float:
|
1073
|
+
"""Returns Tanimoto similarity with other Mol object.
|
1107
1074
|
|
1108
1075
|
Args:
|
1109
|
-
other (
|
1076
|
+
other (Mol): other Mol object.
|
1110
1077
|
|
1111
1078
|
Raises:
|
1112
|
-
TypeError: if `other` is not
|
1079
|
+
TypeError: if `other` is not Mol object type.
|
1113
1080
|
|
1114
1081
|
Returns:
|
1115
1082
|
float: Tanimoto similarity.
|
1116
1083
|
"""
|
1117
|
-
|
1118
|
-
|
1084
|
+
assert isinstance(other, Mol), "similarity() Error: invalid Mol object"
|
1085
|
+
|
1119
1086
|
if not self.fp:
|
1120
1087
|
self.fp = self.MFP2.GetFingerprint(self.rdmol)
|
1088
|
+
|
1121
1089
|
if not other.fp:
|
1122
1090
|
other.fp = other.MFP2.GetFingerprint(other.rdmol)
|
1091
|
+
|
1123
1092
|
return DataStructs.TanimotoSimilarity(self.fp, other.fp)
|
1124
1093
|
|
1125
1094
|
|
1126
|
-
def is_similar(self, other:
|
1127
|
-
"""Check if
|
1095
|
+
def is_similar(self, other: Self, threshold: float) -> bool:
|
1096
|
+
"""Check if other molecule is similar within Tanimoto similarity threshold.
|
1128
1097
|
|
1129
1098
|
Args:
|
1130
|
-
other (
|
1099
|
+
other (Mol): other Mol object to compare with.
|
1131
1100
|
threshold (float): Tanimoto similarity threshold.
|
1132
1101
|
|
1133
1102
|
Returns:
|
@@ -1135,8 +1104,21 @@ class Mol:
|
|
1135
1104
|
"""
|
1136
1105
|
return self.similarity(other) >= threshold
|
1137
1106
|
|
1138
|
-
|
1139
|
-
def
|
1107
|
+
|
1108
|
+
def has_substr(self, substr: str) -> bool:
|
1109
|
+
"""Determine if the molecule has the substructure match.
|
1110
|
+
|
1111
|
+
Args:
|
1112
|
+
pattern (str): SMARTS or SMILES.
|
1113
|
+
|
1114
|
+
Returns:
|
1115
|
+
bool: True if matches.
|
1116
|
+
"""
|
1117
|
+
query = Chem.MolFromSmarts(substr)
|
1118
|
+
return self.rdmol.HasSubstructMatch(query)
|
1119
|
+
|
1120
|
+
|
1121
|
+
def is_matching(self, terms: str | Path, invert: bool = False) -> bool:
|
1140
1122
|
"""Determines if the molecule matches the predefined substructure and/or descriptor ranges.
|
1141
1123
|
|
1142
1124
|
invert | terms(~ or !) | effect
|
@@ -1154,14 +1136,15 @@ class Mol:
|
|
1154
1136
|
Returns:
|
1155
1137
|
bool: True if matches.
|
1156
1138
|
"""
|
1157
|
-
if isinstance(terms,
|
1139
|
+
if isinstance(terms, Path):
|
1158
1140
|
path = terms.as_posix()
|
1141
|
+
|
1159
1142
|
elif isinstance(terms, str):
|
1160
1143
|
if terms.startswith('~') or terms.startswith('!'):
|
1161
1144
|
terms = terms.replace('~','').replace('!','')
|
1162
1145
|
invert = (invert ^ True)
|
1163
1146
|
try:
|
1164
|
-
path =
|
1147
|
+
path = Path(terms) # test if terms points to a xml file
|
1165
1148
|
assert path.is_file()
|
1166
1149
|
except:
|
1167
1150
|
path = get_predefined_xml(terms)
|
@@ -1192,8 +1175,10 @@ class Mol:
|
|
1192
1175
|
if combine.lower() == 'or' and any(mask):
|
1193
1176
|
# early termination if any term is satisfied
|
1194
1177
|
return invert ^ True # XOR(^) inverts only if invert is True
|
1178
|
+
|
1195
1179
|
if combine.lower() == 'and' and all(mask):
|
1196
1180
|
return invert ^ True
|
1181
|
+
|
1197
1182
|
return invert ^ False
|
1198
1183
|
|
1199
1184
|
|
@@ -1246,6 +1231,7 @@ class Mol:
|
|
1246
1231
|
continue
|
1247
1232
|
else:
|
1248
1233
|
stereos.append(element.specified == Chem.StereoSpecified.Specified)
|
1234
|
+
|
1249
1235
|
# note all([]) returns True
|
1250
1236
|
return all(stereos)
|
1251
1237
|
|
@@ -1262,12 +1248,12 @@ class Mol:
|
|
1262
1248
|
if element.type == Chem.StereoType.Bond_Double:
|
1263
1249
|
if self.rdmol.GetBondWithIdx(element.centeredOn).IsInRing():
|
1264
1250
|
ring_bond_stereo_info.append((element.centeredOn, element.descriptor))
|
1251
|
+
|
1265
1252
|
return ring_bond_stereo_info
|
1266
1253
|
|
1267
1254
|
|
1268
1255
|
def report_stereo(self) -> None:
|
1269
|
-
"""
|
1270
|
-
"""
|
1256
|
+
"""Report stereochemistry information for debug"""
|
1271
1257
|
num_chiral_centers = rdMolDescriptors.CalcNumAtomStereoCenters(self.rdmol)
|
1272
1258
|
# Returns the total number of atomic stereocenters (specified and unspecified)
|
1273
1259
|
num_unspecified_chiral_centers = rdMolDescriptors.CalcNumUnspecifiedAtomStereoCenters(self.rdmol)
|
@@ -1292,8 +1278,7 @@ class Mol:
|
|
1292
1278
|
|
1293
1279
|
|
1294
1280
|
def report_props(self) -> None:
|
1295
|
-
"""
|
1296
|
-
"""
|
1281
|
+
"""Report properties"""
|
1297
1282
|
if self.props:
|
1298
1283
|
print(f"Properties({len(self.props)}):")
|
1299
1284
|
fixed_width = max([len(k) for k in self.props]) + 4
|
@@ -1305,7 +1290,59 @@ class Mol:
|
|
1305
1290
|
print(f"Properties: None")
|
1306
1291
|
|
1307
1292
|
|
1308
|
-
def
|
1293
|
+
def draw(self,
|
1294
|
+
coordgen: bool = False,
|
1295
|
+
rotate: bool = False,
|
1296
|
+
axis: str = 'z',
|
1297
|
+
degree: float = 0.0,
|
1298
|
+
) -> Self:
|
1299
|
+
"""Draw molecule in 2D.
|
1300
|
+
|
1301
|
+
Args:
|
1302
|
+
coordgen (bool, optional): whether to use `coordgen`. Defaults to False.
|
1303
|
+
rotate (bool, optional): whether to rotate drawing. Defaults to False.
|
1304
|
+
axis (str, optional): axis for rotation. Defaults to 'z'.
|
1305
|
+
degree (float, optional): degree for rotation. Defaults to 0.0.
|
1306
|
+
|
1307
|
+
Returns:
|
1308
|
+
Self.
|
1309
|
+
"""
|
1310
|
+
rdDepictor.SetPreferCoordGen(coordgen)
|
1311
|
+
rdDepictor.Compute2DCoords(self.rdmol)
|
1312
|
+
|
1313
|
+
if rotate:
|
1314
|
+
rad = (np.pi/180.0) * degree
|
1315
|
+
c = np.cos(rad)
|
1316
|
+
s = np.sin(rad)
|
1317
|
+
if axis.lower() == 'x':
|
1318
|
+
rotmat = np.array([
|
1319
|
+
[1., 0., 0., 0.],
|
1320
|
+
[0., c, -s, 0.],
|
1321
|
+
[0., s, c, 0.],
|
1322
|
+
[0., 0., 0., 1.],
|
1323
|
+
])
|
1324
|
+
elif axis.lower() == 'y':
|
1325
|
+
rotmat = np.array([
|
1326
|
+
[ c, 0., s, 0.],
|
1327
|
+
[ 0., 1., 0., 0.],
|
1328
|
+
[-s, 0., c, 0.],
|
1329
|
+
[ 0., 0., 0., 1.],
|
1330
|
+
])
|
1331
|
+
elif axis.lower() == 'z':
|
1332
|
+
rotmat = np.array([
|
1333
|
+
[c, -s, 0., 0.],
|
1334
|
+
[s, c, 0., 0.],
|
1335
|
+
[0., 0., 1., 0.],
|
1336
|
+
[0., 0., 0., 1.],
|
1337
|
+
])
|
1338
|
+
rdMolTransforms.TransformConformer(
|
1339
|
+
self.rdmol.GetConformer(),
|
1340
|
+
rotmat)
|
1341
|
+
|
1342
|
+
return self
|
1343
|
+
|
1344
|
+
|
1345
|
+
def to_sdf(self, confs: bool = False, props: bool = True) -> str:
|
1309
1346
|
"""Returns strings of SDF output.
|
1310
1347
|
|
1311
1348
|
Args:
|
@@ -1315,8 +1352,8 @@ class Mol:
|
|
1315
1352
|
Returns:
|
1316
1353
|
str: strings of SDF output.
|
1317
1354
|
"""
|
1318
|
-
|
1319
|
-
with Chem.SDWriter(
|
1355
|
+
buf = StringIO()
|
1356
|
+
with Chem.SDWriter(buf) as f:
|
1320
1357
|
if confs:
|
1321
1358
|
for conf in self.confs:
|
1322
1359
|
rdmol = Chem.Mol(conf.rdmol)
|
@@ -1336,45 +1373,61 @@ class Mol:
|
|
1336
1373
|
for k,v in self.props.items():
|
1337
1374
|
rdmol.SetProp(k, str(v))
|
1338
1375
|
f.write(rdmol)
|
1339
|
-
|
1340
|
-
|
1341
|
-
|
1342
|
-
def to_image(self, width:int=300, height:int=300, index:bool=False, svg:bool=True) -> object:
|
1343
|
-
"""Returns PIL(Python Image Library) image object.
|
1376
|
+
|
1377
|
+
return buf.getvalue()
|
1378
|
+
|
1344
1379
|
|
1345
|
-
|
1380
|
+
def to_png(self,
|
1381
|
+
width: int = 300,
|
1382
|
+
height: int = 300,
|
1383
|
+
legend: str = '',
|
1384
|
+
atom_index: bool = False,
|
1385
|
+
highlight_atoms: list[int] | None = None,
|
1386
|
+
highlight_bonds: list[int] | None = None,
|
1387
|
+
redraw: bool = False,
|
1388
|
+
coordgen: bool = False,
|
1389
|
+
trim: bool = True,
|
1390
|
+
) -> Image.Image:
|
1391
|
+
"""Draw 2D molecule in PNG format.
|
1346
1392
|
|
1347
1393
|
Args:
|
1348
|
-
width (int, optional): width
|
1349
|
-
height (int, optional): height
|
1350
|
-
|
1351
|
-
|
1352
|
-
|
1394
|
+
width (int, optional): width. Defaults to 300.
|
1395
|
+
height (int, optional): height. Defaults to 300.
|
1396
|
+
legend (str, optional): legend. Defaults to ''.
|
1397
|
+
atom_index (bool, optional): whether to show atom index. Defaults to False.
|
1398
|
+
highlight_atoms (list[int] | None, optional): atom(s) to highlight. Defaults to None.
|
1399
|
+
highlight_bonds (list[int] | None, optional): bond(s) to highlight. Defaults to None.
|
1400
|
+
redraw (bool, optional): whether to redraw. Defaults to False.
|
1401
|
+
coordgen (bool, optional): whether to use coordgen. Defaults to False.
|
1402
|
+
trim (bool, optional): whether to trim white margins. Default to True.
|
1403
|
+
|
1353
1404
|
Returns:
|
1354
|
-
|
1405
|
+
Image.Image: output PIL Image object.
|
1355
1406
|
"""
|
1356
|
-
if index:
|
1357
|
-
for a in self.rdmol.GetAtoms():
|
1358
|
-
a.SetProp("atomNote", str(a.GetIdx()+1))
|
1359
1407
|
|
1360
|
-
return
|
1361
|
-
|
1362
|
-
|
1363
|
-
|
1364
|
-
|
1365
|
-
|
1366
|
-
|
1367
|
-
|
1368
|
-
|
1408
|
+
return render_png(self.rdmol,
|
1409
|
+
width = width,
|
1410
|
+
height = height,
|
1411
|
+
legend = legend,
|
1412
|
+
atom_index = atom_index,
|
1413
|
+
highlight_atoms = highlight_atoms,
|
1414
|
+
highlight_bonds = highlight_bonds,
|
1415
|
+
redraw = redraw,
|
1416
|
+
coordgen = coordgen,
|
1417
|
+
trim = trim)
|
1369
1418
|
|
1370
1419
|
def to_svg(self,
|
1371
|
-
width:int =
|
1372
|
-
height:int =
|
1373
|
-
legend:str = '',
|
1374
|
-
|
1375
|
-
|
1376
|
-
|
1377
|
-
|
1420
|
+
width: int = 300,
|
1421
|
+
height: int = 300,
|
1422
|
+
legend: str = '',
|
1423
|
+
atom_index: bool = False,
|
1424
|
+
highlight_atoms: list[int] | None = None,
|
1425
|
+
highlight_bonds: list[int] | None = None,
|
1426
|
+
redraw: bool = False,
|
1427
|
+
coordgen: bool = False,
|
1428
|
+
optimize: bool = True,
|
1429
|
+
) -> str:
|
1430
|
+
"""Draw 2D molecule in SVG format.
|
1378
1431
|
|
1379
1432
|
Examples:
|
1380
1433
|
For Jupyternotebook, wrap the output with SVG:
|
@@ -1383,55 +1436,60 @@ class Mol:
|
|
1383
1436
|
>>> SVG(libr[0].to_svg())
|
1384
1437
|
|
1385
1438
|
Args:
|
1386
|
-
width (int): width
|
1387
|
-
height (int): height
|
1388
|
-
legend (str): legend
|
1389
|
-
|
1390
|
-
|
1439
|
+
width (int, optional): width. Defaults to 300.
|
1440
|
+
height (int, optional): height. Defaults to 300.
|
1441
|
+
legend (str, optional): legend. Defaults to ''.
|
1442
|
+
atom_index (bool, optional): whether to show atom index. Defaults to False.
|
1443
|
+
highlight_atoms (list[int] | None, optional): atom(s) to highlight. Defaults to None.
|
1444
|
+
highlight_bonds (list[int] | None, optional): bond(s) to highlight. Defaults to None.
|
1445
|
+
redraw (bool, optional): whether to redraw. Defaults to False.
|
1446
|
+
coordgen (bool, optional): whether to use coordgen. Defaults to False.
|
1447
|
+
optimize (bool, optional): whether to optimize SVG string. Defaults to True.
|
1391
1448
|
|
1392
1449
|
Returns:
|
1393
|
-
str: SVG
|
1450
|
+
str: SVG string
|
1394
1451
|
"""
|
1395
|
-
|
1396
|
-
|
1397
|
-
|
1398
|
-
|
1399
|
-
|
1400
|
-
|
1401
|
-
|
1402
|
-
|
1403
|
-
|
1404
|
-
|
1405
|
-
|
1406
|
-
for atom in rdmol_2d.GetAtoms():
|
1407
|
-
atom.SetProp("atomLabel", str(atom.GetIdx()))
|
1408
|
-
# atom.SetProp("atomNote", str(atom.GetIdx()))
|
1409
|
-
# atom.SetProp("molAtomMapNumber", str(atom.GetIdx()))
|
1410
|
-
|
1411
|
-
drawer = rdMolDraw2D.MolDraw2DSVG(width, height)
|
1412
|
-
if highlight:
|
1413
|
-
drawer.DrawMolecule(rdmol_2d, legend=legend, highlightAtoms=highlight)
|
1414
|
-
else:
|
1415
|
-
drawer.DrawMolecule(rdmol_2d, legend=legend)
|
1416
|
-
drawer.FinishDrawing()
|
1417
|
-
return drawer.GetDrawingText()
|
1418
|
-
|
1419
|
-
|
1420
|
-
def plot_energy(self, df:pd.DataFrame) -> str:
|
1421
|
-
"""Returns Seaborn plot strings for dihedral energy profile in SVG format.
|
1452
|
+
return render_svg(self.rdmol,
|
1453
|
+
width = width,
|
1454
|
+
height = height,
|
1455
|
+
legend = legend,
|
1456
|
+
atom_index = atom_index,
|
1457
|
+
highlight_atoms = highlight_atoms,
|
1458
|
+
highlight_bonds = highlight_bonds,
|
1459
|
+
redraw = redraw,
|
1460
|
+
coordgen = coordgen,
|
1461
|
+
optimize = optimize)
|
1462
|
+
|
1422
1463
|
|
1423
|
-
|
1464
|
+
def plot_torsion_energies(self,
|
1465
|
+
torsion_key: int,
|
1466
|
+
svg: bool = False,
|
1467
|
+
upper_limit: float = 35.0,
|
1468
|
+
zoomin_limit: float = 5.0,
|
1469
|
+
**kwargs,
|
1470
|
+
) -> str | None:
|
1471
|
+
"""Plot torsion energies.
|
1424
1472
|
|
1425
1473
|
Args:
|
1426
|
-
|
1474
|
+
torsion_key (int): torsion data to plot.
|
1475
|
+
svg (bool, optional): whether to return SVG strings. Defaults to False.
|
1476
|
+
upper_limit (float, optional): upper limit for E_rel(kcal/mol). Defaults to 35.0.
|
1477
|
+
zoomin_limit (float, optional): lower limit for E_rel(kcal/mol). Defaults to 5.0.
|
1478
|
+
**kwargs: matplotlib.pyplot.plt.figure options.
|
1427
1479
|
|
1428
1480
|
Returns:
|
1429
|
-
|
1481
|
+
SVG strings or None for Jupyter Notebook.
|
1430
1482
|
"""
|
1483
|
+
data = self.props['torsion'][torsion_key]
|
1484
|
+
df = pd.DataFrame({ax: data[ax] for ax in ['angle', 'E_rel(kcal/mol)']})
|
1431
1485
|
|
1432
|
-
|
1486
|
+
plt.figure(**kwargs)
|
1487
|
+
plt.clf() # Clear the current figure to prevent overlapping plots
|
1488
|
+
|
1489
|
+
sns.set_theme()
|
1433
1490
|
sns.color_palette("tab10")
|
1434
1491
|
sns.set_style("whitegrid")
|
1492
|
+
|
1435
1493
|
if len(df['angle']) == len(df['angle'].drop_duplicates()):
|
1436
1494
|
g = sns.lineplot(x="angle",
|
1437
1495
|
y="E_rel(kcal/mol)",
|
@@ -1448,31 +1506,57 @@ class Mol:
|
|
1448
1506
|
markersize=10)
|
1449
1507
|
g.xaxis.set_major_locator(ticker.MultipleLocator(30))
|
1450
1508
|
g.xaxis.set_major_formatter(ticker.ScalarFormatter())
|
1451
|
-
if df["E_rel(kcal/mol)"].max() >
|
1509
|
+
if df["E_rel(kcal/mol)"].max() > upper_limit:
|
1452
1510
|
g.set(title=self.name,
|
1453
1511
|
xlabel='Dihedral Angle (degree)',
|
1454
1512
|
ylabel='Relative Energy (Kcal/mol)',
|
1455
1513
|
xlim=(-190, 190),
|
1456
|
-
ylim=(-1.5,
|
1457
|
-
elif df["E_rel(kcal/mol)"].max() <
|
1514
|
+
ylim=(-1.5, upper_limit))
|
1515
|
+
elif df["E_rel(kcal/mol)"].max() < zoomin_limit:
|
1458
1516
|
g.set(title=self.name,
|
1459
1517
|
xlabel='Dihedral Angle (degree)',
|
1460
1518
|
ylabel='Relative Energy (Kcal/mol)',
|
1461
1519
|
xlim=(-190, 190),
|
1462
|
-
ylim=(-1.5,
|
1520
|
+
ylim=(-1.5, zoomin_limit))
|
1463
1521
|
else:
|
1464
1522
|
g.set(title=self.name,
|
1465
1523
|
xlabel='Dihedral Angle (degree)',
|
1466
1524
|
ylabel='Relative Energy (Kcal/mol)',
|
1467
1525
|
xlim=(-190, 190),)
|
1468
1526
|
g.tick_params(axis='x', rotation=30)
|
1469
|
-
|
1470
|
-
|
1471
|
-
|
1472
|
-
|
1473
|
-
|
1527
|
+
|
1528
|
+
if svg:
|
1529
|
+
buf = StringIO()
|
1530
|
+
plt.savefig(buf, format='svg', bbox_inches='tight')
|
1531
|
+
plt.close() # prevents duplicate plot outputs in Jupyter Notebook
|
1532
|
+
svg_string = buf.getvalue()
|
1533
|
+
# optimize SVG string
|
1534
|
+
scour_options = {
|
1535
|
+
'strip_comments': True,
|
1536
|
+
'strip_ids': True,
|
1537
|
+
'shorten_ids': True,
|
1538
|
+
'compact_paths': True,
|
1539
|
+
'indent_type': 'none',
|
1540
|
+
}
|
1541
|
+
svg_string = scourString(svg_string, options=scour_options)
|
1542
|
+
|
1543
|
+
return svg_string
|
1544
|
+
|
1545
|
+
else:
|
1546
|
+
buf = BytesIO()
|
1547
|
+
plt.savefig(buf, format='png', bbox_inches='tight')
|
1548
|
+
plt.close() # prevents duplicate plot outputs in Jupyter Notebook
|
1549
|
+
buf.seek(0)
|
1550
|
+
img = Image.open(buf)
|
1551
|
+
plt.imshow(img)
|
1552
|
+
plt.axis('off') # Optional: remove axes
|
1553
|
+
plt.show()
|
1554
|
+
|
1474
1555
|
|
1475
|
-
def to_html(self,
|
1556
|
+
def to_html(self,
|
1557
|
+
htmlbody: bool = False,
|
1558
|
+
contents: str = 'torsion',
|
1559
|
+
) -> str:
|
1476
1560
|
"""Returns HTML text of dihedral energy profile.
|
1477
1561
|
|
1478
1562
|
Args:
|
@@ -1481,42 +1565,86 @@ class Mol:
|
|
1481
1565
|
Returns:
|
1482
1566
|
str: HTML text.
|
1483
1567
|
"""
|
1568
|
+
HTML = ''
|
1484
1569
|
if htmlbody:
|
1485
|
-
HTML =
|
1486
|
-
|
1487
|
-
|
1488
|
-
|
1489
|
-
|
1490
|
-
|
1491
|
-
|
1492
|
-
|
1493
|
-
|
1494
|
-
|
1495
|
-
|
1496
|
-
|
1497
|
-
|
1498
|
-
|
1499
|
-
|
1500
|
-
|
1501
|
-
|
1502
|
-
|
1503
|
-
|
1570
|
+
HTML = '<html><body>'
|
1571
|
+
|
1572
|
+
if contents.lower() == 'torsion':
|
1573
|
+
# start of content
|
1574
|
+
HTML += f'<h1 style="text-align:left">{self.name}</h1>'
|
1575
|
+
HTML += '<table>'
|
1576
|
+
for tk, dictdata in self.props['torsion'].items():
|
1577
|
+
ijkl = dictdata['indices']
|
1578
|
+
ijkl_str = '-'.join([str(i) for i in ijkl])
|
1579
|
+
svg_mol = self.to_svg(highlight_atoms=ijkl, atom_index=True)
|
1580
|
+
svg_plot = self.plot_torsion_energies(torsion_key=tk, svg=True)
|
1581
|
+
frag = dictdata.get('frag', None)
|
1582
|
+
if frag is not None:
|
1583
|
+
frag = Chem.MolFromMolBlock(frag)
|
1584
|
+
pqrs = dictdata['frag_indices']
|
1585
|
+
pqrs_str = '-'.join([str(i) for i in pqrs])
|
1586
|
+
svg_frag = render_svg(frag, highlight_atoms=pqrs, atom_index=True)
|
1587
|
+
HTML += f'<tr><td>{ijkl_str}</td><td>{svg_mol}</td>'
|
1588
|
+
HTML += f'<td>{pqrs_str}<td>{svg_frag}</td><td>{svg_plot}</td></tr>'
|
1589
|
+
else:
|
1590
|
+
HTML += f'<tr><td>{ijkl_str}</td><td>{svg_mol}</td><td>{svg_plot}</td></tr>'
|
1591
|
+
HTML += '</table>'
|
1592
|
+
HTML += '<hr style="height:2px;border-width:0;color:gray;background-color:gray">'
|
1593
|
+
# end of content
|
1594
|
+
|
1504
1595
|
if htmlbody:
|
1505
|
-
HTML +=
|
1596
|
+
HTML += '</body></html>'
|
1597
|
+
|
1506
1598
|
return HTML
|
1507
1599
|
|
1508
1600
|
|
1509
|
-
def
|
1601
|
+
def dumps(self, key: str = "", decimals: int = 2) -> str:
|
1510
1602
|
"""Returns JSON dumps of properties.
|
1511
1603
|
|
1512
1604
|
Args:
|
1513
1605
|
key (str | None): key for a subset of properties. Defaults to None.
|
1514
|
-
|
1606
|
+
decimals (int, optional): decimal places for float numbers. Defaults to 2.
|
1515
1607
|
|
1516
1608
|
Returns:
|
1517
|
-
str:
|
1609
|
+
str: JSON dumps.
|
1518
1610
|
"""
|
1519
|
-
props =
|
1611
|
+
props = recursive_round(self.props, decimals)
|
1612
|
+
|
1520
1613
|
if key:
|
1521
1614
|
return json.dumps({key:props[key]})
|
1615
|
+
|
1522
1616
|
return json.dumps(props)
|
1617
|
+
|
1618
|
+
|
1619
|
+
def serialize(self, decimals: int = 2) -> str:
|
1620
|
+
serialized = json.dumps({
|
1621
|
+
'name' : self.name,
|
1622
|
+
'smiles': self.smiles,
|
1623
|
+
'props' : recursive_round(self.props, decimals),
|
1624
|
+
'confs' : [conf.serialize() for conf in self.confs],
|
1625
|
+
})
|
1626
|
+
|
1627
|
+
return serialized
|
1628
|
+
|
1629
|
+
|
1630
|
+
def deserialize(self, serialized: str) -> Self:
|
1631
|
+
"""Updates self with the serialized string input.
|
1632
|
+
|
1633
|
+
Args:
|
1634
|
+
serialized (str): input
|
1635
|
+
|
1636
|
+
Returns:
|
1637
|
+
Self: modified self.
|
1638
|
+
"""
|
1639
|
+
data = json.loads(serialized)
|
1640
|
+
|
1641
|
+
self.name = data['name']
|
1642
|
+
self.smiles = data['smiles'] # isomeric SMILES, no H
|
1643
|
+
self.rdmol = Chem.MolFromSmiles(data['smiles']) # for 2D depiction
|
1644
|
+
self.rdmol.SetProp('_Name', self.name)
|
1645
|
+
self.InChI = Chem.MolToInchi(self.rdmol)
|
1646
|
+
self.InChIKey = inchi.InchiToInchiKey(self.InChI)
|
1647
|
+
self.props = data['props']
|
1648
|
+
self.confs = [Conf().deserialize(_) for _ in data['confs']] # for 3D conformers (iterable)
|
1649
|
+
|
1650
|
+
return self
|