rdworks 0.25.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdworks/__init__.py +35 -0
- rdworks/autograph/__init__.py +4 -0
- rdworks/autograph/autograph.py +184 -0
- rdworks/autograph/centroid.py +90 -0
- rdworks/autograph/dynamictreecut.py +135 -0
- rdworks/autograph/nmrclust.py +123 -0
- rdworks/autograph/rckmeans.py +74 -0
- rdworks/bitqt/__init__.py +1 -0
- rdworks/bitqt/bitqt.py +355 -0
- rdworks/conf.py +374 -0
- rdworks/descriptor.py +36 -0
- rdworks/display.py +206 -0
- rdworks/ionized.py +170 -0
- rdworks/matchedseries.py +260 -0
- rdworks/mol.py +1522 -0
- rdworks/mollibr.py +887 -0
- rdworks/pka.py +38 -0
- rdworks/predefined/Asinex_fragment.xml +20 -0
- rdworks/predefined/Astex_RO3.xml +16 -0
- rdworks/predefined/Baell2010_PAINS/Baell2010A.xml +52 -0
- rdworks/predefined/Baell2010_PAINS/Baell2010B.xml +169 -0
- rdworks/predefined/Baell2010_PAINS/Baell2010C.xml +1231 -0
- rdworks/predefined/Baell2010_PAINS/PAINS-less-than-015-hits.xml +2048 -0
- rdworks/predefined/Baell2010_PAINS/PAINS-less-than-150-hits.xml +278 -0
- rdworks/predefined/Baell2010_PAINS/PAINS-more-than-150-hits.xml +83 -0
- rdworks/predefined/Baell2010_PAINS/makexml.py +70 -0
- rdworks/predefined/Brenk2008_Dundee/makexml.py +21 -0
- rdworks/predefined/CNS.xml +18 -0
- rdworks/predefined/ChEMBL_Walters/BMS.xml +543 -0
- rdworks/predefined/ChEMBL_Walters/Dundee.xml +318 -0
- rdworks/predefined/ChEMBL_Walters/Glaxo.xml +168 -0
- rdworks/predefined/ChEMBL_Walters/Inpharmatica.xml +276 -0
- rdworks/predefined/ChEMBL_Walters/LINT.xml +174 -0
- rdworks/predefined/ChEMBL_Walters/MLSMR.xml +351 -0
- rdworks/predefined/ChEMBL_Walters/PAINS.xml +1446 -0
- rdworks/predefined/ChEMBL_Walters/SureChEMBL.xml +501 -0
- rdworks/predefined/ChEMBL_Walters/makexml.py +40 -0
- rdworks/predefined/Hann1999_Glaxo/Hann1999.xml +168 -0
- rdworks/predefined/Hann1999_Glaxo/Hann1999Acid.xml +102 -0
- rdworks/predefined/Hann1999_Glaxo/Hann1999Base.xml +6 -0
- rdworks/predefined/Hann1999_Glaxo/Hann1999ElPh.xml +6 -0
- rdworks/predefined/Hann1999_Glaxo/Hann1999NuPh.xml +6 -0
- rdworks/predefined/Hann1999_Glaxo/makexml.py +83 -0
- rdworks/predefined/Kazius2005/Kazius2005.xml +114 -0
- rdworks/predefined/Kazius2005/makexml.py +66 -0
- rdworks/predefined/ZINC_druglike.xml +24 -0
- rdworks/predefined/ZINC_fragment.xml +14 -0
- rdworks/predefined/ZINC_leadlike.xml +15 -0
- rdworks/predefined/fragment.xml +7 -0
- rdworks/predefined/ionized/simple_smarts_pattern.csv +57 -0
- rdworks/predefined/ionized/smarts_pattern.csv +107 -0
- rdworks/predefined/misc/makexml.py +119 -0
- rdworks/predefined/misc/reactive-part-2.xml +104 -0
- rdworks/predefined/misc/reactive-part-3.xml +74 -0
- rdworks/predefined/misc/reactive.xml +321 -0
- rdworks/readin.py +312 -0
- rdworks/rgroup.py +2173 -0
- rdworks/scaffold.py +520 -0
- rdworks/std.py +143 -0
- rdworks/stereoisomers.py +127 -0
- rdworks/tautomers.py +20 -0
- rdworks/units.py +63 -0
- rdworks/utils.py +495 -0
- rdworks/xml.py +260 -0
- rdworks-0.25.7.dist-info/METADATA +37 -0
- rdworks-0.25.7.dist-info/RECORD +69 -0
- rdworks-0.25.7.dist-info/WHEEL +5 -0
- rdworks-0.25.7.dist-info/licenses/LICENSE +21 -0
- rdworks-0.25.7.dist-info/top_level.txt +1 -0
rdworks/mol.py
ADDED
@@ -0,0 +1,1522 @@
|
|
1
|
+
# rdworks/mol.py
|
2
|
+
|
3
|
+
import os
|
4
|
+
import io
|
5
|
+
import copy
|
6
|
+
import types
|
7
|
+
import pathlib
|
8
|
+
import itertools
|
9
|
+
import math
|
10
|
+
import json
|
11
|
+
import logging
|
12
|
+
import tempfile
|
13
|
+
|
14
|
+
from collections import defaultdict
|
15
|
+
from collections.abc import Callable
|
16
|
+
from pathlib import Path
|
17
|
+
from typing import Iterator, Self
|
18
|
+
|
19
|
+
import numpy as np
|
20
|
+
import pandas as pd
|
21
|
+
import matplotlib.ticker as ticker
|
22
|
+
import matplotlib.pyplot as plt
|
23
|
+
import seaborn as sns
|
24
|
+
|
25
|
+
import CDPL
|
26
|
+
import CDPL.Chem
|
27
|
+
import CDPL.ConfGen
|
28
|
+
|
29
|
+
from rdkit import Chem, DataStructs
|
30
|
+
|
31
|
+
from rdkit.Chem import (
|
32
|
+
rdMolDescriptors, AllChem, Descriptors, QED,
|
33
|
+
rdFingerprintGenerator,
|
34
|
+
Draw, rdDepictor,
|
35
|
+
rdDistGeom, rdMolAlign, rdMolTransforms, rdmolops
|
36
|
+
)
|
37
|
+
from rdkit.Chem.Draw import rdMolDraw2D
|
38
|
+
|
39
|
+
from rdkit.ML.Cluster import Butina
|
40
|
+
|
41
|
+
from rdworks.std import desalt_smiles, standardize
|
42
|
+
from rdworks.xml import list_predefined_xml, get_predefined_xml, parse_xml
|
43
|
+
from rdworks.scaffold import rigid_fragment_indices
|
44
|
+
from rdworks.descriptor import rd_descriptor, rd_descriptor_f
|
45
|
+
from rdworks.display import svg
|
46
|
+
from rdworks.utils import convert_tril_to_symm, QT, fix_decimal_places_in_dict
|
47
|
+
from rdworks.units import ev2kcalpermol
|
48
|
+
from rdworks.autograph import NMRCLUST, DynamicTreeCut, RCKmeans, AutoGraph
|
49
|
+
from rdworks.bitqt import BitQT
|
50
|
+
from rdworks.conf import Conf
|
51
|
+
|
52
|
+
main_logger = logging.getLogger()
|
53
|
+
|
54
|
+
|
55
|
+
class Mol:
|
56
|
+
"""Container for molecular structure, conformers, and other information.
|
57
|
+
"""
|
58
|
+
|
59
|
+
MFP2 = rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=2048)
|
60
|
+
|
61
|
+
ETKDG_params = rdDistGeom.ETKDGv3()
|
62
|
+
ETKDG_params.useSmallRingTorsions = True
|
63
|
+
ETKDG_params.maxIterations = 2000
|
64
|
+
|
65
|
+
|
66
|
+
def __init__(self,
|
67
|
+
molecular_input: str | Chem.Mol,
|
68
|
+
name:str='',
|
69
|
+
std:bool=False,
|
70
|
+
max_workers:int=1,
|
71
|
+
chunksize:int=4,
|
72
|
+
progress:bool=False) -> None:
|
73
|
+
"""Create a rdworks.Mol object.
|
74
|
+
|
75
|
+
Examples:
|
76
|
+
>>> import rdworks
|
77
|
+
>>> m = rdworks.Mol('c1ccccc1', name='benzene')
|
78
|
+
|
79
|
+
Args:
|
80
|
+
molecular_input (str | Chem.Mol): SMILES or rdkit.Chem.Mol object
|
81
|
+
name (str, optional): name of the molecule. Defaults to ''.
|
82
|
+
std (bool, optional): whether to standardize the molecule. Defaults to False.
|
83
|
+
|
84
|
+
Raises:
|
85
|
+
ValueError: Invalid SMILES or rdkit.Chem.Mol object.
|
86
|
+
TypeError: No SMILES or rdkit.Chem.Mol object is provided.
|
87
|
+
RuntimeError: Desalting or standardization process failed.
|
88
|
+
"""
|
89
|
+
|
90
|
+
self.rdmol = None # rdkit.Chem.Mol object
|
91
|
+
self.smiles = None # isomeric SMILES
|
92
|
+
self.name = None
|
93
|
+
self.props = {}
|
94
|
+
self.confs = [] # 3D conformers (iterable)
|
95
|
+
self.fp = None
|
96
|
+
self.max_workers = max_workers
|
97
|
+
self.chunksize = chunksize
|
98
|
+
self.progress = progress
|
99
|
+
|
100
|
+
if isinstance(molecular_input, str):
|
101
|
+
try:
|
102
|
+
self.rdmol = Chem.MolFromSmiles(molecular_input)
|
103
|
+
assert self.rdmol
|
104
|
+
self.smiles = Chem.MolToSmiles(self.rdmol)
|
105
|
+
except:
|
106
|
+
raise ValueError(f'Mol() received invalid SMILES: {molecular_input}')
|
107
|
+
elif isinstance(molecular_input, Chem.Mol):
|
108
|
+
try:
|
109
|
+
self.rdmol = molecular_input
|
110
|
+
assert self.rdmol
|
111
|
+
self.smiles = Chem.MolToSmiles(self.rdmol)
|
112
|
+
except:
|
113
|
+
raise ValueError('Mol() received invalid rdkit.Chem.Mol object')
|
114
|
+
else:
|
115
|
+
raise TypeError('Mol() expects SMILES or rdkit.Chem.Mol object')
|
116
|
+
|
117
|
+
### desalting
|
118
|
+
if "." in self.smiles:
|
119
|
+
try:
|
120
|
+
(self.smiles, self.rdmol) = desalt_smiles(self.smiles)
|
121
|
+
assert self.smiles
|
122
|
+
assert self.rdmol
|
123
|
+
except:
|
124
|
+
raise RuntimeError(f'Mol() error occurred in desalting: {self.smiles}')
|
125
|
+
|
126
|
+
### standardization
|
127
|
+
if std:
|
128
|
+
# standardization changes self.rdmol
|
129
|
+
try:
|
130
|
+
self.rdmol = standardize(self.rdmol)
|
131
|
+
self.smiles = Chem.MolToSmiles(self.rdmol)
|
132
|
+
assert self.smiles
|
133
|
+
assert self.rdmol
|
134
|
+
except:
|
135
|
+
raise RuntimeError('Mol() error occurred in standardization')
|
136
|
+
|
137
|
+
### naming
|
138
|
+
try:
|
139
|
+
self.name = str(name)
|
140
|
+
except:
|
141
|
+
self.name = 'untitled'
|
142
|
+
self.rdmol.SetProp('_Name', self.name) # _Name can't be None
|
143
|
+
|
144
|
+
### set default properties
|
145
|
+
self.props.update({
|
146
|
+
'aka' : [], # <-- to be set by MolLibr.unique()
|
147
|
+
'atoms' : self.rdmol.GetNumAtoms(),
|
148
|
+
# hydrogens not excluded
|
149
|
+
# m = Chem.MolFromSmiles("c1c[nH]cc1")
|
150
|
+
# m.GetNumAtoms()
|
151
|
+
# >> 5
|
152
|
+
# Chem.AddHs(m).GetNumAtoms()
|
153
|
+
# >> 10
|
154
|
+
'charge': rdmolops.GetFormalCharge(self.rdmol),
|
155
|
+
# number of rotatable bonds
|
156
|
+
"nrb" : Descriptors.NumRotatableBonds(self.rdmol),
|
157
|
+
})
|
158
|
+
|
159
|
+
|
160
|
+
def __str__(self) -> str:
|
161
|
+
"""String representation of the molecule.
|
162
|
+
|
163
|
+
Examples:
|
164
|
+
>>> m = Mol('CCO', name='ethanol')
|
165
|
+
>>> print(m)
|
166
|
+
|
167
|
+
Returns:
|
168
|
+
str: string representation.
|
169
|
+
"""
|
170
|
+
return f"<Mol({self.smiles} name={self.name} conformers={self.count()})>"
|
171
|
+
|
172
|
+
|
173
|
+
def __hash__(self) -> str:
|
174
|
+
"""Hashed SMILES string of the molecule.
|
175
|
+
|
176
|
+
When you compare two objects using the `==` operator, Python first checks
|
177
|
+
if their hash values are equal. If they are different, the objects are
|
178
|
+
considered unequal, and the __eq__ method is not called.
|
179
|
+
The return value of `__hash__` method is also used as dictionary keys or set elements.
|
180
|
+
|
181
|
+
Examples:
|
182
|
+
>>> m1 == m2
|
183
|
+
|
184
|
+
Returns:
|
185
|
+
str: hashed SMILES string.
|
186
|
+
"""
|
187
|
+
return hash(self.smiles)
|
188
|
+
|
189
|
+
|
190
|
+
def __eq__(self, other:object) -> bool:
|
191
|
+
"""True if `other` molecule is identical with the molecule.
|
192
|
+
|
193
|
+
It compares canonicalized SMILES.
|
194
|
+
|
195
|
+
Examples:
|
196
|
+
>>> m1 == m2
|
197
|
+
|
198
|
+
Args:
|
199
|
+
other (object): other rdworks.Mol object.
|
200
|
+
|
201
|
+
Returns:
|
202
|
+
bool: True if identical.
|
203
|
+
"""
|
204
|
+
return self.smiles == other.smiles
|
205
|
+
|
206
|
+
|
207
|
+
def __iter__(self) -> Iterator:
|
208
|
+
"""Yields an iterator of conformers of the molecule.
|
209
|
+
|
210
|
+
Examples:
|
211
|
+
>>> for conformer in mol:
|
212
|
+
>>> print(conformer.name)
|
213
|
+
|
214
|
+
Yields:
|
215
|
+
Iterator: conformers of the molecule.
|
216
|
+
"""
|
217
|
+
return iter(self.confs)
|
218
|
+
|
219
|
+
|
220
|
+
def __next__(self) -> Conf:
|
221
|
+
"""Next conformer of the molecule.
|
222
|
+
|
223
|
+
Returns:
|
224
|
+
Conf: Conf object of one of conformers of the molecule.
|
225
|
+
"""
|
226
|
+
return next(self.confs)
|
227
|
+
|
228
|
+
|
229
|
+
def __getitem__(self, index: int | slice) -> Conf:
|
230
|
+
"""Conformer object of conformers of the molecule with given index or slice of indexes.
|
231
|
+
|
232
|
+
Examples:
|
233
|
+
>>> first_conformer = mol[0]
|
234
|
+
|
235
|
+
Args:
|
236
|
+
index (int | slice): index for conformers.
|
237
|
+
|
238
|
+
Raises:
|
239
|
+
ValueError: conformers are not defined in the molecule or index is out of range.
|
240
|
+
|
241
|
+
Returns:
|
242
|
+
Conf: Conf object matching the index of the molecule.
|
243
|
+
"""
|
244
|
+
if self.count() == 0:
|
245
|
+
raise ValueError(f"no conformers")
|
246
|
+
try:
|
247
|
+
return self.confs[index]
|
248
|
+
except:
|
249
|
+
raise ValueError(f"index should be 0..{self.count()-1}")
|
250
|
+
|
251
|
+
|
252
|
+
def copy(self) -> Self:
|
253
|
+
"""Returns a copy of self.
|
254
|
+
|
255
|
+
Returns:
|
256
|
+
Self: a copy of self (rdworks.Mol) object.
|
257
|
+
"""
|
258
|
+
return copy.deepcopy(self)
|
259
|
+
|
260
|
+
|
261
|
+
def rename(self, prefix:str='', sep:str='/', start:int=1) -> Self:
|
262
|
+
"""Rename conformer names and returns self
|
263
|
+
|
264
|
+
The first conformer name is {prefix}{sep}{start}
|
265
|
+
|
266
|
+
Args:
|
267
|
+
prefix (str, optional): prefix of the name. Defaults to ''.
|
268
|
+
sep (str, optional): separtor betwween prefix and serial number. Defaults to '/'.
|
269
|
+
start (int, optional): first serial number. Defaults to 1.
|
270
|
+
|
271
|
+
Returns:
|
272
|
+
Self: rdworks.Mol object.
|
273
|
+
"""
|
274
|
+
if prefix :
|
275
|
+
self.name = prefix
|
276
|
+
self.rdmol.SetProp('_Name', prefix)
|
277
|
+
# update conformer names
|
278
|
+
num_digits = len(str(self.count())) # ex. '100' -> 3
|
279
|
+
for (serial, conf) in enumerate(self.confs, start=start):
|
280
|
+
serial_str = str(serial)
|
281
|
+
while len(serial_str) < num_digits:
|
282
|
+
serial_str = '0' + serial_str
|
283
|
+
conf.rename(f'{self.name}{sep}{serial_str}')
|
284
|
+
return self
|
285
|
+
|
286
|
+
|
287
|
+
def qed(self, properties:list[str]=['QED', 'MolWt', 'LogP', 'TPSA', 'HBD']) -> Self:
|
288
|
+
"""Updates quantitative estimate of drug-likeness (QED).
|
289
|
+
|
290
|
+
Args:
|
291
|
+
properties (list[str], optional): Defaults to ['QED', 'MolWt', 'LogP', 'TPSA', 'HBD'].
|
292
|
+
|
293
|
+
Raises:
|
294
|
+
KeyError: if property key is unknown.
|
295
|
+
|
296
|
+
Returns:
|
297
|
+
Self: rdworks.Mol object.
|
298
|
+
"""
|
299
|
+
props_dict = {}
|
300
|
+
for k in properties:
|
301
|
+
try:
|
302
|
+
props_dict[k] = rd_descriptor_f[k](self.rdmol)
|
303
|
+
except:
|
304
|
+
raise KeyError(f'Mol.qed() received undefined property {k} for {self}')
|
305
|
+
self.props.update(props_dict)
|
306
|
+
return self
|
307
|
+
|
308
|
+
|
309
|
+
def remove_stereo(self) -> Self:
|
310
|
+
"""Removes stereochemistry and returns a copy of self.
|
311
|
+
|
312
|
+
Examples:
|
313
|
+
>>> m = rdworks.Mol("C/C=C/C=C\\C", "double_bond")
|
314
|
+
>>> m.remove_stereo().smiles == "CC=CC=CC"
|
315
|
+
|
316
|
+
Returns:
|
317
|
+
Self: rdworks.Mol object.
|
318
|
+
"""
|
319
|
+
obj = copy.deepcopy(self)
|
320
|
+
# keep the original stereo info. for ring double bond
|
321
|
+
Chem.RemoveStereochemistry(obj.rdmol)
|
322
|
+
Chem.AssignStereochemistry(obj.rdmol,
|
323
|
+
cleanIt=False,
|
324
|
+
force=False,
|
325
|
+
flagPossibleStereoCenters=False)
|
326
|
+
obj.smiles = Chem.MolToSmiles(obj.rdmol)
|
327
|
+
return obj
|
328
|
+
|
329
|
+
|
330
|
+
def make_confs(self,
|
331
|
+
n:int = 50,
|
332
|
+
method:str = 'RDKit_ETKDG',
|
333
|
+
calculator:str | Callable = 'MMFF94') -> Self:
|
334
|
+
"""Generates 3D conformers.
|
335
|
+
|
336
|
+
Args:
|
337
|
+
n (int, optional): number of conformers to generate. Defaults to 50.
|
338
|
+
method (str, optional): conformer generation method.
|
339
|
+
Choices are `RDKit_ETKDG`, `CDPL_CONFORGE`.
|
340
|
+
Defaults to 'RDKit_ETKDG'.
|
341
|
+
|
342
|
+
Returns:
|
343
|
+
Self: rdworks.Mol object
|
344
|
+
|
345
|
+
Reference:
|
346
|
+
T. Seidel, C. Permann, O. Wieder, S. M. Kohlbacher, T. Langer,
|
347
|
+
High-Quality Conformer Generation with CONFORGE: Algorithm and Performance Assessment.
|
348
|
+
J. Chem. Inf. Model. 63, 5549-5570 (2023).
|
349
|
+
"""
|
350
|
+
|
351
|
+
# if n is None:
|
352
|
+
# rot_bonds = rd_descriptor_f['RotBonds'](self.rdmol)
|
353
|
+
# n = min(max(1, int(8.481 * (rot_bonds **1.642))), 1000)
|
354
|
+
# n = max(1, math.ceil(n * n_rel)) # ensures that n is at least 1
|
355
|
+
|
356
|
+
self.confs = []
|
357
|
+
|
358
|
+
if method.upper() == 'RDKIT_ETKDG':
|
359
|
+
rdmol_H = Chem.AddHs(self.rdmol, addCoords=True) # returns a copy with hydrogens added
|
360
|
+
conf_ids = rdDistGeom.EmbedMultipleConfs(rdmol_H, n, params=self.ETKDG_params)
|
361
|
+
for rdConformer in rdmol_H.GetConformers():
|
362
|
+
# number of atoms should match with conformer(s)
|
363
|
+
rdmol_conf = Chem.Mol(rdmol_H)
|
364
|
+
rdmol_conf.RemoveAllConformers()
|
365
|
+
rdmol_conf.AddConformer(Chem.Conformer(rdConformer))
|
366
|
+
conf = Conf(rdmol_conf)
|
367
|
+
self.confs.append(conf)
|
368
|
+
|
369
|
+
elif method.upper() == 'CDPL_CONFORGE':
|
370
|
+
with tempfile.NamedTemporaryFile() as tmpfile:
|
371
|
+
mol = CDPL.Chem.parseSMILES(self.smiles)
|
372
|
+
# create and initialize an instance of the class ConfGen.ConformerGenerator which
|
373
|
+
# will perform the actual conformer ensemble generation work
|
374
|
+
conf_gen = CDPL.ConfGen.ConformerGenerator()
|
375
|
+
conf_gen.settings.timeout = 60 * 1000 # 60 sec.
|
376
|
+
conf_gen.settings.minRMSD = 0.5
|
377
|
+
conf_gen.settings.energyWindow = 20.0 # kcal/mol(?)
|
378
|
+
conf_gen.settings.maxNumOutputConformers = n
|
379
|
+
# dictionary mapping status codes to human readable strings
|
380
|
+
status_to_str = {
|
381
|
+
CDPL.ConfGen.ReturnCode.UNINITIALIZED : 'uninitialized',
|
382
|
+
CDPL.ConfGen.ReturnCode.TIMEOUT : 'max. processing time exceeded',
|
383
|
+
CDPL.ConfGen.ReturnCode.ABORTED : 'aborted',
|
384
|
+
CDPL.ConfGen.ReturnCode.FORCEFIELD_SETUP_FAILED : 'force field setup failed',
|
385
|
+
CDPL.ConfGen.ReturnCode.FORCEFIELD_MINIMIZATION_FAILED : 'force field structure refinement failed',
|
386
|
+
CDPL.ConfGen.ReturnCode.FRAGMENT_LIBRARY_NOT_SET : 'fragment library not available',
|
387
|
+
CDPL.ConfGen.ReturnCode.FRAGMENT_CONF_GEN_FAILED : 'fragment conformer generation failed',
|
388
|
+
CDPL.ConfGen.ReturnCode.FRAGMENT_CONF_GEN_TIMEOUT : 'fragment conformer generation timeout',
|
389
|
+
CDPL.ConfGen.ReturnCode.FRAGMENT_ALREADY_PROCESSED : 'fragment already processed',
|
390
|
+
CDPL.ConfGen.ReturnCode.TORSION_DRIVING_FAILED : 'torsion driving failed',
|
391
|
+
CDPL.ConfGen.ReturnCode.CONF_GEN_FAILED : 'conformer generation failed',
|
392
|
+
}
|
393
|
+
writer = CDPL.Chem.MolecularGraphWriter( f"{tmpfile.name}.sdf", "sdf" )
|
394
|
+
# SB - io.StringIO does not work with Chem.MolecularGraphWriter()
|
395
|
+
# We have to create a temporary file and re-read it for storing individual conformers.
|
396
|
+
try:
|
397
|
+
# prepare the molecule for conformer generation
|
398
|
+
CDPL.ConfGen.prepareForConformerGeneration(mol)
|
399
|
+
# generate the conformer ensemble
|
400
|
+
status = conf_gen.generate(mol)
|
401
|
+
# if successful, store the generated conformer ensemble as
|
402
|
+
# per atom 3D coordinates arrays (= the way conformers are represented in CDPKit)
|
403
|
+
if status == CDPL.ConfGen.ReturnCode.SUCCESS or status == CDPL.ConfGen.ReturnCode.TOO_MUCH_SYMMETRY:
|
404
|
+
# TOO_MUCH_SYMMETRY: output ensemble may contain duplicates
|
405
|
+
conf_gen.setConformers(mol)
|
406
|
+
writer.write(mol)
|
407
|
+
with Chem.SDMolSupplier(f"{tmpfile.name}.sdf", sanitize=True, removeHs=False) as sdf:
|
408
|
+
self.confs = [ Conf(m) for m in sdf if m is not None ]
|
409
|
+
else:
|
410
|
+
raise RuntimeError('Error: conformer generation failed: %s' % status_to_str[status])
|
411
|
+
except Exception as e:
|
412
|
+
raise RuntimeError('Error: conformer generation failed: %s' % str(e))
|
413
|
+
# tmpfile is automatically closed and deleted here
|
414
|
+
|
415
|
+
|
416
|
+
# energy evaluations for ranking
|
417
|
+
for conf in self.confs:
|
418
|
+
conf.get_potential_energy(calculator) # default: MMFF94
|
419
|
+
|
420
|
+
# set relative energy, E_rel(kcal/mol)
|
421
|
+
sort_by = 'E_tot(kcal/mol)'
|
422
|
+
self.confs = sorted(self.confs, key=lambda c: c.props[sort_by]) # ascending order
|
423
|
+
lowest_energy = self.confs[0].props[sort_by]
|
424
|
+
for conf in self.confs:
|
425
|
+
conf.props.update({"E_rel(kcal/mol)": conf.props[sort_by] - lowest_energy})
|
426
|
+
|
427
|
+
return self.rename()
|
428
|
+
|
429
|
+
|
430
|
+
def optimize(self, calculator:str | Callable = 'MMFF94', fmax:float=0.05) -> Self:
|
431
|
+
"""Optimizes 3D conformers
|
432
|
+
|
433
|
+
Args:
|
434
|
+
calculator (str | Callable): _description_
|
435
|
+
fmax (float, optional): _description_. Defaults to 0.05.
|
436
|
+
|
437
|
+
Returns:
|
438
|
+
Self: _description_
|
439
|
+
"""
|
440
|
+
self.confs = [ conf.optimize(calculator, fmax) for conf in self.confs ]
|
441
|
+
return self
|
442
|
+
|
443
|
+
|
444
|
+
def sort_confs(self) -> Self:
|
445
|
+
"""Sorts conformers by `E_tot(eV)` or `E_tot(kcal/mol)` and sets `E_rel(kcal/mol)`.
|
446
|
+
|
447
|
+
Raises:
|
448
|
+
KeyError: if `E_tot(eV)` or `E_tot(kcal/mol)` is not defined.
|
449
|
+
|
450
|
+
Returns:
|
451
|
+
Self: rdworks.Mol object.
|
452
|
+
"""
|
453
|
+
if all(['E_tot(eV)' in c.props for c in self.confs]):
|
454
|
+
sort_by = 'E_tot(eV)'
|
455
|
+
conversion = 23.060547830619026 # eV to kcal/mol
|
456
|
+
elif all(['E_tot(kcal/mol)' in c.props for c in self.confs]):
|
457
|
+
sort_by = 'E_tot(kcal/mol)'
|
458
|
+
conversion = 1.0
|
459
|
+
else:
|
460
|
+
raise KeyError(f'Mol.sort_confs() requires E_tot(eV) or E_tot(kcal/mol) property')
|
461
|
+
self.confs = sorted(self.confs, key=lambda c: c.props[sort_by]) # ascending order
|
462
|
+
if self.count() > 0:
|
463
|
+
E_lowest = self.confs[0].props[sort_by]
|
464
|
+
for conf in self.confs:
|
465
|
+
E_rel = (conf.props[sort_by] - E_lowest)* conversion
|
466
|
+
conf.props.update({"E_rel(kcal/mol)": E_rel})
|
467
|
+
return self
|
468
|
+
|
469
|
+
|
470
|
+
def align_confs(self, method:str='rigid_fragment') -> Self:
|
471
|
+
"""Aligns all conformers to the first conformer.
|
472
|
+
|
473
|
+
Args:
|
474
|
+
method (str, optional): alignment method:
|
475
|
+
`rigid_fragment`, `CrippenO3A`, `MMFFO3A`, `best_rms`.
|
476
|
+
Defaults to `rigid_fragment`.
|
477
|
+
|
478
|
+
Returns:
|
479
|
+
Self: rdworks.Mol object.
|
480
|
+
"""
|
481
|
+
|
482
|
+
if self.count() < 2: # nothing to do
|
483
|
+
return self
|
484
|
+
|
485
|
+
if method == 'rigid_fragment':
|
486
|
+
indices = rigid_fragment_indices(self.confs[0].rdmol)[0] # 3D and H, largest fragment
|
487
|
+
atomMap = [(i, i) for i in indices]
|
488
|
+
for i in range(1, self.count()):
|
489
|
+
# rdMolAlign.AlignMol does not take symmetry into account
|
490
|
+
# but we will use atom indices for alignment anyway.
|
491
|
+
rmsd = rdMolAlign.AlignMol(prbMol=self.confs[i].rdmol,
|
492
|
+
refMol=self.confs[0].rdmol,
|
493
|
+
atomMap=atomMap)
|
494
|
+
# If atomMap is not given, AlignMol() will attempt to generate atomMap by
|
495
|
+
# substructure matching.
|
496
|
+
|
497
|
+
elif method == 'CrippenO3A':
|
498
|
+
crippen_ref_contrib = rdMolDescriptors._CalcCrippenContribs(self.confs[0].rdmol)
|
499
|
+
for i in range(1, self.count()):
|
500
|
+
crippen_prb_contrib = rdMolDescriptors._CalcCrippenContribs(self.confs[i].rdmol)
|
501
|
+
crippen_O3A = rdMolAlign.GetCrippenO3A(prbMol=self.confs[i].rdmol,
|
502
|
+
refMol=self.confs[0].rdmol,
|
503
|
+
prbCrippenContribs=crippen_prb_contrib,
|
504
|
+
refCrippenContribs=crippen_ref_contrib,
|
505
|
+
)
|
506
|
+
crippen_O3A.Align()
|
507
|
+
# crippen_O3A.Score()
|
508
|
+
|
509
|
+
elif method == 'MMFFO3A':
|
510
|
+
mmff_ref_params = AllChem.MMFFGetMoleculeProperties(self.confs[0].rdmol)
|
511
|
+
for i in range(1, self.count()):
|
512
|
+
mmff_prb_params = AllChem.MMFFGetMoleculeProperties(self.confs[i].rdmol)
|
513
|
+
mmff_O3A = rdMolAlign.GetO3A(prbMol=self.confs[i].rdmol,
|
514
|
+
refMol=self.confs[0].rdmol,
|
515
|
+
prbPyMMFFMolProperties=mmff_prb_params,
|
516
|
+
refPyMMFFMolProperties=mmff_ref_params,
|
517
|
+
)
|
518
|
+
mmff_O3A.Align()
|
519
|
+
# mmff_O3A.Score()
|
520
|
+
|
521
|
+
elif method == 'best_rms':
|
522
|
+
for i in range(1, self.count()):
|
523
|
+
# symmetry-aware alignment / speed can be improved by removing Hs
|
524
|
+
rmsd = rdMolAlign.GetBestRMS(prbMol=self.confs[i].rdmol,
|
525
|
+
refMol=self.confs[0].rdmol)
|
526
|
+
|
527
|
+
return self
|
528
|
+
|
529
|
+
|
530
|
+
def cluster_confs(self, method:str='QT', threshold:float=1.0, sortby:str='size') -> Self:
|
531
|
+
"""Clusters all conformers and sets cluster properties.
|
532
|
+
|
533
|
+
Following cluster properties will be added: `cluster`, `cluster_mean_energy`,
|
534
|
+
`cluster_median_energy`, `cluster_IQR_energy`, `cluster_size`, `cluster_centroid` (True or False)
|
535
|
+
|
536
|
+
`RCKMeans` algorithm is unreliable and not supported for now.
|
537
|
+
|
538
|
+
Args:
|
539
|
+
method (str, optional): clustering algorithm:
|
540
|
+
`Butina`,
|
541
|
+
`QT`,
|
542
|
+
`NMRCLUST`,
|
543
|
+
`DQT`,
|
544
|
+
`BitQT`,
|
545
|
+
`DynamicTreeCut`,
|
546
|
+
`AutoGraph`.
|
547
|
+
Defaults to `QT`.
|
548
|
+
threshold (float, optional): RMSD threshold of a cluster. Defaults to 1.0.
|
549
|
+
sortby (str, optional): sort cluster(s) by mean `energy` or cluster `size`.
|
550
|
+
Defaults to `size`.
|
551
|
+
|
552
|
+
Raises:
|
553
|
+
NotImplementedError: if unsupported method is requested.
|
554
|
+
|
555
|
+
Returns:
|
556
|
+
Self: rdworks.Mol object
|
557
|
+
"""
|
558
|
+
if method != 'DQT': # rmsd of x,y,z coordinates (non-H)
|
559
|
+
conf_rdmols_noH = [Chem.RemoveHs(Chem.Mol(conf.rdmol)) for conf in self.confs]
|
560
|
+
# copies are made for rmsd calculations to prevent coordinates changes
|
561
|
+
lower_triangle_values = []
|
562
|
+
for i in range(self.count()): # number of conformers
|
563
|
+
for j in range(i):
|
564
|
+
# rdMolAlign.GetBestRMS takes symmetry into account
|
565
|
+
# removed hydrogens to speed up
|
566
|
+
best_rms = rdMolAlign.GetBestRMS(prbMol=conf_rdmols_noH[i], refMol=conf_rdmols_noH[j])
|
567
|
+
lower_triangle_values.append(best_rms)
|
568
|
+
|
569
|
+
else: # rmsd (radian) of dihedral angles
|
570
|
+
torsion_atom_indices = self.torsion_atoms()
|
571
|
+
# symmmetry-related equivalence is not considered
|
572
|
+
torsions = []
|
573
|
+
for conf in self.confs:
|
574
|
+
t_radians = []
|
575
|
+
for (i, j, k, l, rot_indices, fix_indices) in torsion_atom_indices:
|
576
|
+
t_radians.append(
|
577
|
+
rdMolTransforms.GetDihedralRad(conf.rdmol.GetConformer(), i, j, k, l))
|
578
|
+
torsions.append(np.array(t_radians))
|
579
|
+
# torsions: num.confs x num.torsions
|
580
|
+
N = len(torsions)
|
581
|
+
lower_triangle_values = []
|
582
|
+
for i in range(N):
|
583
|
+
for j in range(i):
|
584
|
+
rad_diff = np.fmod(torsions[i] - torsions[j], 2.0*np.pi)
|
585
|
+
rmsd = np.sqrt(np.sum(rad_diff**2)/N)
|
586
|
+
# np.max(np.absolute(rad_diff))
|
587
|
+
lower_triangle_values.append(rmsd)
|
588
|
+
|
589
|
+
cluster_assignment = None
|
590
|
+
centroid_indices = None
|
591
|
+
|
592
|
+
if method == 'Butina':
|
593
|
+
clusters = Butina.ClusterData(data=lower_triangle_values,
|
594
|
+
nPts=self.count(),
|
595
|
+
distThresh=threshold,
|
596
|
+
isDistData=True,
|
597
|
+
reordering=True)
|
598
|
+
cluster_assignment = [None,] * self.count()
|
599
|
+
centroid_indices = []
|
600
|
+
for cluster_idx, indices in enumerate(clusters):
|
601
|
+
for conf_idx in indices:
|
602
|
+
cluster_assignment[conf_idx] = cluster_idx
|
603
|
+
centroid_indices.append(indices[0])
|
604
|
+
|
605
|
+
elif method == 'QT':
|
606
|
+
# my implementation of the original QT algorithm
|
607
|
+
# tighter than Butina
|
608
|
+
symm_matrix = convert_tril_to_symm(lower_triangle_values)
|
609
|
+
cluster_assignment, centroid_indices = QT(symm_matrix, threshold)
|
610
|
+
|
611
|
+
elif method == 'NMRCLUST':
|
612
|
+
# looser than Butina
|
613
|
+
# does not require threshold
|
614
|
+
symm_matrix = convert_tril_to_symm(lower_triangle_values)
|
615
|
+
cluster_assignment, centroid_indices = NMRCLUST(symm_matrix)
|
616
|
+
|
617
|
+
elif method == 'DQT':
|
618
|
+
# issues with symmetry related multiplicities
|
619
|
+
symm_matrix = convert_tril_to_symm(lower_triangle_values)
|
620
|
+
cluster_assignment, centroid_indices = QT(symm_matrix, threshold)
|
621
|
+
|
622
|
+
elif method == 'BitQT':
|
623
|
+
# supposed to produce identical result as QT but it does not
|
624
|
+
symm_matrix = convert_tril_to_symm(lower_triangle_values)
|
625
|
+
cluster_assignment, centroid_indices = BitQT(symm_matrix, threshold)
|
626
|
+
|
627
|
+
elif method == 'DynamicTreeCut':
|
628
|
+
# often collapses into single cluster. so not very useful.
|
629
|
+
symm_matrix = convert_tril_to_symm(lower_triangle_values)
|
630
|
+
cluster_assignment, centroid_indices = DynamicTreeCut(symm_matrix)
|
631
|
+
|
632
|
+
# elif method == 'RCKmeans':
|
633
|
+
# # buggy
|
634
|
+
# symm_matrix = convert_tril_to_symm(lower_triangle_values)
|
635
|
+
# cluster_assignment, centroid_indices = RCKmeans(symm_matrix)
|
636
|
+
|
637
|
+
elif method == 'AutoGraph':
|
638
|
+
# not reliable
|
639
|
+
symm_matrix = convert_tril_to_symm(lower_triangle_values)
|
640
|
+
cluster_assignment, centroid_indices = AutoGraph(symm_matrix)
|
641
|
+
|
642
|
+
else:
|
643
|
+
raise NotImplementedError(f'{method} clustering is not implemented yet.')
|
644
|
+
|
645
|
+
# cluster_assignment: ex. [0,1,0,0,2,..]
|
646
|
+
# centroid_indices: ex. [10,5,..] i.e. centroids of clusters 0 and 1 are 10 and 5, respectively.
|
647
|
+
|
648
|
+
if cluster_assignment is not None and centroid_indices is not None:
|
649
|
+
cluster_raw_data = defaultdict(list)
|
650
|
+
for conf_idx, cluster_idx in enumerate(cluster_assignment):
|
651
|
+
cluster_raw_data[cluster_idx].append(conf_idx)
|
652
|
+
cluster_list = []
|
653
|
+
for i, k in enumerate(sorted(cluster_raw_data.keys())):
|
654
|
+
energies = [self.confs[conf_idx].props['E_rel(kcal/mol)'] for conf_idx in cluster_raw_data[k]]
|
655
|
+
mean_energy = np.mean(energies)
|
656
|
+
median_energy = np.median(energies)
|
657
|
+
q75, q25 = np.percentile(energies, [75, 25])
|
658
|
+
iqr_energy = q75 - q25 # interquartile range (IQR)
|
659
|
+
cluster_list.append({'confs' : cluster_raw_data[k],
|
660
|
+
'centroid' : centroid_indices[i], # conformer index
|
661
|
+
'size' : len(cluster_raw_data[k]),
|
662
|
+
'mean_energy' : mean_energy,
|
663
|
+
'median_energy' : median_energy,
|
664
|
+
'iqr_energy' : iqr_energy,
|
665
|
+
})
|
666
|
+
# sort cluster index
|
667
|
+
if sortby == 'size':
|
668
|
+
cluster_list = sorted(cluster_list, key=lambda x: x['size'], reverse=True)
|
669
|
+
|
670
|
+
elif sortby == 'energy':
|
671
|
+
cluster_list = sorted(cluster_list, key=lambda x: x['median_energy'], reverse=False)
|
672
|
+
|
673
|
+
else:
|
674
|
+
raise NotImplementedError(f'{sortby} is not implemented yet.')
|
675
|
+
|
676
|
+
for cluster_idx, cluster_dict in enumerate(cluster_list, start=1):
|
677
|
+
for conf_idx in cluster_dict['confs']:
|
678
|
+
if conf_idx == cluster_dict['centroid']:
|
679
|
+
self.confs[conf_idx].props.update({
|
680
|
+
'cluster' : cluster_idx,
|
681
|
+
'cluster_mean_energy' : cluster_dict['mean_energy'],
|
682
|
+
'cluster_median_energy' : cluster_dict['median_energy'],
|
683
|
+
'cluster_IQR_energy' : cluster_dict['iqr_energy'],
|
684
|
+
'cluster_size' : cluster_dict['size'],
|
685
|
+
'cluster_centroid' : True,
|
686
|
+
})
|
687
|
+
else:
|
688
|
+
self.confs[conf_idx].props.update({
|
689
|
+
'cluster' : cluster_idx,
|
690
|
+
'cluster_mean_energy' : cluster_dict['mean_energy'],
|
691
|
+
'cluster_median_energy' : cluster_dict['median_energy'],
|
692
|
+
'cluster_IQR_energy' : cluster_dict['iqr_energy'],
|
693
|
+
'cluster_size' : cluster_dict['size'],
|
694
|
+
'cluster_centroid' : False,
|
695
|
+
})
|
696
|
+
return self
|
697
|
+
|
698
|
+
|
699
|
+
def drop_confs(self,
|
700
|
+
stereo_flipped:bool=True,
|
701
|
+
unconverged:bool=True,
|
702
|
+
similar: bool | None = None,
|
703
|
+
similar_rmsd:float=0.3,
|
704
|
+
cluster: bool | None =None,
|
705
|
+
k: int | None = None,
|
706
|
+
window: float | None = None,
|
707
|
+
verbose: bool = False) -> Self:
|
708
|
+
"""Drop conformers that meet some condition(s).
|
709
|
+
|
710
|
+
Args:
|
711
|
+
stereo_flipped (bool): drop conformers whose R/S and cis/trans stereo is unintentionally flipped.
|
712
|
+
For example, a trans double bond in a macrocyle can end up with both trans
|
713
|
+
and cis isomers in the final optimized conformers.
|
714
|
+
unconverged (bool): drop unconverged conformers. see `Converged` property.
|
715
|
+
similar (bool, optional): drop similar conformers. see `similar_rmsd`.
|
716
|
+
similar_rmsd (float): RMSD (A) below `similar_rmsd` is regarded similar (default: 0.3)
|
717
|
+
cluster (bool, optional): drop all except for the lowest energy conformer in each cluster.
|
718
|
+
k (int, optional): drop all except for `k` lowest energy conformers.
|
719
|
+
window (float, optional): drop all except for conformers within `window` of relative energy.
|
720
|
+
|
721
|
+
Returns:
|
722
|
+
Self: a copy of rdworks.Mol object.
|
723
|
+
|
724
|
+
Examples:
|
725
|
+
To drop similar conformers within rmsd of 0.5 A
|
726
|
+
>>> mol.drop_confs(similar=True, similar_rmsd=0.5)
|
727
|
+
|
728
|
+
To drop conformers beyond 5 kcal/mol
|
729
|
+
>>> mol.drop_confs(window=5.0)
|
730
|
+
|
731
|
+
"""
|
732
|
+
obj = copy.deepcopy(self)
|
733
|
+
|
734
|
+
if stereo_flipped and obj.count() > 0:
|
735
|
+
mask = [Chem.MolToSmiles(Chem.RemoveHs(_.rdmol)) == obj.smiles for _ in obj.confs]
|
736
|
+
obj.confs = list(itertools.compress(obj.confs, mask))
|
737
|
+
if verbose:
|
738
|
+
main_logger.info(f'drop_confs stereo_flipped={mask.count(False)} -> {obj.count()}')
|
739
|
+
|
740
|
+
if unconverged and obj.count() > 0:
|
741
|
+
mask = [_.props['Converged'] if 'Converged' in _.props else True for _ in obj.confs]
|
742
|
+
obj.confs = list(itertools.compress(obj.confs, mask))
|
743
|
+
if verbose:
|
744
|
+
main_logger.info(f'drop_confs unconverged={mask.count(False)} -> {obj.count()}')
|
745
|
+
|
746
|
+
if similar and obj.count() > 1:
|
747
|
+
# it is observed that there are essentially identical conformers
|
748
|
+
# such as 180-degree ring rotation and there is not minor conformational variations
|
749
|
+
# in the RDKit ETKDG generated conformers.
|
750
|
+
conf_rdmols_noH = [Chem.RemoveHs(Chem.Mol(_.rdmol)) for _ in obj.confs]
|
751
|
+
# copies are made for rmsd calculations to prevent coordinates changes
|
752
|
+
lower_triangle_values = []
|
753
|
+
for i in range(obj.count()): # number of conformers
|
754
|
+
for j in range(i):
|
755
|
+
# rdMolAlign.GetBestRMS takes symmetry into account
|
756
|
+
# removed hydrogens to speed up
|
757
|
+
best_rms = rdMolAlign.GetBestRMS(prbMol=conf_rdmols_noH[i], refMol=conf_rdmols_noH[j])
|
758
|
+
lower_triangle_values.append(best_rms)
|
759
|
+
symm_matrix = convert_tril_to_symm(lower_triangle_values)
|
760
|
+
cluster_assignment, centroid_indices = QT(symm_matrix, similar_rmsd)
|
761
|
+
mask = [conf_idx in centroid_indices for conf_idx, conf in enumerate(obj.confs)]
|
762
|
+
obj.confs = list(itertools.compress(obj.confs, mask))
|
763
|
+
if verbose:
|
764
|
+
main_logger.info(f'drop_confs similar({similar_rmsd})={mask.count(False)} -> {obj.count()}')
|
765
|
+
|
766
|
+
# note: it will retain the conformers with lower index
|
767
|
+
# so, it should be sorted before dropping
|
768
|
+
# obj = obj.sort_confs()
|
769
|
+
# mask = []
|
770
|
+
# retained_confs = []
|
771
|
+
# for conf_i in obj.confs:
|
772
|
+
# is_dissimilar = True
|
773
|
+
# for conf_j_rdmol_noH in retained_confs:
|
774
|
+
# # symmetry-aware alignment / removing Hs speeds up the calculation
|
775
|
+
# rmsd = rdMolAlign.GetBestRMS(Chem.RemoveHs(conf_i.rdmol), conf_j_rdmol_noH)
|
776
|
+
# if rmsd < similar_rmsd:
|
777
|
+
# is_dissimilar = False
|
778
|
+
# break
|
779
|
+
# mask.append(is_dissimilar)
|
780
|
+
# if is_dissimilar:
|
781
|
+
# retained_confs.append(Chem.RemoveHs(conf_i.rdmol)) # store a copy of H-removed rdmol
|
782
|
+
# obj.confs = list(itertools.compress(obj.confs, mask))
|
783
|
+
|
784
|
+
if cluster and obj.count() > 1:
|
785
|
+
# drop non-centroid cluster member(s)
|
786
|
+
mask = [_.props['centroid'] if 'centroid' in _.props else True for _ in obj.confs]
|
787
|
+
obj.confs = list(itertools.compress(obj.confs, mask))
|
788
|
+
if verbose:
|
789
|
+
main_logger.info(f'drop_confs cluster(non-centroid)={mask.count(False)} -> {obj.count()}')
|
790
|
+
|
791
|
+
if (k or window) and obj.count() > 0:
|
792
|
+
if k:
|
793
|
+
mask_k = [i < k for i,_ in enumerate(obj.confs)]
|
794
|
+
else:
|
795
|
+
mask_k = [True,] * obj.count()
|
796
|
+
if window:
|
797
|
+
mask_window = [_.props['E_rel(kcal/mol)'] < window if 'E_rel(kcal/mol)' in _.props else True for _ in obj.confs]
|
798
|
+
else:
|
799
|
+
mask_window = [True,] * obj.count()
|
800
|
+
# retain conformer(s) that satisfy both k and window conditions
|
801
|
+
mask = [(x and y) for (x,y) in zip(mask_k, mask_window)]
|
802
|
+
obj.confs = list(itertools.compress(obj.confs, mask))
|
803
|
+
if verbose:
|
804
|
+
main_logger.info(f'drop_confs k and/or window={mask.count(False)} -> {obj.count()}')
|
805
|
+
|
806
|
+
return obj
|
807
|
+
|
808
|
+
|
809
|
+
def count(self) -> int:
|
810
|
+
"""Returns the total number of conformers.
|
811
|
+
|
812
|
+
Returns:
|
813
|
+
int: total count of conformers.
|
814
|
+
"""
|
815
|
+
return len(self.confs)
|
816
|
+
|
817
|
+
|
818
|
+
def is_nn_applicable(self, model:str) -> bool:
|
819
|
+
"""Check if a particular neural network model is applicable to current molecule.
|
820
|
+
|
821
|
+
Args:
|
822
|
+
model (str): neural network models: `ANI-2x`, `ANI-2xt`, `AIMNET`
|
823
|
+
|
824
|
+
Raises:
|
825
|
+
ValueError: if model is not supported.
|
826
|
+
|
827
|
+
Returns:
|
828
|
+
bool: True if applicable.
|
829
|
+
"""
|
830
|
+
if model.lower() in ['ani-2x', 'ani-2xt']:
|
831
|
+
if self.props['charge'] != 0:
|
832
|
+
return False
|
833
|
+
# H, C, N, O, F, S, Cl
|
834
|
+
atomic_numbers = [1, 6, 7, 8, 9, 16, 17 ]
|
835
|
+
|
836
|
+
elif model in ['aimnet', 'aimnet2']:
|
837
|
+
# H, B, C, N, O, F, Si, P, S, Cl, As, Se, Br, I
|
838
|
+
atomic_numbers = [1, 5, 6, 7, 8, 9, 14, 15, 16, 17, 33, 34, 35, 53 ]
|
839
|
+
|
840
|
+
else:
|
841
|
+
raise ValueError('is_nn_applicable() supports ANI-2x, ANI-2xt, or AIMNET')
|
842
|
+
|
843
|
+
for a in self.rdmol.GetAtoms():
|
844
|
+
if a.GetAtomicNum() not in atomic_numbers:
|
845
|
+
return False
|
846
|
+
|
847
|
+
return True
|
848
|
+
|
849
|
+
|
850
|
+
def charge(self) -> int:
|
851
|
+
"""Returns molecular formal charge
|
852
|
+
|
853
|
+
Returns:
|
854
|
+
int: molecular formal charge
|
855
|
+
"""
|
856
|
+
return rdmolops.GetFormalCharge(self.rdmol)
|
857
|
+
|
858
|
+
|
859
|
+
def symbols(self) -> list[str]:
|
860
|
+
"""Returns the element symbols.
|
861
|
+
|
862
|
+
Returns:
|
863
|
+
list: list of element symbols.
|
864
|
+
"""
|
865
|
+
return [ a.GetSymbol() for a in self.rdmol.GetAtoms() ]
|
866
|
+
|
867
|
+
|
868
|
+
def numbers(self) -> list[int]:
|
869
|
+
"""Returns the atomic numbers.
|
870
|
+
|
871
|
+
Returns:
|
872
|
+
list: list of atomic numbers.
|
873
|
+
"""
|
874
|
+
return [ a.GetAtomicNum() for a in self.rdmol.GetAtoms() ]
|
875
|
+
|
876
|
+
|
877
|
+
def torsion_atoms(self, strict:bool=True) -> list[tuple]:
|
878
|
+
"""Determine dihedral angle atoms (a-b-c-d) and rotating group for each rotatable bond (b-c).
|
879
|
+
|
880
|
+
Args:
|
881
|
+
strict (bool): whether to exclude amide/imide/ester/acid bonds.
|
882
|
+
|
883
|
+
Returns:
|
884
|
+
[ (a, b, c, d, rot_atom_indices, fix_atom_indices),
|
885
|
+
(a, b, c, d, rot_atom_indices, fix_atom_indices),
|
886
|
+
...,
|
887
|
+
]
|
888
|
+
"""
|
889
|
+
# https://github.com/rdkit/rdkit/blob/1bf6ef3d65f5c7b06b56862b3fb9116a3839b229/rdkit/Chem/Lipinski.py#L47%3E
|
890
|
+
# https://github.com/rdkit/rdkit/blob/de602c88809ea6ceba1e8ed50fd543b6e406e9c4/Code/GraphMol/Descriptors/Lipinski.cpp#L108
|
891
|
+
if strict :
|
892
|
+
# excludes amide/imide/ester/acid bonds
|
893
|
+
rotatable_bond_pattern = Chem.MolFromSmarts(
|
894
|
+
(
|
895
|
+
"[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])("
|
896
|
+
"[CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1])&!$([#7,O,S!D1]-!@[CD3]="
|
897
|
+
"[N,O,S])&!$([CD3](=[N+])-!@[#7!D1])&!$([#7!D1]-!@[CD3]=[N+])]-,:;!@[!$"
|
898
|
+
"(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])(["
|
899
|
+
"CH3])[CH3])]"
|
900
|
+
)
|
901
|
+
)
|
902
|
+
else:
|
903
|
+
rotatable_bond_pattern = Chem.MolFromSmarts('[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]')
|
904
|
+
rotatable_bonds = self.rdmol.GetSubstructMatches(rotatable_bond_pattern)
|
905
|
+
torsion_angle_atom_indices = []
|
906
|
+
|
907
|
+
# small rings (n=3 or 4)
|
908
|
+
small_rings = [ r for r in list(self.rdmol.GetRingInfo().AtomRings()) if len(r) < 5 ]
|
909
|
+
# ex. = [(1, 37, 35, 34, 3, 2), (29, 28, 30)]
|
910
|
+
|
911
|
+
forbidden_terminal_nuclei = [1, 9, 17, 35, 53] # H,F,Cl,Br,I
|
912
|
+
|
913
|
+
for (b_idx, c_idx) in rotatable_bonds:
|
914
|
+
# determine a atom ``a`` that define a dihedral angle
|
915
|
+
a_candidates = []
|
916
|
+
for neighbor in self.rdmol.GetAtomWithIdx(b_idx).GetNeighbors():
|
917
|
+
neighbor_idx = neighbor.GetIdx()
|
918
|
+
if neighbor_idx == c_idx:
|
919
|
+
continue
|
920
|
+
neighbor_atomic_num = neighbor.GetAtomicNum()
|
921
|
+
if neighbor_atomic_num not in forbidden_terminal_nuclei:
|
922
|
+
a_candidates.append((neighbor_atomic_num, neighbor_idx))
|
923
|
+
|
924
|
+
if not a_candidates:
|
925
|
+
continue
|
926
|
+
|
927
|
+
(a_atomic_num, a_idx) = sorted(a_candidates, key=lambda x: (x[0], -x[1]), reverse=True)[0]
|
928
|
+
|
929
|
+
# is a-b in a small ring (n=3 or 4)?
|
930
|
+
is_in_small_ring = False
|
931
|
+
for small_ring in small_rings:
|
932
|
+
if (a_idx in small_ring) and (b_idx in small_ring):
|
933
|
+
is_in_small_ring = True
|
934
|
+
break
|
935
|
+
|
936
|
+
if is_in_small_ring:
|
937
|
+
continue
|
938
|
+
|
939
|
+
# determine a atom ``d`` that define a dihedral angle
|
940
|
+
d_candidates = []
|
941
|
+
for neighbor in self.rdmol.GetAtomWithIdx(c_idx).GetNeighbors():
|
942
|
+
neighbor_idx = neighbor.GetIdx()
|
943
|
+
if (neighbor_idx == b_idx):
|
944
|
+
continue
|
945
|
+
neighbor_atomic_num = neighbor.GetAtomicNum()
|
946
|
+
if neighbor_atomic_num not in forbidden_terminal_nuclei:
|
947
|
+
d_candidates.append((neighbor_atomic_num, neighbor_idx))
|
948
|
+
|
949
|
+
if not d_candidates:
|
950
|
+
continue
|
951
|
+
|
952
|
+
(d_atomic_num, d_idx) = sorted(d_candidates, key=lambda x: (x[0], -x[1]), reverse=True)[0]
|
953
|
+
|
954
|
+
# is c-d in a small ring?
|
955
|
+
is_in_small_ring = False
|
956
|
+
for small_ring in small_rings:
|
957
|
+
if (c_idx in small_ring) and (d_idx in small_ring):
|
958
|
+
is_in_small_ring = True
|
959
|
+
break
|
960
|
+
|
961
|
+
if is_in_small_ring:
|
962
|
+
continue
|
963
|
+
|
964
|
+
# determine a group of atoms to be rotated
|
965
|
+
# https://ctr.fandom.com/wiki/Break_rotatable_bonds_and_report_the_fragments
|
966
|
+
em = Chem.EditableMol(self.rdmol)
|
967
|
+
em.RemoveBond(b_idx, c_idx)
|
968
|
+
fragmented = em.GetMol()
|
969
|
+
(frag1, frag2) = Chem.GetMolFrags(fragmented, asMols=False) # returns tuple of tuple
|
970
|
+
hac1 = sum([ 1 for i in frag1 if self.rdmol.GetAtomWithIdx(i).GetAtomicNum() > 1 ])
|
971
|
+
hac2 = sum([ 1 for i in frag2 if self.rdmol.GetAtomWithIdx(i).GetAtomicNum() > 1 ])
|
972
|
+
|
973
|
+
# smaller fragment will be rotated and must contain at least three heavy atoms
|
974
|
+
if min(hac1, hac2) >= 3:
|
975
|
+
(frag_rot, frag_fix) = sorted([(hac1, frag1), (hac2, frag2)])
|
976
|
+
torsion_angle_atom_indices.append((a_idx, b_idx, c_idx, d_idx, frag_rot[1], frag_fix[1]))
|
977
|
+
|
978
|
+
return torsion_angle_atom_indices
|
979
|
+
|
980
|
+
|
981
|
+
def compute(self, **kwargs) -> Self:
|
982
|
+
"""Change settings for parallel computing.
|
983
|
+
|
984
|
+
Args:
|
985
|
+
max_workers (int): max number of workers.
|
986
|
+
chunksize (int): chunksize of splitted workload.
|
987
|
+
progress (bool): whether to show progress bar.
|
988
|
+
|
989
|
+
Returns:
|
990
|
+
Self: rdworks.MolLibr object.
|
991
|
+
"""
|
992
|
+
self.max_workers = kwargs.get('max_workers', self.max_workers)
|
993
|
+
self.chunksize = kwargs.get('chunksize', self.chunksize)
|
994
|
+
self.progress = kwargs.get('progress', self.progress)
|
995
|
+
return self
|
996
|
+
|
997
|
+
|
998
|
+
@staticmethod
|
999
|
+
def _map_optimize_conf(conf:Conf, targs:tuple) -> Conf:
|
1000
|
+
"""A map function to apply Conf.optimize() on `conf`.
|
1001
|
+
|
1002
|
+
The default behavior of map() is to pass the elements of the iterable to the function by reference.
|
1003
|
+
This means that if the function modifies the elements of the iterable,
|
1004
|
+
those changes will be reflected in the iterable itself.
|
1005
|
+
|
1006
|
+
Args:
|
1007
|
+
conf (Conf): subject rdworks.Conf object.
|
1008
|
+
targs (tuple): tuple of arguments to be passed to Conf.optimize().
|
1009
|
+
|
1010
|
+
Returns:
|
1011
|
+
Conf: rdworks.Conf object
|
1012
|
+
"""
|
1013
|
+
return conf.optimize(*targs)
|
1014
|
+
|
1015
|
+
|
1016
|
+
def torsion_energies(self,
|
1017
|
+
calculator:str | Callable,
|
1018
|
+
fmax:float = 0.05,
|
1019
|
+
interval:float = 15.0,
|
1020
|
+
use_converged_only: bool = True,
|
1021
|
+
optimize_ref: bool = False,
|
1022
|
+
**kwargs,
|
1023
|
+
) -> Self:
|
1024
|
+
"""Calculates potential energy profiles for each torsion angle using ASE optimizer.
|
1025
|
+
|
1026
|
+
Args:
|
1027
|
+
calculator (str | Callable): 'MMFF', 'UFF', or ASE calculator.
|
1028
|
+
fmax (float, optional): fmax of ASE optimizer. Defaults to 0.05.
|
1029
|
+
interval (float, optional): interval of torsion angles in degree. Defaults to 15.0.
|
1030
|
+
use_converged_only (bool, optional): whether to use only converged data. Defaults to True.
|
1031
|
+
|
1032
|
+
Returns:
|
1033
|
+
list[dict]: [{'indices':list, 'angle':list, 'E_rel(kcal/mol)':list}, ...]
|
1034
|
+
"""
|
1035
|
+
self = self.compute(**kwargs)
|
1036
|
+
|
1037
|
+
torsion_atoms_indices = self.torsion_atoms()
|
1038
|
+
|
1039
|
+
ref_conf = self.confs[0].copy() # use the lowest energy conformer as a reference
|
1040
|
+
if optimize_ref:
|
1041
|
+
ref_conf = ref_conf.optimize(calculator, fmax)
|
1042
|
+
|
1043
|
+
# mol.confs will be populated with torsion conformers.
|
1044
|
+
# It is designed for a batch optimization in the future.
|
1045
|
+
mol = self.copy()
|
1046
|
+
mol.confs = []
|
1047
|
+
data = []
|
1048
|
+
|
1049
|
+
for k, (a, b, c, d, rot_indices, fix_indices) in enumerate(torsion_atoms_indices):
|
1050
|
+
data.append({'angle':[], 'init':[], 'final':[], 'Converged':[]})
|
1051
|
+
for angle in np.arange(-180.0, 180.0, interval):
|
1052
|
+
# Iterated numpy.ndarray does not contain the last 180: -180., ..., (180).
|
1053
|
+
x = ref_conf.copy()
|
1054
|
+
x.props.update({'torsion_index': k, 'angle': float(angle)})
|
1055
|
+
AllChem.SetDihedralDeg(x.rdmol.GetConformer(), a, b, c, d, angle)
|
1056
|
+
# All atoms bonded to atom d will move.
|
1057
|
+
mol.confs.append(x)
|
1058
|
+
|
1059
|
+
# Optimize
|
1060
|
+
# with ProcessPoolExecutor(max_workers=self.max_workers) as executor:
|
1061
|
+
# largs = [ (calculator, fmax,) ] * mol.count()
|
1062
|
+
# if self.progress:
|
1063
|
+
# lconfs = list(tqdm(
|
1064
|
+
# executor.map(Mol._map_optimize_conf, mol.confs, largs, chunksize=1),
|
1065
|
+
# desc="Optimize conformers",
|
1066
|
+
# total=mol.count()))
|
1067
|
+
# else:
|
1068
|
+
# lconfs = list(
|
1069
|
+
# executor.map(Mol._map_optimize_conf, mol.confs, largs, chunksize=1))
|
1070
|
+
# mol.confs = lconfs
|
1071
|
+
|
1072
|
+
# Calculate relaxation energies
|
1073
|
+
for conf in mol.confs:
|
1074
|
+
conf = conf.optimize(calculator, fmax)
|
1075
|
+
# conf.optimize() updates coordinates and conf.props:
|
1076
|
+
# `angle`, `E_tot_init(kcal/mol)`, `E_tot(kcal/mol)`, `Converged`.
|
1077
|
+
i = conf.props['torsion_index']
|
1078
|
+
data[i]['angle'].append(conf.props['angle'])
|
1079
|
+
data[i]['init'].append(conf.props['E_tot_init(kcal/mol)'])
|
1080
|
+
data[i]['final'].append(conf.props['E_tot(kcal/mol)'])
|
1081
|
+
data[i]['Converged'].append(conf.props['Converged'])
|
1082
|
+
|
1083
|
+
# Post-processing
|
1084
|
+
torsion_energy_profiles = []
|
1085
|
+
for indices, datadict in zip(torsion_atoms_indices, data):
|
1086
|
+
if use_converged_only:
|
1087
|
+
datadict['angle'] = list(itertools.compress(datadict['angle'], datadict['Converged']))
|
1088
|
+
datadict['init'] = list(itertools.compress(datadict['init'], datadict['Converged']))
|
1089
|
+
datadict['final'] = list(itertools.compress(datadict['final'], datadict['Converged']))
|
1090
|
+
relax = np.array(datadict['init']) - np.median(datadict['final'])
|
1091
|
+
E_rel = relax - np.min(relax)
|
1092
|
+
torsion_energy_profiles.append({
|
1093
|
+
'indices': indices, # (a, b, c, d, rot_indices, fix_indices)
|
1094
|
+
'angle': np.array(datadict['angle']).tolist(), # np.ndarray -> list for serialization
|
1095
|
+
'E_rel(kcal/mol)': E_rel.tolist(), # np.ndarray -> list for serialization
|
1096
|
+
})
|
1097
|
+
self.props['torsion'] = torsion_energy_profiles
|
1098
|
+
self.props['torsion_calculator'] = str(calculator)
|
1099
|
+
|
1100
|
+
return self
|
1101
|
+
|
1102
|
+
|
1103
|
+
|
1104
|
+
|
1105
|
+
def similarity(self, other:object) -> float:
|
1106
|
+
"""Returns Tanimoto similarity with `other` rdworks.Mol object.
|
1107
|
+
|
1108
|
+
Args:
|
1109
|
+
other (rdworks.Mol): other rdworks.Mol object.
|
1110
|
+
|
1111
|
+
Raises:
|
1112
|
+
TypeError: if `other` is not rdworks.Mol object type.
|
1113
|
+
|
1114
|
+
Returns:
|
1115
|
+
float: Tanimoto similarity.
|
1116
|
+
"""
|
1117
|
+
if not isinstance(other, Mol):
|
1118
|
+
raise TypeError("Mol.is_similar() expects Mol object")
|
1119
|
+
if not self.fp:
|
1120
|
+
self.fp = self.MFP2.GetFingerprint(self.rdmol)
|
1121
|
+
if not other.fp:
|
1122
|
+
other.fp = other.MFP2.GetFingerprint(other.rdmol)
|
1123
|
+
return DataStructs.TanimotoSimilarity(self.fp, other.fp)
|
1124
|
+
|
1125
|
+
|
1126
|
+
def is_similar(self, other:object, threshold:float) -> bool:
|
1127
|
+
"""Check if `other` molecule is similar within `threshold`.
|
1128
|
+
|
1129
|
+
Args:
|
1130
|
+
other (rdworks.Mol): other rdworks.Mol object to compare with.
|
1131
|
+
threshold (float): Tanimoto similarity threshold.
|
1132
|
+
|
1133
|
+
Returns:
|
1134
|
+
bool: True if similar.
|
1135
|
+
"""
|
1136
|
+
return self.similarity(other) >= threshold
|
1137
|
+
|
1138
|
+
|
1139
|
+
def is_matching(self, terms: str | Path, invert:bool=False) -> bool:
|
1140
|
+
"""Determines if the molecule matches the predefined substructure and/or descriptor ranges.
|
1141
|
+
|
1142
|
+
invert | terms(~ or !) | effect
|
1143
|
+
------ | ------------- | -------------
|
1144
|
+
True | ~ | No inversion
|
1145
|
+
True | | Inversion
|
1146
|
+
False | ~ | Inversion
|
1147
|
+
False | | No inversion
|
1148
|
+
|
1149
|
+
Args:
|
1150
|
+
terms (str | Path):
|
1151
|
+
substructure SMARTS expression or a path to predefined descriptor ranges.
|
1152
|
+
invert (bool, optional): whether to invert the result. Defaults to False.
|
1153
|
+
|
1154
|
+
Returns:
|
1155
|
+
bool: True if matches.
|
1156
|
+
"""
|
1157
|
+
if isinstance(terms, pathlib.PosixPath):
|
1158
|
+
path = terms.as_posix()
|
1159
|
+
elif isinstance(terms, str):
|
1160
|
+
if terms.startswith('~') or terms.startswith('!'):
|
1161
|
+
terms = terms.replace('~','').replace('!','')
|
1162
|
+
invert = (invert ^ True)
|
1163
|
+
try:
|
1164
|
+
path = pathlib.Path(terms) # test if terms points to a xml file
|
1165
|
+
assert path.is_file()
|
1166
|
+
except:
|
1167
|
+
path = get_predefined_xml(terms)
|
1168
|
+
else:
|
1169
|
+
print(list_predefined_xml())
|
1170
|
+
return False
|
1171
|
+
|
1172
|
+
(lterms, combine) = parse_xml(path)
|
1173
|
+
mask = []
|
1174
|
+
for (name, smarts, lb, ub) in lterms:
|
1175
|
+
if smarts:
|
1176
|
+
query= Chem.MolFromSmarts(smarts)
|
1177
|
+
if len(self.rdmol.GetSubstructMatches(query)) > 0:
|
1178
|
+
mask.append(True)
|
1179
|
+
else:
|
1180
|
+
mask.append(False)
|
1181
|
+
else: # descriptor lower and upper bounds
|
1182
|
+
if name not in self.props:
|
1183
|
+
val = rd_descriptor_f[name](self.rdmol)
|
1184
|
+
self.props.update({name: val})
|
1185
|
+
else:
|
1186
|
+
val = self.props[name]
|
1187
|
+
# return if lower and upper boundaries are satisfied
|
1188
|
+
if ((not lb) or (val >= lb)) and ((not ub) or (val <= ub)):
|
1189
|
+
mask.append(True)
|
1190
|
+
else:
|
1191
|
+
mask.append(False)
|
1192
|
+
if combine.lower() == 'or' and any(mask):
|
1193
|
+
# early termination if any term is satisfied
|
1194
|
+
return invert ^ True # XOR(^) inverts only if invert is True
|
1195
|
+
if combine.lower() == 'and' and all(mask):
|
1196
|
+
return invert ^ True
|
1197
|
+
return invert ^ False
|
1198
|
+
|
1199
|
+
|
1200
|
+
def is_stereo_specified(self) -> bool:
|
1201
|
+
"""Check if the molecule is stereo-specified at tetrahedral atom and double bond.
|
1202
|
+
|
1203
|
+
This function uses `Chem.FindPotentialStereo()` function which returns a list of `elements`.
|
1204
|
+
Explanation of the elements:
|
1205
|
+
element.type:
|
1206
|
+
whether the element is a stereocenter ('stereoAtom') or a stereobond ('stereoBond')
|
1207
|
+
- Atom_Octahedral
|
1208
|
+
- Atom_SquarePlanar
|
1209
|
+
- *Atom_Tetrahedral*
|
1210
|
+
- Atom_TrigonalBipyramidal
|
1211
|
+
- Bond_Atropisomer
|
1212
|
+
- Bond_Cumulene_Even
|
1213
|
+
- *Bond_Double*m.
|
1214
|
+
- Unspecified
|
1215
|
+
|
1216
|
+
element.centeredOn:
|
1217
|
+
The atom or bond index where the stereochemistry is centered.
|
1218
|
+
|
1219
|
+
element.specified:
|
1220
|
+
A boolean indicating whether the stereochemistry at that location
|
1221
|
+
is explicitly specified in the molecule.
|
1222
|
+
values = {
|
1223
|
+
0: rdkit.Chem.rdchem.StereoSpecified.Unspecified,
|
1224
|
+
1: rdkit.Chem.rdchem.StereoSpecified.Specified,
|
1225
|
+
2: rdkit.Chem.rdchem.StereoSpecified.Unknown,
|
1226
|
+
}
|
1227
|
+
|
1228
|
+
element.descriptor:
|
1229
|
+
A descriptor that can be used to identify the type of stereochemistry (e.g., 'R', 'S', 'E', 'Z').
|
1230
|
+
- Bond_Cis = rdkit.Chem.StereoDescriptor.Bond_Cis
|
1231
|
+
- Bond_Trans = rdkit.Chem.StereoDescriptor.Bond_Trans
|
1232
|
+
- NoValue = rdkit.Chem.StereoDescriptor.NoValue
|
1233
|
+
- Tet_CCW = rdkit.Chem.StereoDescriptor.Tet_CCW
|
1234
|
+
- Tet_CW = rdkit.Chem.StereoDescriptor.Tet_CW
|
1235
|
+
|
1236
|
+
Returns:
|
1237
|
+
bool: True if stereo-specified.
|
1238
|
+
"""
|
1239
|
+
stereos = []
|
1240
|
+
for element in Chem.FindPotentialStereo(self.rdmol):
|
1241
|
+
if element.type == Chem.StereoType.Atom_Tetrahedral:
|
1242
|
+
stereos.append(element.specified == Chem.StereoSpecified.Specified)
|
1243
|
+
elif element.type == Chem.StereoType.Bond_Double :
|
1244
|
+
bond = self.rdmol.GetBondWithIdx(element.centeredOn)
|
1245
|
+
if bond.GetBeginAtom().GetSymbol() == 'N' or bond.GetEndAtom().GetSymbol() == 'N':
|
1246
|
+
continue
|
1247
|
+
else:
|
1248
|
+
stereos.append(element.specified == Chem.StereoSpecified.Specified)
|
1249
|
+
# note all([]) returns True
|
1250
|
+
return all(stereos)
|
1251
|
+
|
1252
|
+
|
1253
|
+
def get_ring_bond_stereo(self) -> list[tuple]:
|
1254
|
+
"""Returns double bond and cis/trans stereochemistry information.
|
1255
|
+
|
1256
|
+
Returns:
|
1257
|
+
list[tuple]: [(element.centeredOn, element.descriptor), ...]
|
1258
|
+
"""
|
1259
|
+
stereo_info = Chem.FindPotentialStereo(self.rdmol)
|
1260
|
+
ring_bond_stereo_info = []
|
1261
|
+
for element in stereo_info:
|
1262
|
+
if element.type == Chem.StereoType.Bond_Double:
|
1263
|
+
if self.rdmol.GetBondWithIdx(element.centeredOn).IsInRing():
|
1264
|
+
ring_bond_stereo_info.append((element.centeredOn, element.descriptor))
|
1265
|
+
return ring_bond_stereo_info
|
1266
|
+
|
1267
|
+
|
1268
|
+
def report_stereo(self) -> None:
|
1269
|
+
"""Print out stereochemistry information.
|
1270
|
+
"""
|
1271
|
+
num_chiral_centers = rdMolDescriptors.CalcNumAtomStereoCenters(self.rdmol)
|
1272
|
+
# Returns the total number of atomic stereocenters (specified and unspecified)
|
1273
|
+
num_unspecified_chiral_centers = rdMolDescriptors.CalcNumUnspecifiedAtomStereoCenters(self.rdmol)
|
1274
|
+
print(f"chiral centers = unspecified {num_unspecified_chiral_centers} / total {num_chiral_centers}")
|
1275
|
+
print(f"stereogenic double bonds =")
|
1276
|
+
for element in Chem.FindPotentialStereo(self.rdmol):
|
1277
|
+
# element.type= Atom_Octahedral, Atom_SquarePlanar, Atom_Tetrahedral,
|
1278
|
+
# Atom_TrigonalBipyramidal,
|
1279
|
+
# Bond_Atropisomer, Bond_Cumulene_Even, Bond_Double,
|
1280
|
+
# Unspecified
|
1281
|
+
if element.type == Chem.StereoType.Bond_Double:
|
1282
|
+
bond = self.rdmol.GetBondWithIdx(element.centeredOn)
|
1283
|
+
atom1 = bond.GetBeginAtom().GetSymbol()
|
1284
|
+
atom2 = bond.GetEndAtom().GetSymbol()
|
1285
|
+
is_nitrogen = (atom1 == 'N' or atom2 == 'N')
|
1286
|
+
print(f' {element.type} bond: {element.centeredOn}', end=' ')
|
1287
|
+
print(f'ring: {bond.IsInRing()} N: {is_nitrogen}', end=' ')
|
1288
|
+
elif element.type == Chem.StereoType.Atom_Tetrahedral:
|
1289
|
+
print(f' {element.type} atom: {element.centeredOn}', end=' ')
|
1290
|
+
print(f'atoms {list(element.controllingAtoms)}', end=' ')
|
1291
|
+
print(f'{element.specified} {element.descriptor}') # type: Chem.StereoDescriptor
|
1292
|
+
|
1293
|
+
|
1294
|
+
def report_props(self) -> None:
|
1295
|
+
"""Print out properties.
|
1296
|
+
"""
|
1297
|
+
if self.props:
|
1298
|
+
print(f"Properties({len(self.props)}):")
|
1299
|
+
fixed_width = max([len(k) for k in self.props]) + 4
|
1300
|
+
for k,v in self.props.items():
|
1301
|
+
while len(k) <= fixed_width:
|
1302
|
+
k = k + ' '
|
1303
|
+
print(f" {k} {v}")
|
1304
|
+
else:
|
1305
|
+
print(f"Properties: None")
|
1306
|
+
|
1307
|
+
|
1308
|
+
def to_sdf(self, confs:bool=False, props:bool=True) -> str:
|
1309
|
+
"""Returns strings of SDF output.
|
1310
|
+
|
1311
|
+
Args:
|
1312
|
+
confs (bool, optional): whether to include conformers. Defaults to False.
|
1313
|
+
props (bool, optional): whether to include properties. Defaults to True.
|
1314
|
+
|
1315
|
+
Returns:
|
1316
|
+
str: strings of SDF output.
|
1317
|
+
"""
|
1318
|
+
in_memory = io.StringIO()
|
1319
|
+
with Chem.SDWriter(in_memory) as f:
|
1320
|
+
if confs:
|
1321
|
+
for conf in self.confs:
|
1322
|
+
rdmol = Chem.Mol(conf.rdmol)
|
1323
|
+
rdmol.SetProp('_Name', conf.name)
|
1324
|
+
if props:
|
1325
|
+
# molcule props.
|
1326
|
+
for k,v in self.props.items():
|
1327
|
+
rdmol.SetProp(k, str(v))
|
1328
|
+
# conformer props.
|
1329
|
+
for k,v in conf.props.items():
|
1330
|
+
rdmol.SetProp(k, str(v))
|
1331
|
+
f.write(rdmol)
|
1332
|
+
else:
|
1333
|
+
rdmol = Chem.Mol(self.rdmol)
|
1334
|
+
rdmol.SetProp('_Name', self.name)
|
1335
|
+
if props:
|
1336
|
+
for k,v in self.props.items():
|
1337
|
+
rdmol.SetProp(k, str(v))
|
1338
|
+
f.write(rdmol)
|
1339
|
+
return in_memory.getvalue()
|
1340
|
+
|
1341
|
+
|
1342
|
+
def to_image(self, width:int=300, height:int=300, index:bool=False, svg:bool=True) -> object:
|
1343
|
+
"""Returns PIL(Python Image Library) image object.
|
1344
|
+
|
1345
|
+
Use .save(output_filename) method to save as an image file.
|
1346
|
+
|
1347
|
+
Args:
|
1348
|
+
width (int, optional): width of image. Defaults to 300.
|
1349
|
+
height (int, optional): height of image. Defaults to 300.
|
1350
|
+
index (bool, optional): whether to highlight atom indexes. Defaults to False.
|
1351
|
+
svg (bool, optional): whether to return in SVG format. Defaults to True.
|
1352
|
+
|
1353
|
+
Returns:
|
1354
|
+
object: PIL image object.
|
1355
|
+
"""
|
1356
|
+
if index:
|
1357
|
+
for a in self.rdmol.GetAtoms():
|
1358
|
+
a.SetProp("atomNote", str(a.GetIdx()+1))
|
1359
|
+
|
1360
|
+
return Draw.MolsToImage(self.rdmol,
|
1361
|
+
size=(width,height),
|
1362
|
+
kekulize=True,
|
1363
|
+
wedgeBonds=True, # draw wedge (stereo)
|
1364
|
+
fitImage=False,
|
1365
|
+
options=None,
|
1366
|
+
canvas=None,
|
1367
|
+
useSVG=svg)
|
1368
|
+
|
1369
|
+
|
1370
|
+
def to_svg(self,
|
1371
|
+
width:int = 400,
|
1372
|
+
height:int = 400,
|
1373
|
+
legend:str = '',
|
1374
|
+
index:bool = False,
|
1375
|
+
highlight: list[int] | None = None,
|
1376
|
+
coordgen:bool = False) -> str:
|
1377
|
+
"""Returns depiction strings in SVG format.
|
1378
|
+
|
1379
|
+
Examples:
|
1380
|
+
For Jupyternotebook, wrap the output with SVG:
|
1381
|
+
|
1382
|
+
>>> from IPython.display import SVG
|
1383
|
+
>>> SVG(libr[0].to_svg())
|
1384
|
+
|
1385
|
+
Args:
|
1386
|
+
width (int): width (default:400)
|
1387
|
+
height (int): height (default:400)
|
1388
|
+
legend (str): legend
|
1389
|
+
index (bool): True/False whether to display atom index
|
1390
|
+
highlight (list): list of atom indices to highlight
|
1391
|
+
|
1392
|
+
Returns:
|
1393
|
+
str: SVG text
|
1394
|
+
"""
|
1395
|
+
rdDepictor.SetPreferCoordGen(coordgen)
|
1396
|
+
|
1397
|
+
rdmol_2d = Chem.Mol(self.rdmol)
|
1398
|
+
rdDepictor.Compute2DCoords(rdmol_2d)
|
1399
|
+
rdDepictor.StraightenDepiction(rdmol_2d)
|
1400
|
+
|
1401
|
+
for atom in rdmol_2d.GetAtoms():
|
1402
|
+
for key in atom.GetPropsAsDict():
|
1403
|
+
atom.ClearProp(key)
|
1404
|
+
|
1405
|
+
if index: # index hides polar hydrogens
|
1406
|
+
for atom in rdmol_2d.GetAtoms():
|
1407
|
+
atom.SetProp("atomLabel", str(atom.GetIdx()))
|
1408
|
+
# atom.SetProp("atomNote", str(atom.GetIdx()))
|
1409
|
+
# atom.SetProp("molAtomMapNumber", str(atom.GetIdx()))
|
1410
|
+
|
1411
|
+
drawer = rdMolDraw2D.MolDraw2DSVG(width, height)
|
1412
|
+
if highlight:
|
1413
|
+
drawer.DrawMolecule(rdmol_2d, legend=legend, highlightAtoms=highlight)
|
1414
|
+
else:
|
1415
|
+
drawer.DrawMolecule(rdmol_2d, legend=legend)
|
1416
|
+
drawer.FinishDrawing()
|
1417
|
+
return drawer.GetDrawingText()
|
1418
|
+
|
1419
|
+
|
1420
|
+
def plot_energy(self, df:pd.DataFrame) -> str:
|
1421
|
+
"""Returns Seaborn plot strings for dihedral energy profile in SVG format.
|
1422
|
+
|
1423
|
+
Input pandas DataFrame must have columns: `angle` and `E_rel(kcal/mol)`
|
1424
|
+
|
1425
|
+
Args:
|
1426
|
+
df (pd.DataFrame): input dataframe.
|
1427
|
+
|
1428
|
+
Returns:
|
1429
|
+
str: Seaborn plot in strings.
|
1430
|
+
"""
|
1431
|
+
|
1432
|
+
# sns.set_theme()
|
1433
|
+
sns.color_palette("tab10")
|
1434
|
+
sns.set_style("whitegrid")
|
1435
|
+
if len(df['angle']) == len(df['angle'].drop_duplicates()):
|
1436
|
+
g = sns.lineplot(x="angle",
|
1437
|
+
y="E_rel(kcal/mol)",
|
1438
|
+
data=df,
|
1439
|
+
marker='o',
|
1440
|
+
markersize=10)
|
1441
|
+
else:
|
1442
|
+
g = sns.lineplot(x="angle",
|
1443
|
+
y="E_rel(kcal/mol)",
|
1444
|
+
data=df,
|
1445
|
+
errorbar=('ci', 95),
|
1446
|
+
err_style='bars',
|
1447
|
+
marker='o',
|
1448
|
+
markersize=10)
|
1449
|
+
g.xaxis.set_major_locator(ticker.MultipleLocator(30))
|
1450
|
+
g.xaxis.set_major_formatter(ticker.ScalarFormatter())
|
1451
|
+
if df["E_rel(kcal/mol)"].max() > 35.0:
|
1452
|
+
g.set(title=self.name,
|
1453
|
+
xlabel='Dihedral Angle (degree)',
|
1454
|
+
ylabel='Relative Energy (Kcal/mol)',
|
1455
|
+
xlim=(-190, 190),
|
1456
|
+
ylim=(-1.5, 35.0))
|
1457
|
+
elif df["E_rel(kcal/mol)"].max() < 5.0:
|
1458
|
+
g.set(title=self.name,
|
1459
|
+
xlabel='Dihedral Angle (degree)',
|
1460
|
+
ylabel='Relative Energy (Kcal/mol)',
|
1461
|
+
xlim=(-190, 190),
|
1462
|
+
ylim=(-1.5, 5.0))
|
1463
|
+
else:
|
1464
|
+
g.set(title=self.name,
|
1465
|
+
xlabel='Dihedral Angle (degree)',
|
1466
|
+
ylabel='Relative Energy (Kcal/mol)',
|
1467
|
+
xlim=(-190, 190),)
|
1468
|
+
g.tick_params(axis='x', rotation=30)
|
1469
|
+
in_memory = io.StringIO()
|
1470
|
+
plt.savefig(in_memory, format='svg', bbox_inches='tight')
|
1471
|
+
plt.clf()
|
1472
|
+
return in_memory.getvalue()
|
1473
|
+
|
1474
|
+
|
1475
|
+
def to_html(self, htmlbody:bool=False) -> str:
|
1476
|
+
"""Returns HTML text of dihedral energy profile.
|
1477
|
+
|
1478
|
+
Args:
|
1479
|
+
htmlbody (bool, optional): whether to wrap around with `<html><body>`. Defaults to False.
|
1480
|
+
|
1481
|
+
Returns:
|
1482
|
+
str: HTML text.
|
1483
|
+
"""
|
1484
|
+
if htmlbody:
|
1485
|
+
HTML = "<html><body>"
|
1486
|
+
else:
|
1487
|
+
HTML = ""
|
1488
|
+
# start of content
|
1489
|
+
HTML += f'<h1 style="text-align:left">{self.name}</h1>'
|
1490
|
+
HTML += "<table>"
|
1491
|
+
for datadict in self.props['torsion']: # list of dict
|
1492
|
+
(a1, a2, a3, a4, _, _) = datadict['indices']
|
1493
|
+
df = pd.DataFrame({k:datadict[k] for k in ['angle', 'E_rel(kcal/mol)']})
|
1494
|
+
svg_rdmol = self.to_svg(highlight=[a1, a2, a3, a4], index=True)
|
1495
|
+
svg_energy_plot = self.plot_energy(df)
|
1496
|
+
HTML += f"<tr>"
|
1497
|
+
HTML += f"<td>{a1}-{a2}-{a3}-{a4}</td>"
|
1498
|
+
HTML += f"<td>{svg_rdmol}</td>"
|
1499
|
+
HTML += f"<td>{svg_energy_plot}</td>"
|
1500
|
+
HTML += f"</tr>"
|
1501
|
+
HTML += '</table>'
|
1502
|
+
HTML += '<hr style="height:2px;border-width:0;color:gray;background-color:gray">'
|
1503
|
+
# end of content
|
1504
|
+
if htmlbody:
|
1505
|
+
HTML += "</body></html>"
|
1506
|
+
return HTML
|
1507
|
+
|
1508
|
+
|
1509
|
+
def serialize(self, key: str | None = None, decimal_places:int=2) -> str:
|
1510
|
+
"""Returns JSON dumps of properties.
|
1511
|
+
|
1512
|
+
Args:
|
1513
|
+
key (str | None): key for a subset of properties. Defaults to None.
|
1514
|
+
decimal_places (int, optional): decimal places for float numbers. Defaults to 2.
|
1515
|
+
|
1516
|
+
Returns:
|
1517
|
+
str: serialized JSON dumps.
|
1518
|
+
"""
|
1519
|
+
props = fix_decimal_places_in_dict(self.props, decimal_places)
|
1520
|
+
if key:
|
1521
|
+
return json.dumps({key:props[key]})
|
1522
|
+
return json.dumps(props)
|