rdworks 0.25.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdworks/__init__.py +35 -0
- rdworks/autograph/__init__.py +4 -0
- rdworks/autograph/autograph.py +184 -0
- rdworks/autograph/centroid.py +90 -0
- rdworks/autograph/dynamictreecut.py +135 -0
- rdworks/autograph/nmrclust.py +123 -0
- rdworks/autograph/rckmeans.py +74 -0
- rdworks/bitqt/__init__.py +1 -0
- rdworks/bitqt/bitqt.py +355 -0
- rdworks/conf.py +374 -0
- rdworks/descriptor.py +36 -0
- rdworks/display.py +206 -0
- rdworks/ionized.py +170 -0
- rdworks/matchedseries.py +260 -0
- rdworks/mol.py +1522 -0
- rdworks/mollibr.py +887 -0
- rdworks/pka.py +38 -0
- rdworks/predefined/Asinex_fragment.xml +20 -0
- rdworks/predefined/Astex_RO3.xml +16 -0
- rdworks/predefined/Baell2010_PAINS/Baell2010A.xml +52 -0
- rdworks/predefined/Baell2010_PAINS/Baell2010B.xml +169 -0
- rdworks/predefined/Baell2010_PAINS/Baell2010C.xml +1231 -0
- rdworks/predefined/Baell2010_PAINS/PAINS-less-than-015-hits.xml +2048 -0
- rdworks/predefined/Baell2010_PAINS/PAINS-less-than-150-hits.xml +278 -0
- rdworks/predefined/Baell2010_PAINS/PAINS-more-than-150-hits.xml +83 -0
- rdworks/predefined/Baell2010_PAINS/makexml.py +70 -0
- rdworks/predefined/Brenk2008_Dundee/makexml.py +21 -0
- rdworks/predefined/CNS.xml +18 -0
- rdworks/predefined/ChEMBL_Walters/BMS.xml +543 -0
- rdworks/predefined/ChEMBL_Walters/Dundee.xml +318 -0
- rdworks/predefined/ChEMBL_Walters/Glaxo.xml +168 -0
- rdworks/predefined/ChEMBL_Walters/Inpharmatica.xml +276 -0
- rdworks/predefined/ChEMBL_Walters/LINT.xml +174 -0
- rdworks/predefined/ChEMBL_Walters/MLSMR.xml +351 -0
- rdworks/predefined/ChEMBL_Walters/PAINS.xml +1446 -0
- rdworks/predefined/ChEMBL_Walters/SureChEMBL.xml +501 -0
- rdworks/predefined/ChEMBL_Walters/makexml.py +40 -0
- rdworks/predefined/Hann1999_Glaxo/Hann1999.xml +168 -0
- rdworks/predefined/Hann1999_Glaxo/Hann1999Acid.xml +102 -0
- rdworks/predefined/Hann1999_Glaxo/Hann1999Base.xml +6 -0
- rdworks/predefined/Hann1999_Glaxo/Hann1999ElPh.xml +6 -0
- rdworks/predefined/Hann1999_Glaxo/Hann1999NuPh.xml +6 -0
- rdworks/predefined/Hann1999_Glaxo/makexml.py +83 -0
- rdworks/predefined/Kazius2005/Kazius2005.xml +114 -0
- rdworks/predefined/Kazius2005/makexml.py +66 -0
- rdworks/predefined/ZINC_druglike.xml +24 -0
- rdworks/predefined/ZINC_fragment.xml +14 -0
- rdworks/predefined/ZINC_leadlike.xml +15 -0
- rdworks/predefined/fragment.xml +7 -0
- rdworks/predefined/ionized/simple_smarts_pattern.csv +57 -0
- rdworks/predefined/ionized/smarts_pattern.csv +107 -0
- rdworks/predefined/misc/makexml.py +119 -0
- rdworks/predefined/misc/reactive-part-2.xml +104 -0
- rdworks/predefined/misc/reactive-part-3.xml +74 -0
- rdworks/predefined/misc/reactive.xml +321 -0
- rdworks/readin.py +312 -0
- rdworks/rgroup.py +2173 -0
- rdworks/scaffold.py +520 -0
- rdworks/std.py +143 -0
- rdworks/stereoisomers.py +127 -0
- rdworks/tautomers.py +20 -0
- rdworks/units.py +63 -0
- rdworks/utils.py +495 -0
- rdworks/xml.py +260 -0
- rdworks-0.25.7.dist-info/METADATA +37 -0
- rdworks-0.25.7.dist-info/RECORD +69 -0
- rdworks-0.25.7.dist-info/WHEEL +5 -0
- rdworks-0.25.7.dist-info/licenses/LICENSE +21 -0
- rdworks-0.25.7.dist-info/top_level.txt +1 -0
rdworks/conf.py
ADDED
@@ -0,0 +1,374 @@
|
|
1
|
+
import io
|
2
|
+
import copy
|
3
|
+
import json
|
4
|
+
import types
|
5
|
+
import numpy as np
|
6
|
+
|
7
|
+
import ase
|
8
|
+
from ase.optimize import FIRE
|
9
|
+
|
10
|
+
from collections.abc import Callable
|
11
|
+
|
12
|
+
from rdkit import Chem
|
13
|
+
from rdkit.Chem import rdMolTransforms, AllChem, rdMolAlign
|
14
|
+
from rdkit.Chem.Draw import rdMolDraw2D
|
15
|
+
|
16
|
+
from typing import List, Optional, Union, Self
|
17
|
+
|
18
|
+
from .units import ev2kcalpermol
|
19
|
+
from .element import radii
|
20
|
+
|
21
|
+
class Conf:
|
22
|
+
"""Container for 3D conformers.
|
23
|
+
"""
|
24
|
+
|
25
|
+
def __init__(self, molecular_input:Chem.Mol, name:str='') -> None:
|
26
|
+
"""Create 3D conformers.
|
27
|
+
|
28
|
+
Args:
|
29
|
+
molecular_input (Chem.Mol): Molecule for conformer generation.
|
30
|
+
name (str, optional): Name prefix of the generated conformers. Defaults to ''.
|
31
|
+
|
32
|
+
Raises:
|
33
|
+
ValueError: if `molecular_input` is not rdkit.Chem.Mol object.
|
34
|
+
"""
|
35
|
+
self.rdmol = None # has only one rdkit conformer
|
36
|
+
self.name = name
|
37
|
+
self.props = {}
|
38
|
+
if isinstance(molecular_input, Chem.Mol):
|
39
|
+
self.rdmol = molecular_input
|
40
|
+
self.natoms = self.rdmol.GetNumAtoms()
|
41
|
+
self.props.update({'atoms': self.natoms})
|
42
|
+
else:
|
43
|
+
raise ValueError(f'rdworks.Conf() takes Chem.Mol object')
|
44
|
+
|
45
|
+
|
46
|
+
def __str__(self) -> str:
|
47
|
+
"""Returns a string representation.
|
48
|
+
|
49
|
+
Returns:
|
50
|
+
str: string representation.
|
51
|
+
"""
|
52
|
+
return f"<rdworks.Conf({self.rdmol} name={self.name} atoms={self.natoms})>"
|
53
|
+
|
54
|
+
|
55
|
+
##################################################
|
56
|
+
### Cascading methods
|
57
|
+
##################################################
|
58
|
+
|
59
|
+
|
60
|
+
def copy(self) -> Self:
|
61
|
+
"""Returns a copy of self.
|
62
|
+
|
63
|
+
Returns:
|
64
|
+
Self: `rdworks.Conf` object.
|
65
|
+
"""
|
66
|
+
return copy.deepcopy(self)
|
67
|
+
|
68
|
+
|
69
|
+
def rename(self, name:str) -> Self:
|
70
|
+
"""Rename and returns self.
|
71
|
+
|
72
|
+
Args:
|
73
|
+
name (str): a new name for conformers.
|
74
|
+
|
75
|
+
Raises:
|
76
|
+
ValueError: if `name` is not given.
|
77
|
+
|
78
|
+
Returns:
|
79
|
+
Self: `rdworks.Conf` object.
|
80
|
+
"""
|
81
|
+
if not name:
|
82
|
+
raise ValueError('rdworks.Conf.rename() expects a name')
|
83
|
+
self.name = name
|
84
|
+
self.rdmol.SetProp('_Name', name)
|
85
|
+
return self
|
86
|
+
|
87
|
+
|
88
|
+
def sync(self, coord:Union[np.ndarray, list]) -> Self:
|
89
|
+
"""Synchronize the conformer coordinates with the provided `coord`.
|
90
|
+
|
91
|
+
Args:
|
92
|
+
coord (np.array): 3D coordinates.
|
93
|
+
|
94
|
+
Raises:
|
95
|
+
ValueError: if `coord` does not have the correct shape (natoms, 3).
|
96
|
+
|
97
|
+
Returns:
|
98
|
+
Self: `rdworks.Conf` object.
|
99
|
+
"""
|
100
|
+
if isinstance(coord, np.ndarray) and coord.shape != (self.natoms, 3):
|
101
|
+
raise ValueError(f"`coord.shape` should be ({self.natoms},3)")
|
102
|
+
elif isinstance(coord, list) and len(coord) != self.natoms:
|
103
|
+
raise ValueError(f"`coord` should be length of {self.natoms}")
|
104
|
+
for i, a in enumerate(self.rdmol.GetAtoms()):
|
105
|
+
self.rdmol.GetConformer().SetAtomPosition(a.GetIdx(), coord[i])
|
106
|
+
|
107
|
+
return self
|
108
|
+
|
109
|
+
|
110
|
+
def get_potential_energy(self, calculator: str | Callable = 'MMFF94') -> float:
|
111
|
+
"""Get potential energy in kcal/mol.
|
112
|
+
|
113
|
+
Args:
|
114
|
+
calculator (str | Callable): MMFF94 (= MMFF), MMFF94s, UFF, or ASE calculator.
|
115
|
+
`MMFF94` or `MMFF` - Intended for general use, including organic molecules and proteins,
|
116
|
+
and primarily relies on data from quantum mechanical calculations.
|
117
|
+
It's often used in molecular dynamics simulations.
|
118
|
+
`MMFF94s` - A "static" variant of MMFF94, with adjusted parameters for out-of-plane
|
119
|
+
bending and dihedral torsions to favor planar geometries for specific nitrogen atoms.
|
120
|
+
This makes it better suited for geometry optimization studies where a static,
|
121
|
+
time-averaged structure is desired. The "s" stands for "static".
|
122
|
+
`UFF` - UFF refers to the "Universal Force Field," a force field model used for
|
123
|
+
molecular mechanics calculations. It's a tool for geometry optimization,
|
124
|
+
energy minimization, and exploring molecular conformations in 3D space.
|
125
|
+
UFF is often used to refine conformers generated by other methods,
|
126
|
+
such as random conformer generation, to produce more physically plausible
|
127
|
+
and stable structures.
|
128
|
+
|
129
|
+
Returns:
|
130
|
+
float: potential energy in kcal/mol.
|
131
|
+
"""
|
132
|
+
PE = None
|
133
|
+
if isinstance(calculator, str):
|
134
|
+
if calculator == 'MMFF94' or calculator == 'MMFF':
|
135
|
+
mp = AllChem.MMFFGetMoleculeProperties(self.rdmol, mmffVariant='MMFF94')
|
136
|
+
ff = AllChem.MMFFGetMoleculeForceField(self.rdmol, mp)
|
137
|
+
elif calculator == 'MMFF94s':
|
138
|
+
mp = AllChem.MMFFGetMoleculeProperties(self.rdmol, mmffVariant='MMFF94s')
|
139
|
+
ff = AllChem.MMFFGetMoleculeForceField(self.rdmol, mp)
|
140
|
+
elif calculator == 'UFF':
|
141
|
+
ff = AllChem.UFFGetMoleculeForceField(self.rdmol)
|
142
|
+
else:
|
143
|
+
raise ValueError("Unsupported calculator")
|
144
|
+
PE = ff.CalcEnergy()
|
145
|
+
self.props.update({'E_tot(kcal/mol)': PE})
|
146
|
+
else:
|
147
|
+
try:
|
148
|
+
ase_atoms = ase.Atoms(symbols=self.symbols(), positions=self.positions())
|
149
|
+
ase_atoms.calc = calculator
|
150
|
+
PE = ase_atoms.get_potential_energy() # np.array
|
151
|
+
PE = ev2kcalpermol * float(PE[0]) # np.float64 to float
|
152
|
+
self.props.update({'E_tot(kcal/mol)': PE})
|
153
|
+
except:
|
154
|
+
raise RuntimeError("ASE calculator error")
|
155
|
+
return PE
|
156
|
+
|
157
|
+
|
158
|
+
def optimize(self, calculator: str | Callable = 'MMFF94', fmax:float=0.05) -> Self:
|
159
|
+
"""Optimize conformation using a callable.
|
160
|
+
|
161
|
+
Args:
|
162
|
+
calculator (str | Callable): MMFF94 (= MMFF), MMFF94s, UFF, or ASE calculator.
|
163
|
+
`MMFF94` or `MMFF` - Intended for general use, including organic molecules and proteins,
|
164
|
+
and primarily relies on data from quantum mechanical calculations.
|
165
|
+
It's often used in molecular dynamics simulations.
|
166
|
+
`MMFF94s` - A "static" variant of MMFF94, with adjusted parameters for out-of-plane
|
167
|
+
bending and dihedral torsions to favor planar geometries for specific nitrogen atoms.
|
168
|
+
This makes it better suited for geometry optimization studies where a static,
|
169
|
+
time-averaged structure is desired. The "s" stands for "static".
|
170
|
+
`UFF` - UFF refers to the "Universal Force Field," a force field model used for
|
171
|
+
molecular mechanics calculations. It's a tool for geometry optimization,
|
172
|
+
energy minimization, and exploring molecular conformations in 3D space.
|
173
|
+
UFF is often used to refine conformers generated by other methods,
|
174
|
+
such as random conformer generation, to produce more physically plausible
|
175
|
+
and stable structures.
|
176
|
+
fmax (float, optional): fmax for the calculator. Defaults to 0.05.
|
177
|
+
|
178
|
+
Returns:
|
179
|
+
Self: self
|
180
|
+
"""
|
181
|
+
if isinstance(calculator, str) :
|
182
|
+
init = self.get_potential_energy(calculator)
|
183
|
+
if calculator == 'MMFF94' or calculator == 'MMFF':
|
184
|
+
retcode = AllChem.MMFFOptimizeMolecule(self.rdmol, mmffVariant='MMFF94')
|
185
|
+
# returns 0 if the optimization converged
|
186
|
+
elif calculator == 'MMFF94s':
|
187
|
+
retcode = AllChem.MMFFOptimizeMolecule(self.rdmol, mmffVariant='MMFF94s')
|
188
|
+
# returns 0 if the optimization converged
|
189
|
+
elif calculator == 'UFF':
|
190
|
+
retcode = AllChem.UFFOptimizeMolecule(self.rdmol)
|
191
|
+
# returns 0 if the optimization converged
|
192
|
+
final = self.get_potential_energy(calculator)
|
193
|
+
self.props.update({
|
194
|
+
'E_tot_init(kcal/mol)': init , # energy before optimization
|
195
|
+
'E_tot(kcal/mol)': final, # energy after optimization
|
196
|
+
'Converged' : retcode == 0, # True or False
|
197
|
+
})
|
198
|
+
return self
|
199
|
+
|
200
|
+
else:
|
201
|
+
with io.StringIO() as logfile:
|
202
|
+
ase_atoms = ase.Atoms(symbols=self.symbols(), positions=self.positions())
|
203
|
+
ase_atoms.calc = calculator
|
204
|
+
FIRE(ase_atoms, logfile=logfile).run(fmax=fmax)
|
205
|
+
lines = [l.strip().split()[1:] for l in logfile.getvalue().split('\n') if l.startswith('FIRE')]
|
206
|
+
data = [(float(e), float(f)) for (_, _, e, f) in lines]
|
207
|
+
self.props.update({
|
208
|
+
'E_tot_init(kcal/mol)': data[0][0] * ev2kcalpermol, # energy before optimization
|
209
|
+
'E_tot(kcal/mol)': data[-1][0] * ev2kcalpermol, # energy after optimization
|
210
|
+
'Converged' : data[-1][1] < fmax, # True or False
|
211
|
+
})
|
212
|
+
# update atomic coordinates
|
213
|
+
return self.sync(ase_atoms.get_positions())
|
214
|
+
|
215
|
+
|
216
|
+
##################################################
|
217
|
+
### Endpoint methods
|
218
|
+
##################################################
|
219
|
+
|
220
|
+
def check_bonds(self, tolerance:float=0.25) -> bool:
|
221
|
+
"""Check bond lengths.
|
222
|
+
|
223
|
+
Args:
|
224
|
+
tolerance (float, optional): tolerance from the sum of
|
225
|
+
van der Waals radii of bonded atoms. Defaults to 0.25 (A).
|
226
|
+
|
227
|
+
Returns:
|
228
|
+
bool: True if all bond lengths are accceptable.
|
229
|
+
"""
|
230
|
+
|
231
|
+
for bond in self.rdmol.GetBonds():
|
232
|
+
idx1 = bond.GetBeginAtomIdx()
|
233
|
+
idx2 = bond.GetEndAtomIdx()
|
234
|
+
nuc1 = self.rdmol.GetAtomWithIdx(idx1).GetAtomicNum()
|
235
|
+
nuc2 = self.rdmol.GetAtomWithIdx(idx2).GetAtomicNum()
|
236
|
+
sum_radii = (radii[nuc1] + radii[nuc2])
|
237
|
+
bond_length = rdMolTransforms.GetBondLength(self.rdmol.GetConformer(), idx1, idx2)
|
238
|
+
if abs(bond_length - sum_radii) > tolerance:
|
239
|
+
return False
|
240
|
+
|
241
|
+
return True
|
242
|
+
|
243
|
+
|
244
|
+
def positions(self) -> np.array:
|
245
|
+
"""Returns the coordinates.
|
246
|
+
|
247
|
+
Returns:
|
248
|
+
np.array: the coordinates.
|
249
|
+
"""
|
250
|
+
return np.array(self.rdmol.GetConformer().GetPositions().tolist())
|
251
|
+
|
252
|
+
|
253
|
+
def symbols(self) -> list[str]:
|
254
|
+
"""Returns the element symbols.
|
255
|
+
|
256
|
+
Returns:
|
257
|
+
list: list of element symbols.
|
258
|
+
"""
|
259
|
+
return [ a.GetSymbol() for a in self.rdmol.GetAtoms() ]
|
260
|
+
|
261
|
+
|
262
|
+
def numbers(self) -> list[int]:
|
263
|
+
"""Returns the atomic numbers.
|
264
|
+
|
265
|
+
Returns:
|
266
|
+
list: list of atomic numbers.
|
267
|
+
"""
|
268
|
+
return [ a.GetAtomicNum() for a in self.rdmol.GetAtoms() ]
|
269
|
+
|
270
|
+
|
271
|
+
def cog(self) -> np.array:
|
272
|
+
"""Returns the center of geometry (COG).
|
273
|
+
|
274
|
+
Returns:
|
275
|
+
np.array: the center of geometry (COG).
|
276
|
+
"""
|
277
|
+
xyz = []
|
278
|
+
for i in range(0, self.natoms):
|
279
|
+
pos = self.rdmol.GetConformer().GetAtomPositions(i)
|
280
|
+
xyz.append([pos.x, pos.y, pos.z])
|
281
|
+
return np.mean(xyz, axis=0)
|
282
|
+
|
283
|
+
|
284
|
+
def rg(self) -> float:
|
285
|
+
"""Returns the radius of gyration (Rg).
|
286
|
+
|
287
|
+
Returns:
|
288
|
+
float: the radius of gyration (Rg).
|
289
|
+
"""
|
290
|
+
xyz = []
|
291
|
+
for i in range(0, self.natoms):
|
292
|
+
pos = self.rdmol.GetConformer().GetAtomPositions(i)
|
293
|
+
xyz.append([pos.x, pos.y, pos.z])
|
294
|
+
xyz = np.array(xyz)
|
295
|
+
cog = np.mean(xyz, axis=0)
|
296
|
+
a = xyz-cog
|
297
|
+
b = np.einsum('ij,ij->i', a, a)
|
298
|
+
return np.sqrt(np.mean(b))
|
299
|
+
|
300
|
+
|
301
|
+
def serialize(self, key:str='') -> dict:
|
302
|
+
"""Returns JSON dumps of the `props`.
|
303
|
+
|
304
|
+
Args:
|
305
|
+
key (str): a key for the `props` dictionary. Defaults to '' (all).
|
306
|
+
|
307
|
+
Returns:
|
308
|
+
dict: JSON dumps.
|
309
|
+
"""
|
310
|
+
if key:
|
311
|
+
return json.dumps({key:self.props[key]})
|
312
|
+
else:
|
313
|
+
return json.dumps(self.props)
|
314
|
+
|
315
|
+
|
316
|
+
def to_sdf(self, props:bool=True) -> str:
|
317
|
+
"""Returns the SDF-formatted strings.
|
318
|
+
|
319
|
+
Args:
|
320
|
+
props (bool, optional): include `props as SDF properties. Defaults to True.
|
321
|
+
|
322
|
+
Returns:
|
323
|
+
str: strings in the SDF format.
|
324
|
+
"""
|
325
|
+
in_memory = io.StringIO()
|
326
|
+
with Chem.SDWriter(in_memory) as f:
|
327
|
+
rdmol = Chem.Mol(self.rdmol)
|
328
|
+
rdmol.SetProp('_Name', self.name)
|
329
|
+
if props:
|
330
|
+
for k,v in self.props.items():
|
331
|
+
rdmol.SetProp(k, str(v))
|
332
|
+
f.write(rdmol)
|
333
|
+
return in_memory.getvalue()
|
334
|
+
|
335
|
+
|
336
|
+
def to_svg(self,
|
337
|
+
width:int=400,
|
338
|
+
height:int=400,
|
339
|
+
legend:Optional[str]=None,
|
340
|
+
atom_index:bool=False,
|
341
|
+
highlight:Optional[List[int]]=None) -> str:
|
342
|
+
"""Returns 2D SVG depiction of the molecule.
|
343
|
+
|
344
|
+
Examples:
|
345
|
+
>>> from IPython.display import SVG
|
346
|
+
>>> SVG(libr[0].confs[0].to_svg(atom_index=True))
|
347
|
+
|
348
|
+
Args:
|
349
|
+
width (int): width (default:400)
|
350
|
+
height (int): height (default:400)
|
351
|
+
legend (str, optional): title or Mol.name if not given
|
352
|
+
atom_index (bool): True/False whether to display atom index
|
353
|
+
highlight (list): list of atom indices to highlight
|
354
|
+
|
355
|
+
Returns:
|
356
|
+
str: SVG text
|
357
|
+
"""
|
358
|
+
rdmol_3d = Chem.Mol(self.rdmol) # a copy of self.rdmol (3D, with hydrogens)
|
359
|
+
AllChem.Compute2DCoords(rdmol_3d) # 2D depiction
|
360
|
+
for atom in rdmol_3d.GetAtoms():
|
361
|
+
for key in atom.GetPropsAsDict():
|
362
|
+
atom.ClearProp(key)
|
363
|
+
drawer = rdMolDraw2D.MolDraw2DSVG(width, height)
|
364
|
+
if not legend:
|
365
|
+
legend = self.name
|
366
|
+
if atom_index:
|
367
|
+
for atom in rdmol_3d.GetAtoms():
|
368
|
+
atom.SetProp("atomLabel", str(atom.GetIdx()))
|
369
|
+
if highlight:
|
370
|
+
drawer.DrawMolecule(rdmol_3d, legend=legend, highlightAtoms=highlight)
|
371
|
+
else:
|
372
|
+
drawer.DrawMolecule(rdmol_3d, legend=legend)
|
373
|
+
drawer.FinishDrawing()
|
374
|
+
return drawer.GetDrawingText()
|
rdworks/descriptor.py
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
from rdkit.Chem import Descriptors, rdMolDescriptors, QED
|
2
|
+
|
3
|
+
rd_descriptor = {
|
4
|
+
'QED' : "Quantitative estimate of drug-likeness.",
|
5
|
+
'MolWt' : 'Molecular weight',
|
6
|
+
'LogP' : 'Predicted octanol/water partition coefficient',
|
7
|
+
'TPSA' : 'Topological polar surface area',
|
8
|
+
'HBD' : 'Number of hydrogen bonding donors',
|
9
|
+
'HBA' : 'Number of hydrogen bonding acceptors',
|
10
|
+
'RotBonds' : 'Number of rotatable bonds',
|
11
|
+
'RingCount' : 'Number of rings',
|
12
|
+
'FCsp3' : 'Fraction of SP3 carbons',
|
13
|
+
'HAC' : 'Number of heavy atoms',
|
14
|
+
'Hetero' : 'Number of hetero atoms (not H or C) [B,N,O,P,S,F,Cl,Br,I]',
|
15
|
+
'LipinskiHBA' : 'Number of hydrogen bonding acceptors according to the Lipinski definition',
|
16
|
+
'LipinskiHBD' : 'Number of hydrogen bonding donors according to the Lipinski definition',
|
17
|
+
}
|
18
|
+
|
19
|
+
rd_descriptor_f = {
|
20
|
+
"QED" : QED.qed,
|
21
|
+
"MolWt" : Descriptors.MolWt,
|
22
|
+
"HAC" : Descriptors.HeavyAtomCount,
|
23
|
+
"LogP" : Descriptors.MolLogP, # == Crippen.MolLogP
|
24
|
+
"TPSA" : Descriptors.TPSA, # == MolSurf.TPSA
|
25
|
+
"HBA" : rdMolDescriptors.CalcNumHBA, # == Descriptors.NumHAcceptors
|
26
|
+
"HBD" : rdMolDescriptors.CalcNumHBD, # == Descriptors.NumHDonors
|
27
|
+
"RotBonds" : rdMolDescriptors.CalcNumRotatableBonds, # == Descriptors.NumRotatableBonds
|
28
|
+
"RingCount" : rdMolDescriptors.CalcNumRings, # == Descriptors.RingCount
|
29
|
+
"FCsp3" : rdMolDescriptors.CalcFractionCSP3, # == Descriptors.FractionCSP3
|
30
|
+
"Hetero" : rdMolDescriptors.CalcNumHeteroatoms, # not (H or C) [B,N,O,P,S,F,Cl,Br,I]
|
31
|
+
"LipinskiHBA" : rdMolDescriptors.CalcNumLipinskiHBA,
|
32
|
+
"LipinskiHBD" : rdMolDescriptors.CalcNumLipinskiHBD,
|
33
|
+
# "StereoCenters" : rdMolDescriptors.CalcNumAtomStereoCenters,
|
34
|
+
# props_dict[k] = rd_descriptor_f[k](self.rdmol)
|
35
|
+
# ValueError: numStereoCenters called without stereo being assigned
|
36
|
+
}
|
rdworks/display.py
ADDED
@@ -0,0 +1,206 @@
|
|
1
|
+
import io
|
2
|
+
import os
|
3
|
+
import numpy as np
|
4
|
+
from typing import Optional, List, Tuple
|
5
|
+
|
6
|
+
from PIL import Image, ImageChops
|
7
|
+
|
8
|
+
from rdkit import Chem, Geometry
|
9
|
+
from rdkit.Chem import AllChem, Draw, rdDepictor, rdMolTransforms
|
10
|
+
from rdkit.Chem.Draw import rdMolDraw2D
|
11
|
+
|
12
|
+
|
13
|
+
# https://greglandrum.github.io/rdkit-blog/posts/2023-05-26-drawing-options-explained.html
|
14
|
+
|
15
|
+
|
16
|
+
def twod_depictor(rdmol:Chem.Mol, index:bool=False, coordgen:bool=False) -> Chem.Mol:
|
17
|
+
"""Sets up for 2D depiction.
|
18
|
+
|
19
|
+
Args:
|
20
|
+
rdmol (Chem.Mol): input molecule.
|
21
|
+
index (bool, optional): whether to show atom index. Defaults to False.
|
22
|
+
coordgen (bool, optional): whether to set rdDepictor.SetPreferCoordGen. Defaults to False.
|
23
|
+
|
24
|
+
Returns:
|
25
|
+
Chem.Mol: a copy of rdkit.Chem.Mol object.
|
26
|
+
"""
|
27
|
+
if coordgen:
|
28
|
+
rdDepictor.SetPreferCoordGen(True)
|
29
|
+
else:
|
30
|
+
rdDepictor.SetPreferCoordGen(False)
|
31
|
+
|
32
|
+
rdmol_2d = Chem.Mol(rdmol)
|
33
|
+
rdDepictor.Compute2DCoords(rdmol_2d)
|
34
|
+
rdDepictor.StraightenDepiction(rdmol_2d)
|
35
|
+
|
36
|
+
for atom in rdmol_2d.GetAtoms():
|
37
|
+
for key in atom.GetPropsAsDict():
|
38
|
+
atom.ClearProp(key)
|
39
|
+
|
40
|
+
if index: # index hides polar hydrogens
|
41
|
+
for atom in rdmol_2d.GetAtoms():
|
42
|
+
atom.SetProp("atomLabel", str(atom.GetIdx()))
|
43
|
+
# atom.SetProp("atomNote", str(atom.GetIdx()))
|
44
|
+
# atom.SetProp("molAtomMapNumber", str(atom.GetIdx()))
|
45
|
+
|
46
|
+
return rdmol_2d
|
47
|
+
|
48
|
+
|
49
|
+
def svg(rdmol:Chem.Mol,
|
50
|
+
width:int=300,
|
51
|
+
height:int=300,
|
52
|
+
legend:str='',
|
53
|
+
index:bool=False,
|
54
|
+
highlight:list[int] | None = None,
|
55
|
+
coordgen:bool = False) -> str:
|
56
|
+
"""Returns string SVG output of a molecule.
|
57
|
+
|
58
|
+
Examples:
|
59
|
+
>>> from IPython.display import SVG
|
60
|
+
>>> SVG(libr[0].to_svg())
|
61
|
+
|
62
|
+
Args:
|
63
|
+
rdmol (Chem.Mol): input molecule.
|
64
|
+
width (int): width. Defaults to 300.
|
65
|
+
height (int): height. Defaults to 300.
|
66
|
+
legend (str): title of molecule. Defaults to ''.
|
67
|
+
index (bool): whether to show atom indexes. Defaults to False.
|
68
|
+
highlight (list[int]): list of atom indices to highlight. Defaults to None.
|
69
|
+
coordgen (bool): whether to use rdDepictor.SetPreferCoordGen. Defaults to False.
|
70
|
+
|
71
|
+
Returns:
|
72
|
+
str: SVG text
|
73
|
+
"""
|
74
|
+
d2d_svg = rdMolDraw2D.MolDraw2DSVG(width, height)
|
75
|
+
rdmol_2d = twod_depictor(rdmol, index, coordgen)
|
76
|
+
if highlight:
|
77
|
+
d2d_svg.DrawMolecule(rdmol_2d, legend=legend, highlightAtoms=highlight)
|
78
|
+
else:
|
79
|
+
d2d_svg.DrawMolecule(rdmol_2d, legend=legend)
|
80
|
+
#rdMolDraw2D.PrepareAndDrawMolecule(d2d_svg, rdmol_2d, highlightAtoms=highlight, legend=legend)
|
81
|
+
d2d_svg.FinishDrawing()
|
82
|
+
return d2d_svg.GetDrawingText()
|
83
|
+
|
84
|
+
|
85
|
+
def png(rdmol:Chem.Mol, width:int=300, height:int=300, legend:str='',
|
86
|
+
index:bool=False, highlight:Optional[List[int]]=None, coordgen:bool=False) -> Image.Image:
|
87
|
+
"""Returns a trimmed PIL Image object of a molecule.
|
88
|
+
|
89
|
+
Args:
|
90
|
+
rdmol (Chem.Mol): input molecule.
|
91
|
+
width (int): width. Defaults to 300.
|
92
|
+
height (int): height. Defaults to 300.
|
93
|
+
legend (str): title of molecule. Defaults to ''.
|
94
|
+
index (bool): whether to show atom indexes. Defaults to False.
|
95
|
+
highlight (list): list of atom indices to highlight. Defaults to None.
|
96
|
+
coordgen (bool): whether to use rdDepictor.SetPreferCoordGen. Defaults to False.
|
97
|
+
|
98
|
+
Returns:
|
99
|
+
Image.Image: output PIL Image object.
|
100
|
+
"""
|
101
|
+
rdmol_2d = twod_depictor(rdmol, index, coordgen)
|
102
|
+
img = Draw.MolToImage(rdmol_2d,
|
103
|
+
size=(width,height),
|
104
|
+
highlightAtoms=highlight,
|
105
|
+
kekulize=True,
|
106
|
+
wedgeBonds=True,
|
107
|
+
fitImage=False,
|
108
|
+
)
|
109
|
+
# highlightAtoms: list of atoms to highlight (default [])
|
110
|
+
# highlightBonds: list of bonds to highlight (default [])
|
111
|
+
# highlightColor: RGB color as tuple (default [1, 0, 0])
|
112
|
+
|
113
|
+
return trim_png(img)
|
114
|
+
|
115
|
+
|
116
|
+
def trim_png(img:Image.Image) -> Image.Image:
|
117
|
+
"""Removes white margin around molecular drawing.
|
118
|
+
|
119
|
+
Args:
|
120
|
+
img (Image.Image): input PIL Image object.
|
121
|
+
|
122
|
+
Returns:
|
123
|
+
Image.Image: output PIL Image object.
|
124
|
+
"""
|
125
|
+
bg = Image.new(img.mode, img.size, img.getpixel((0,0)))
|
126
|
+
diff = ImageChops.difference(img,bg)
|
127
|
+
diff = ImageChops.add(diff, diff, 2.0, -100)
|
128
|
+
bbox = diff.getbbox()
|
129
|
+
if bbox:
|
130
|
+
return img.crop(bbox)
|
131
|
+
return img
|
132
|
+
|
133
|
+
|
134
|
+
def rescale(rdmol:Chem.Mol, factor:float=1.5) -> Chem.Mol:
|
135
|
+
"""Returns a copy of `rdmol` by a `factor`.
|
136
|
+
|
137
|
+
Args:
|
138
|
+
rdmol (Chem.Mol): input molecule.
|
139
|
+
factor (float): scaling factor.
|
140
|
+
|
141
|
+
Returns:
|
142
|
+
Chem.Mol: a copy of rescaled rdkit.Chem.Mol object.
|
143
|
+
"""
|
144
|
+
transformed_rdmol = Chem.Mol(rdmol)
|
145
|
+
center = AllChem.ComputeCentroid(transformed_rdmol.GetConformer())
|
146
|
+
tf = np.identity(4, np.float)
|
147
|
+
tf[0][3] -= center[0]
|
148
|
+
tf[1][3] -= center[1]
|
149
|
+
tf[0][0] = tf[1][1] = tf[2][2] = factor
|
150
|
+
AllChem.TransformMol(transformed_rdmol, tf)
|
151
|
+
return transformed_rdmol
|
152
|
+
|
153
|
+
|
154
|
+
def rotation_matrix(axis:str, degree:float) -> np.ndarray:
|
155
|
+
"""Returns a numpy rotation matrix of shape (4,4).
|
156
|
+
|
157
|
+
Args:
|
158
|
+
axis (str): 'x' or 'y' or 'z'.
|
159
|
+
degree (float): degree of rotation.
|
160
|
+
|
161
|
+
Returns:
|
162
|
+
np.ndarray: a numpy array of shape (4,4).
|
163
|
+
"""
|
164
|
+
rad = (np.pi/180.0) * degree
|
165
|
+
c = np.cos(rad)
|
166
|
+
s = np.sin(rad)
|
167
|
+
if axis.lower() == 'x':
|
168
|
+
return np.array([
|
169
|
+
[1., 0., 0., 0.],
|
170
|
+
[0., c, -s, 0.],
|
171
|
+
[0., s, c, 0.],
|
172
|
+
[0., 0., 0., 1.],
|
173
|
+
])
|
174
|
+
elif axis.lower() == 'y':
|
175
|
+
return np.array([
|
176
|
+
[ c, 0., s, 0.],
|
177
|
+
[ 0., 1., 0., 0.],
|
178
|
+
[-s, 0., c, 0.],
|
179
|
+
[ 0., 0., 0., 1.],
|
180
|
+
])
|
181
|
+
elif axis.lower() == 'z':
|
182
|
+
return np.array([
|
183
|
+
[c, -s, 0., 0.],
|
184
|
+
[s, c, 0., 0.],
|
185
|
+
[0., 0., 1., 0.],
|
186
|
+
[0., 0., 0., 1.],
|
187
|
+
])
|
188
|
+
|
189
|
+
|
190
|
+
def rotate(rdmol:Chem.Mol, axis:str, degree:float) -> None:
|
191
|
+
"""Rotate `rdmol` around given axis and degree.
|
192
|
+
|
193
|
+
Input `rdmol` will be modified.
|
194
|
+
|
195
|
+
Args:
|
196
|
+
rdmol (Chem.Mol): input molecule.
|
197
|
+
axis (str): axis of rotation, 'x' or 'y' or 'z'.
|
198
|
+
degree (float): degree of rotation.
|
199
|
+
"""
|
200
|
+
try:
|
201
|
+
conf = rdmol.GetConformer()
|
202
|
+
except:
|
203
|
+
AllChem.Compute2DCoords(rdmol)
|
204
|
+
conf = rdmol.GetConformer()
|
205
|
+
R = rotation_matrix(axis, degree)
|
206
|
+
rdMolTransforms.TransformConformer(conf, R)
|