rdworks 0.25.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. rdworks/__init__.py +35 -0
  2. rdworks/autograph/__init__.py +4 -0
  3. rdworks/autograph/autograph.py +184 -0
  4. rdworks/autograph/centroid.py +90 -0
  5. rdworks/autograph/dynamictreecut.py +135 -0
  6. rdworks/autograph/nmrclust.py +123 -0
  7. rdworks/autograph/rckmeans.py +74 -0
  8. rdworks/bitqt/__init__.py +1 -0
  9. rdworks/bitqt/bitqt.py +355 -0
  10. rdworks/conf.py +374 -0
  11. rdworks/descriptor.py +36 -0
  12. rdworks/display.py +206 -0
  13. rdworks/ionized.py +170 -0
  14. rdworks/matchedseries.py +260 -0
  15. rdworks/mol.py +1522 -0
  16. rdworks/mollibr.py +887 -0
  17. rdworks/pka.py +38 -0
  18. rdworks/predefined/Asinex_fragment.xml +20 -0
  19. rdworks/predefined/Astex_RO3.xml +16 -0
  20. rdworks/predefined/Baell2010_PAINS/Baell2010A.xml +52 -0
  21. rdworks/predefined/Baell2010_PAINS/Baell2010B.xml +169 -0
  22. rdworks/predefined/Baell2010_PAINS/Baell2010C.xml +1231 -0
  23. rdworks/predefined/Baell2010_PAINS/PAINS-less-than-015-hits.xml +2048 -0
  24. rdworks/predefined/Baell2010_PAINS/PAINS-less-than-150-hits.xml +278 -0
  25. rdworks/predefined/Baell2010_PAINS/PAINS-more-than-150-hits.xml +83 -0
  26. rdworks/predefined/Baell2010_PAINS/makexml.py +70 -0
  27. rdworks/predefined/Brenk2008_Dundee/makexml.py +21 -0
  28. rdworks/predefined/CNS.xml +18 -0
  29. rdworks/predefined/ChEMBL_Walters/BMS.xml +543 -0
  30. rdworks/predefined/ChEMBL_Walters/Dundee.xml +318 -0
  31. rdworks/predefined/ChEMBL_Walters/Glaxo.xml +168 -0
  32. rdworks/predefined/ChEMBL_Walters/Inpharmatica.xml +276 -0
  33. rdworks/predefined/ChEMBL_Walters/LINT.xml +174 -0
  34. rdworks/predefined/ChEMBL_Walters/MLSMR.xml +351 -0
  35. rdworks/predefined/ChEMBL_Walters/PAINS.xml +1446 -0
  36. rdworks/predefined/ChEMBL_Walters/SureChEMBL.xml +501 -0
  37. rdworks/predefined/ChEMBL_Walters/makexml.py +40 -0
  38. rdworks/predefined/Hann1999_Glaxo/Hann1999.xml +168 -0
  39. rdworks/predefined/Hann1999_Glaxo/Hann1999Acid.xml +102 -0
  40. rdworks/predefined/Hann1999_Glaxo/Hann1999Base.xml +6 -0
  41. rdworks/predefined/Hann1999_Glaxo/Hann1999ElPh.xml +6 -0
  42. rdworks/predefined/Hann1999_Glaxo/Hann1999NuPh.xml +6 -0
  43. rdworks/predefined/Hann1999_Glaxo/makexml.py +83 -0
  44. rdworks/predefined/Kazius2005/Kazius2005.xml +114 -0
  45. rdworks/predefined/Kazius2005/makexml.py +66 -0
  46. rdworks/predefined/ZINC_druglike.xml +24 -0
  47. rdworks/predefined/ZINC_fragment.xml +14 -0
  48. rdworks/predefined/ZINC_leadlike.xml +15 -0
  49. rdworks/predefined/fragment.xml +7 -0
  50. rdworks/predefined/ionized/simple_smarts_pattern.csv +57 -0
  51. rdworks/predefined/ionized/smarts_pattern.csv +107 -0
  52. rdworks/predefined/misc/makexml.py +119 -0
  53. rdworks/predefined/misc/reactive-part-2.xml +104 -0
  54. rdworks/predefined/misc/reactive-part-3.xml +74 -0
  55. rdworks/predefined/misc/reactive.xml +321 -0
  56. rdworks/readin.py +312 -0
  57. rdworks/rgroup.py +2173 -0
  58. rdworks/scaffold.py +520 -0
  59. rdworks/std.py +143 -0
  60. rdworks/stereoisomers.py +127 -0
  61. rdworks/tautomers.py +20 -0
  62. rdworks/units.py +63 -0
  63. rdworks/utils.py +495 -0
  64. rdworks/xml.py +260 -0
  65. rdworks-0.25.7.dist-info/METADATA +37 -0
  66. rdworks-0.25.7.dist-info/RECORD +69 -0
  67. rdworks-0.25.7.dist-info/WHEEL +5 -0
  68. rdworks-0.25.7.dist-info/licenses/LICENSE +21 -0
  69. rdworks-0.25.7.dist-info/top_level.txt +1 -0
rdworks/conf.py ADDED
@@ -0,0 +1,374 @@
1
+ import io
2
+ import copy
3
+ import json
4
+ import types
5
+ import numpy as np
6
+
7
+ import ase
8
+ from ase.optimize import FIRE
9
+
10
+ from collections.abc import Callable
11
+
12
+ from rdkit import Chem
13
+ from rdkit.Chem import rdMolTransforms, AllChem, rdMolAlign
14
+ from rdkit.Chem.Draw import rdMolDraw2D
15
+
16
+ from typing import List, Optional, Union, Self
17
+
18
+ from .units import ev2kcalpermol
19
+ from .element import radii
20
+
21
+ class Conf:
22
+ """Container for 3D conformers.
23
+ """
24
+
25
+ def __init__(self, molecular_input:Chem.Mol, name:str='') -> None:
26
+ """Create 3D conformers.
27
+
28
+ Args:
29
+ molecular_input (Chem.Mol): Molecule for conformer generation.
30
+ name (str, optional): Name prefix of the generated conformers. Defaults to ''.
31
+
32
+ Raises:
33
+ ValueError: if `molecular_input` is not rdkit.Chem.Mol object.
34
+ """
35
+ self.rdmol = None # has only one rdkit conformer
36
+ self.name = name
37
+ self.props = {}
38
+ if isinstance(molecular_input, Chem.Mol):
39
+ self.rdmol = molecular_input
40
+ self.natoms = self.rdmol.GetNumAtoms()
41
+ self.props.update({'atoms': self.natoms})
42
+ else:
43
+ raise ValueError(f'rdworks.Conf() takes Chem.Mol object')
44
+
45
+
46
+ def __str__(self) -> str:
47
+ """Returns a string representation.
48
+
49
+ Returns:
50
+ str: string representation.
51
+ """
52
+ return f"<rdworks.Conf({self.rdmol} name={self.name} atoms={self.natoms})>"
53
+
54
+
55
+ ##################################################
56
+ ### Cascading methods
57
+ ##################################################
58
+
59
+
60
+ def copy(self) -> Self:
61
+ """Returns a copy of self.
62
+
63
+ Returns:
64
+ Self: `rdworks.Conf` object.
65
+ """
66
+ return copy.deepcopy(self)
67
+
68
+
69
+ def rename(self, name:str) -> Self:
70
+ """Rename and returns self.
71
+
72
+ Args:
73
+ name (str): a new name for conformers.
74
+
75
+ Raises:
76
+ ValueError: if `name` is not given.
77
+
78
+ Returns:
79
+ Self: `rdworks.Conf` object.
80
+ """
81
+ if not name:
82
+ raise ValueError('rdworks.Conf.rename() expects a name')
83
+ self.name = name
84
+ self.rdmol.SetProp('_Name', name)
85
+ return self
86
+
87
+
88
+ def sync(self, coord:Union[np.ndarray, list]) -> Self:
89
+ """Synchronize the conformer coordinates with the provided `coord`.
90
+
91
+ Args:
92
+ coord (np.array): 3D coordinates.
93
+
94
+ Raises:
95
+ ValueError: if `coord` does not have the correct shape (natoms, 3).
96
+
97
+ Returns:
98
+ Self: `rdworks.Conf` object.
99
+ """
100
+ if isinstance(coord, np.ndarray) and coord.shape != (self.natoms, 3):
101
+ raise ValueError(f"`coord.shape` should be ({self.natoms},3)")
102
+ elif isinstance(coord, list) and len(coord) != self.natoms:
103
+ raise ValueError(f"`coord` should be length of {self.natoms}")
104
+ for i, a in enumerate(self.rdmol.GetAtoms()):
105
+ self.rdmol.GetConformer().SetAtomPosition(a.GetIdx(), coord[i])
106
+
107
+ return self
108
+
109
+
110
+ def get_potential_energy(self, calculator: str | Callable = 'MMFF94') -> float:
111
+ """Get potential energy in kcal/mol.
112
+
113
+ Args:
114
+ calculator (str | Callable): MMFF94 (= MMFF), MMFF94s, UFF, or ASE calculator.
115
+ `MMFF94` or `MMFF` - Intended for general use, including organic molecules and proteins,
116
+ and primarily relies on data from quantum mechanical calculations.
117
+ It's often used in molecular dynamics simulations.
118
+ `MMFF94s` - A "static" variant of MMFF94, with adjusted parameters for out-of-plane
119
+ bending and dihedral torsions to favor planar geometries for specific nitrogen atoms.
120
+ This makes it better suited for geometry optimization studies where a static,
121
+ time-averaged structure is desired. The "s" stands for "static".
122
+ `UFF` - UFF refers to the "Universal Force Field," a force field model used for
123
+ molecular mechanics calculations. It's a tool for geometry optimization,
124
+ energy minimization, and exploring molecular conformations in 3D space.
125
+ UFF is often used to refine conformers generated by other methods,
126
+ such as random conformer generation, to produce more physically plausible
127
+ and stable structures.
128
+
129
+ Returns:
130
+ float: potential energy in kcal/mol.
131
+ """
132
+ PE = None
133
+ if isinstance(calculator, str):
134
+ if calculator == 'MMFF94' or calculator == 'MMFF':
135
+ mp = AllChem.MMFFGetMoleculeProperties(self.rdmol, mmffVariant='MMFF94')
136
+ ff = AllChem.MMFFGetMoleculeForceField(self.rdmol, mp)
137
+ elif calculator == 'MMFF94s':
138
+ mp = AllChem.MMFFGetMoleculeProperties(self.rdmol, mmffVariant='MMFF94s')
139
+ ff = AllChem.MMFFGetMoleculeForceField(self.rdmol, mp)
140
+ elif calculator == 'UFF':
141
+ ff = AllChem.UFFGetMoleculeForceField(self.rdmol)
142
+ else:
143
+ raise ValueError("Unsupported calculator")
144
+ PE = ff.CalcEnergy()
145
+ self.props.update({'E_tot(kcal/mol)': PE})
146
+ else:
147
+ try:
148
+ ase_atoms = ase.Atoms(symbols=self.symbols(), positions=self.positions())
149
+ ase_atoms.calc = calculator
150
+ PE = ase_atoms.get_potential_energy() # np.array
151
+ PE = ev2kcalpermol * float(PE[0]) # np.float64 to float
152
+ self.props.update({'E_tot(kcal/mol)': PE})
153
+ except:
154
+ raise RuntimeError("ASE calculator error")
155
+ return PE
156
+
157
+
158
+ def optimize(self, calculator: str | Callable = 'MMFF94', fmax:float=0.05) -> Self:
159
+ """Optimize conformation using a callable.
160
+
161
+ Args:
162
+ calculator (str | Callable): MMFF94 (= MMFF), MMFF94s, UFF, or ASE calculator.
163
+ `MMFF94` or `MMFF` - Intended for general use, including organic molecules and proteins,
164
+ and primarily relies on data from quantum mechanical calculations.
165
+ It's often used in molecular dynamics simulations.
166
+ `MMFF94s` - A "static" variant of MMFF94, with adjusted parameters for out-of-plane
167
+ bending and dihedral torsions to favor planar geometries for specific nitrogen atoms.
168
+ This makes it better suited for geometry optimization studies where a static,
169
+ time-averaged structure is desired. The "s" stands for "static".
170
+ `UFF` - UFF refers to the "Universal Force Field," a force field model used for
171
+ molecular mechanics calculations. It's a tool for geometry optimization,
172
+ energy minimization, and exploring molecular conformations in 3D space.
173
+ UFF is often used to refine conformers generated by other methods,
174
+ such as random conformer generation, to produce more physically plausible
175
+ and stable structures.
176
+ fmax (float, optional): fmax for the calculator. Defaults to 0.05.
177
+
178
+ Returns:
179
+ Self: self
180
+ """
181
+ if isinstance(calculator, str) :
182
+ init = self.get_potential_energy(calculator)
183
+ if calculator == 'MMFF94' or calculator == 'MMFF':
184
+ retcode = AllChem.MMFFOptimizeMolecule(self.rdmol, mmffVariant='MMFF94')
185
+ # returns 0 if the optimization converged
186
+ elif calculator == 'MMFF94s':
187
+ retcode = AllChem.MMFFOptimizeMolecule(self.rdmol, mmffVariant='MMFF94s')
188
+ # returns 0 if the optimization converged
189
+ elif calculator == 'UFF':
190
+ retcode = AllChem.UFFOptimizeMolecule(self.rdmol)
191
+ # returns 0 if the optimization converged
192
+ final = self.get_potential_energy(calculator)
193
+ self.props.update({
194
+ 'E_tot_init(kcal/mol)': init , # energy before optimization
195
+ 'E_tot(kcal/mol)': final, # energy after optimization
196
+ 'Converged' : retcode == 0, # True or False
197
+ })
198
+ return self
199
+
200
+ else:
201
+ with io.StringIO() as logfile:
202
+ ase_atoms = ase.Atoms(symbols=self.symbols(), positions=self.positions())
203
+ ase_atoms.calc = calculator
204
+ FIRE(ase_atoms, logfile=logfile).run(fmax=fmax)
205
+ lines = [l.strip().split()[1:] for l in logfile.getvalue().split('\n') if l.startswith('FIRE')]
206
+ data = [(float(e), float(f)) for (_, _, e, f) in lines]
207
+ self.props.update({
208
+ 'E_tot_init(kcal/mol)': data[0][0] * ev2kcalpermol, # energy before optimization
209
+ 'E_tot(kcal/mol)': data[-1][0] * ev2kcalpermol, # energy after optimization
210
+ 'Converged' : data[-1][1] < fmax, # True or False
211
+ })
212
+ # update atomic coordinates
213
+ return self.sync(ase_atoms.get_positions())
214
+
215
+
216
+ ##################################################
217
+ ### Endpoint methods
218
+ ##################################################
219
+
220
+ def check_bonds(self, tolerance:float=0.25) -> bool:
221
+ """Check bond lengths.
222
+
223
+ Args:
224
+ tolerance (float, optional): tolerance from the sum of
225
+ van der Waals radii of bonded atoms. Defaults to 0.25 (A).
226
+
227
+ Returns:
228
+ bool: True if all bond lengths are accceptable.
229
+ """
230
+
231
+ for bond in self.rdmol.GetBonds():
232
+ idx1 = bond.GetBeginAtomIdx()
233
+ idx2 = bond.GetEndAtomIdx()
234
+ nuc1 = self.rdmol.GetAtomWithIdx(idx1).GetAtomicNum()
235
+ nuc2 = self.rdmol.GetAtomWithIdx(idx2).GetAtomicNum()
236
+ sum_radii = (radii[nuc1] + radii[nuc2])
237
+ bond_length = rdMolTransforms.GetBondLength(self.rdmol.GetConformer(), idx1, idx2)
238
+ if abs(bond_length - sum_radii) > tolerance:
239
+ return False
240
+
241
+ return True
242
+
243
+
244
+ def positions(self) -> np.array:
245
+ """Returns the coordinates.
246
+
247
+ Returns:
248
+ np.array: the coordinates.
249
+ """
250
+ return np.array(self.rdmol.GetConformer().GetPositions().tolist())
251
+
252
+
253
+ def symbols(self) -> list[str]:
254
+ """Returns the element symbols.
255
+
256
+ Returns:
257
+ list: list of element symbols.
258
+ """
259
+ return [ a.GetSymbol() for a in self.rdmol.GetAtoms() ]
260
+
261
+
262
+ def numbers(self) -> list[int]:
263
+ """Returns the atomic numbers.
264
+
265
+ Returns:
266
+ list: list of atomic numbers.
267
+ """
268
+ return [ a.GetAtomicNum() for a in self.rdmol.GetAtoms() ]
269
+
270
+
271
+ def cog(self) -> np.array:
272
+ """Returns the center of geometry (COG).
273
+
274
+ Returns:
275
+ np.array: the center of geometry (COG).
276
+ """
277
+ xyz = []
278
+ for i in range(0, self.natoms):
279
+ pos = self.rdmol.GetConformer().GetAtomPositions(i)
280
+ xyz.append([pos.x, pos.y, pos.z])
281
+ return np.mean(xyz, axis=0)
282
+
283
+
284
+ def rg(self) -> float:
285
+ """Returns the radius of gyration (Rg).
286
+
287
+ Returns:
288
+ float: the radius of gyration (Rg).
289
+ """
290
+ xyz = []
291
+ for i in range(0, self.natoms):
292
+ pos = self.rdmol.GetConformer().GetAtomPositions(i)
293
+ xyz.append([pos.x, pos.y, pos.z])
294
+ xyz = np.array(xyz)
295
+ cog = np.mean(xyz, axis=0)
296
+ a = xyz-cog
297
+ b = np.einsum('ij,ij->i', a, a)
298
+ return np.sqrt(np.mean(b))
299
+
300
+
301
+ def serialize(self, key:str='') -> dict:
302
+ """Returns JSON dumps of the `props`.
303
+
304
+ Args:
305
+ key (str): a key for the `props` dictionary. Defaults to '' (all).
306
+
307
+ Returns:
308
+ dict: JSON dumps.
309
+ """
310
+ if key:
311
+ return json.dumps({key:self.props[key]})
312
+ else:
313
+ return json.dumps(self.props)
314
+
315
+
316
+ def to_sdf(self, props:bool=True) -> str:
317
+ """Returns the SDF-formatted strings.
318
+
319
+ Args:
320
+ props (bool, optional): include `props as SDF properties. Defaults to True.
321
+
322
+ Returns:
323
+ str: strings in the SDF format.
324
+ """
325
+ in_memory = io.StringIO()
326
+ with Chem.SDWriter(in_memory) as f:
327
+ rdmol = Chem.Mol(self.rdmol)
328
+ rdmol.SetProp('_Name', self.name)
329
+ if props:
330
+ for k,v in self.props.items():
331
+ rdmol.SetProp(k, str(v))
332
+ f.write(rdmol)
333
+ return in_memory.getvalue()
334
+
335
+
336
+ def to_svg(self,
337
+ width:int=400,
338
+ height:int=400,
339
+ legend:Optional[str]=None,
340
+ atom_index:bool=False,
341
+ highlight:Optional[List[int]]=None) -> str:
342
+ """Returns 2D SVG depiction of the molecule.
343
+
344
+ Examples:
345
+ >>> from IPython.display import SVG
346
+ >>> SVG(libr[0].confs[0].to_svg(atom_index=True))
347
+
348
+ Args:
349
+ width (int): width (default:400)
350
+ height (int): height (default:400)
351
+ legend (str, optional): title or Mol.name if not given
352
+ atom_index (bool): True/False whether to display atom index
353
+ highlight (list): list of atom indices to highlight
354
+
355
+ Returns:
356
+ str: SVG text
357
+ """
358
+ rdmol_3d = Chem.Mol(self.rdmol) # a copy of self.rdmol (3D, with hydrogens)
359
+ AllChem.Compute2DCoords(rdmol_3d) # 2D depiction
360
+ for atom in rdmol_3d.GetAtoms():
361
+ for key in atom.GetPropsAsDict():
362
+ atom.ClearProp(key)
363
+ drawer = rdMolDraw2D.MolDraw2DSVG(width, height)
364
+ if not legend:
365
+ legend = self.name
366
+ if atom_index:
367
+ for atom in rdmol_3d.GetAtoms():
368
+ atom.SetProp("atomLabel", str(atom.GetIdx()))
369
+ if highlight:
370
+ drawer.DrawMolecule(rdmol_3d, legend=legend, highlightAtoms=highlight)
371
+ else:
372
+ drawer.DrawMolecule(rdmol_3d, legend=legend)
373
+ drawer.FinishDrawing()
374
+ return drawer.GetDrawingText()
rdworks/descriptor.py ADDED
@@ -0,0 +1,36 @@
1
+ from rdkit.Chem import Descriptors, rdMolDescriptors, QED
2
+
3
+ rd_descriptor = {
4
+ 'QED' : "Quantitative estimate of drug-likeness.",
5
+ 'MolWt' : 'Molecular weight',
6
+ 'LogP' : 'Predicted octanol/water partition coefficient',
7
+ 'TPSA' : 'Topological polar surface area',
8
+ 'HBD' : 'Number of hydrogen bonding donors',
9
+ 'HBA' : 'Number of hydrogen bonding acceptors',
10
+ 'RotBonds' : 'Number of rotatable bonds',
11
+ 'RingCount' : 'Number of rings',
12
+ 'FCsp3' : 'Fraction of SP3 carbons',
13
+ 'HAC' : 'Number of heavy atoms',
14
+ 'Hetero' : 'Number of hetero atoms (not H or C) [B,N,O,P,S,F,Cl,Br,I]',
15
+ 'LipinskiHBA' : 'Number of hydrogen bonding acceptors according to the Lipinski definition',
16
+ 'LipinskiHBD' : 'Number of hydrogen bonding donors according to the Lipinski definition',
17
+ }
18
+
19
+ rd_descriptor_f = {
20
+ "QED" : QED.qed,
21
+ "MolWt" : Descriptors.MolWt,
22
+ "HAC" : Descriptors.HeavyAtomCount,
23
+ "LogP" : Descriptors.MolLogP, # == Crippen.MolLogP
24
+ "TPSA" : Descriptors.TPSA, # == MolSurf.TPSA
25
+ "HBA" : rdMolDescriptors.CalcNumHBA, # == Descriptors.NumHAcceptors
26
+ "HBD" : rdMolDescriptors.CalcNumHBD, # == Descriptors.NumHDonors
27
+ "RotBonds" : rdMolDescriptors.CalcNumRotatableBonds, # == Descriptors.NumRotatableBonds
28
+ "RingCount" : rdMolDescriptors.CalcNumRings, # == Descriptors.RingCount
29
+ "FCsp3" : rdMolDescriptors.CalcFractionCSP3, # == Descriptors.FractionCSP3
30
+ "Hetero" : rdMolDescriptors.CalcNumHeteroatoms, # not (H or C) [B,N,O,P,S,F,Cl,Br,I]
31
+ "LipinskiHBA" : rdMolDescriptors.CalcNumLipinskiHBA,
32
+ "LipinskiHBD" : rdMolDescriptors.CalcNumLipinskiHBD,
33
+ # "StereoCenters" : rdMolDescriptors.CalcNumAtomStereoCenters,
34
+ # props_dict[k] = rd_descriptor_f[k](self.rdmol)
35
+ # ValueError: numStereoCenters called without stereo being assigned
36
+ }
rdworks/display.py ADDED
@@ -0,0 +1,206 @@
1
+ import io
2
+ import os
3
+ import numpy as np
4
+ from typing import Optional, List, Tuple
5
+
6
+ from PIL import Image, ImageChops
7
+
8
+ from rdkit import Chem, Geometry
9
+ from rdkit.Chem import AllChem, Draw, rdDepictor, rdMolTransforms
10
+ from rdkit.Chem.Draw import rdMolDraw2D
11
+
12
+
13
+ # https://greglandrum.github.io/rdkit-blog/posts/2023-05-26-drawing-options-explained.html
14
+
15
+
16
+ def twod_depictor(rdmol:Chem.Mol, index:bool=False, coordgen:bool=False) -> Chem.Mol:
17
+ """Sets up for 2D depiction.
18
+
19
+ Args:
20
+ rdmol (Chem.Mol): input molecule.
21
+ index (bool, optional): whether to show atom index. Defaults to False.
22
+ coordgen (bool, optional): whether to set rdDepictor.SetPreferCoordGen. Defaults to False.
23
+
24
+ Returns:
25
+ Chem.Mol: a copy of rdkit.Chem.Mol object.
26
+ """
27
+ if coordgen:
28
+ rdDepictor.SetPreferCoordGen(True)
29
+ else:
30
+ rdDepictor.SetPreferCoordGen(False)
31
+
32
+ rdmol_2d = Chem.Mol(rdmol)
33
+ rdDepictor.Compute2DCoords(rdmol_2d)
34
+ rdDepictor.StraightenDepiction(rdmol_2d)
35
+
36
+ for atom in rdmol_2d.GetAtoms():
37
+ for key in atom.GetPropsAsDict():
38
+ atom.ClearProp(key)
39
+
40
+ if index: # index hides polar hydrogens
41
+ for atom in rdmol_2d.GetAtoms():
42
+ atom.SetProp("atomLabel", str(atom.GetIdx()))
43
+ # atom.SetProp("atomNote", str(atom.GetIdx()))
44
+ # atom.SetProp("molAtomMapNumber", str(atom.GetIdx()))
45
+
46
+ return rdmol_2d
47
+
48
+
49
+ def svg(rdmol:Chem.Mol,
50
+ width:int=300,
51
+ height:int=300,
52
+ legend:str='',
53
+ index:bool=False,
54
+ highlight:list[int] | None = None,
55
+ coordgen:bool = False) -> str:
56
+ """Returns string SVG output of a molecule.
57
+
58
+ Examples:
59
+ >>> from IPython.display import SVG
60
+ >>> SVG(libr[0].to_svg())
61
+
62
+ Args:
63
+ rdmol (Chem.Mol): input molecule.
64
+ width (int): width. Defaults to 300.
65
+ height (int): height. Defaults to 300.
66
+ legend (str): title of molecule. Defaults to ''.
67
+ index (bool): whether to show atom indexes. Defaults to False.
68
+ highlight (list[int]): list of atom indices to highlight. Defaults to None.
69
+ coordgen (bool): whether to use rdDepictor.SetPreferCoordGen. Defaults to False.
70
+
71
+ Returns:
72
+ str: SVG text
73
+ """
74
+ d2d_svg = rdMolDraw2D.MolDraw2DSVG(width, height)
75
+ rdmol_2d = twod_depictor(rdmol, index, coordgen)
76
+ if highlight:
77
+ d2d_svg.DrawMolecule(rdmol_2d, legend=legend, highlightAtoms=highlight)
78
+ else:
79
+ d2d_svg.DrawMolecule(rdmol_2d, legend=legend)
80
+ #rdMolDraw2D.PrepareAndDrawMolecule(d2d_svg, rdmol_2d, highlightAtoms=highlight, legend=legend)
81
+ d2d_svg.FinishDrawing()
82
+ return d2d_svg.GetDrawingText()
83
+
84
+
85
+ def png(rdmol:Chem.Mol, width:int=300, height:int=300, legend:str='',
86
+ index:bool=False, highlight:Optional[List[int]]=None, coordgen:bool=False) -> Image.Image:
87
+ """Returns a trimmed PIL Image object of a molecule.
88
+
89
+ Args:
90
+ rdmol (Chem.Mol): input molecule.
91
+ width (int): width. Defaults to 300.
92
+ height (int): height. Defaults to 300.
93
+ legend (str): title of molecule. Defaults to ''.
94
+ index (bool): whether to show atom indexes. Defaults to False.
95
+ highlight (list): list of atom indices to highlight. Defaults to None.
96
+ coordgen (bool): whether to use rdDepictor.SetPreferCoordGen. Defaults to False.
97
+
98
+ Returns:
99
+ Image.Image: output PIL Image object.
100
+ """
101
+ rdmol_2d = twod_depictor(rdmol, index, coordgen)
102
+ img = Draw.MolToImage(rdmol_2d,
103
+ size=(width,height),
104
+ highlightAtoms=highlight,
105
+ kekulize=True,
106
+ wedgeBonds=True,
107
+ fitImage=False,
108
+ )
109
+ # highlightAtoms: list of atoms to highlight (default [])
110
+ # highlightBonds: list of bonds to highlight (default [])
111
+ # highlightColor: RGB color as tuple (default [1, 0, 0])
112
+
113
+ return trim_png(img)
114
+
115
+
116
+ def trim_png(img:Image.Image) -> Image.Image:
117
+ """Removes white margin around molecular drawing.
118
+
119
+ Args:
120
+ img (Image.Image): input PIL Image object.
121
+
122
+ Returns:
123
+ Image.Image: output PIL Image object.
124
+ """
125
+ bg = Image.new(img.mode, img.size, img.getpixel((0,0)))
126
+ diff = ImageChops.difference(img,bg)
127
+ diff = ImageChops.add(diff, diff, 2.0, -100)
128
+ bbox = diff.getbbox()
129
+ if bbox:
130
+ return img.crop(bbox)
131
+ return img
132
+
133
+
134
+ def rescale(rdmol:Chem.Mol, factor:float=1.5) -> Chem.Mol:
135
+ """Returns a copy of `rdmol` by a `factor`.
136
+
137
+ Args:
138
+ rdmol (Chem.Mol): input molecule.
139
+ factor (float): scaling factor.
140
+
141
+ Returns:
142
+ Chem.Mol: a copy of rescaled rdkit.Chem.Mol object.
143
+ """
144
+ transformed_rdmol = Chem.Mol(rdmol)
145
+ center = AllChem.ComputeCentroid(transformed_rdmol.GetConformer())
146
+ tf = np.identity(4, np.float)
147
+ tf[0][3] -= center[0]
148
+ tf[1][3] -= center[1]
149
+ tf[0][0] = tf[1][1] = tf[2][2] = factor
150
+ AllChem.TransformMol(transformed_rdmol, tf)
151
+ return transformed_rdmol
152
+
153
+
154
+ def rotation_matrix(axis:str, degree:float) -> np.ndarray:
155
+ """Returns a numpy rotation matrix of shape (4,4).
156
+
157
+ Args:
158
+ axis (str): 'x' or 'y' or 'z'.
159
+ degree (float): degree of rotation.
160
+
161
+ Returns:
162
+ np.ndarray: a numpy array of shape (4,4).
163
+ """
164
+ rad = (np.pi/180.0) * degree
165
+ c = np.cos(rad)
166
+ s = np.sin(rad)
167
+ if axis.lower() == 'x':
168
+ return np.array([
169
+ [1., 0., 0., 0.],
170
+ [0., c, -s, 0.],
171
+ [0., s, c, 0.],
172
+ [0., 0., 0., 1.],
173
+ ])
174
+ elif axis.lower() == 'y':
175
+ return np.array([
176
+ [ c, 0., s, 0.],
177
+ [ 0., 1., 0., 0.],
178
+ [-s, 0., c, 0.],
179
+ [ 0., 0., 0., 1.],
180
+ ])
181
+ elif axis.lower() == 'z':
182
+ return np.array([
183
+ [c, -s, 0., 0.],
184
+ [s, c, 0., 0.],
185
+ [0., 0., 1., 0.],
186
+ [0., 0., 0., 1.],
187
+ ])
188
+
189
+
190
+ def rotate(rdmol:Chem.Mol, axis:str, degree:float) -> None:
191
+ """Rotate `rdmol` around given axis and degree.
192
+
193
+ Input `rdmol` will be modified.
194
+
195
+ Args:
196
+ rdmol (Chem.Mol): input molecule.
197
+ axis (str): axis of rotation, 'x' or 'y' or 'z'.
198
+ degree (float): degree of rotation.
199
+ """
200
+ try:
201
+ conf = rdmol.GetConformer()
202
+ except:
203
+ AllChem.Compute2DCoords(rdmol)
204
+ conf = rdmol.GetConformer()
205
+ R = rotation_matrix(axis, degree)
206
+ rdMolTransforms.TransformConformer(conf, R)