rdworks 0.25.8__py3-none-any.whl → 0.36.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdworks/__init__.py +19 -20
- rdworks/conf.py +319 -118
- rdworks/display.py +244 -83
- rdworks/mol.py +620 -489
- rdworks/mollibr.py +336 -180
- rdworks/readin.py +2 -4
- rdworks/scaffold.py +1 -1
- rdworks/std.py +64 -24
- rdworks/torsion.py +477 -0
- rdworks/units.py +7 -58
- rdworks/utils.py +141 -258
- rdworks/xtb/__init__.py +0 -0
- rdworks/xtb/wrapper.py +304 -0
- {rdworks-0.25.8.dist-info → rdworks-0.36.1.dist-info}/METADATA +6 -9
- {rdworks-0.25.8.dist-info → rdworks-0.36.1.dist-info}/RECORD +18 -15
- {rdworks-0.25.8.dist-info → rdworks-0.36.1.dist-info}/WHEEL +1 -1
- {rdworks-0.25.8.dist-info → rdworks-0.36.1.dist-info}/licenses/LICENSE +0 -0
- {rdworks-0.25.8.dist-info → rdworks-0.36.1.dist-info}/top_level.txt +0 -0
rdworks/readin.py
CHANGED
@@ -1,11 +1,9 @@
|
|
1
1
|
from pathlib import Path
|
2
2
|
|
3
3
|
from rdkit import Chem
|
4
|
-
from rdkit.Chem import AllChem, rdmolfiles
|
4
|
+
from rdkit.Chem import AllChem, rdmolfiles
|
5
5
|
|
6
|
-
from rdworks
|
7
|
-
from rdworks.mollibr import MolLibr
|
8
|
-
from rdworks.conf import Conf
|
6
|
+
from rdworks import Conf, Mol, MolLibr
|
9
7
|
from rdworks.utils import compute, precheck_path, guess_mol_id
|
10
8
|
|
11
9
|
import pandas as pd
|
rdworks/scaffold.py
CHANGED
@@ -40,7 +40,7 @@ def remove_exocyclic(rdmol:Chem.Mol) -> Chem.Mol:
|
|
40
40
|
fg_mol = [Chem.MolFromSmiles(x) for x in fg_smi]
|
41
41
|
# ring count
|
42
42
|
fg_rc = [rdMolDescriptors.CalcNumRings(g) for g in fg_mol]
|
43
|
-
if 0 in fg_rc: # if one the fragmented parts has no ring system
|
43
|
+
if 0 in fg_rc: # if one of the fragmented parts has no ring system
|
44
44
|
xbs.append(b.GetIdx())
|
45
45
|
fg_smi = Chem.MolToSmiles(
|
46
46
|
Chem.FragmentOnBonds(rdmol,xbs,addDummies=False)).split(".")
|
rdworks/std.py
CHANGED
@@ -1,18 +1,19 @@
|
|
1
1
|
import operator
|
2
|
-
from typing import Tuple, Union
|
3
2
|
|
4
3
|
from rdkit import Chem
|
4
|
+
from rdkit.Chem import rdDepictor
|
5
5
|
from rdkit.Chem.MolStandardize import rdMolStandardize
|
6
6
|
|
7
7
|
|
8
|
-
|
9
|
-
|
8
|
+
|
9
|
+
def desalt_smiles(smiles: str) -> tuple[str, Chem.Mol]:
|
10
|
+
"""Remove salt(s) from SMILES.
|
10
11
|
|
11
12
|
Args:
|
12
|
-
smiles (str):
|
13
|
+
smiles (str): SMILES.
|
13
14
|
|
14
15
|
Returns:
|
15
|
-
|
16
|
+
(desalted SMILES, desalted Chem.Mol)
|
16
17
|
"""
|
17
18
|
mols = []
|
18
19
|
for smi in smiles.split("."):
|
@@ -22,18 +23,22 @@ def desalt_smiles(smiles:str) -> Tuple[Union[str, None], Union[Chem.Mol, None]]:
|
|
22
23
|
mols.append((n, smi, rdmol))
|
23
24
|
except:
|
24
25
|
pass
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
26
|
+
|
27
|
+
assert len(mols) > 0, "desalt_smiles() Error: invalid SMILES"
|
28
|
+
|
29
|
+
# `sorted` function compares the number of atoms first then smiles and rdmol.
|
30
|
+
# Comparing smiles string would be okay but comparison of rdmol objects will
|
31
|
+
# cause error because comparison operation for Chem.Mol is not supported.
|
32
|
+
# So we need to restrict the key to the number of atoms.
|
33
|
+
|
34
|
+
(n, desalted_smiles, desalted_rdmol) = sorted(mols,
|
35
|
+
key=operator.itemgetter(0),
|
36
|
+
reverse=True)[0]
|
34
37
|
|
38
|
+
return (desalted_smiles, desalted_rdmol)
|
39
|
+
|
35
40
|
|
36
|
-
def standardize_smiles(smiles:str) -> str:
|
41
|
+
def standardize_smiles(smiles: str) -> str:
|
37
42
|
"""Returns standardized SMILES string.
|
38
43
|
|
39
44
|
The rdMolStandardize.StandardizeSmiles() function performs the following steps:
|
@@ -62,7 +67,7 @@ def standardize_smiles(smiles:str) -> str:
|
|
62
67
|
return rdMolStandardize.StandardizeSmiles(smiles)
|
63
68
|
|
64
69
|
|
65
|
-
def standardize(smiles:str) -> Chem.Mol:
|
70
|
+
def standardize(smiles: str) -> Chem.Mol:
|
66
71
|
"""Returns standardized rdkit.Chem.Mol object.
|
67
72
|
|
68
73
|
Args:
|
@@ -97,7 +102,7 @@ def standardize(smiles:str) -> Chem.Mol:
|
|
97
102
|
return taut_uncharged_parent_clean_mol
|
98
103
|
|
99
104
|
|
100
|
-
def neutralize_atoms(rdmol:Chem.Mol) -> Chem.Mol:
|
105
|
+
def neutralize_atoms(rdmol: Chem.Mol) -> Chem.Mol:
|
101
106
|
"""Neutralizes atoms.
|
102
107
|
|
103
108
|
It is adapted from Noel O'Boyle's nocharge code:
|
@@ -122,22 +127,57 @@ def neutralize_atoms(rdmol:Chem.Mol) -> Chem.Mol:
|
|
122
127
|
charges even if the neutralization introduces an overall formal charge on the molecule.
|
123
128
|
|
124
129
|
Args:
|
125
|
-
rdmol (
|
130
|
+
rdmol (Chem.Mol) : molecule (not to be modified).
|
126
131
|
|
127
132
|
Returns:
|
128
|
-
Chem.Mol:
|
133
|
+
Chem.Mol: neutralized copy of molecule.
|
129
134
|
"""
|
130
|
-
|
131
|
-
rdmol_ = Chem.Mol(rdmol)
|
135
|
+
mol = Chem.Mol(rdmol)
|
132
136
|
pattern = Chem.MolFromSmarts("[+1!h0!$([*]~[-1,-2,-3,-4]),-1!$([*]~[+1,+2,+3,+4])]")
|
133
|
-
at_matches =
|
137
|
+
at_matches = mol.GetSubstructMatches(pattern)
|
134
138
|
at_matches_list = [y[0] for y in at_matches]
|
139
|
+
|
135
140
|
if len(at_matches_list) > 0:
|
136
141
|
for at_idx in at_matches_list:
|
137
|
-
atom =
|
142
|
+
atom = mol.GetAtomWithIdx(at_idx)
|
138
143
|
chg = atom.GetFormalCharge()
|
139
144
|
hcount = atom.GetTotalNumHs()
|
140
145
|
atom.SetFormalCharge(0)
|
141
146
|
atom.SetNumExplicitHs(hcount - chg)
|
142
147
|
atom.UpdatePropertyCache()
|
143
|
-
|
148
|
+
|
149
|
+
return mol
|
150
|
+
|
151
|
+
|
152
|
+
def clean_2d(rdmol: Chem.Mol,
|
153
|
+
reset_isotope: bool = True,
|
154
|
+
remove_H: bool = True,
|
155
|
+
) -> tuple[Chem.Mol, list[Chem.Mol]]:
|
156
|
+
"""Clean molecule for 2D depiction.
|
157
|
+
|
158
|
+
Args:
|
159
|
+
rdmol (Chem.Mol): molecule (not to be modified)
|
160
|
+
reset_isotope (bool, optional): whether to reset isotope information. Defaults to True.
|
161
|
+
remove_H (bool, optional): whether to remove implicit hydrogens. Defaults to True.
|
162
|
+
|
163
|
+
Returns:
|
164
|
+
(cleaned copy of molecule, list of Chem.Mol.Conformers from molecule)
|
165
|
+
"""
|
166
|
+
mol = Chem.Mol(rdmol)
|
167
|
+
conformers = []
|
168
|
+
|
169
|
+
if mol.GetNumConformers() == 0:
|
170
|
+
# A molecule constructed from SMILES has no conformer information
|
171
|
+
pass
|
172
|
+
|
173
|
+
elif mol.GetConformer().Is3D() and mol.GetNumConformers() > 1:
|
174
|
+
conformers = [x for x in mol.GetConformers()]
|
175
|
+
|
176
|
+
if reset_isotope:
|
177
|
+
for atom in mol.GetAtoms():
|
178
|
+
atom.SetIsotope(0)
|
179
|
+
|
180
|
+
if remove_H:
|
181
|
+
mol = Chem.RemoveHs(mol)
|
182
|
+
|
183
|
+
return (mol, conformers)
|
rdworks/torsion.py
ADDED
@@ -0,0 +1,477 @@
|
|
1
|
+
import numpy as np
|
2
|
+
|
3
|
+
from rdkit import Chem
|
4
|
+
from rdworks.xtb.wrapper import GFN2xTB
|
5
|
+
|
6
|
+
|
7
|
+
def get_torsion_atoms(rdmol:Chem.Mol, strict:bool=True) -> list[tuple]:
|
8
|
+
"""Determine dihedral angle atoms (a-b-c-d) and rotating group for each rotatable bond (b-c).
|
9
|
+
|
10
|
+
Args:
|
11
|
+
rdmol (Chem.Mol): molecule
|
12
|
+
strict (bool): whether to exclude amide/imide/ester/acid bonds.
|
13
|
+
|
14
|
+
Returns:
|
15
|
+
[ (a, b, c, d, rot_atom_indices, fix_atom_indices),
|
16
|
+
(a, b, c, d, rot_atom_indices, fix_atom_indices),
|
17
|
+
...,
|
18
|
+
]
|
19
|
+
"""
|
20
|
+
# https://github.com/rdkit/rdkit/blob/1bf6ef3d65f5c7b06b56862b3fb9116a3839b229/rdkit/Chem/Lipinski.py#L47%3E
|
21
|
+
# https://github.com/rdkit/rdkit/blob/de602c88809ea6ceba1e8ed50fd543b6e406e9c4/Code/GraphMol/Descriptors/Lipinski.cpp#L108
|
22
|
+
if strict :
|
23
|
+
# excludes amide/imide/ester/acid bonds
|
24
|
+
rotatable_bond_pattern = Chem.MolFromSmarts(
|
25
|
+
(
|
26
|
+
"[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])("
|
27
|
+
"[CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1])&!$([#7,O,S!D1]-!@[CD3]="
|
28
|
+
"[N,O,S])&!$([CD3](=[N+])-!@[#7!D1])&!$([#7!D1]-!@[CD3]=[N+])]-,:;!@[!$"
|
29
|
+
"(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])(["
|
30
|
+
"CH3])[CH3])]"
|
31
|
+
)
|
32
|
+
)
|
33
|
+
else:
|
34
|
+
rotatable_bond_pattern = Chem.MolFromSmarts('[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]')
|
35
|
+
|
36
|
+
rotatable_bonds = rdmol.GetSubstructMatches(rotatable_bond_pattern)
|
37
|
+
|
38
|
+
torsion_angle_atom_indices = []
|
39
|
+
|
40
|
+
# small rings (n=3 or 4)
|
41
|
+
small_rings = [ r for r in list(rdmol.GetRingInfo().AtomRings()) if len(r) < 5 ]
|
42
|
+
# ex. = [(1, 37, 35, 34, 3, 2), (29, 28, 30)]
|
43
|
+
|
44
|
+
forbidden_terminal_nuclei = [1, 9, 17, 35, 53] # H,F,Cl,Br,I
|
45
|
+
|
46
|
+
for (b_idx, c_idx) in rotatable_bonds:
|
47
|
+
# determine a atom ``a`` that define a dihedral angle
|
48
|
+
a_candidates = []
|
49
|
+
for neighbor in rdmol.GetAtomWithIdx(b_idx).GetNeighbors():
|
50
|
+
neighbor_idx = neighbor.GetIdx()
|
51
|
+
if neighbor_idx == c_idx:
|
52
|
+
continue
|
53
|
+
neighbor_atomic_num = neighbor.GetAtomicNum()
|
54
|
+
if neighbor_atomic_num not in forbidden_terminal_nuclei:
|
55
|
+
a_candidates.append((neighbor_atomic_num, neighbor_idx))
|
56
|
+
|
57
|
+
if not a_candidates:
|
58
|
+
continue
|
59
|
+
|
60
|
+
(a_atomic_num, a_idx) = sorted(a_candidates, key=lambda x: (x[0], -x[1]), reverse=True)[0]
|
61
|
+
|
62
|
+
# is a-b in a small ring (n=3 or 4)?
|
63
|
+
is_in_small_ring = False
|
64
|
+
for small_ring in small_rings:
|
65
|
+
if (a_idx in small_ring) and (b_idx in small_ring):
|
66
|
+
is_in_small_ring = True
|
67
|
+
break
|
68
|
+
|
69
|
+
if is_in_small_ring:
|
70
|
+
continue
|
71
|
+
|
72
|
+
# determine a atom ``d`` that define a dihedral angle
|
73
|
+
d_candidates = []
|
74
|
+
for neighbor in rdmol.GetAtomWithIdx(c_idx).GetNeighbors():
|
75
|
+
neighbor_idx = neighbor.GetIdx()
|
76
|
+
if (neighbor_idx == b_idx):
|
77
|
+
continue
|
78
|
+
neighbor_atomic_num = neighbor.GetAtomicNum()
|
79
|
+
if neighbor_atomic_num not in forbidden_terminal_nuclei:
|
80
|
+
d_candidates.append((neighbor_atomic_num, neighbor_idx))
|
81
|
+
|
82
|
+
if not d_candidates:
|
83
|
+
continue
|
84
|
+
|
85
|
+
(d_atomic_num, d_idx) = sorted(d_candidates, key=lambda x: (x[0], -x[1]), reverse=True)[0]
|
86
|
+
|
87
|
+
# is c-d in a small ring?
|
88
|
+
is_in_small_ring = False
|
89
|
+
for small_ring in small_rings:
|
90
|
+
if (c_idx in small_ring) and (d_idx in small_ring):
|
91
|
+
is_in_small_ring = True
|
92
|
+
break
|
93
|
+
|
94
|
+
if is_in_small_ring:
|
95
|
+
continue
|
96
|
+
|
97
|
+
# determine a group of atoms to be rotated
|
98
|
+
# https://ctr.fandom.com/wiki/Break_rotatable_bonds_and_report_the_fragments
|
99
|
+
em = Chem.EditableMol(rdmol)
|
100
|
+
em.RemoveBond(b_idx, c_idx)
|
101
|
+
fragmented = em.GetMol()
|
102
|
+
(frag1, frag2) = Chem.GetMolFrags(fragmented, asMols=False) # returns tuple of tuple
|
103
|
+
hac1 = sum([ 1 for i in frag1 if rdmol.GetAtomWithIdx(i).GetAtomicNum() > 1 ])
|
104
|
+
hac2 = sum([ 1 for i in frag2 if rdmol.GetAtomWithIdx(i).GetAtomicNum() > 1 ])
|
105
|
+
|
106
|
+
# smaller fragment will be rotated and must contain at least three heavy atoms
|
107
|
+
if min(hac1, hac2) >= 3:
|
108
|
+
(frag_rot, frag_fix) = sorted([(hac1, frag1), (hac2, frag2)])
|
109
|
+
torsion_angle_atom_indices.append((a_idx, b_idx, c_idx, d_idx, frag_rot[1], frag_fix[1]))
|
110
|
+
|
111
|
+
return torsion_angle_atom_indices
|
112
|
+
|
113
|
+
|
114
|
+
|
115
|
+
|
116
|
+
def find_atoms_at_bond_distance(rdmol: Chem.Mol,
|
117
|
+
start_atom_idx: int,
|
118
|
+
distance: int) -> list[int]:
|
119
|
+
"""Finds atoms at a specific bond distance from a starting atom.
|
120
|
+
|
121
|
+
Args:
|
122
|
+
mol: An RDKit Mol object.
|
123
|
+
start_atom_idx: The index of the starting atom.
|
124
|
+
distance: The desired bond distance.
|
125
|
+
|
126
|
+
Returns:
|
127
|
+
A list of atom indices at the specified distance
|
128
|
+
"""
|
129
|
+
assert start_atom_idx < rdmol.GetNumAtoms(), "start_atom_idx out of range."
|
130
|
+
|
131
|
+
found_atoms = []
|
132
|
+
visited = set()
|
133
|
+
|
134
|
+
def dfs(curr_atom_idx: int, curr_bond_dist: int):
|
135
|
+
if curr_bond_dist == distance:
|
136
|
+
found_atoms.append(curr_atom_idx)
|
137
|
+
return
|
138
|
+
|
139
|
+
visited.add(curr_atom_idx)
|
140
|
+
curr_atom = rdmol.GetAtomWithIdx(curr_atom_idx)
|
141
|
+
for next_atom in curr_atom.GetNeighbors():
|
142
|
+
next_atom_idx = next_atom.GetIdx()
|
143
|
+
if next_atom_idx not in visited:
|
144
|
+
dfs(next_atom_idx, curr_bond_dist + 1)
|
145
|
+
|
146
|
+
# Backtrack
|
147
|
+
visited.remove(curr_atom_idx)
|
148
|
+
|
149
|
+
dfs(start_atom_idx, 0)
|
150
|
+
|
151
|
+
return found_atoms
|
152
|
+
|
153
|
+
|
154
|
+
def get_bond_distance(rdmol: Chem.Mol, start_atom_idx: int) -> dict:
|
155
|
+
"""Get bonds distance from a given atom.
|
156
|
+
|
157
|
+
Args:
|
158
|
+
mol: An RDKit Mol object.
|
159
|
+
start_atom_idx: The index of the starting atom.
|
160
|
+
distance: The desired bond distance.
|
161
|
+
|
162
|
+
Returns:
|
163
|
+
A list of atom indices at the specified distance
|
164
|
+
"""
|
165
|
+
assert start_atom_idx < rdmol.GetNumAtoms(), "start_atom_idx out of range."
|
166
|
+
|
167
|
+
bond_distance = {}
|
168
|
+
visited = set()
|
169
|
+
|
170
|
+
def dfs(curr_atom_idx: int, curr_bond_dist: int):
|
171
|
+
if curr_bond_dist in bond_distance:
|
172
|
+
bond_distance[curr_bond_dist].append(curr_atom_idx)
|
173
|
+
else:
|
174
|
+
bond_distance[curr_bond_dist] = [curr_atom_idx]
|
175
|
+
|
176
|
+
visited.add(curr_atom_idx)
|
177
|
+
curr_atom = rdmol.GetAtomWithIdx(curr_atom_idx)
|
178
|
+
for next_atom in curr_atom.GetNeighbors():
|
179
|
+
next_atom_idx = next_atom.GetIdx()
|
180
|
+
if next_atom_idx not in visited:
|
181
|
+
dfs(next_atom_idx, curr_bond_dist + 1)
|
182
|
+
|
183
|
+
# Backtrack
|
184
|
+
visited.remove(curr_atom_idx)
|
185
|
+
|
186
|
+
dfs(start_atom_idx, 0)
|
187
|
+
|
188
|
+
return bond_distance
|
189
|
+
|
190
|
+
|
191
|
+
def find_bonds_to_prune(rdmol: Chem.Mol,
|
192
|
+
torsion_indices: tuple,
|
193
|
+
bond_dist_threshold: int = 4,
|
194
|
+
bond_order_threshold: float = 1.75,
|
195
|
+
electronegative: list[int] = [7, 8, 9, 17, 35],
|
196
|
+
) -> dict[int, list[int]]:
|
197
|
+
"""Find pruning candidate bonds from a given atom to construct fragment.
|
198
|
+
|
199
|
+
Rules for a candidate bond to break:
|
200
|
+
|
201
|
+
For (i-j-k-l) torsion,
|
202
|
+
|
203
|
+
1. NOT (bond distance from j or k < 4)
|
204
|
+
2. NOT (bond order > 1.75)
|
205
|
+
3. NOT (Pauling electronegativity of any of bond atoms > 2.9)
|
206
|
+
|
207
|
+
Args:
|
208
|
+
mol: An RDKit Mol object.
|
209
|
+
start_atom_idx: The index of the starting atom.
|
210
|
+
distance: The desired bond distance.
|
211
|
+
|
212
|
+
Pauling electronegativity:
|
213
|
+
```py
|
214
|
+
from mendeleev import element
|
215
|
+
for i in range(1, 119): # 118 is the highest atomic number known
|
216
|
+
el = element(i)
|
217
|
+
if isinstance(el.en_pauling, float) and el.en_pauling > 2.9:
|
218
|
+
print(f"Element {i}: {el.symbol} {el.atomic_number} {el.en_pauling}")
|
219
|
+
```
|
220
|
+
Element 7: N 7 3.04
|
221
|
+
Element 8: O 8 3.44
|
222
|
+
Element 9: F 9 3.98
|
223
|
+
Element 17: Cl 17 3.16
|
224
|
+
Element 35: Br 35 2.96
|
225
|
+
|
226
|
+
Returns:
|
227
|
+
A list of atom indices at the specified distance
|
228
|
+
"""
|
229
|
+
|
230
|
+
(i, j, k, l) = torsion_indices[:4]
|
231
|
+
|
232
|
+
dist_from_j = get_bond_distance(rdmol, j)
|
233
|
+
dist_from_k = get_bond_distance(rdmol, k)
|
234
|
+
|
235
|
+
# sum(,[]) flattens a list of list
|
236
|
+
forbidden = sum([v for d, v in dist_from_j.items() if d < bond_dist_threshold], [])
|
237
|
+
forbidden += sum([v for d, v in dist_from_k.items() if d < bond_dist_threshold], [])
|
238
|
+
forbidden = set(forbidden)
|
239
|
+
|
240
|
+
start_atom_idx = k # either j or k yields the same result
|
241
|
+
|
242
|
+
found_bonds = {}
|
243
|
+
visited = set()
|
244
|
+
|
245
|
+
def ordered(p: int, q: int) -> list[int]:
|
246
|
+
"""Returns a list of atom indices by bond distance.
|
247
|
+
|
248
|
+
Args:
|
249
|
+
p (int): atom index
|
250
|
+
q (int): atom index
|
251
|
+
|
252
|
+
Returns:
|
253
|
+
list[int]: (atom index closer to the torsion angle, the other)
|
254
|
+
"""
|
255
|
+
dist_p = []
|
256
|
+
dist_q = []
|
257
|
+
for d, indices in dist_from_j.items():
|
258
|
+
if p in indices:
|
259
|
+
dist_p.append(d)
|
260
|
+
if q in indices:
|
261
|
+
dist_q.append(d)
|
262
|
+
for d, indices in dist_from_k.items():
|
263
|
+
if p in indices:
|
264
|
+
dist_p.append(d)
|
265
|
+
if q in indices:
|
266
|
+
dist_q.append(d)
|
267
|
+
if sum(dist_p) < sum(dist_q):
|
268
|
+
return [p, q]
|
269
|
+
else:
|
270
|
+
return [q, p]
|
271
|
+
|
272
|
+
def dfs(curr_atom_idx: int, bond_dist: int):
|
273
|
+
"""Depth-first recursive search of bonded atoms.
|
274
|
+
|
275
|
+
Args:
|
276
|
+
curr_atom_idx (int): atom index.
|
277
|
+
bond_dist (int): bond distance.
|
278
|
+
"""
|
279
|
+
curr_atom = rdmol.GetAtomWithIdx(curr_atom_idx)
|
280
|
+
visited.add(curr_atom_idx)
|
281
|
+
for next_atom in curr_atom.GetNeighbors():
|
282
|
+
next_atom_idx = next_atom.GetIdx()
|
283
|
+
bond = rdmol.GetBondBetweenAtoms(curr_atom_idx, next_atom_idx)
|
284
|
+
n1 = curr_atom.GetAtomicNum()
|
285
|
+
n2 = next_atom.GetAtomicNum()
|
286
|
+
# forbidden (rule 1)
|
287
|
+
too_close = (curr_atom_idx in forbidden) and (next_atom_idx in forbidden)
|
288
|
+
# bond order (rule 2)
|
289
|
+
bond_order = not (bond.GetBondTypeAsDouble() > bond_order_threshold)
|
290
|
+
# Pauling electronegativity (rule 3)
|
291
|
+
bond_pauling = not ((n1 in electronegative) or (n2 in electronegative))
|
292
|
+
if (bond_dist >= bond_dist_threshold) and (not too_close) \
|
293
|
+
and (not bond.IsInRing()) and bond_order and bond_pauling:
|
294
|
+
# determine which atom has shorter bond distance to the torsion angle (j or k)
|
295
|
+
found_bonds[bond.GetIdx()] = ordered(curr_atom_idx, next_atom_idx)
|
296
|
+
return
|
297
|
+
if next_atom_idx not in visited:
|
298
|
+
dfs(next_atom_idx, bond_dist + 1)
|
299
|
+
# Backtrack
|
300
|
+
visited.remove(curr_atom_idx)
|
301
|
+
|
302
|
+
dfs(start_atom_idx, 0)
|
303
|
+
|
304
|
+
return found_bonds
|
305
|
+
|
306
|
+
|
307
|
+
def get_fragment_idx(parent: Chem.Mol,
|
308
|
+
indices: tuple,
|
309
|
+
fragment: Chem.Mol) -> tuple:
|
310
|
+
"""Get fragment atom indices corresponding to given parent indices.
|
311
|
+
|
312
|
+
It uses 3D coordinates to find matching atoms between parent and fragment.
|
313
|
+
In comparison with the MCS-based method `get_fragment_idx_with_mcs()`,
|
314
|
+
0 elapsed=0.0006455129478126764 sec.
|
315
|
+
1 elapsed=0.0005964740412309766
|
316
|
+
2 elapsed=0.0005442029796540737
|
317
|
+
3 elapsed=0.000652436981908977
|
318
|
+
4 elapsed=0.0006737819639965892
|
319
|
+
5 elapsed=0.0004481689538806677
|
320
|
+
6 elapsed=0.00035582599230110645
|
321
|
+
7 elapsed=0.0003812289796769619
|
322
|
+
8 elapsed=0.000359484925866127
|
323
|
+
9 elapsed=0.0002818549983203411
|
324
|
+
10 elapsed=0.000247497926466167
|
325
|
+
11 elapsed=0.0003651580773293972
|
326
|
+
|
327
|
+
Args:
|
328
|
+
parent (Chem.Mol): rdkit Chem.Mol object.
|
329
|
+
parent_indices (tuple): parent atom indices to map within the MCS.
|
330
|
+
fragment (Chem.Mol): fragment originated from the parent.
|
331
|
+
|
332
|
+
Returns:
|
333
|
+
dict[int, int]: { parent_atom_index : fragment_atom_index, ...}
|
334
|
+
"""
|
335
|
+
parent_xyz = parent.GetConformer().GetPositions() # numpy.ndarray
|
336
|
+
frag_xyz = fragment.GetConformer().GetPositions() # numpy.ndarray
|
337
|
+
qpos = [parent_xyz[i] for i in indices]
|
338
|
+
|
339
|
+
return tuple(j for q in qpos for j, f in enumerate(frag_xyz) if np.array_equal(f, q))
|
340
|
+
|
341
|
+
|
342
|
+
|
343
|
+
def get_fragment_idx_with_mcs(parent: Chem.Mol,
|
344
|
+
indices: tuple,
|
345
|
+
fragment: Chem.Mol) -> tuple:
|
346
|
+
"""Get fragment atom indices corresponding to given parent indices.
|
347
|
+
|
348
|
+
Warning:
|
349
|
+
It uses MCS and can be extremely slow sometimes.
|
350
|
+
For example, below are the elapsed times for 12 torsion angles of atorvastatin:
|
351
|
+
0 elapsed=5.525973221054301 sec. **
|
352
|
+
1 elapsed=1.9143556850031018 *
|
353
|
+
2 elapsed=3.145250838017091 *
|
354
|
+
3 elapsed=9.390580283012241 **
|
355
|
+
4 elapsed=89.97735002799891 ***
|
356
|
+
5 elapsed=0.19022215204313397
|
357
|
+
6 elapsed=0.013428106089122593
|
358
|
+
7 elapsed=0.023345661000348628
|
359
|
+
8 elapsed=0.023358764010481536
|
360
|
+
9 elapsed=0.0007965450640767813
|
361
|
+
10 elapsed=0.0008196790004149079
|
362
|
+
11 elapsed=0.04075543500948697
|
363
|
+
|
364
|
+
Args:
|
365
|
+
parent (Chem.Mol): rdkit Chem.Mol object.
|
366
|
+
parent_indices (tuple): parent atom indices to map within the MCS.
|
367
|
+
fragment (Chem.Mol): fragment originated from the parent.
|
368
|
+
|
369
|
+
Returns:
|
370
|
+
dict[int, int]: { parent_atom_index : fragment_atom_index, ...}
|
371
|
+
"""
|
372
|
+
mcs_result = Chem.rdFMCS.FindMCS([parent, fragment])
|
373
|
+
mcs_mol = Chem.MolFromSmarts(mcs_result.smartsString)
|
374
|
+
|
375
|
+
parent_matches = parent.GetSubstructMatches(mcs_mol)
|
376
|
+
frag_matches = fragment.GetSubstructMatches(mcs_mol)
|
377
|
+
|
378
|
+
indices_idx = None
|
379
|
+
for parent_matched_indices in parent_matches:
|
380
|
+
# It is possible to have more than one matches (i.e. methyl rotation).
|
381
|
+
# However, even if there are more than one matches, the parent indices
|
382
|
+
# should be the same.
|
383
|
+
if indices_idx is None:
|
384
|
+
indices_idx = {x: parent_matched_indices.index(x) for x in indices}
|
385
|
+
else:
|
386
|
+
assert all([indices_idx[x] == parent_matched_indices.index(x) for x in indices])
|
387
|
+
|
388
|
+
indices_map = None
|
389
|
+
for frag_matched_indices in frag_matches:
|
390
|
+
# it is possible to have more than one matches (i.e. methyl rotation)
|
391
|
+
if indices_map is None:
|
392
|
+
indices_map = {x : frag_matched_indices[indices_idx[x]] for x in indices}
|
393
|
+
else:
|
394
|
+
assert all([indices_map[x] == frag_matched_indices[indices_idx[x]] for x in indices])
|
395
|
+
|
396
|
+
return tuple([indices_map[x] for x in indices])
|
397
|
+
|
398
|
+
|
399
|
+
|
400
|
+
def create_fragment_on_bonds(rdmol: Chem.Mol, bonds: dict, cap: bool = True) -> Chem.Mol | None:
|
401
|
+
"""Create a fragment that preserves defined atoms.
|
402
|
+
|
403
|
+
Args:
|
404
|
+
rdmol (Chem.Mol): input molecule.
|
405
|
+
bonds (dict): {bond_index : (preserved_atom_index, removed_atom_index), ...}
|
406
|
+
cap (bool): whether to cap the dummy atom(s) with hydrogen(s)
|
407
|
+
|
408
|
+
Returns:
|
409
|
+
Chem.Mol: resulting fragment molecule.
|
410
|
+
"""
|
411
|
+
fragments = Chem.FragmentOnBonds(rdmol, list(bonds))
|
412
|
+
preserved_atoms = { preserved for bond_idx, (preserved, removed) in bonds.items() }
|
413
|
+
for fragment_indices, fragment_mol in zip(Chem.GetMolFrags(fragments),
|
414
|
+
Chem.GetMolFrags(fragments, asMols=True)):
|
415
|
+
if preserved_atoms.issubset(set(fragment_indices)):
|
416
|
+
if cap:
|
417
|
+
# cap dummy atoms with hydrogens
|
418
|
+
for atom in fragment_mol.GetAtoms():
|
419
|
+
if atom.GetAtomicNum() == 0:
|
420
|
+
atom.SetAtomicNum(1)
|
421
|
+
|
422
|
+
return fragment_mol
|
423
|
+
|
424
|
+
return None
|
425
|
+
|
426
|
+
|
427
|
+
def create_torsion_fragment(rdmol: Chem.Mol,
|
428
|
+
torsion_indices: tuple,
|
429
|
+
wbo_tolerance: float = 0.03) -> tuple[Chem.Mol, list[int]]:
|
430
|
+
"""Create a close surrogate fragment that captures the PES of the intended torsion.
|
431
|
+
|
432
|
+
Fragmentation aims to preserve the local chemical environment around the targeted torsion
|
433
|
+
while increase calculation speed and potential complications. To avoid oversimplification
|
434
|
+
and inaccurate approximation, two strategies are combined:
|
435
|
+
- fragment candidates are generated by a set of reasonably empirical rules
|
436
|
+
- further filtered by Wiberg bond order (WBO) calculated by semi-empirical QM. It has
|
437
|
+
been shown that the Wiberg bond order (WBO) provides a fast and robust measure of
|
438
|
+
whether a torsion profile has been disrupted by fragmentation. Any fragment that causes
|
439
|
+
WBO difference larger than 0.03 will be excluded.
|
440
|
+
|
441
|
+
Args:
|
442
|
+
rdmol (Chem.Mol): molecule.
|
443
|
+
torsion_indices (tuple): (i, j, k, l, atoms to be rotated, atoms to be fixed)
|
444
|
+
|
445
|
+
Returns:
|
446
|
+
Chem.Mol: simplified fragment molecule.
|
447
|
+
|
448
|
+
References:
|
449
|
+
https://pubs.acs.org/doi/10.1021/acs.jcim.2c01153
|
450
|
+
https://www.biorxiv.org/content/10.1101/2020.08.27.270934v2
|
451
|
+
"""
|
452
|
+
(i, j, k, l) = torsion_indices[:4]
|
453
|
+
|
454
|
+
candidates = find_bonds_to_prune(rdmol, torsion_indices)
|
455
|
+
|
456
|
+
if GFN2xTB().version() is not None:
|
457
|
+
jk = tuple(sorted([j, k]))
|
458
|
+
wbo_passed_candidates = {}
|
459
|
+
# filter candidate(s) by Wiberg bond order (WBO)
|
460
|
+
parent = GFN2xTB(rdmol).singlepoint()
|
461
|
+
assert hasattr(parent, 'wbo'), "create_torsion_fragment() Error: no wbo for parent"
|
462
|
+
for bond_idx, (p, q) in candidates.items():
|
463
|
+
frag_single_break = create_fragment_on_bonds(rdmol, {bond_idx: (p, q)})
|
464
|
+
fragment = GFN2xTB(frag_single_break).singlepoint()
|
465
|
+
assert hasattr(fragment, 'wbo'), "create_torsion_fragment() Error: no wbo for fragment"
|
466
|
+
# WBO difference at the torsion angle bond
|
467
|
+
frag_jk = get_fragment_idx(rdmol, jk, frag_single_break)
|
468
|
+
frag_jk = tuple(sorted(frag_jk))
|
469
|
+
if abs(fragment.wbo[frag_jk] - parent.wbo[jk]) < wbo_tolerance:
|
470
|
+
wbo_passed_candidates[bond_idx] = (p, q)
|
471
|
+
frag_multi_breaks = create_fragment_on_bonds(rdmol, wbo_passed_candidates)
|
472
|
+
else:
|
473
|
+
frag_multi_breaks = create_fragment_on_bonds(rdmol, candidates)
|
474
|
+
|
475
|
+
frag_ijkl = get_fragment_idx(rdmol, (i, j, k, l), frag_multi_breaks)
|
476
|
+
|
477
|
+
return frag_multi_breaks, frag_ijkl
|