rdworks 0.25.7__py3-none-any.whl → 0.35.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rdworks/readin.py CHANGED
@@ -1,11 +1,9 @@
1
1
  from pathlib import Path
2
2
 
3
3
  from rdkit import Chem
4
- from rdkit.Chem import AllChem, rdmolfiles, Draw
4
+ from rdkit.Chem import AllChem, rdmolfiles
5
5
 
6
- from rdworks.mol import Mol
7
- from rdworks.mollibr import MolLibr
8
- from rdworks.conf import Conf
6
+ from rdworks import Conf, Mol, MolLibr
9
7
  from rdworks.utils import compute, precheck_path, guess_mol_id
10
8
 
11
9
  import pandas as pd
rdworks/scaffold.py CHANGED
@@ -40,7 +40,7 @@ def remove_exocyclic(rdmol:Chem.Mol) -> Chem.Mol:
40
40
  fg_mol = [Chem.MolFromSmiles(x) for x in fg_smi]
41
41
  # ring count
42
42
  fg_rc = [rdMolDescriptors.CalcNumRings(g) for g in fg_mol]
43
- if 0 in fg_rc: # if one the fragmented parts has no ring system
43
+ if 0 in fg_rc: # if one of the fragmented parts has no ring system
44
44
  xbs.append(b.GetIdx())
45
45
  fg_smi = Chem.MolToSmiles(
46
46
  Chem.FragmentOnBonds(rdmol,xbs,addDummies=False)).split(".")
rdworks/std.py CHANGED
@@ -1,18 +1,19 @@
1
1
  import operator
2
- from typing import Tuple, Union
3
2
 
4
3
  from rdkit import Chem
4
+ from rdkit.Chem import rdDepictor
5
5
  from rdkit.Chem.MolStandardize import rdMolStandardize
6
6
 
7
7
 
8
- def desalt_smiles(smiles:str) -> Tuple[Union[str, None], Union[Chem.Mol, None]]:
9
- """Returns (desalted SMILES string, rdkit.Chem.Mol).
8
+
9
+ def desalt_smiles(smiles: str) -> tuple[str, Chem.Mol]:
10
+ """Remove salt(s) from SMILES.
10
11
 
11
12
  Args:
12
- smiles (str): input SMILES string.
13
+ smiles (str): SMILES.
13
14
 
14
15
  Returns:
15
- Tuple[Union[str, None], Union[Chem.Mol, None]]: (desalted SMILES, desalted rdkit.Chem.Mol)
16
+ (desalted SMILES, desalted Chem.Mol)
16
17
  """
17
18
  mols = []
18
19
  for smi in smiles.split("."):
@@ -22,18 +23,22 @@ def desalt_smiles(smiles:str) -> Tuple[Union[str, None], Union[Chem.Mol, None]]:
22
23
  mols.append((n, smi, rdmol))
23
24
  except:
24
25
  pass
25
- if len(mols) > 0:
26
- # `sorted` function compares the number of atoms first then smiles and rdmol.
27
- # Comparing smiles string would be okay but comparison of rdmol objects will
28
- # cause error because comparison operation for Chem.Mol is not supported.
29
- # So we need to restrict the key to the number of atoms.
30
- (n, desalted_smiles, desalted_rdmol) = sorted(mols, key=operator.itemgetter(0), reverse=True)[0]
31
- return (desalted_smiles, desalted_rdmol)
32
- else:
33
- return (None, None)
26
+
27
+ assert len(mols) > 0, "desalt_smiles() Error: invalid SMILES"
28
+
29
+ # `sorted` function compares the number of atoms first then smiles and rdmol.
30
+ # Comparing smiles string would be okay but comparison of rdmol objects will
31
+ # cause error because comparison operation for Chem.Mol is not supported.
32
+ # So we need to restrict the key to the number of atoms.
33
+
34
+ (n, desalted_smiles, desalted_rdmol) = sorted(mols,
35
+ key=operator.itemgetter(0),
36
+ reverse=True)[0]
34
37
 
38
+ return (desalted_smiles, desalted_rdmol)
39
+
35
40
 
36
- def standardize_smiles(smiles:str) -> str:
41
+ def standardize_smiles(smiles: str) -> str:
37
42
  """Returns standardized SMILES string.
38
43
 
39
44
  The rdMolStandardize.StandardizeSmiles() function performs the following steps:
@@ -62,7 +67,7 @@ def standardize_smiles(smiles:str) -> str:
62
67
  return rdMolStandardize.StandardizeSmiles(smiles)
63
68
 
64
69
 
65
- def standardize(smiles:str) -> Chem.Mol:
70
+ def standardize(smiles: str) -> Chem.Mol:
66
71
  """Returns standardized rdkit.Chem.Mol object.
67
72
 
68
73
  Args:
@@ -97,7 +102,7 @@ def standardize(smiles:str) -> Chem.Mol:
97
102
  return taut_uncharged_parent_clean_mol
98
103
 
99
104
 
100
- def neutralize_atoms(rdmol:Chem.Mol) -> Chem.Mol:
105
+ def neutralize_atoms(rdmol: Chem.Mol) -> Chem.Mol:
101
106
  """Neutralizes atoms.
102
107
 
103
108
  It is adapted from Noel O'Boyle's nocharge code:
@@ -122,22 +127,57 @@ def neutralize_atoms(rdmol:Chem.Mol) -> Chem.Mol:
122
127
  charges even if the neutralization introduces an overall formal charge on the molecule.
123
128
 
124
129
  Args:
125
- rdmol (rdkit.Chem.Mol) : input molecule.
130
+ rdmol (Chem.Mol) : molecule (not to be modified).
126
131
 
127
132
  Returns:
128
- Chem.Mol: a copy of neutralized rdkit.Chem.Mol object.
133
+ Chem.Mol: neutralized copy of molecule.
129
134
  """
130
-
131
- rdmol_ = Chem.Mol(rdmol)
135
+ mol = Chem.Mol(rdmol)
132
136
  pattern = Chem.MolFromSmarts("[+1!h0!$([*]~[-1,-2,-3,-4]),-1!$([*]~[+1,+2,+3,+4])]")
133
- at_matches = rdmol_.GetSubstructMatches(pattern)
137
+ at_matches = mol.GetSubstructMatches(pattern)
134
138
  at_matches_list = [y[0] for y in at_matches]
139
+
135
140
  if len(at_matches_list) > 0:
136
141
  for at_idx in at_matches_list:
137
- atom = rdmol_.GetAtomWithIdx(at_idx)
142
+ atom = mol.GetAtomWithIdx(at_idx)
138
143
  chg = atom.GetFormalCharge()
139
144
  hcount = atom.GetTotalNumHs()
140
145
  atom.SetFormalCharge(0)
141
146
  atom.SetNumExplicitHs(hcount - chg)
142
147
  atom.UpdatePropertyCache()
143
- return rdmol_
148
+
149
+ return mol
150
+
151
+
152
+ def clean_2d(rdmol: Chem.Mol,
153
+ reset_isotope: bool = True,
154
+ remove_H: bool = True,
155
+ ) -> tuple[Chem.Mol, list[Chem.Mol]]:
156
+ """Clean molecule for 2D depiction.
157
+
158
+ Args:
159
+ rdmol (Chem.Mol): molecule (not to be modified)
160
+ reset_isotope (bool, optional): whether to reset isotope information. Defaults to True.
161
+ remove_H (bool, optional): whether to remove implicit hydrogens. Defaults to True.
162
+
163
+ Returns:
164
+ (cleaned copy of molecule, list of Chem.Mol.Conformers from molecule)
165
+ """
166
+ mol = Chem.Mol(rdmol)
167
+ conformers = []
168
+
169
+ if mol.GetNumConformers() == 0:
170
+ # A molecule constructed from SMILES has no conformer information
171
+ pass
172
+
173
+ elif mol.GetConformer().Is3D() and mol.GetNumConformers() > 1:
174
+ conformers = [x for x in mol.GetConformers()]
175
+
176
+ if reset_isotope:
177
+ for atom in mol.GetAtoms():
178
+ atom.SetIsotope(0)
179
+
180
+ if remove_H:
181
+ mol = Chem.RemoveHs(mol)
182
+
183
+ return (mol, conformers)
rdworks/torsion.py ADDED
@@ -0,0 +1,477 @@
1
+ import numpy as np
2
+
3
+ from rdkit import Chem
4
+ from rdworks.xtb.wrapper import GFN2xTB
5
+
6
+
7
+ def get_torsion_atoms(rdmol:Chem.Mol, strict:bool=True) -> list[tuple]:
8
+ """Determine dihedral angle atoms (a-b-c-d) and rotating group for each rotatable bond (b-c).
9
+
10
+ Args:
11
+ rdmol (Chem.Mol): molecule
12
+ strict (bool): whether to exclude amide/imide/ester/acid bonds.
13
+
14
+ Returns:
15
+ [ (a, b, c, d, rot_atom_indices, fix_atom_indices),
16
+ (a, b, c, d, rot_atom_indices, fix_atom_indices),
17
+ ...,
18
+ ]
19
+ """
20
+ # https://github.com/rdkit/rdkit/blob/1bf6ef3d65f5c7b06b56862b3fb9116a3839b229/rdkit/Chem/Lipinski.py#L47%3E
21
+ # https://github.com/rdkit/rdkit/blob/de602c88809ea6ceba1e8ed50fd543b6e406e9c4/Code/GraphMol/Descriptors/Lipinski.cpp#L108
22
+ if strict :
23
+ # excludes amide/imide/ester/acid bonds
24
+ rotatable_bond_pattern = Chem.MolFromSmarts(
25
+ (
26
+ "[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])("
27
+ "[CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1])&!$([#7,O,S!D1]-!@[CD3]="
28
+ "[N,O,S])&!$([CD3](=[N+])-!@[#7!D1])&!$([#7!D1]-!@[CD3]=[N+])]-,:;!@[!$"
29
+ "(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])(["
30
+ "CH3])[CH3])]"
31
+ )
32
+ )
33
+ else:
34
+ rotatable_bond_pattern = Chem.MolFromSmarts('[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]')
35
+
36
+ rotatable_bonds = rdmol.GetSubstructMatches(rotatable_bond_pattern)
37
+
38
+ torsion_angle_atom_indices = []
39
+
40
+ # small rings (n=3 or 4)
41
+ small_rings = [ r for r in list(rdmol.GetRingInfo().AtomRings()) if len(r) < 5 ]
42
+ # ex. = [(1, 37, 35, 34, 3, 2), (29, 28, 30)]
43
+
44
+ forbidden_terminal_nuclei = [1, 9, 17, 35, 53] # H,F,Cl,Br,I
45
+
46
+ for (b_idx, c_idx) in rotatable_bonds:
47
+ # determine a atom ``a`` that define a dihedral angle
48
+ a_candidates = []
49
+ for neighbor in rdmol.GetAtomWithIdx(b_idx).GetNeighbors():
50
+ neighbor_idx = neighbor.GetIdx()
51
+ if neighbor_idx == c_idx:
52
+ continue
53
+ neighbor_atomic_num = neighbor.GetAtomicNum()
54
+ if neighbor_atomic_num not in forbidden_terminal_nuclei:
55
+ a_candidates.append((neighbor_atomic_num, neighbor_idx))
56
+
57
+ if not a_candidates:
58
+ continue
59
+
60
+ (a_atomic_num, a_idx) = sorted(a_candidates, key=lambda x: (x[0], -x[1]), reverse=True)[0]
61
+
62
+ # is a-b in a small ring (n=3 or 4)?
63
+ is_in_small_ring = False
64
+ for small_ring in small_rings:
65
+ if (a_idx in small_ring) and (b_idx in small_ring):
66
+ is_in_small_ring = True
67
+ break
68
+
69
+ if is_in_small_ring:
70
+ continue
71
+
72
+ # determine a atom ``d`` that define a dihedral angle
73
+ d_candidates = []
74
+ for neighbor in rdmol.GetAtomWithIdx(c_idx).GetNeighbors():
75
+ neighbor_idx = neighbor.GetIdx()
76
+ if (neighbor_idx == b_idx):
77
+ continue
78
+ neighbor_atomic_num = neighbor.GetAtomicNum()
79
+ if neighbor_atomic_num not in forbidden_terminal_nuclei:
80
+ d_candidates.append((neighbor_atomic_num, neighbor_idx))
81
+
82
+ if not d_candidates:
83
+ continue
84
+
85
+ (d_atomic_num, d_idx) = sorted(d_candidates, key=lambda x: (x[0], -x[1]), reverse=True)[0]
86
+
87
+ # is c-d in a small ring?
88
+ is_in_small_ring = False
89
+ for small_ring in small_rings:
90
+ if (c_idx in small_ring) and (d_idx in small_ring):
91
+ is_in_small_ring = True
92
+ break
93
+
94
+ if is_in_small_ring:
95
+ continue
96
+
97
+ # determine a group of atoms to be rotated
98
+ # https://ctr.fandom.com/wiki/Break_rotatable_bonds_and_report_the_fragments
99
+ em = Chem.EditableMol(rdmol)
100
+ em.RemoveBond(b_idx, c_idx)
101
+ fragmented = em.GetMol()
102
+ (frag1, frag2) = Chem.GetMolFrags(fragmented, asMols=False) # returns tuple of tuple
103
+ hac1 = sum([ 1 for i in frag1 if rdmol.GetAtomWithIdx(i).GetAtomicNum() > 1 ])
104
+ hac2 = sum([ 1 for i in frag2 if rdmol.GetAtomWithIdx(i).GetAtomicNum() > 1 ])
105
+
106
+ # smaller fragment will be rotated and must contain at least three heavy atoms
107
+ if min(hac1, hac2) >= 3:
108
+ (frag_rot, frag_fix) = sorted([(hac1, frag1), (hac2, frag2)])
109
+ torsion_angle_atom_indices.append((a_idx, b_idx, c_idx, d_idx, frag_rot[1], frag_fix[1]))
110
+
111
+ return torsion_angle_atom_indices
112
+
113
+
114
+
115
+
116
+ def find_atoms_at_bond_distance(rdmol: Chem.Mol,
117
+ start_atom_idx: int,
118
+ distance: int) -> list[int]:
119
+ """Finds atoms at a specific bond distance from a starting atom.
120
+
121
+ Args:
122
+ mol: An RDKit Mol object.
123
+ start_atom_idx: The index of the starting atom.
124
+ distance: The desired bond distance.
125
+
126
+ Returns:
127
+ A list of atom indices at the specified distance
128
+ """
129
+ assert start_atom_idx < rdmol.GetNumAtoms(), "start_atom_idx out of range."
130
+
131
+ found_atoms = []
132
+ visited = set()
133
+
134
+ def dfs(curr_atom_idx: int, curr_bond_dist: int):
135
+ if curr_bond_dist == distance:
136
+ found_atoms.append(curr_atom_idx)
137
+ return
138
+
139
+ visited.add(curr_atom_idx)
140
+ curr_atom = rdmol.GetAtomWithIdx(curr_atom_idx)
141
+ for next_atom in curr_atom.GetNeighbors():
142
+ next_atom_idx = next_atom.GetIdx()
143
+ if next_atom_idx not in visited:
144
+ dfs(next_atom_idx, curr_bond_dist + 1)
145
+
146
+ # Backtrack
147
+ visited.remove(curr_atom_idx)
148
+
149
+ dfs(start_atom_idx, 0)
150
+
151
+ return found_atoms
152
+
153
+
154
+ def get_bond_distance(rdmol: Chem.Mol, start_atom_idx: int) -> dict:
155
+ """Get bonds distance from a given atom.
156
+
157
+ Args:
158
+ mol: An RDKit Mol object.
159
+ start_atom_idx: The index of the starting atom.
160
+ distance: The desired bond distance.
161
+
162
+ Returns:
163
+ A list of atom indices at the specified distance
164
+ """
165
+ assert start_atom_idx < rdmol.GetNumAtoms(), "start_atom_idx out of range."
166
+
167
+ bond_distance = {}
168
+ visited = set()
169
+
170
+ def dfs(curr_atom_idx: int, curr_bond_dist: int):
171
+ if curr_bond_dist in bond_distance:
172
+ bond_distance[curr_bond_dist].append(curr_atom_idx)
173
+ else:
174
+ bond_distance[curr_bond_dist] = [curr_atom_idx]
175
+
176
+ visited.add(curr_atom_idx)
177
+ curr_atom = rdmol.GetAtomWithIdx(curr_atom_idx)
178
+ for next_atom in curr_atom.GetNeighbors():
179
+ next_atom_idx = next_atom.GetIdx()
180
+ if next_atom_idx not in visited:
181
+ dfs(next_atom_idx, curr_bond_dist + 1)
182
+
183
+ # Backtrack
184
+ visited.remove(curr_atom_idx)
185
+
186
+ dfs(start_atom_idx, 0)
187
+
188
+ return bond_distance
189
+
190
+
191
+ def find_bonds_to_prune(rdmol: Chem.Mol,
192
+ torsion_indices: tuple,
193
+ bond_dist_threshold: int = 4,
194
+ bond_order_threshold: float = 1.75,
195
+ electronegative: list[int] = [7, 8, 9, 17, 35],
196
+ ) -> dict[int, list[int]]:
197
+ """Find pruning candidate bonds from a given atom to construct fragment.
198
+
199
+ Rules for a candidate bond to break:
200
+
201
+ For (i-j-k-l) torsion,
202
+
203
+ 1. NOT (bond distance from j or k < 4)
204
+ 2. NOT (bond order > 1.75)
205
+ 3. NOT (Pauling electronegativity of any of bond atoms > 2.9)
206
+
207
+ Args:
208
+ mol: An RDKit Mol object.
209
+ start_atom_idx: The index of the starting atom.
210
+ distance: The desired bond distance.
211
+
212
+ Pauling electronegativity:
213
+ ```py
214
+ from mendeleev import element
215
+ for i in range(1, 119): # 118 is the highest atomic number known
216
+ el = element(i)
217
+ if isinstance(el.en_pauling, float) and el.en_pauling > 2.9:
218
+ print(f"Element {i}: {el.symbol} {el.atomic_number} {el.en_pauling}")
219
+ ```
220
+ Element 7: N 7 3.04
221
+ Element 8: O 8 3.44
222
+ Element 9: F 9 3.98
223
+ Element 17: Cl 17 3.16
224
+ Element 35: Br 35 2.96
225
+
226
+ Returns:
227
+ A list of atom indices at the specified distance
228
+ """
229
+
230
+ (i, j, k, l) = torsion_indices[:4]
231
+
232
+ dist_from_j = get_bond_distance(rdmol, j)
233
+ dist_from_k = get_bond_distance(rdmol, k)
234
+
235
+ # sum(,[]) flattens a list of list
236
+ forbidden = sum([v for d, v in dist_from_j.items() if d < bond_dist_threshold], [])
237
+ forbidden += sum([v for d, v in dist_from_k.items() if d < bond_dist_threshold], [])
238
+ forbidden = set(forbidden)
239
+
240
+ start_atom_idx = k # either j or k yields the same result
241
+
242
+ found_bonds = {}
243
+ visited = set()
244
+
245
+ def ordered(p: int, q: int) -> list[int]:
246
+ """Returns a list of atom indices by bond distance.
247
+
248
+ Args:
249
+ p (int): atom index
250
+ q (int): atom index
251
+
252
+ Returns:
253
+ list[int]: (atom index closer to the torsion angle, the other)
254
+ """
255
+ dist_p = []
256
+ dist_q = []
257
+ for d, indices in dist_from_j.items():
258
+ if p in indices:
259
+ dist_p.append(d)
260
+ if q in indices:
261
+ dist_q.append(d)
262
+ for d, indices in dist_from_k.items():
263
+ if p in indices:
264
+ dist_p.append(d)
265
+ if q in indices:
266
+ dist_q.append(d)
267
+ if sum(dist_p) < sum(dist_q):
268
+ return [p, q]
269
+ else:
270
+ return [q, p]
271
+
272
+ def dfs(curr_atom_idx: int, bond_dist: int):
273
+ """Depth-first recursive search of bonded atoms.
274
+
275
+ Args:
276
+ curr_atom_idx (int): atom index.
277
+ bond_dist (int): bond distance.
278
+ """
279
+ curr_atom = rdmol.GetAtomWithIdx(curr_atom_idx)
280
+ visited.add(curr_atom_idx)
281
+ for next_atom in curr_atom.GetNeighbors():
282
+ next_atom_idx = next_atom.GetIdx()
283
+ bond = rdmol.GetBondBetweenAtoms(curr_atom_idx, next_atom_idx)
284
+ n1 = curr_atom.GetAtomicNum()
285
+ n2 = next_atom.GetAtomicNum()
286
+ # forbidden (rule 1)
287
+ too_close = (curr_atom_idx in forbidden) and (next_atom_idx in forbidden)
288
+ # bond order (rule 2)
289
+ bond_order = not (bond.GetBondTypeAsDouble() > bond_order_threshold)
290
+ # Pauling electronegativity (rule 3)
291
+ bond_pauling = not ((n1 in electronegative) or (n2 in electronegative))
292
+ if (bond_dist >= bond_dist_threshold) and (not too_close) \
293
+ and (not bond.IsInRing()) and bond_order and bond_pauling:
294
+ # determine which atom has shorter bond distance to the torsion angle (j or k)
295
+ found_bonds[bond.GetIdx()] = ordered(curr_atom_idx, next_atom_idx)
296
+ return
297
+ if next_atom_idx not in visited:
298
+ dfs(next_atom_idx, bond_dist + 1)
299
+ # Backtrack
300
+ visited.remove(curr_atom_idx)
301
+
302
+ dfs(start_atom_idx, 0)
303
+
304
+ return found_bonds
305
+
306
+
307
+ def get_fragment_idx(parent: Chem.Mol,
308
+ indices: tuple,
309
+ fragment: Chem.Mol) -> tuple:
310
+ """Get fragment atom indices corresponding to given parent indices.
311
+
312
+ It uses 3D coordinates to find matching atoms between parent and fragment.
313
+ In comparison with the MCS-based method `get_fragment_idx_with_mcs()`,
314
+ 0 elapsed=0.0006455129478126764 sec.
315
+ 1 elapsed=0.0005964740412309766
316
+ 2 elapsed=0.0005442029796540737
317
+ 3 elapsed=0.000652436981908977
318
+ 4 elapsed=0.0006737819639965892
319
+ 5 elapsed=0.0004481689538806677
320
+ 6 elapsed=0.00035582599230110645
321
+ 7 elapsed=0.0003812289796769619
322
+ 8 elapsed=0.000359484925866127
323
+ 9 elapsed=0.0002818549983203411
324
+ 10 elapsed=0.000247497926466167
325
+ 11 elapsed=0.0003651580773293972
326
+
327
+ Args:
328
+ parent (Chem.Mol): rdkit Chem.Mol object.
329
+ parent_indices (tuple): parent atom indices to map within the MCS.
330
+ fragment (Chem.Mol): fragment originated from the parent.
331
+
332
+ Returns:
333
+ dict[int, int]: { parent_atom_index : fragment_atom_index, ...}
334
+ """
335
+ parent_xyz = parent.GetConformer().GetPositions() # numpy.ndarray
336
+ frag_xyz = fragment.GetConformer().GetPositions() # numpy.ndarray
337
+ qpos = [parent_xyz[i] for i in indices]
338
+
339
+ return tuple(j for q in qpos for j, f in enumerate(frag_xyz) if np.array_equal(f, q))
340
+
341
+
342
+
343
+ def get_fragment_idx_with_mcs(parent: Chem.Mol,
344
+ indices: tuple,
345
+ fragment: Chem.Mol) -> tuple:
346
+ """Get fragment atom indices corresponding to given parent indices.
347
+
348
+ Warning:
349
+ It uses MCS and can be extremely slow sometimes.
350
+ For example, below are the elapsed times for 12 torsion angles of atorvastatin:
351
+ 0 elapsed=5.525973221054301 sec. **
352
+ 1 elapsed=1.9143556850031018 *
353
+ 2 elapsed=3.145250838017091 *
354
+ 3 elapsed=9.390580283012241 **
355
+ 4 elapsed=89.97735002799891 ***
356
+ 5 elapsed=0.19022215204313397
357
+ 6 elapsed=0.013428106089122593
358
+ 7 elapsed=0.023345661000348628
359
+ 8 elapsed=0.023358764010481536
360
+ 9 elapsed=0.0007965450640767813
361
+ 10 elapsed=0.0008196790004149079
362
+ 11 elapsed=0.04075543500948697
363
+
364
+ Args:
365
+ parent (Chem.Mol): rdkit Chem.Mol object.
366
+ parent_indices (tuple): parent atom indices to map within the MCS.
367
+ fragment (Chem.Mol): fragment originated from the parent.
368
+
369
+ Returns:
370
+ dict[int, int]: { parent_atom_index : fragment_atom_index, ...}
371
+ """
372
+ mcs_result = Chem.rdFMCS.FindMCS([parent, fragment])
373
+ mcs_mol = Chem.MolFromSmarts(mcs_result.smartsString)
374
+
375
+ parent_matches = parent.GetSubstructMatches(mcs_mol)
376
+ frag_matches = fragment.GetSubstructMatches(mcs_mol)
377
+
378
+ indices_idx = None
379
+ for parent_matched_indices in parent_matches:
380
+ # It is possible to have more than one matches (i.e. methyl rotation).
381
+ # However, even if there are more than one matches, the parent indices
382
+ # should be the same.
383
+ if indices_idx is None:
384
+ indices_idx = {x: parent_matched_indices.index(x) for x in indices}
385
+ else:
386
+ assert all([indices_idx[x] == parent_matched_indices.index(x) for x in indices])
387
+
388
+ indices_map = None
389
+ for frag_matched_indices in frag_matches:
390
+ # it is possible to have more than one matches (i.e. methyl rotation)
391
+ if indices_map is None:
392
+ indices_map = {x : frag_matched_indices[indices_idx[x]] for x in indices}
393
+ else:
394
+ assert all([indices_map[x] == frag_matched_indices[indices_idx[x]] for x in indices])
395
+
396
+ return tuple([indices_map[x] for x in indices])
397
+
398
+
399
+
400
+ def create_fragment_on_bonds(rdmol: Chem.Mol, bonds: dict, cap: bool = True) -> Chem.Mol | None:
401
+ """Create a fragment that preserves defined atoms.
402
+
403
+ Args:
404
+ rdmol (Chem.Mol): input molecule.
405
+ bonds (dict): {bond_index : (preserved_atom_index, removed_atom_index), ...}
406
+ cap (bool): whether to cap the dummy atom(s) with hydrogen(s)
407
+
408
+ Returns:
409
+ Chem.Mol: resulting fragment molecule.
410
+ """
411
+ fragments = Chem.FragmentOnBonds(rdmol, list(bonds))
412
+ preserved_atoms = { preserved for bond_idx, (preserved, removed) in bonds.items() }
413
+ for fragment_indices, fragment_mol in zip(Chem.GetMolFrags(fragments),
414
+ Chem.GetMolFrags(fragments, asMols=True)):
415
+ if preserved_atoms.issubset(set(fragment_indices)):
416
+ if cap:
417
+ # cap dummy atoms with hydrogens
418
+ for atom in fragment_mol.GetAtoms():
419
+ if atom.GetAtomicNum() == 0:
420
+ atom.SetAtomicNum(1)
421
+
422
+ return fragment_mol
423
+
424
+ return None
425
+
426
+
427
+ def create_torsion_fragment(rdmol: Chem.Mol,
428
+ torsion_indices: tuple,
429
+ wbo_tolerance: float = 0.03) -> tuple[Chem.Mol, list[int]]:
430
+ """Create a close surrogate fragment that captures the PES of the intended torsion.
431
+
432
+ Fragmentation aims to preserve the local chemical environment around the targeted torsion
433
+ while increase calculation speed and potential complications. To avoid oversimplification
434
+ and inaccurate approximation, two strategies are combined:
435
+ - fragment candidates are generated by a set of reasonably empirical rules
436
+ - further filtered by Wiberg bond order (WBO) calculated by semi-empirical QM. It has
437
+ been shown that the Wiberg bond order (WBO) provides a fast and robust measure of
438
+ whether a torsion profile has been disrupted by fragmentation. Any fragment that causes
439
+ WBO difference larger than 0.03 will be excluded.
440
+
441
+ Args:
442
+ rdmol (Chem.Mol): molecule.
443
+ torsion_indices (tuple): (i, j, k, l, atoms to be rotated, atoms to be fixed)
444
+
445
+ Returns:
446
+ Chem.Mol: simplified fragment molecule.
447
+
448
+ References:
449
+ https://pubs.acs.org/doi/10.1021/acs.jcim.2c01153
450
+ https://www.biorxiv.org/content/10.1101/2020.08.27.270934v2
451
+ """
452
+ (i, j, k, l) = torsion_indices[:4]
453
+
454
+ candidates = find_bonds_to_prune(rdmol, torsion_indices)
455
+
456
+ if GFN2xTB().version() is not None:
457
+ jk = tuple(sorted([j, k]))
458
+ wbo_passed_candidates = {}
459
+ # filter candidate(s) by Wiberg bond order (WBO)
460
+ parent = GFN2xTB(rdmol).singlepoint()
461
+ assert hasattr(parent, 'wbo'), "create_torsion_fragment() Error: no wbo for parent"
462
+ for bond_idx, (p, q) in candidates.items():
463
+ frag_single_break = create_fragment_on_bonds(rdmol, {bond_idx: (p, q)})
464
+ fragment = GFN2xTB(frag_single_break).singlepoint()
465
+ assert hasattr(fragment, 'wbo'), "create_torsion_fragment() Error: no wbo for fragment"
466
+ # WBO difference at the torsion angle bond
467
+ frag_jk = get_fragment_idx(rdmol, jk, frag_single_break)
468
+ frag_jk = tuple(sorted(frag_jk))
469
+ if abs(fragment.wbo[frag_jk] - parent.wbo[jk]) < wbo_tolerance:
470
+ wbo_passed_candidates[bond_idx] = (p, q)
471
+ frag_multi_breaks = create_fragment_on_bonds(rdmol, wbo_passed_candidates)
472
+ else:
473
+ frag_multi_breaks = create_fragment_on_bonds(rdmol, candidates)
474
+
475
+ frag_ijkl = get_fragment_idx(rdmol, (i, j, k, l), frag_multi_breaks)
476
+
477
+ return frag_multi_breaks, frag_ijkl