prism-pruner 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of prism-pruner might be problematic. Click here for more details.

@@ -0,0 +1,468 @@
1
+ """PRISM - PRuning Interface for Similar Molecules."""
2
+
3
+ from copy import deepcopy
4
+ from dataclasses import dataclass
5
+ from typing import Iterable, Sequence
6
+
7
+ import numpy as np
8
+ from networkx import (
9
+ Graph,
10
+ connected_components,
11
+ has_path,
12
+ is_isomorphic,
13
+ minimum_spanning_tree,
14
+ shortest_path,
15
+ subgraph,
16
+ )
17
+
18
+ from prism_pruner.algebra import norm, norm_of, vec_angle
19
+ from prism_pruner.graph_manipulations import (
20
+ get_phenyl_ids,
21
+ get_sp_n,
22
+ is_amide_n,
23
+ is_ester_o,
24
+ )
25
+ from prism_pruner.rmsd import rmsd_and_max
26
+ from prism_pruner.typing import Array1D_bool, Array1D_int, Array2D_float, Array2D_int, F
27
+ from prism_pruner.utils import rotate_dihedral
28
+
29
+
30
+ @dataclass
31
+ class Torsion:
32
+ """Torsion class."""
33
+
34
+ i1: int
35
+ i2: int
36
+ i3: int
37
+ i4: int
38
+ mode: str | None = None
39
+
40
+ @property
41
+ def torsion(self) -> tuple[int, int, int, int]:
42
+ """Return tuple of indices defining the torsion."""
43
+ return (self.i1, self.i2, self.i3, self.i4)
44
+
45
+
46
+ def in_cycle(torsion: Torsion, graph: Graph) -> bool:
47
+ """Return True if the torsion is part of a cycle."""
48
+ graph.remove_edge(torsion.i2, torsion.i3)
49
+ cyclical: bool = has_path(graph, torsion.i1, torsion.i4)
50
+ graph.add_edge(torsion.i2, torsion.i3)
51
+ return cyclical
52
+
53
+
54
+ def is_rotable(
55
+ torsion: Torsion,
56
+ graph: Graph,
57
+ hydrogen_bonds: list[list[int]],
58
+ keepdummy: bool = False,
59
+ ) -> bool:
60
+ """Return True if the Torsion object is rotatable.
61
+
62
+ hydrogen bonds: iterable with pairs of sorted atomic indices.
63
+ """
64
+ if sorted((torsion.i2, torsion.i3)) in hydrogen_bonds:
65
+ # self.n_fold = 6
66
+ # # This has to be an intermolecular HB: rotate it
67
+ # return True
68
+ return False
69
+
70
+ if _is_free(torsion.i2, graph) or (_is_free(torsion.i3, graph)):
71
+ if keepdummy or (
72
+ is_nondummy(torsion.i2, torsion.i3, graph)
73
+ and (is_nondummy(torsion.i3, torsion.i2, graph))
74
+ ):
75
+ return True
76
+
77
+ return False
78
+
79
+
80
+ def get_n_fold(torsion: Torsion, graph: Graph) -> int:
81
+ """Return the n-fold of the rotation."""
82
+ nums = (graph.nodes[torsion.i2]["atomnos"], graph.nodes[torsion.i3]["atomnos"])
83
+
84
+ if 1 in nums:
85
+ return 6 # H-N, H-O hydrogen bonds
86
+
87
+ if is_amide_n(torsion.i2, graph, mode=2) or (is_amide_n(torsion.i3, graph, mode=2)):
88
+ # tertiary amides rotations are 2-fold
89
+ return 2
90
+
91
+ if (6 in nums) or (7 in nums) or (16 in nums): # if C, N or S atoms
92
+ sp_n_i2 = get_sp_n(torsion.i2, graph)
93
+ sp_n_i3 = get_sp_n(torsion.i3, graph)
94
+
95
+ if 3 == sp_n_i2 == sp_n_i3:
96
+ return 3
97
+
98
+ if 3 in (sp_n_i2, sp_n_i3): # Csp3-X, Nsp3-X, Ssulfone-X
99
+ if torsion.mode == "csearch":
100
+ return 3
101
+
102
+ elif torsion.mode == "symmetry":
103
+ return sp_n_i3 or 2
104
+
105
+ if 2 in (sp_n_i2, sp_n_i3):
106
+ return 2
107
+
108
+ return 4 # O-O, S-S, Ar-Ar, Ar-CO, and everything else
109
+
110
+
111
+ def get_angles(torsion: Torsion, graph: Graph) -> tuple[int, ...]:
112
+ """Return the angles associated with the torsion."""
113
+ d = {
114
+ 2: (0, 180),
115
+ 3: (0, 120, 240),
116
+ 4: (0, 90, 180, 270),
117
+ 6: (0, 60, 120, 180, 240, 300),
118
+ }
119
+
120
+ n_fold = get_n_fold(torsion, graph)
121
+
122
+ return d[n_fold]
123
+
124
+
125
+ def _is_free(index: int, graph: Graph) -> bool:
126
+ """Return whether the torsion is free to rotate.
127
+
128
+ Return True if the index specified
129
+ satisfies all of the following:
130
+ - Is not a sp2 carbonyl carbon atom
131
+ - Is not the oxygen atom of an ester
132
+ - Is not the nitrogen atom of a secondary amide (CONHR)
133
+ """
134
+ if all(
135
+ (
136
+ graph.nodes[index]["atomnos"] == 6,
137
+ 2 == get_sp_n(index, graph),
138
+ 8 in (graph.nodes[n]["atomnos"] for n in graph.neighbors(index)),
139
+ )
140
+ ):
141
+ return False
142
+
143
+ if is_amide_n(index, graph, mode=1):
144
+ return False
145
+
146
+ if is_ester_o(index, graph):
147
+ return False
148
+
149
+ return True
150
+
151
+
152
+ def is_nondummy(i: int, root: int, graph: Graph) -> bool:
153
+ """Return whether the torsion is not dummy.
154
+
155
+ Checks that a molecular rotation along the dihedral
156
+ angle (*, root, i, *) is non-dummy, that is the atom
157
+ at index i, in the direction opposite to the one leading
158
+ to root, has different substituents. i.e. methyl, CF3 and tBu
159
+ rotations should return False.
160
+ """
161
+ if graph.nodes[i]["atomnos"] not in (6, 7):
162
+ return True
163
+ # for now, we only discard rotations around carbon
164
+ # and nitrogen atoms, like methyl/tert-butyl/triphenyl
165
+ # and flat symmetrical rings like phenyl, N-pyrrolyl...
166
+
167
+ G = deepcopy(graph)
168
+ nb = list(G.neighbors(i))
169
+ nb.remove(root)
170
+
171
+ if len(nb) == 1:
172
+ if len(list(G.neighbors(nb[0]))) == 2:
173
+ return False
174
+ # if node i has two bonds only (one with root and one with a)
175
+ # and the other atom (a) has two bonds only (one with i)
176
+ # the rotation is considered dummy: some other rotation
177
+ # will account for its freedom (i.e. alkynes, hydrogen bonds)
178
+
179
+ # check if it is a phenyl-like rotation
180
+ if len(nb) == 2:
181
+ # get the 6 indices of the aromatic atoms (i1-i6)
182
+ phenyl_indices = get_phenyl_ids(i, G)
183
+
184
+ # compare the two halves of the 6-membered ring (indices i2-i3 region with i5-i6 region)
185
+ if phenyl_indices is not None:
186
+ i1, i2, i3, i4, i5, i6 = phenyl_indices
187
+ G.remove_edge(i3, i4)
188
+ G.remove_edge(i4, i5)
189
+ G.remove_edge(i1, i2)
190
+ G.remove_edge(i1, i6)
191
+
192
+ subgraphs = [
193
+ subgraph(G, _set) for _set in connected_components(G) if i2 in _set or i6 in _set
194
+ ]
195
+
196
+ if len(subgraphs) == 2:
197
+ return not is_isomorphic(
198
+ subgraphs[0],
199
+ subgraphs[1],
200
+ node_match=lambda n1, n2: n1["atomnos"] == n2["atomnos"],
201
+ )
202
+
203
+ # We should not end up here, but if we do, rotation should not be dummy
204
+ return True
205
+
206
+ # if not, compare immediate neighbors of i
207
+ for n in nb:
208
+ G.remove_edge(i, n)
209
+
210
+ # make a set of each fragment around the chopped n-i bonds,
211
+ # but only for fragments that are not root nor contain other random,
212
+ # disconnected parts of the graph
213
+ subgraphs_nodes = [
214
+ _set for _set in connected_components(G) if root not in _set and any(n in _set for n in nb)
215
+ ]
216
+
217
+ if len(subgraphs_nodes) == 1:
218
+ return True
219
+ # if not, the torsion is likely to be rotable
220
+ # (tetramethylguanidyl alanine C(β)-N bond)
221
+
222
+ subgraphs = [subgraph(G, s) for s in subgraphs_nodes]
223
+ for sub in subgraphs[1:]:
224
+ if not is_isomorphic(
225
+ subgraphs[0], sub, node_match=lambda n1, n2: n1["atomnos"] == n2["atomnos"]
226
+ ):
227
+ return True
228
+ # Care should be taken because chiral centers are not taken into account: a rotation
229
+ # involving an index where substituents only differ by stereochemistry, and where a
230
+ # rotation is not an element of symmetry of the subsystem, the rotation is considered
231
+ # dummy even if it would be more correct not to. For rotaionally corrected RMSD this
232
+ # should only cause small inefficiencies and not lead to discarding any good conformer.
233
+
234
+ return False
235
+
236
+
237
+ def get_hydrogen_bonds(
238
+ coords: Array2D_float,
239
+ atomnos: Array1D_int,
240
+ graph: Graph,
241
+ d_min: float = 2.5,
242
+ d_max: float = 3.3,
243
+ max_angle: int = 45,
244
+ elements: Sequence[Sequence[int]] | None = None,
245
+ fragments: Sequence[Sequence[int]] | None = None,
246
+ ) -> list[list[int]]:
247
+ """Return a list of tuples with the indices of hydrogen bonding partners.
248
+
249
+ An HB is a pair of atoms:
250
+ - with one H and one X (N or O) atom
251
+ - with an Y-X distance between d_min and d_max (i.e. N-O, Angstroms)
252
+ - with an Y-H-X angle below max_angle (i.e. N-H-O, degrees)
253
+
254
+ elements: iterable of two iterables with donor atomic numbers in the first
255
+ element and acceptors in the second. default: ((7, 8), (7, 8))
256
+
257
+ If fragments is specified (iterable of iterable of indices for each fragment)
258
+ the function only returns inter-fragment hydrogen bonds.
259
+ """
260
+ hbs = []
261
+ # initializing output list
262
+
263
+ if elements is None:
264
+ elements = ((7, 8), (7, 8, 9))
265
+
266
+ het_idx_from = np.array([i for i, a in enumerate(atomnos) if a in elements[0]], dtype=int)
267
+ het_idx_to = np.array([i for i, a in enumerate(atomnos) if a in elements[1]], dtype=int)
268
+ # indices where N or O (or user-specified elements) atoms are present.
269
+
270
+ for i1 in het_idx_from:
271
+ for i2 in het_idx_to:
272
+ # if inter-fragment HBs are requested, skip intra-HBs
273
+ if fragments is not None:
274
+ if any(((i1 in f and i2 in f) for f in fragments)):
275
+ continue
276
+
277
+ # keep close pairs
278
+ if d_min < norm_of(coords[i1] - coords[i2]) < d_max:
279
+ # getting the indices of all H atoms attached to them
280
+ Hs = [i for i in graph.neighbors(i1) if graph.nodes[i]["atomnos"] == 1]
281
+
282
+ # versor connectring the two Heteroatoms
283
+ versor = norm(coords[i2] - coords[i1])
284
+
285
+ for iH in Hs:
286
+ # vectors connecting heteroatoms to H
287
+ v1 = coords[iH] - coords[i1]
288
+ v2 = coords[iH] - coords[i2]
289
+
290
+ # lengths of these vectors
291
+ d1 = norm_of(v1)
292
+ d2 = norm_of(v2)
293
+
294
+ # scalar projection in the heteroatom direction
295
+ l1 = v1 @ versor
296
+ l2 = v2 @ -versor
297
+
298
+ # largest planar angle between Het-H and Het-Het, in degrees (0 to 90°)
299
+ alfa = vec_angle(v1, versor) if l1 < l2 else vec_angle(v2, -versor)
300
+
301
+ # if the three atoms are not too far from being in line
302
+ if alfa < max_angle:
303
+ # adding the correct pair of atoms to results
304
+ if d1 < d2:
305
+ hbs.append(sorted((iH, i2)))
306
+ else:
307
+ hbs.append(sorted((iH, i1)))
308
+
309
+ break
310
+
311
+ return hbs
312
+
313
+
314
+ def _get_rotation_mask(graph: Graph, torsion: Iterable[int]) -> Array1D_bool:
315
+ """Return the rotation mask to be applied to coordinates before rotation.
316
+
317
+ Get mask for the atoms that will rotate in a torsion:
318
+ all the ones in the graph reachable from the last index
319
+ of the torsion but not going through the central two
320
+ atoms in the torsion quadruplet.
321
+ """
322
+ _, i2, i3, i4 = torsion
323
+
324
+ graph.remove_edge(i2, i3)
325
+ reachable_indices = shortest_path(graph, i4).keys()
326
+ # get all indices reachable from i4 not going through i2-i3
327
+
328
+ graph.add_edge(i2, i3)
329
+ # restore modified graph
330
+
331
+ mask = np.array([i in reachable_indices for i in graph.nodes], dtype=bool)
332
+ # generate boolean mask
333
+
334
+ # if np.count_nonzero(mask) > int(len(mask)/2):
335
+ # mask = ~mask
336
+ # if we want to rotate more than half of the indices,
337
+ # invert the selection so that we do less math
338
+
339
+ mask[i3] = False
340
+ # do not rotate i3: it would not move,
341
+ # since it lies on the rotation axis
342
+
343
+ return mask
344
+
345
+
346
+ def _get_quadruplets(graph: Graph) -> Array2D_int:
347
+ """Return list of quadruplets that indicate potential torsions."""
348
+ # Step 1: Find spanning tree
349
+ spanning_tree = minimum_spanning_tree(graph)
350
+
351
+ # Step 2: Add dihedrals for spanning tree
352
+ dihedrals = []
353
+
354
+ # For each edge in the spanning tree, we can potentially define a dihedral
355
+ # We need edges that have at least 2 neighbors each to form a 4-point dihedral
356
+ for edge in spanning_tree.edges():
357
+ i, j = edge
358
+
359
+ # Find neighbors of i and j in the original graph
360
+ i_neighbors = [n for n in graph.neighbors(i) if n not in (i, j)]
361
+ j_neighbors = [n for n in graph.neighbors(j) if n not in (i, j)]
362
+
363
+ if len(i_neighbors) > 0 and len(j_neighbors) > 0:
364
+ # Form dihedral: neighbor_of_i - i - j - neighbor_of_j
365
+ k = i_neighbors[0] # Choose first available neighbor
366
+ m = j_neighbors[0] # Choose first available neighbor
367
+ dihedrals.append((k, i, j, m))
368
+
369
+ return np.array(dihedrals)
370
+
371
+
372
+ def get_torsions(
373
+ graph: Graph,
374
+ hydrogen_bonds: list[list[int]],
375
+ double_bonds: list[tuple[int, int]],
376
+ keepdummy: bool = False,
377
+ mode: str = "csearch",
378
+ ) -> list[Torsion]:
379
+ """Return list of Torsion objects."""
380
+ torsions = []
381
+ for path in _get_quadruplets(graph):
382
+ _, i2, i3, _ = path
383
+ bt = tuple(sorted((i2, i3)))
384
+
385
+ if bt not in double_bonds:
386
+ t = Torsion(*path)
387
+ t.mode = mode
388
+
389
+ if (not in_cycle(t, graph)) and is_rotable(
390
+ t, graph, hydrogen_bonds, keepdummy=keepdummy
391
+ ):
392
+ torsions.append(t)
393
+ # Create non-redundant torsion objects
394
+ # Rejects (4,3,2,1) if (1,2,3,4) is present
395
+ # Rejects torsions that do not represent a rotable bond
396
+
397
+ return torsions
398
+
399
+
400
+ def rotationally_corrected_rmsd_and_max(
401
+ ref: Array2D_float,
402
+ coord: Array2D_float,
403
+ atomnos: Array1D_int,
404
+ torsions: Array2D_int,
405
+ graph: Graph,
406
+ angles: Sequence[Sequence[int]],
407
+ heavy_atoms_only: bool = True,
408
+ debugfunction: F | None = None,
409
+ return_type: str = "rmsd",
410
+ ) -> tuple[float, float] | Array2D_float:
411
+ """Return RMSD and max deviation, corrected for degenerate torsions.
412
+
413
+ Return a tuple with the RMSD between p and q
414
+ and the maximum deviation of their positions.
415
+ """
416
+ assert return_type in ("rmsd", "coords")
417
+
418
+ torsion_corrections = [0 for _ in torsions]
419
+
420
+ # Now rotate every dummy torsion by the appropriate increment until we minimize local RMSD
421
+ for i, torsion in enumerate(torsions):
422
+ best_rmsd = 1e10
423
+
424
+ # Look for the rotational angle set that minimizes the torsion RMSD and save it for later
425
+ for angle in angles[i]:
426
+ coord = rotate_dihedral(coord, torsion, angle, indices_to_be_moved=[torsion[3]])
427
+
428
+ locally_corrected_rmsd, _ = rmsd_and_max(ref[torsion], coord[torsion])
429
+
430
+ if locally_corrected_rmsd < best_rmsd:
431
+ best_rmsd = locally_corrected_rmsd
432
+ torsion_corrections[i] = angle
433
+
434
+ # it is faster to undo the rotation rather than working with a copy of coords
435
+ coord = rotate_dihedral(coord, torsion, -angle, indices_to_be_moved=[torsion[3]])
436
+
437
+ # now rotate that angle to the desired orientation before going to the next angle
438
+ if torsion_corrections[i] != 0:
439
+ coord = rotate_dihedral(
440
+ coord, torsion, torsion_corrections[i], mask=_get_rotation_mask(graph, torsion)
441
+ )
442
+
443
+ if debugfunction is not None:
444
+ global_rmsd = rmsd_and_max(ref[(atomnos != 1)], coord[(atomnos != 1)])[0]
445
+ debugfunction(
446
+ f"Torsion {i + 1} - {torsion}: best corr = {torsion_corrections[i]}°, 4-atom RMSD: "
447
+ + f"{best_rmsd:.3f} A, global RMSD: {global_rmsd:.3f}"
448
+ )
449
+
450
+ # we should have the optimal orientation on all torsions now:
451
+ # calculate the RMSD
452
+ mask = (atomnos != 1) if heavy_atoms_only else np.ones(atomnos.shape, dtype=bool)
453
+ rmsd, maxdev = rmsd_and_max(ref[mask], coord[mask])
454
+
455
+ # since we could have segmented graphs, and therefore potentially only rotate
456
+ # subsets of the graph where the torsion last two indices are,
457
+ # we have to undo the final rotation too (would not be needed for connected graphs)
458
+ for torsion, optimal_angle in zip(
459
+ reversed(torsions), reversed(torsion_corrections), strict=False
460
+ ):
461
+ coord = rotate_dihedral(
462
+ coord, torsion, -optimal_angle, mask=_get_rotation_mask(graph, torsion)
463
+ )
464
+
465
+ if return_type == "rmsd":
466
+ return rmsd, maxdev
467
+
468
+ return coord
prism_pruner/typing.py ADDED
@@ -0,0 +1,15 @@
1
+ """PRISM - PRuning Interface for Similar Molecules."""
2
+
3
+ from typing import Annotated, Any, Callable, TypeVar, Union
4
+
5
+ import numpy as np
6
+ from numpy.typing import NDArray
7
+
8
+ Array3D_float = Annotated[NDArray[np.float64], "shape: (nconfs, natoms, 3)"]
9
+ Array2D_float = Annotated[NDArray[np.float64], "shape: (natoms, 3)"]
10
+ Array2D_int = Annotated[NDArray[np.int32], "shape: (a, b)"]
11
+ Array1D_float = Annotated[NDArray[np.float64], "shape: (energy,)"]
12
+ Array1D_int = Annotated[NDArray[np.int32], "shape: (natoms,)"]
13
+ Array1D_bool = Annotated[NDArray[np.bool_], "shape: (n,)"]
14
+ F = TypeVar("F", bound=Callable[..., object])
15
+ FloatIterable = Union[tuple[float, ...], NDArray[np.floating[Any]]]