prism-pruner 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,472 @@
1
+ """PRISM - PRuning Interface for Similar Molecules."""
2
+
3
+ from copy import deepcopy
4
+ from dataclasses import dataclass
5
+ from typing import Callable, Iterable, Sequence
6
+
7
+ import numpy as np
8
+ from networkx import (
9
+ Graph,
10
+ connected_components,
11
+ has_path,
12
+ is_isomorphic,
13
+ minimum_spanning_tree,
14
+ shortest_path,
15
+ subgraph,
16
+ )
17
+
18
+ from prism_pruner.algebra import vec_angle
19
+ from prism_pruner.graph_manipulations import (
20
+ get_phenyl_ids,
21
+ get_sp_n,
22
+ is_amide_n,
23
+ is_ester_o,
24
+ )
25
+ from prism_pruner.rmsd import rmsd_and_max
26
+ from prism_pruner.typing import Array1D_bool, Array1D_str, Array2D_float, Array2D_int
27
+ from prism_pruner.utils import rotate_dihedral
28
+
29
+
30
+ @dataclass
31
+ class Torsion:
32
+ """Torsion class."""
33
+
34
+ i1: int
35
+ i2: int
36
+ i3: int
37
+ i4: int
38
+ mode: str | None = None
39
+
40
+ @property
41
+ def torsion(self) -> tuple[int, int, int, int]:
42
+ """Return tuple of indices defining the torsion."""
43
+ return (self.i1, self.i2, self.i3, self.i4)
44
+
45
+
46
+ def in_cycle(torsion: Torsion, graph: Graph) -> bool:
47
+ """Return True if the torsion is part of a cycle."""
48
+ graph.remove_edge(torsion.i2, torsion.i3)
49
+ cyclical: bool = has_path(graph, torsion.i1, torsion.i4)
50
+ graph.add_edge(torsion.i2, torsion.i3)
51
+ return cyclical
52
+
53
+
54
+ def is_rotable(
55
+ torsion: Torsion,
56
+ graph: Graph,
57
+ hydrogen_bonds: list[list[int]],
58
+ keepdummy: bool = False,
59
+ ) -> bool:
60
+ """Return True if the Torsion object is rotatable.
61
+
62
+ hydrogen bonds: iterable with pairs of sorted atomic indices.
63
+ """
64
+ if sorted((torsion.i2, torsion.i3)) in hydrogen_bonds:
65
+ # self.n_fold = 6
66
+ # # This has to be an intermolecular HB: rotate it
67
+ # return True
68
+ return False
69
+
70
+ if _is_free(torsion.i2, graph) or (_is_free(torsion.i3, graph)):
71
+ if keepdummy or (
72
+ is_nondummy(torsion.i2, torsion.i3, graph)
73
+ and (is_nondummy(torsion.i3, torsion.i2, graph))
74
+ ):
75
+ return True
76
+
77
+ return False
78
+
79
+
80
+ def get_n_fold(torsion: Torsion, graph: Graph) -> int:
81
+ """Return the n-fold of the rotation."""
82
+ atoms = (graph.nodes[torsion.i2]["atoms"], graph.nodes[torsion.i3]["atoms"])
83
+
84
+ if "H" in atoms:
85
+ return 6 # H-N, H-O hydrogen bonds
86
+
87
+ if is_amide_n(torsion.i2, graph, mode=2) or (is_amide_n(torsion.i3, graph, mode=2)):
88
+ # tertiary amides rotations are 2-fold
89
+ return 2
90
+
91
+ if ("C" in atoms) or ("N" in atoms) or ("S" in atoms): # if C, N or S atoms
92
+ sp_n_i2 = get_sp_n(torsion.i2, graph)
93
+ sp_n_i3 = get_sp_n(torsion.i3, graph)
94
+
95
+ if 3 == sp_n_i2 == sp_n_i3:
96
+ return 3
97
+
98
+ if 3 in (sp_n_i2, sp_n_i3): # Csp3-X, Nsp3-X, Ssulfone-X
99
+ if torsion.mode == "csearch":
100
+ return 3
101
+
102
+ elif torsion.mode == "symmetry":
103
+ return sp_n_i3 or 2
104
+
105
+ if 2 in (sp_n_i2, sp_n_i3):
106
+ return 2
107
+
108
+ return 4 # O-O, S-S, Ar-Ar, Ar-CO, and everything else
109
+
110
+
111
+ def get_angles(torsion: Torsion, graph: Graph) -> tuple[int, ...]:
112
+ """Return the angles associated with the torsion."""
113
+ d = {
114
+ 2: (0, 180),
115
+ 3: (0, 120, 240),
116
+ 4: (0, 90, 180, 270),
117
+ 6: (0, 60, 120, 180, 240, 300),
118
+ }
119
+
120
+ n_fold = get_n_fold(torsion, graph)
121
+
122
+ return d[n_fold]
123
+
124
+
125
+ def _is_free(index: int, graph: Graph) -> bool:
126
+ """Return whether the torsion is free to rotate.
127
+
128
+ Return True if the index specified
129
+ satisfies all of the following:
130
+ - Is not a sp2 carbonyl carbon atom
131
+ - Is not the oxygen atom of an ester
132
+ - Is not the nitrogen atom of a secondary amide (CONHR)
133
+ """
134
+ if all(
135
+ (
136
+ graph.nodes[index]["atoms"] == "C",
137
+ 2 == get_sp_n(index, graph),
138
+ "O" in (graph.nodes[n]["atoms"] for n in graph.neighbors(index)),
139
+ )
140
+ ):
141
+ return False
142
+
143
+ if is_amide_n(index, graph, mode=1):
144
+ return False
145
+
146
+ if is_ester_o(index, graph):
147
+ return False
148
+
149
+ return True
150
+
151
+
152
+ def is_nondummy(i: int, root: int, graph: Graph) -> bool:
153
+ """Return whether the torsion is not dummy.
154
+
155
+ Checks that a molecular rotation along the dihedral
156
+ angle (*, root, i, *) is non-dummy, that is the atom
157
+ at index i, in the direction opposite to the one leading
158
+ to root, has different substituents. i.e. methyl, CF3 and tBu
159
+ rotations should return False.
160
+ """
161
+ if graph.nodes[i]["atoms"] not in ("C", "N"):
162
+ return True
163
+ # for now, we only discard rotations around carbon
164
+ # and nitrogen atoms, like methyl/tert-butyl/triphenyl
165
+ # and flat symmetrical rings like phenyl, N-pyrrolyl...
166
+
167
+ G = deepcopy(graph)
168
+ nb = list(G.neighbors(i))
169
+ nb.remove(root)
170
+
171
+ if len(nb) == 1:
172
+ if len(list(G.neighbors(nb[0]))) == 2:
173
+ return False
174
+ # if node i has two bonds only (one with root and one with a)
175
+ # and the other atom (a) has two bonds only (one with i)
176
+ # the rotation is considered dummy: some other rotation
177
+ # will account for its freedom (i.e. alkynes, hydrogen bonds)
178
+
179
+ # check if it is a phenyl-like rotation
180
+ if len(nb) == 2:
181
+ # get the 6 indices of the aromatic atoms (i1-i6)
182
+ phenyl_indices = get_phenyl_ids(i, G)
183
+
184
+ # compare the two halves of the 6-membered ring (indices i2-i3 region with i5-i6 region)
185
+ if phenyl_indices is not None:
186
+ i1, i2, i3, i4, i5, i6 = phenyl_indices
187
+ G.remove_edge(i3, i4)
188
+ G.remove_edge(i4, i5)
189
+ G.remove_edge(i1, i2)
190
+ G.remove_edge(i1, i6)
191
+
192
+ subgraphs = [
193
+ subgraph(G, _set) for _set in connected_components(G) if i2 in _set or i6 in _set
194
+ ]
195
+
196
+ if len(subgraphs) == 2:
197
+ return not is_isomorphic(
198
+ subgraphs[0],
199
+ subgraphs[1],
200
+ node_match=lambda n1, n2: n1["atoms"] == n2["atoms"],
201
+ )
202
+
203
+ # We should not end up here, but if we do, rotation should not be dummy
204
+ return True
205
+
206
+ # if not, compare immediate neighbors of i
207
+ for n in nb:
208
+ G.remove_edge(i, n)
209
+
210
+ # make a set of each fragment around the chopped n-i bonds,
211
+ # but only for fragments that are not root nor contain other random,
212
+ # disconnected parts of the graph
213
+ subgraphs_nodes = [
214
+ _set for _set in connected_components(G) if root not in _set and any(n in _set for n in nb)
215
+ ]
216
+
217
+ if len(subgraphs_nodes) == 1:
218
+ return True
219
+ # if not, the torsion is likely to be rotable
220
+ # (tetramethylguanidyl alanine C(β)-N bond)
221
+
222
+ subgraphs = [subgraph(G, s) for s in subgraphs_nodes]
223
+ for sub in subgraphs[1:]:
224
+ if not is_isomorphic(
225
+ subgraphs[0], sub, node_match=lambda n1, n2: n1["atoms"] == n2["atoms"]
226
+ ):
227
+ return True
228
+ # Care should be taken because chiral centers are not taken into account: a rotation
229
+ # involving an index where substituents only differ by stereochemistry, and where a
230
+ # rotation is not an element of symmetry of the subsystem, the rotation is considered
231
+ # dummy even if it would be more correct not to. For rotaionally corrected RMSD this
232
+ # should only cause small inefficiencies and not lead to discarding any good conformer.
233
+
234
+ return False
235
+
236
+
237
+ def get_hydrogen_bonds(
238
+ coords: Array2D_float,
239
+ atoms: Array1D_str,
240
+ graph: Graph,
241
+ d_min: float = 2.5,
242
+ d_max: float = 3.3,
243
+ max_angle: int = 45,
244
+ elements: Sequence[Sequence[str]] | None = None,
245
+ fragments: Sequence[Sequence[int]] | None = None,
246
+ ) -> list[list[int]]:
247
+ """Return a list of tuples with the indices of hydrogen bonding partners.
248
+
249
+ An HB is a pair of atoms:
250
+ - with one H and one X (N or O) atom
251
+ - with an Y-X distance between d_min and d_max (i.e. N-O, Angstroms)
252
+ - with an Y-H-X angle below max_angle (i.e. N-H-O, degrees)
253
+
254
+ elements: iterable of two iterables with donor atomic symbols in the first
255
+ element and acceptors in the second. default: (("N", "O"), ("N", "O"))
256
+
257
+ If fragments is specified (iterable of iterable of indices for each fragment)
258
+ the function only returns inter-fragment hydrogen bonds.
259
+ """
260
+ hbs = []
261
+ # initializing output list
262
+
263
+ if elements is None:
264
+ elements = (("N", "O"), ("N", "O", "F"))
265
+
266
+ het_idx_from = np.array([i for i, a in enumerate(atoms) if a in elements[0]], dtype=int)
267
+ het_idx_to = np.array([i for i, a in enumerate(atoms) if a in elements[1]], dtype=int)
268
+ # indices where N or O (or user-specified elements) atoms are present.
269
+
270
+ for i1 in het_idx_from:
271
+ for i2 in het_idx_to:
272
+ # if inter-fragment HBs are requested, skip intra-HBs
273
+ if fragments is not None:
274
+ if any(((i1 in f and i2 in f) for f in fragments)):
275
+ continue
276
+
277
+ # keep close pairs
278
+ if d_min < np.linalg.norm(coords[i1] - coords[i2]) < d_max:
279
+ # getting the indices of all H atoms attached to them
280
+ Hs = [i for i in graph.neighbors(i1) if graph.nodes[i]["atoms"] == "H"]
281
+
282
+ # versor connectring the two Heteroatoms
283
+ versor = coords[i2] - coords[i1]
284
+ versor = versor / np.linalg.norm(versor)
285
+
286
+ for iH in Hs:
287
+ # vectors connecting heteroatoms to H
288
+ v1 = coords[iH] - coords[i1]
289
+ v2 = coords[iH] - coords[i2]
290
+
291
+ # lengths of these vectors
292
+ d1 = np.linalg.norm(v1)
293
+ d2 = np.linalg.norm(v2)
294
+
295
+ # scalar projection in the heteroatom direction
296
+ l1 = v1 @ versor
297
+ l2 = v2 @ -versor
298
+
299
+ # largest planar angle between Het-H and Het-Het, in degrees (0 to 90°)
300
+ alfa = vec_angle(v1, versor) if l1 < l2 else vec_angle(v2, -versor)
301
+
302
+ # if the three atoms are not too far from being in line
303
+ if alfa < max_angle:
304
+ # adding the correct pair of atoms to results
305
+ if d1 < d2:
306
+ hbs.append(sorted((iH, i2)))
307
+ else:
308
+ hbs.append(sorted((iH, i1)))
309
+
310
+ break
311
+
312
+ return hbs
313
+
314
+
315
+ def _get_rotation_mask(graph: Graph, torsion: Iterable[int]) -> Array1D_bool:
316
+ """Return the rotation mask to be applied to coordinates before rotation.
317
+
318
+ Get mask for the atoms that will rotate in a torsion:
319
+ all the ones in the graph reachable from the last index
320
+ of the torsion but not going through the central two
321
+ atoms in the torsion quadruplet.
322
+ """
323
+ _, i2, i3, i4 = torsion
324
+
325
+ graph.remove_edge(i2, i3)
326
+ reachable_indices = shortest_path(graph, i4).keys()
327
+ # get all indices reachable from i4 not going through i2-i3
328
+
329
+ graph.add_edge(i2, i3)
330
+ # restore modified graph
331
+
332
+ mask = np.array([i in reachable_indices for i in graph.nodes], dtype=bool)
333
+ # generate boolean mask
334
+
335
+ # if np.count_nonzero(mask) > int(len(mask)/2):
336
+ # mask = ~mask
337
+ # if we want to rotate more than half of the indices,
338
+ # invert the selection so that we do less math
339
+
340
+ mask[i3] = False
341
+ # do not rotate i3: it would not move,
342
+ # since it lies on the rotation axis
343
+
344
+ return mask
345
+
346
+
347
+ def _get_quadruplets(graph: Graph) -> Array2D_int:
348
+ """Return list of quadruplets that indicate potential torsions."""
349
+ # Step 1: Find spanning tree
350
+ spanning_tree = minimum_spanning_tree(graph)
351
+
352
+ # Step 2: Add dihedrals for spanning tree
353
+ dihedrals = []
354
+
355
+ # For each edge in the spanning tree, we can potentially define a dihedral
356
+ # We need edges that have at least 2 neighbors each to form a 4-point dihedral
357
+ for edge in spanning_tree.edges():
358
+ i, j = edge
359
+
360
+ # Find neighbors of i and j in the original graph
361
+ i_neighbors = [n for n in graph.neighbors(i) if n not in (i, j)]
362
+ j_neighbors = [n for n in graph.neighbors(j) if n not in (i, j)]
363
+
364
+ if len(i_neighbors) > 0 and len(j_neighbors) > 0:
365
+ # Form dihedral: neighbor_of_i - i - j - neighbor_of_j
366
+ k = i_neighbors[0] # Choose first available neighbor
367
+ m = j_neighbors[0] # Choose first available neighbor
368
+ dihedrals.append((k, i, j, m))
369
+
370
+ return np.array(dihedrals)
371
+
372
+
373
+ def get_torsions(
374
+ graph: Graph,
375
+ hydrogen_bonds: list[list[int]],
376
+ double_bonds: list[tuple[int, int]],
377
+ keepdummy: bool = False,
378
+ mode: str = "csearch",
379
+ ) -> list[Torsion]:
380
+ """Return list of Torsion objects."""
381
+ torsions = []
382
+ for path in _get_quadruplets(graph):
383
+ _, i2, i3, _ = path
384
+ bt = tuple(sorted((i2, i3)))
385
+
386
+ if bt not in double_bonds:
387
+ t = Torsion(*path)
388
+ t.mode = mode
389
+
390
+ if (not in_cycle(t, graph)) and is_rotable(
391
+ t, graph, hydrogen_bonds, keepdummy=keepdummy
392
+ ):
393
+ torsions.append(t)
394
+ # Create non-redundant torsion objects
395
+ # Rejects (4,3,2,1) if (1,2,3,4) is present
396
+ # Rejects torsions that do not represent a rotable bond
397
+
398
+ return torsions
399
+
400
+
401
+ def rotationally_corrected_rmsd_and_max(
402
+ ref: Array2D_float,
403
+ coord: Array2D_float,
404
+ atoms: Array1D_str,
405
+ torsions: Array2D_int,
406
+ graph: Graph,
407
+ angles: Sequence[Sequence[int]],
408
+ heavy_atoms_only: bool = True,
409
+ debugfunction: Callable[..., object] | None = None,
410
+ return_type: str = "rmsd",
411
+ ) -> tuple[float, float] | Array2D_float:
412
+ """Return RMSD and max deviation, corrected for degenerate torsions.
413
+
414
+ Return a tuple with the RMSD between p and q
415
+ and the maximum deviation of their positions.
416
+ """
417
+ assert return_type in ("rmsd", "coords")
418
+
419
+ torsion_corrections = [0 for _ in torsions]
420
+
421
+ mask = (
422
+ np.array([a != "H" for a in atoms]) if heavy_atoms_only else np.ones(len(atoms), dtype=bool)
423
+ )
424
+
425
+ # Now rotate every dummy torsion by the appropriate increment until we minimize local RMSD
426
+ for i, torsion in enumerate(torsions):
427
+ best_rmsd = 1e10
428
+
429
+ # Look for the rotational angle set that minimizes the torsion RMSD and save it for later
430
+ for angle in angles[i]:
431
+ coord = rotate_dihedral(coord, torsion, angle, indices_to_be_moved=[torsion[3]])
432
+
433
+ locally_corrected_rmsd, _ = rmsd_and_max(ref[torsion], coord[torsion])
434
+
435
+ if locally_corrected_rmsd < best_rmsd:
436
+ best_rmsd = locally_corrected_rmsd
437
+ torsion_corrections[i] = angle
438
+
439
+ # it is faster to undo the rotation rather than working with a copy of coordss
440
+ coord = rotate_dihedral(coord, torsion, -angle, indices_to_be_moved=[torsion[3]])
441
+
442
+ # now rotate that angle to the desired orientation before going to the next angle
443
+ if torsion_corrections[i] != 0:
444
+ coord = rotate_dihedral(
445
+ coord, torsion, torsion_corrections[i], mask=_get_rotation_mask(graph, torsion)
446
+ )
447
+
448
+ if debugfunction is not None:
449
+ global_rmsd = rmsd_and_max(ref[mask], coord[mask])[0]
450
+ debugfunction(
451
+ f" Torsion {i + 1} - {torsion}: best θ = {torsion_corrections[i]}°, "
452
+ + f"4-atom RMSD: {best_rmsd:.3f} Å, global RMSD: {global_rmsd:.3f} Å"
453
+ )
454
+
455
+ # we should have the optimal orientation on all torsions now:
456
+ # calculate the RMSD
457
+ rmsd, maxdev = rmsd_and_max(ref[mask], coord[mask])
458
+
459
+ # since we could have segmented graphs, and therefore potentially only rotate
460
+ # subsets of the graph where the torsion last two indices are,
461
+ # we have to undo the final rotation too (would not be needed for connected graphs)
462
+ for torsion, optimal_angle in zip(
463
+ reversed(torsions), reversed(torsion_corrections), strict=False
464
+ ):
465
+ coord = rotate_dihedral(
466
+ coord, torsion, -optimal_angle, mask=_get_rotation_mask(graph, torsion)
467
+ )
468
+
469
+ if return_type == "rmsd":
470
+ return rmsd, maxdev
471
+
472
+ return coord
prism_pruner/typing.py ADDED
@@ -0,0 +1,15 @@
1
+ """PRISM - PRuning Interface for Similar Molecules."""
2
+
3
+ from typing import Annotated, Any, Union
4
+
5
+ import numpy as np
6
+ from numpy.typing import NDArray
7
+
8
+ Array3D_float = Annotated[NDArray[np.float64], "shape: (nconfs, natoms, 3)"]
9
+ Array2D_float = Annotated[NDArray[np.float64], "shape: (natoms, 3)"]
10
+ Array2D_int = Annotated[NDArray[np.int32], "shape: (a, b)"]
11
+ Array1D_float = Annotated[NDArray[np.float64], "shape: (energy,)"]
12
+ Array1D_int = Annotated[NDArray[np.int32], "shape: (natoms,)"]
13
+ Array1D_str = Annotated[NDArray[np.str_], "shape: (natoms,)"]
14
+ Array1D_bool = Annotated[NDArray[np.bool_], "shape: (n,)"]
15
+ FloatIterable = Union[tuple[float, ...], NDArray[np.floating[Any]]]
prism_pruner/utils.py ADDED
@@ -0,0 +1,153 @@
1
+ """PRISM - PRuning Interface for Similar Molecules."""
2
+
3
+ from typing import Any, Sequence
4
+
5
+ import numpy as np
6
+ from numpy.linalg import LinAlgError
7
+ from numpy.typing import ArrayLike
8
+
9
+ from prism_pruner.algebra import get_alignment_matrix, rot_mat_from_pointer
10
+ from prism_pruner.typing import Array1D_bool, Array1D_int, Array1D_str, Array2D_float, Array3D_float
11
+
12
+ EH_TO_EV = 27.211399
13
+ EH_TO_KCAL = 627.5096080305927
14
+ EV_TO_KCAL = 23.060541945329334
15
+
16
+
17
+ def align_structures(
18
+ structures: Array3D_float, indices: Array1D_int | None = None
19
+ ) -> Array3D_float:
20
+ """Align structures.
21
+
22
+ Aligns molecules of a structure array (shape is (n_structures, n_atoms, 3))
23
+ to the first one, based on the indices. If not provided, all atoms are used
24
+ to get the best alignment. Return is the aligned array.
25
+ """
26
+ reference = structures[0]
27
+ targets = structures[1:]
28
+ if isinstance(indices, (list, tuple)):
29
+ indices = np.array(indices)
30
+
31
+ indices = indices if indices is not None else np.array([i for i, _ in enumerate(structures[0])])
32
+
33
+ reference -= np.mean(reference[indices], axis=0)
34
+ for t, _ in enumerate(targets):
35
+ targets[t] -= np.mean(targets[t, indices], axis=0)
36
+
37
+ output = np.zeros(structures.shape)
38
+ output[0] = reference
39
+
40
+ for t, target in enumerate(targets):
41
+ try:
42
+ matrix = get_alignment_matrix(reference[indices], target[indices])
43
+
44
+ except LinAlgError:
45
+ # it is actually possible for the kabsch alg not to converge
46
+ matrix = np.eye(3)
47
+
48
+ # output[t+1] = np.array([matrix @ vector for vector in target])
49
+ output[t + 1] = (matrix @ target.T).T
50
+
51
+ return output
52
+
53
+
54
+ def time_to_string(total_time: float, verbose: bool = False, digits: int = 1) -> str:
55
+ """Convert totaltime (float) to a timestring with hours, minutes and seconds."""
56
+ timestring = ""
57
+
58
+ names = ("days", "hours", "minutes", "seconds") if verbose else ("d", "h", "m", "s")
59
+
60
+ if total_time > 24 * 3600:
61
+ d = total_time // (24 * 3600)
62
+ timestring += f"{int(d)} {names[0]} "
63
+ total_time %= 24 * 3600
64
+
65
+ if total_time > 3600:
66
+ h = total_time // 3600
67
+ timestring += f"{int(h)} {names[1]} "
68
+ total_time %= 3600
69
+
70
+ if total_time > 60:
71
+ m = total_time // 60
72
+ timestring += f"{int(m)} {names[2]} "
73
+ total_time %= 60
74
+
75
+ timestring += f"{round(total_time, digits):{2 + digits}} {names[3]}"
76
+
77
+ return timestring
78
+
79
+
80
+ double_bonds_thresholds_dict = {
81
+ "CC": 1.4,
82
+ "CN": 1.3,
83
+ }
84
+
85
+
86
+ def get_double_bonds_indices(coords: Array2D_float, atoms: Array1D_str) -> list[tuple[int, int]]:
87
+ """Return a list containing 2-elements tuples of indices involved in any double bond."""
88
+ mask = atoms != "H"
89
+ numbering = np.arange(len(coords))[mask]
90
+ coords = coords[mask]
91
+ atoms_masked = atoms[mask]
92
+ output = []
93
+
94
+ for i1, _ in enumerate(coords):
95
+ for i2 in range(i1 + 1, len(coords)):
96
+ dist = np.linalg.norm(coords[i1] - coords[i2])
97
+ tag = "".join(sorted([atoms_masked[i1], atoms_masked[i2]]))
98
+
99
+ threshold = double_bonds_thresholds_dict.get(tag)
100
+ if threshold is not None and dist < threshold:
101
+ output.append((numbering[i1], numbering[i2]))
102
+
103
+ return output
104
+
105
+
106
+ def rotate_dihedral(
107
+ coords: Array2D_float,
108
+ dihedral: list[int] | tuple[int, ...],
109
+ angle: float,
110
+ mask: Array1D_bool | None = None,
111
+ indices_to_be_moved: ArrayLike | None = None,
112
+ ) -> Array2D_float:
113
+ """Rotate a molecule around a given bond.
114
+
115
+ Atoms that will move are the ones
116
+ specified by mask or indices_to_be_moved.
117
+ If both are None, only the first index of
118
+ the dihedral iterable is moved.
119
+
120
+ angle: angle, in degrees
121
+ """
122
+ i1, i2, i3, *_ = dihedral
123
+
124
+ if indices_to_be_moved is not None:
125
+ mask = np.isin(np.arange(len(coords)), indices_to_be_moved)
126
+
127
+ if mask is None:
128
+ mask = np.zeros(len(coords), dtype=bool)
129
+ mask[i1] = True
130
+
131
+ axis = coords[i2] - coords[i3]
132
+ mat = rot_mat_from_pointer(axis, angle)
133
+
134
+ center = coords[i3]
135
+ coords[mask] = (coords[mask] - center) @ mat.T + center
136
+
137
+ return coords
138
+
139
+
140
+ def flatten(array: Sequence[Any], typefunc: type = float) -> list[Any]:
141
+ """Return the unraveled sequence, with items coerced into the typefunc type."""
142
+ out = []
143
+
144
+ def rec(_l: Any) -> None:
145
+ """Recursive unraveling function."""
146
+ for e in _l:
147
+ if type(e) in [list, tuple, np.ndarray]:
148
+ rec(e)
149
+ else:
150
+ out.append(typefunc(e))
151
+
152
+ rec(array)
153
+ return out
@@ -0,0 +1,34 @@
1
+ Metadata-Version: 2.4
2
+ Name: prism_pruner
3
+ Version: 0.0.3
4
+ Summary: Prism Pruner
5
+ Author-email: Nicolò Tampellini <nicolo.tampellini@yale.edu>
6
+ License-Expression: MIT
7
+ Requires-Python: >=3.12
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: networkx>=3.0
11
+ Requires-Dist: numpy>=2.0
12
+ Requires-Dist: periodictable
13
+ Requires-Dist: scipy>=1.10
14
+ Requires-Dist: tqdm>=4
15
+ Dynamic: license-file
16
+
17
+ # Prism Pruner
18
+
19
+ [![License](https://img.shields.io/github/license/ntampellini/prism_pruner)](https://github.com/ntampellini/prism_pruner/blob/master/LICENSE)
20
+ [![Powered by: Pixi](https://img.shields.io/badge/Powered_by-Pixi-facc15)](https://pixi.sh)
21
+ [![Code style: ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
22
+ [![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/ntampellini/prism_pruner/test.yml?branch=master&logo=github-actions)](https://github.com/ntampellini/prism_pruner/actions/)
23
+ [![Codecov](https://img.shields.io/codecov/c/github/ntampellini/prism_pruner)](https://codecov.io/gh/ntampellini/prism_pruner)
24
+ [![PyPI - Version](https://img.shields.io/pypi/v/prism_pruner)](https://pypi.org/project/prism-pruner/)
25
+
26
+ PRISM (PRuning Interface for Similar Molecules) is the modular similarity pruning code from [FIRECODE](https://github.com/ntampellini/FIRECODE/tree/main), in a standalone package. It filters out duplicate structures from conformational ensembles, leaving behind non-redundant states.
27
+
28
+ The code implements a cached, iterative, divide-and conquer approach on increasingly large subsets of the ensemble and removes duplicates as assessed by one of three metrics:
29
+ - Heavy-atom RMSD and maximum deviation
30
+ - Rotamer-corrected heavy-atom RMSD and maximum deviation
31
+ - Relative deviation of the moments of inertia on the principal axes
32
+
33
+ ## Credits
34
+ This package was created with [Cookiecutter](https://github.com/audreyr/cookiecutter) and the [jevandezande/pixi-cookiecutter](https://github.com/jevandezande/pixi-cookiecutter) project template.
@@ -0,0 +1,14 @@
1
+ prism_pruner/__init__.py,sha256=M3KYy269Z7PmWOgRhMcBZySZSmC3pjG7lRIG17eN-FQ,55
2
+ prism_pruner/algebra.py,sha256=4oKViTtGiTzpZs3sQQaiHX3fpQeAmlpdHFfTo2FPrGU,4951
3
+ prism_pruner/conformer_ensemble.py,sha256=9VYpRb0k-IhicAl1pYsOjX26WkrCqiqhH588_3ALB-k,1837
4
+ prism_pruner/graph_manipulations.py,sha256=PNAa2zEcPezFg-83dvtSU-UYCOYVcvR3AYBpdEzWMNY,6321
5
+ prism_pruner/pruner.py,sha256=JIoT2L8w3hqlAPsfNDGdUn2nNBG_AKEE-jH7u7L1T0Y,20493
6
+ prism_pruner/rmsd.py,sha256=09CHQy2-z3mWA6cQhoNRSuA_E5JZ7NEtCj1Al_Wjl6M,877
7
+ prism_pruner/torsion_module.py,sha256=LoFnvmb3OBMzfKxaHK54YtlpgSO6QMYiDbSb60jXwlc,16023
8
+ prism_pruner/typing.py,sha256=fBHZgLf38MlvIoOHaMZOP4thI-9OvhHK3AnjuqFPbfU,676
9
+ prism_pruner/utils.py,sha256=OdV9qX6XiocKzPMLL9UmLKj8poKnipJmaf8KdsGlNTs,4594
10
+ prism_pruner-0.0.3.dist-info/licenses/LICENSE,sha256=Im9pMXp0ignxYTY5QMacrME_3l6QVtQXO6QvO3bVriY,1075
11
+ prism_pruner-0.0.3.dist-info/METADATA,sha256=M4FgFBJfl_GbK3nSGpfrVuJpjsDMsn0byj4ChQ0BCsg,2045
12
+ prism_pruner-0.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
+ prism_pruner-0.0.3.dist-info/top_level.txt,sha256=GdtwtPlTsKhTsjMoj4bo6wJVoyzFX371HKQU32l6Q84,13
14
+ prism_pruner-0.0.3.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+