prism-pruner 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of prism-pruner might be problematic. Click here for more details.

prism_pruner/utils.py ADDED
@@ -0,0 +1,246 @@
1
+ """PRISM - PRuning Interface for Similar Molecules."""
2
+
3
+ from pathlib import Path
4
+ from typing import Any, Sequence, TextIO
5
+
6
+ import numpy as np
7
+ from numpy.linalg import LinAlgError
8
+ from numpy.typing import ArrayLike
9
+
10
+ from prism_pruner.algebra import get_alignment_matrix, norm_of, rot_mat_from_pointer
11
+ from prism_pruner.pt import pt
12
+ from prism_pruner.typing import Array1D_bool, Array1D_int, Array2D_float, Array3D_float
13
+
14
+
15
+ def align_structures(
16
+ structures: Array3D_float, indices: Array1D_int | None = None
17
+ ) -> Array3D_float:
18
+ """Align structures.
19
+
20
+ Aligns molecules of a structure array (shape is (n_structures, n_atoms, 3))
21
+ to the first one, based on the indices. If not provided, all atoms are used
22
+ to get the best alignment. Return is the aligned array.
23
+ """
24
+ reference = structures[0]
25
+ targets = structures[1:]
26
+ if isinstance(indices, (list, tuple)):
27
+ indices = np.array(indices)
28
+
29
+ indices = indices if indices is not None else np.array([i for i, _ in enumerate(structures[0])])
30
+
31
+ reference -= np.mean(reference[indices], axis=0)
32
+ for t, _ in enumerate(targets):
33
+ targets[t] -= np.mean(targets[t, indices], axis=0)
34
+
35
+ output = np.zeros(structures.shape)
36
+ output[0] = reference
37
+
38
+ for t, target in enumerate(targets):
39
+ try:
40
+ matrix = get_alignment_matrix(reference[indices], target[indices])
41
+
42
+ except LinAlgError:
43
+ # it is actually possible for the kabsch alg not to converge
44
+ matrix = np.eye(3)
45
+
46
+ # output[t+1] = np.array([matrix @ vector for vector in target])
47
+ output[t + 1] = (matrix @ target.T).T
48
+
49
+ return output
50
+
51
+
52
+ def write_xyz(
53
+ coords: Array2D_float, atomnos: Array1D_int, output: TextIO, title: str = "temp"
54
+ ) -> None:
55
+ """Write xyz coordinates to a TextIO file."""
56
+ assert atomnos.shape[0] == coords.shape[0]
57
+ assert coords.shape[1] == 3
58
+ string = ""
59
+ string += str(len(coords))
60
+ string += f"\n{title}\n"
61
+ for i, atom in enumerate(coords):
62
+ string += "%s % .6f % .6f % .6f\n" % (pt[atomnos[i]].symbol, atom[0], atom[1], atom[2])
63
+ output.write(string)
64
+
65
+
66
+ class XYZParser:
67
+ """cclib-like parser for .xyz multimolecular files."""
68
+
69
+ def __init__(self, filename: str, pt: Any):
70
+ """Initialize XYZParser and parse the file.
71
+
72
+ Args:
73
+ filename (str): Path to the .xyz file
74
+ pt: periodictable table instance for atomic number lookup
75
+
76
+ Raises
77
+ ------
78
+ FileNotFoundError: If the specified file does not exist
79
+ """
80
+ self.filename = filename
81
+ self.pt = pt
82
+ self.atomcoords_list: list[Array3D_float] = []
83
+ self.atomnos_list: list[Array1D_int] = []
84
+
85
+ self._parse_file()
86
+
87
+ self.atomcoords: Array3D_float = np.asarray(self.atomcoords_list)
88
+
89
+ self.atomnos: Array1D_int = np.asarray(self.atomnos_list[0])
90
+
91
+ def _parse_file(self) -> None:
92
+ """Parse the .xyz file and populate atomcoords and atomnos."""
93
+ filepath = Path(self.filename)
94
+
95
+ if not filepath.exists():
96
+ raise FileNotFoundError(f"File '{self.filename}' not found")
97
+
98
+ with open(filepath, "r") as f:
99
+ lines = f.readlines()
100
+
101
+ i = 0
102
+ while i < len(lines):
103
+ # Skip empty lines
104
+ if not lines[i].strip():
105
+ i += 1
106
+ continue
107
+
108
+ # Read number of atoms
109
+ try:
110
+ natoms = int(lines[i].strip())
111
+ except ValueError:
112
+ i += 1
113
+ continue
114
+
115
+ # Skip comment line
116
+ i += 2
117
+
118
+ coords = []
119
+ atomnos = []
120
+
121
+ # Read atom data
122
+ for j in range(natoms):
123
+ if i + j < len(lines):
124
+ parts = lines[i + j].split()
125
+ if len(parts) >= 4:
126
+ symbol = parts[0]
127
+ x, y, z = map(float, parts[1:4])
128
+
129
+ # Get atomic number from periodictable
130
+ atomic_no = getattr(self.pt, symbol).number
131
+
132
+ coords.append([x, y, z])
133
+ atomnos.append(atomic_no)
134
+
135
+ if coords:
136
+ self.atomcoords_list.append(np.array(coords))
137
+ self.atomnos_list.append(np.array(atomnos))
138
+
139
+ i += natoms
140
+
141
+
142
+ def read_xyz(filename: str) -> XYZParser:
143
+ """Read a .xyz file and return a cclib-like mol object."""
144
+ mol = XYZParser(filename, pt)
145
+ return mol
146
+
147
+
148
+ def time_to_string(total_time: float, verbose: bool = False, digits: int = 1) -> str:
149
+ """Convert totaltime (float) to a timestring with hours, minutes and seconds."""
150
+ timestring = ""
151
+
152
+ names = ("days", "hours", "minutes", "seconds") if verbose else ("d", "h", "m", "s")
153
+
154
+ if total_time > 24 * 3600:
155
+ d = total_time // (24 * 3600)
156
+ timestring += f"{int(d)} {names[0]} "
157
+ total_time %= 24 * 3600
158
+
159
+ if total_time > 3600:
160
+ h = total_time // 3600
161
+ timestring += f"{int(h)} {names[1]} "
162
+ total_time %= 3600
163
+
164
+ if total_time > 60:
165
+ m = total_time // 60
166
+ timestring += f"{int(m)} {names[2]} "
167
+ total_time %= 60
168
+
169
+ timestring += f"{round(total_time, digits):{2 + digits}} {names[3]}"
170
+
171
+ return timestring
172
+
173
+
174
+ double_bonds_thresholds_dict = {
175
+ "CC": 1.4,
176
+ "CN": 1.3,
177
+ }
178
+
179
+
180
+ def get_double_bonds_indices(coords: Array2D_float, atomnos: Array1D_int) -> list[tuple[int, int]]:
181
+ """Return a list containing 2-elements tuples of indices involved in any double bond."""
182
+ mask = atomnos != 1
183
+ numbering = np.arange(len(coords))[mask]
184
+ coords = coords[mask]
185
+ atomnos = atomnos[mask]
186
+ output = []
187
+
188
+ for i1, _ in enumerate(coords):
189
+ for i2 in range(i1 + 1, len(coords)):
190
+ dist = norm_of(coords[i1] - coords[i2])
191
+ tag = "".join(sorted([pt[atomnos[i1]].symbol, pt[atomnos[i2]].symbol]))
192
+
193
+ threshold = double_bonds_thresholds_dict.get(tag)
194
+ if threshold is not None and dist < threshold:
195
+ output.append((numbering[i1], numbering[i2]))
196
+
197
+ return output
198
+
199
+
200
+ def rotate_dihedral(
201
+ coords: Array2D_float,
202
+ dihedral: list[int] | tuple[int, ...],
203
+ angle: float,
204
+ mask: Array1D_bool | None = None,
205
+ indices_to_be_moved: ArrayLike | None = None,
206
+ ) -> Array2D_float:
207
+ """Rotate a molecule around a given bond.
208
+
209
+ Atoms that will move are the ones
210
+ specified by mask or indices_to_be_moved.
211
+ If both are None, only the first index of
212
+ the dihedral iterable is moved.
213
+
214
+ angle: angle, in degrees
215
+ """
216
+ i1, i2, i3, _ = dihedral
217
+
218
+ if indices_to_be_moved is not None:
219
+ mask = np.isin(np.arange(len(coords)), indices_to_be_moved)
220
+
221
+ if mask is None:
222
+ mask = np.array([[i1]])
223
+
224
+ axis = coords[i2] - coords[i3]
225
+ mat = rot_mat_from_pointer(axis, angle)
226
+ center = coords[i3]
227
+
228
+ coords[mask] = (mat @ (coords[mask] - center).T).T + center
229
+
230
+ return coords
231
+
232
+
233
+ def flatten(array: Sequence[Any], typefunc: type = float) -> list[Any]:
234
+ """Return the unraveled sequence, with items coerced into the typefunc type."""
235
+ out = []
236
+
237
+ def rec(_l: Any) -> None:
238
+ """Recursive unraveling function."""
239
+ for e in _l:
240
+ if type(e) in [list, tuple, np.ndarray]:
241
+ rec(e)
242
+ else:
243
+ out.append(typefunc(e))
244
+
245
+ rec(array)
246
+ return out
@@ -0,0 +1,28 @@
1
+ Metadata-Version: 2.4
2
+ Name: prism_pruner
3
+ Version: 0.0.1
4
+ Summary: Prism Pruner
5
+ Author-email: Nicolò Tampellini <nicolo.tampellini@yale.edu>
6
+ License-Expression: MIT
7
+ Requires-Python: >=3.12
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Dynamic: license-file
11
+
12
+ # Prism Pruner
13
+
14
+ [![License](https://img.shields.io/github/license/ntampellini/prism_pruner)](https://github.com/ntampellini/prism_pruner/blob/master/LICENSE)
15
+ [![Powered by: Pixi](https://img.shields.io/badge/Powered_by-Pixi-facc15)](https://pixi.sh)
16
+ [![Code style: ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
17
+ [![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/ntampellini/prism_pruner/test.yml?branch=master&logo=github-actions)](https://github.com/ntampellini/prism_pruner/actions/)
18
+ [![Codecov](https://img.shields.io/codecov/c/github/ntampellini/prism_pruner)](https://codecov.io/gh/ntampellini/prism_pruner)
19
+
20
+ PRISM (PRuning Interface for Similar Molecules) is the modular similarity pruning code from [FIRECODE](https://github.com/ntampellini/FIRECODE/tree/main), in a standalone package. It filters out duplicate structures from conformational ensembles, leaving behind non-redundant states.
21
+
22
+ The code implements a cached, iterative, divide-and conquer approach on increasingly large subsets of the ensemble and removes duplicates as assessed by one of three metrics:
23
+ - Heavy-atom RMSD and maximum deviation
24
+ - Rotamer-corrected heavy-atom RMSD and maximum deviation
25
+ - Relative deviation of the moments of inertia on the principal axes
26
+
27
+ ## Credits
28
+ This package was created with [Cookiecutter](https://github.com/audreyr/cookiecutter) and the [jevandezande/pixi-cookiecutter](https://github.com/jevandezande/pixi-cookiecutter) project template.
@@ -0,0 +1,14 @@
1
+ prism_pruner/__init__.py,sha256=M3KYy269Z7PmWOgRhMcBZySZSmC3pjG7lRIG17eN-FQ,55
2
+ prism_pruner/algebra.py,sha256=cGGMFur0F4NM9OxBucfzpBvXBa4dScSk8VDaU6x_254,6010
3
+ prism_pruner/graph_manipulations.py,sha256=-8uEOCWezayx2k4eVQVU61Y5-Nv5hBVjXdKSsoYEwoI,6342
4
+ prism_pruner/pruner.py,sha256=JA5Vn_PrmNdKZPOC8YE5Wteq17FApa2UF0C1v-EpU_c,18744
5
+ prism_pruner/pt.py,sha256=XoF7uksmZgJlqZb8t_efEaLGS2RqA67rENR4Hx_YBHg,302
6
+ prism_pruner/rmsd.py,sha256=v11h_xTWF7Ea-MDnQXHGa9XwkvEEMJvDZgCC8bOmdKA,931
7
+ prism_pruner/torsion_module.py,sha256=YNjhVqS0wdlqdYXHYonU8munCEfRijcTz-s5Dn_SKwc,15905
8
+ prism_pruner/typing.py,sha256=WyBF38NsM34bT-uT8EvzEXfDCf2jdATRSX8WtQeYCes,679
9
+ prism_pruner/utils.py,sha256=6T5rNpawscDw4xfkH5Ua11IZUKJXjoJ3ANZV-vyO1vI,7395
10
+ prism_pruner-0.0.1.dist-info/licenses/LICENSE,sha256=Im9pMXp0ignxYTY5QMacrME_3l6QVtQXO6QvO3bVriY,1075
11
+ prism_pruner-0.0.1.dist-info/METADATA,sha256=QgcgRkYkY7WBqMnSQDChM5B4nR-x7QtH2oOVZiV7mGg,1807
12
+ prism_pruner-0.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
+ prism_pruner-0.0.1.dist-info/top_level.txt,sha256=GdtwtPlTsKhTsjMoj4bo6wJVoyzFX371HKQU32l6Q84,13
14
+ prism_pruner-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Nicolò Tampellini
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ prism_pruner