prism-pruner 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prism_pruner-0.0.1/LICENSE +21 -0
- prism_pruner-0.0.1/PKG-INFO +28 -0
- prism_pruner-0.0.1/README.md +17 -0
- prism_pruner-0.0.1/prism_pruner/__init__.py +1 -0
- prism_pruner-0.0.1/prism_pruner/algebra.py +190 -0
- prism_pruner-0.0.1/prism_pruner/graph_manipulations.py +194 -0
- prism_pruner-0.0.1/prism_pruner/pruner.py +571 -0
- prism_pruner-0.0.1/prism_pruner/pt.py +12 -0
- prism_pruner-0.0.1/prism_pruner/rmsd.py +39 -0
- prism_pruner-0.0.1/prism_pruner/torsion_module.py +468 -0
- prism_pruner-0.0.1/prism_pruner/typing.py +15 -0
- prism_pruner-0.0.1/prism_pruner/utils.py +246 -0
- prism_pruner-0.0.1/prism_pruner.egg-info/PKG-INFO +28 -0
- prism_pruner-0.0.1/prism_pruner.egg-info/SOURCES.txt +17 -0
- prism_pruner-0.0.1/prism_pruner.egg-info/dependency_links.txt +1 -0
- prism_pruner-0.0.1/prism_pruner.egg-info/top_level.txt +1 -0
- prism_pruner-0.0.1/pyproject.toml +102 -0
- prism_pruner-0.0.1/setup.cfg +4 -0
- prism_pruner-0.0.1/tests/test_suite.py +121 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Nicolò Tampellini
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: prism_pruner
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Prism Pruner
|
|
5
|
+
Author-email: Nicolò Tampellini <nicolo.tampellini@yale.edu>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Requires-Python: >=3.12
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Dynamic: license-file
|
|
11
|
+
|
|
12
|
+
# Prism Pruner
|
|
13
|
+
|
|
14
|
+
[](https://github.com/ntampellini/prism_pruner/blob/master/LICENSE)
|
|
15
|
+
[](https://pixi.sh)
|
|
16
|
+
[](https://github.com/astral-sh/ruff)
|
|
17
|
+
[](https://github.com/ntampellini/prism_pruner/actions/)
|
|
18
|
+
[](https://codecov.io/gh/ntampellini/prism_pruner)
|
|
19
|
+
|
|
20
|
+
PRISM (PRuning Interface for Similar Molecules) is the modular similarity pruning code from [FIRECODE](https://github.com/ntampellini/FIRECODE/tree/main), in a standalone package. It filters out duplicate structures from conformational ensembles, leaving behind non-redundant states.
|
|
21
|
+
|
|
22
|
+
The code implements a cached, iterative, divide-and conquer approach on increasingly large subsets of the ensemble and removes duplicates as assessed by one of three metrics:
|
|
23
|
+
- Heavy-atom RMSD and maximum deviation
|
|
24
|
+
- Rotamer-corrected heavy-atom RMSD and maximum deviation
|
|
25
|
+
- Relative deviation of the moments of inertia on the principal axes
|
|
26
|
+
|
|
27
|
+
## Credits
|
|
28
|
+
This package was created with [Cookiecutter](https://github.com/audreyr/cookiecutter) and the [jevandezande/pixi-cookiecutter](https://github.com/jevandezande/pixi-cookiecutter) project template.
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Prism Pruner
|
|
2
|
+
|
|
3
|
+
[](https://github.com/ntampellini/prism_pruner/blob/master/LICENSE)
|
|
4
|
+
[](https://pixi.sh)
|
|
5
|
+
[](https://github.com/astral-sh/ruff)
|
|
6
|
+
[](https://github.com/ntampellini/prism_pruner/actions/)
|
|
7
|
+
[](https://codecov.io/gh/ntampellini/prism_pruner)
|
|
8
|
+
|
|
9
|
+
PRISM (PRuning Interface for Similar Molecules) is the modular similarity pruning code from [FIRECODE](https://github.com/ntampellini/FIRECODE/tree/main), in a standalone package. It filters out duplicate structures from conformational ensembles, leaving behind non-redundant states.
|
|
10
|
+
|
|
11
|
+
The code implements a cached, iterative, divide-and conquer approach on increasingly large subsets of the ensemble and removes duplicates as assessed by one of three metrics:
|
|
12
|
+
- Heavy-atom RMSD and maximum deviation
|
|
13
|
+
- Rotamer-corrected heavy-atom RMSD and maximum deviation
|
|
14
|
+
- Relative deviation of the moments of inertia on the principal axes
|
|
15
|
+
|
|
16
|
+
## Credits
|
|
17
|
+
This package was created with [Cookiecutter](https://github.com/audreyr/cookiecutter) and the [jevandezande/pixi-cookiecutter](https://github.com/jevandezande/pixi-cookiecutter) project template.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""PRISM - PRuning Interface for Similar Molecules."""
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
"""Algebra utilities."""
|
|
2
|
+
|
|
3
|
+
from typing import Sequence
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from prism_pruner.typing import Array1D_float, Array1D_int, Array2D_float, Array3D_float
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def norm(vec: Array1D_int) -> Array1D_int:
|
|
11
|
+
"""Normalize a vector (3D only)."""
|
|
12
|
+
return vec / np.sqrt((vec[0] * vec[0] + vec[1] * vec[1] + vec[2] * vec[2])) # type: ignore[no-any-return]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def norm_of(vec: Array1D_int) -> float:
|
|
16
|
+
"""Norm of a vector (3D only)."""
|
|
17
|
+
return float(np.sqrt((vec[0] * vec[0] + vec[1] * vec[1] + vec[2] * vec[2])))
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def vec_angle(v1: Array1D_int, v2: Array1D_int) -> float:
|
|
21
|
+
"""Return the planar angle defined by two 3D vectors."""
|
|
22
|
+
return float(
|
|
23
|
+
np.degrees(
|
|
24
|
+
np.arccos(
|
|
25
|
+
np.clip(np.dot(norm(v1), norm(v2)), -1.0, 1.0),
|
|
26
|
+
)
|
|
27
|
+
)
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def dihedral(p: Array2D_float) -> float:
|
|
32
|
+
"""
|
|
33
|
+
Find dihedral angle in degrees from 4 3D vecs.
|
|
34
|
+
|
|
35
|
+
Praxeolitic formula: 1 sqrt, 1 cross product.
|
|
36
|
+
"""
|
|
37
|
+
p0, p1, p2, p3 = p
|
|
38
|
+
|
|
39
|
+
b0 = -1.0 * (p1 - p0)
|
|
40
|
+
b1 = p2 - p1
|
|
41
|
+
b2 = p3 - p2
|
|
42
|
+
|
|
43
|
+
# normalize b1 so that it does not influence magnitude of vector
|
|
44
|
+
# rejections that come next
|
|
45
|
+
b1 /= norm_of(b1)
|
|
46
|
+
|
|
47
|
+
# vector rejections
|
|
48
|
+
# v = projection of b0 onto plane perpendicular to b1
|
|
49
|
+
# = b0 minus component that aligns with b1
|
|
50
|
+
# w = projection of b2 onto plane perpendicular to b1
|
|
51
|
+
# = b2 minus component that aligns with b1
|
|
52
|
+
v = b0 - np.dot(b0, b1) * b1
|
|
53
|
+
w = b2 - np.dot(b2, b1) * b1
|
|
54
|
+
|
|
55
|
+
# angle between v and w in a plane is the torsion angle
|
|
56
|
+
# v and w may not be normalized but that's fine since tan is y/x
|
|
57
|
+
x = np.dot(v, w)
|
|
58
|
+
y = np.dot(np.cross(b1, v), w)
|
|
59
|
+
|
|
60
|
+
return float(np.degrees(np.arctan2(y, x)))
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def rot_mat_from_pointer(pointer: Array1D_int, angle: float) -> Array2D_float:
|
|
64
|
+
"""
|
|
65
|
+
Get the rotation matrix from the rotation pivot using a quaternion.
|
|
66
|
+
|
|
67
|
+
:param pointer: 3D vector representing the rotation pivot
|
|
68
|
+
:param angle: rotation angle in degrees
|
|
69
|
+
:return rotation_matrix: matrix that applied to a point, rotates it along the pointer
|
|
70
|
+
"""
|
|
71
|
+
assert pointer.shape[0] == 3
|
|
72
|
+
|
|
73
|
+
angle_2 = np.radians(angle) / 2
|
|
74
|
+
sin = np.sin(angle_2)
|
|
75
|
+
pointer = norm(pointer)
|
|
76
|
+
return quaternion_to_rotation_matrix(
|
|
77
|
+
[
|
|
78
|
+
sin * pointer[0],
|
|
79
|
+
sin * pointer[1],
|
|
80
|
+
sin * pointer[2],
|
|
81
|
+
np.cos(angle_2),
|
|
82
|
+
]
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def quaternion_to_rotation_matrix(quat: Array1D_float | Sequence[float]) -> Array2D_float:
|
|
87
|
+
"""
|
|
88
|
+
Convert a quaternion into a full three-dimensional rotation matrix.
|
|
89
|
+
|
|
90
|
+
This rotation matrix converts a point in the local reference frame to a
|
|
91
|
+
point in the global reference frame.
|
|
92
|
+
|
|
93
|
+
:param quat: 4-element array representing the quaternion (q0, q1, q2, q3)
|
|
94
|
+
:return: 3x3 element array representing the full 3D rotation matrix
|
|
95
|
+
"""
|
|
96
|
+
# Extract the values from Q (adjusting for scalar last in input)
|
|
97
|
+
q1, q2, q3, q0 = quat
|
|
98
|
+
|
|
99
|
+
# First row of the rotation matrix
|
|
100
|
+
r00 = 2 * (q0 * q0 + q1 * q1) - 1
|
|
101
|
+
r01 = 2 * (q1 * q2 - q0 * q3)
|
|
102
|
+
r02 = 2 * (q1 * q3 + q0 * q2)
|
|
103
|
+
|
|
104
|
+
# Second row of the rotation matrix
|
|
105
|
+
r10 = 2 * (q1 * q2 + q0 * q3)
|
|
106
|
+
r11 = 2 * (q0 * q0 + q2 * q2) - 1
|
|
107
|
+
r12 = 2 * (q2 * q3 - q0 * q1)
|
|
108
|
+
|
|
109
|
+
# Third row of the rotation matrix
|
|
110
|
+
r20 = 2 * (q1 * q3 - q0 * q2)
|
|
111
|
+
r21 = 2 * (q2 * q3 + q0 * q1)
|
|
112
|
+
r22 = 2 * (q0 * q0 + q3 * q3) - 1
|
|
113
|
+
|
|
114
|
+
# 3x3 rotation matrix
|
|
115
|
+
return np.array([[r00, r01, r02], [r10, r11, r12], [r20, r21, r22]])
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def kronecker_delta(i: int, j: int) -> int:
|
|
119
|
+
"""Kronecker delta."""
|
|
120
|
+
return int(i == j)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def get_inertia_moments(coords: Array3D_float, masses: Array1D_float) -> Array1D_float:
|
|
124
|
+
"""
|
|
125
|
+
Find the moments of inertia of the three principal axes.
|
|
126
|
+
|
|
127
|
+
:return: diagonal of the diagonalized inertia tensor, that is
|
|
128
|
+
a shape (3,) array with the moments of inertia along the main axes.
|
|
129
|
+
(I_x, I_y and largest I_z last)
|
|
130
|
+
"""
|
|
131
|
+
coords -= center_of_mass(coords, masses)
|
|
132
|
+
inertia_moment_matrix = np.array([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]])
|
|
133
|
+
|
|
134
|
+
for i in range(3):
|
|
135
|
+
for j in range(3):
|
|
136
|
+
k = kronecker_delta(i, j)
|
|
137
|
+
inertia_moment_matrix[i][j] = sum(
|
|
138
|
+
[
|
|
139
|
+
masses[n] * ((norm_of(coords[n]) ** 2) * k - coords[n][i] * coords[n][j])
|
|
140
|
+
for n, _ in enumerate(coords)
|
|
141
|
+
]
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
inertia_moment_matrix = diagonalize(inertia_moment_matrix)
|
|
145
|
+
|
|
146
|
+
return np.diag(inertia_moment_matrix)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def diagonalize(a: Array2D_float) -> Array2D_float:
|
|
150
|
+
"""Build the diagonalized matrix."""
|
|
151
|
+
eigenvalues_of_a, eigenvectors_of_a = np.linalg.eig(a)
|
|
152
|
+
b = eigenvectors_of_a[:, np.abs(eigenvalues_of_a).argsort()]
|
|
153
|
+
return np.dot(np.linalg.inv(b), np.dot(a, b)) # type: ignore[no-any-return]
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def center_of_mass(coords: Array3D_float, masses: Array1D_float) -> Array1D_float:
|
|
157
|
+
"""Find the center of mass for the atomic system."""
|
|
158
|
+
total_mass = sum([masses[i] for i in range(len(coords))])
|
|
159
|
+
w = np.array([0.0, 0.0, 0.0])
|
|
160
|
+
for i in range(len(coords)):
|
|
161
|
+
w += coords[i] * masses[i]
|
|
162
|
+
return w / total_mass # type: ignore[no-any-return]
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def get_moi_deviation_vec(
|
|
166
|
+
coords1: Array2D_float, coords2: Array2D_float, masses: Array1D_float
|
|
167
|
+
) -> Array1D_float:
|
|
168
|
+
"""Determine the relative difference of the three principal axes moments of inertia."""
|
|
169
|
+
im_1 = get_inertia_moments(coords1, masses)
|
|
170
|
+
im_2 = get_inertia_moments(coords2, masses)
|
|
171
|
+
|
|
172
|
+
return np.abs(im_1 - im_2) / im_1
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def get_alignment_matrix(p: Array1D_float, q: Array1D_float) -> Array2D_float:
|
|
176
|
+
"""
|
|
177
|
+
Build the rotation matrix that aligns vectors q to p (Kabsch algorithm).
|
|
178
|
+
|
|
179
|
+
Assumes centered vector sets (i.e. their mean is the origin).
|
|
180
|
+
"""
|
|
181
|
+
# calculate the covariance matrix
|
|
182
|
+
cov_mat = np.ascontiguousarray(p.T) @ q
|
|
183
|
+
|
|
184
|
+
# Compute the SVD
|
|
185
|
+
v, _, w = np.linalg.svd(cov_mat)
|
|
186
|
+
|
|
187
|
+
if (np.linalg.det(v) * np.linalg.det(w)) < 0.0:
|
|
188
|
+
v[:, -1] = -v[:, -1]
|
|
189
|
+
|
|
190
|
+
return np.dot(v, w) # type: ignore[no-any-return]
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
"""Graph manipulation utilities for molecular structures."""
|
|
2
|
+
|
|
3
|
+
from functools import lru_cache
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
from networkx import Graph, all_simple_paths, from_numpy_array, set_node_attributes
|
|
7
|
+
from scipy.spatial.distance import cdist
|
|
8
|
+
|
|
9
|
+
from prism_pruner.algebra import dihedral, norm_of
|
|
10
|
+
from prism_pruner.pt import pt
|
|
11
|
+
from prism_pruner.typing import Array1D_bool, Array1D_int, Array2D_float
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@lru_cache()
|
|
15
|
+
def d_min_bond(a1: int, a2: int, factor: float = 1.2) -> float:
|
|
16
|
+
"""Return the bond distance between two atoms."""
|
|
17
|
+
return factor * (pt[a1].covalent_radius + pt[a2].covalent_radius) # type: ignore [no-any-return]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def graphize(
|
|
21
|
+
atoms: Array1D_int,
|
|
22
|
+
coords: Array2D_float,
|
|
23
|
+
mask: Array1D_bool | None = None,
|
|
24
|
+
) -> Graph:
|
|
25
|
+
"""
|
|
26
|
+
Return a NetworkX undirected graph of molecular connectivity.
|
|
27
|
+
|
|
28
|
+
:param atoms: atomic numbers
|
|
29
|
+
:param coords: atomic coordinates as 3D vectors
|
|
30
|
+
:param mask: bool array, with False for atoms to be excluded in the bond evaluation
|
|
31
|
+
:return: connectivity graph
|
|
32
|
+
"""
|
|
33
|
+
mask = np.array([True for _ in atoms], dtype=bool) if mask is None else mask
|
|
34
|
+
assert len(coords) == len(atoms)
|
|
35
|
+
assert len(coords) == len(mask)
|
|
36
|
+
|
|
37
|
+
matrix = np.zeros((len(coords), len(coords)))
|
|
38
|
+
for i, mask_i in enumerate(mask):
|
|
39
|
+
if not mask_i:
|
|
40
|
+
continue
|
|
41
|
+
|
|
42
|
+
for j, mask_j in enumerate(mask[i + 1 :], start=i + 1):
|
|
43
|
+
if not mask_j:
|
|
44
|
+
continue
|
|
45
|
+
|
|
46
|
+
if norm_of(coords[i] - coords[j]) < d_min_bond(atoms[i], atoms[j]):
|
|
47
|
+
matrix[i][j] = 1
|
|
48
|
+
|
|
49
|
+
graph = from_numpy_array(matrix)
|
|
50
|
+
set_node_attributes(graph, dict(enumerate(atoms)), "atomnos")
|
|
51
|
+
|
|
52
|
+
return graph
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def get_sp_n(index: int, graph: Graph) -> int | None:
|
|
56
|
+
"""
|
|
57
|
+
Get hybridization of selected atom.
|
|
58
|
+
|
|
59
|
+
Return n, that is the apex of sp^n hybridization for CONPS atoms.
|
|
60
|
+
This is just an assimilation to the carbon geometry in relation to sp^n:
|
|
61
|
+
- sp¹ is linear
|
|
62
|
+
- sp² is planar
|
|
63
|
+
- sp³ is tetraedral
|
|
64
|
+
This is mainly used to understand if a torsion is to be rotated or not.
|
|
65
|
+
"""
|
|
66
|
+
element = graph.nodes[index]["atomnos"]
|
|
67
|
+
|
|
68
|
+
if element not in {6, 7, 8, 15, 16}:
|
|
69
|
+
return None
|
|
70
|
+
|
|
71
|
+
d: dict[int, dict[int, int | None]] = {
|
|
72
|
+
6: {2: 1, 3: 2, 4: 3}, # C - 2 neighbors means sp, 3 nb means sp2, 4 nb sp3
|
|
73
|
+
7: {2: 2, 3: None, 4: 3}, # N - 2 neighbors means sp2, 3 nb could mean sp3 or sp2, 4 nb sp3
|
|
74
|
+
8: {1: 2, 2: 3, 3: 3, 4: 3}, # O
|
|
75
|
+
15: {2: 2, 3: 3, 4: 3}, # P - like N
|
|
76
|
+
16: {2: 2, 3: 3, 4: 3}, # S
|
|
77
|
+
}
|
|
78
|
+
return d[element].get(len(set(graph.neighbors(index))))
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def is_amide_n(index: int, graph: Graph, mode: int = -1) -> bool:
|
|
82
|
+
"""
|
|
83
|
+
Assess if the atom is an amide-like nitrogen.
|
|
84
|
+
|
|
85
|
+
Note: carbamates and ureas are considered amides.
|
|
86
|
+
|
|
87
|
+
mode:
|
|
88
|
+
-1 - any amide
|
|
89
|
+
0 - primary amide (CONH2)
|
|
90
|
+
1 - secondary amide (CONHR)
|
|
91
|
+
2 - tertiary amide (CONR2)
|
|
92
|
+
"""
|
|
93
|
+
# Must be a nitrogen atom
|
|
94
|
+
if graph.nodes[index]["atomnos"] == 7:
|
|
95
|
+
nb = set(graph.neighbors(index))
|
|
96
|
+
nb_atoms = [graph.nodes[j]["atomnos"] for j in nb]
|
|
97
|
+
|
|
98
|
+
if mode != -1:
|
|
99
|
+
# Primary amides need to have 1H, secondary amides none
|
|
100
|
+
if nb_atoms.count(1) != (2, 1, 0)[mode]:
|
|
101
|
+
return False
|
|
102
|
+
|
|
103
|
+
for n in nb:
|
|
104
|
+
# There must be at least one carbon atom next to N
|
|
105
|
+
if graph.nodes[n]["atomnos"] == 6:
|
|
106
|
+
nb_nb = set(graph.neighbors(n))
|
|
107
|
+
# Bonded to three atoms
|
|
108
|
+
if len(nb_nb) == 3:
|
|
109
|
+
# and at least one of them has to be an oxygen
|
|
110
|
+
if 8 in {graph.nodes[i]["atomnos"] for i in nb_nb}:
|
|
111
|
+
return True
|
|
112
|
+
return False
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def is_ester_o(index: int, graph: Graph) -> bool:
|
|
116
|
+
"""
|
|
117
|
+
Assess if the index is an ester-like oxygen.
|
|
118
|
+
|
|
119
|
+
Note: carbamates and carbonates return True, carboxylic acids return False.
|
|
120
|
+
"""
|
|
121
|
+
if graph.nodes[index]["atomnos"] == 8:
|
|
122
|
+
if 1 in (nb := set(graph.neighbors(index))):
|
|
123
|
+
return False
|
|
124
|
+
|
|
125
|
+
for n in nb:
|
|
126
|
+
if graph.nodes[n]["atomnos"] == 6:
|
|
127
|
+
nb_nb = set(graph.neighbors(n))
|
|
128
|
+
if len(nb_nb) == 3:
|
|
129
|
+
nb_nb_sym = [graph.nodes[i]["atomnos"] for i in nb_nb]
|
|
130
|
+
if nb_nb_sym.count(8) > 1:
|
|
131
|
+
return True
|
|
132
|
+
return False
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def is_phenyl(coords: Array2D_float) -> bool:
|
|
136
|
+
"""
|
|
137
|
+
Assess if the six atomic coords refer to a phenyl-like ring.
|
|
138
|
+
|
|
139
|
+
Note: quinones evaluate to True
|
|
140
|
+
|
|
141
|
+
:params coords: six coordinates of C/N atoms
|
|
142
|
+
:return: bool indicating if the six atoms look like part of a phenyl/naphtyl/pyridine
|
|
143
|
+
system, coordinates for the center of that ring
|
|
144
|
+
"""
|
|
145
|
+
# if any atomic couple is more than 3 A away from each other, this is not a Ph
|
|
146
|
+
if np.max(cdist(coords, coords)) > 3:
|
|
147
|
+
return False
|
|
148
|
+
|
|
149
|
+
threshold_delta: float = 1 - np.cos(10 * np.pi / 180)
|
|
150
|
+
flat_delta: float = 1 - np.abs(np.cos(dihedral(coords[[0, 1, 2, 3]]) * np.pi / 180))
|
|
151
|
+
|
|
152
|
+
return flat_delta < threshold_delta
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def get_phenyl_ids(index: int, graph: Graph) -> list[int] | None:
|
|
156
|
+
"""If index is part of a phenyl, return the six heavy atoms ids associated with the ring."""
|
|
157
|
+
for n in graph.neighbors(index):
|
|
158
|
+
for path in all_simple_paths(graph, source=index, target=n, cutoff=6):
|
|
159
|
+
if len(path) != 6 or any(graph.nodes[n]["atomnos"] == 1 for n in path):
|
|
160
|
+
continue
|
|
161
|
+
if all(len(set(graph.neighbors(i))) == 3 for i in path):
|
|
162
|
+
return path # type: ignore [no-any-return]
|
|
163
|
+
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def find_paths(
|
|
168
|
+
graph: Graph,
|
|
169
|
+
u: int,
|
|
170
|
+
n: int,
|
|
171
|
+
exclude_set: set[int] | None = None,
|
|
172
|
+
) -> list[list[int]]:
|
|
173
|
+
"""
|
|
174
|
+
Find paths in graph.
|
|
175
|
+
|
|
176
|
+
Recursively find all paths of a NetworkX graph with length = n, starting from node u.
|
|
177
|
+
|
|
178
|
+
:param graph: NetworkX graph
|
|
179
|
+
:param u: starting node
|
|
180
|
+
:param n: path length
|
|
181
|
+
:param exclude_set: set of nodes to exclude from the paths
|
|
182
|
+
:return: list of paths (each path is a list of node indices)
|
|
183
|
+
"""
|
|
184
|
+
exclude_set = (exclude_set or set()) | {u}
|
|
185
|
+
|
|
186
|
+
if n == 0:
|
|
187
|
+
return [[u]]
|
|
188
|
+
|
|
189
|
+
return [
|
|
190
|
+
[u, *path]
|
|
191
|
+
for neighbor in graph.neighbors(u)
|
|
192
|
+
if neighbor not in exclude_set
|
|
193
|
+
for path in find_paths(graph, neighbor, n - 1, exclude_set)
|
|
194
|
+
]
|