prism-pruner 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of prism-pruner might be problematic. Click here for more details.
- prism_pruner/__init__.py +1 -0
- prism_pruner/algebra.py +190 -0
- prism_pruner/graph_manipulations.py +194 -0
- prism_pruner/pruner.py +571 -0
- prism_pruner/pt.py +12 -0
- prism_pruner/rmsd.py +39 -0
- prism_pruner/torsion_module.py +468 -0
- prism_pruner/typing.py +15 -0
- prism_pruner/utils.py +246 -0
- prism_pruner-0.0.1.dist-info/METADATA +28 -0
- prism_pruner-0.0.1.dist-info/RECORD +14 -0
- prism_pruner-0.0.1.dist-info/WHEEL +5 -0
- prism_pruner-0.0.1.dist-info/licenses/LICENSE +21 -0
- prism_pruner-0.0.1.dist-info/top_level.txt +1 -0
prism_pruner/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""PRISM - PRuning Interface for Similar Molecules."""
|
prism_pruner/algebra.py
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
"""Algebra utilities."""
|
|
2
|
+
|
|
3
|
+
from typing import Sequence
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from prism_pruner.typing import Array1D_float, Array1D_int, Array2D_float, Array3D_float
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def norm(vec: Array1D_int) -> Array1D_int:
|
|
11
|
+
"""Normalize a vector (3D only)."""
|
|
12
|
+
return vec / np.sqrt((vec[0] * vec[0] + vec[1] * vec[1] + vec[2] * vec[2])) # type: ignore[no-any-return]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def norm_of(vec: Array1D_int) -> float:
|
|
16
|
+
"""Norm of a vector (3D only)."""
|
|
17
|
+
return float(np.sqrt((vec[0] * vec[0] + vec[1] * vec[1] + vec[2] * vec[2])))
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def vec_angle(v1: Array1D_int, v2: Array1D_int) -> float:
|
|
21
|
+
"""Return the planar angle defined by two 3D vectors."""
|
|
22
|
+
return float(
|
|
23
|
+
np.degrees(
|
|
24
|
+
np.arccos(
|
|
25
|
+
np.clip(np.dot(norm(v1), norm(v2)), -1.0, 1.0),
|
|
26
|
+
)
|
|
27
|
+
)
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def dihedral(p: Array2D_float) -> float:
|
|
32
|
+
"""
|
|
33
|
+
Find dihedral angle in degrees from 4 3D vecs.
|
|
34
|
+
|
|
35
|
+
Praxeolitic formula: 1 sqrt, 1 cross product.
|
|
36
|
+
"""
|
|
37
|
+
p0, p1, p2, p3 = p
|
|
38
|
+
|
|
39
|
+
b0 = -1.0 * (p1 - p0)
|
|
40
|
+
b1 = p2 - p1
|
|
41
|
+
b2 = p3 - p2
|
|
42
|
+
|
|
43
|
+
# normalize b1 so that it does not influence magnitude of vector
|
|
44
|
+
# rejections that come next
|
|
45
|
+
b1 /= norm_of(b1)
|
|
46
|
+
|
|
47
|
+
# vector rejections
|
|
48
|
+
# v = projection of b0 onto plane perpendicular to b1
|
|
49
|
+
# = b0 minus component that aligns with b1
|
|
50
|
+
# w = projection of b2 onto plane perpendicular to b1
|
|
51
|
+
# = b2 minus component that aligns with b1
|
|
52
|
+
v = b0 - np.dot(b0, b1) * b1
|
|
53
|
+
w = b2 - np.dot(b2, b1) * b1
|
|
54
|
+
|
|
55
|
+
# angle between v and w in a plane is the torsion angle
|
|
56
|
+
# v and w may not be normalized but that's fine since tan is y/x
|
|
57
|
+
x = np.dot(v, w)
|
|
58
|
+
y = np.dot(np.cross(b1, v), w)
|
|
59
|
+
|
|
60
|
+
return float(np.degrees(np.arctan2(y, x)))
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def rot_mat_from_pointer(pointer: Array1D_int, angle: float) -> Array2D_float:
|
|
64
|
+
"""
|
|
65
|
+
Get the rotation matrix from the rotation pivot using a quaternion.
|
|
66
|
+
|
|
67
|
+
:param pointer: 3D vector representing the rotation pivot
|
|
68
|
+
:param angle: rotation angle in degrees
|
|
69
|
+
:return rotation_matrix: matrix that applied to a point, rotates it along the pointer
|
|
70
|
+
"""
|
|
71
|
+
assert pointer.shape[0] == 3
|
|
72
|
+
|
|
73
|
+
angle_2 = np.radians(angle) / 2
|
|
74
|
+
sin = np.sin(angle_2)
|
|
75
|
+
pointer = norm(pointer)
|
|
76
|
+
return quaternion_to_rotation_matrix(
|
|
77
|
+
[
|
|
78
|
+
sin * pointer[0],
|
|
79
|
+
sin * pointer[1],
|
|
80
|
+
sin * pointer[2],
|
|
81
|
+
np.cos(angle_2),
|
|
82
|
+
]
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def quaternion_to_rotation_matrix(quat: Array1D_float | Sequence[float]) -> Array2D_float:
|
|
87
|
+
"""
|
|
88
|
+
Convert a quaternion into a full three-dimensional rotation matrix.
|
|
89
|
+
|
|
90
|
+
This rotation matrix converts a point in the local reference frame to a
|
|
91
|
+
point in the global reference frame.
|
|
92
|
+
|
|
93
|
+
:param quat: 4-element array representing the quaternion (q0, q1, q2, q3)
|
|
94
|
+
:return: 3x3 element array representing the full 3D rotation matrix
|
|
95
|
+
"""
|
|
96
|
+
# Extract the values from Q (adjusting for scalar last in input)
|
|
97
|
+
q1, q2, q3, q0 = quat
|
|
98
|
+
|
|
99
|
+
# First row of the rotation matrix
|
|
100
|
+
r00 = 2 * (q0 * q0 + q1 * q1) - 1
|
|
101
|
+
r01 = 2 * (q1 * q2 - q0 * q3)
|
|
102
|
+
r02 = 2 * (q1 * q3 + q0 * q2)
|
|
103
|
+
|
|
104
|
+
# Second row of the rotation matrix
|
|
105
|
+
r10 = 2 * (q1 * q2 + q0 * q3)
|
|
106
|
+
r11 = 2 * (q0 * q0 + q2 * q2) - 1
|
|
107
|
+
r12 = 2 * (q2 * q3 - q0 * q1)
|
|
108
|
+
|
|
109
|
+
# Third row of the rotation matrix
|
|
110
|
+
r20 = 2 * (q1 * q3 - q0 * q2)
|
|
111
|
+
r21 = 2 * (q2 * q3 + q0 * q1)
|
|
112
|
+
r22 = 2 * (q0 * q0 + q3 * q3) - 1
|
|
113
|
+
|
|
114
|
+
# 3x3 rotation matrix
|
|
115
|
+
return np.array([[r00, r01, r02], [r10, r11, r12], [r20, r21, r22]])
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def kronecker_delta(i: int, j: int) -> int:
|
|
119
|
+
"""Kronecker delta."""
|
|
120
|
+
return int(i == j)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def get_inertia_moments(coords: Array3D_float, masses: Array1D_float) -> Array1D_float:
|
|
124
|
+
"""
|
|
125
|
+
Find the moments of inertia of the three principal axes.
|
|
126
|
+
|
|
127
|
+
:return: diagonal of the diagonalized inertia tensor, that is
|
|
128
|
+
a shape (3,) array with the moments of inertia along the main axes.
|
|
129
|
+
(I_x, I_y and largest I_z last)
|
|
130
|
+
"""
|
|
131
|
+
coords -= center_of_mass(coords, masses)
|
|
132
|
+
inertia_moment_matrix = np.array([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]])
|
|
133
|
+
|
|
134
|
+
for i in range(3):
|
|
135
|
+
for j in range(3):
|
|
136
|
+
k = kronecker_delta(i, j)
|
|
137
|
+
inertia_moment_matrix[i][j] = sum(
|
|
138
|
+
[
|
|
139
|
+
masses[n] * ((norm_of(coords[n]) ** 2) * k - coords[n][i] * coords[n][j])
|
|
140
|
+
for n, _ in enumerate(coords)
|
|
141
|
+
]
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
inertia_moment_matrix = diagonalize(inertia_moment_matrix)
|
|
145
|
+
|
|
146
|
+
return np.diag(inertia_moment_matrix)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def diagonalize(a: Array2D_float) -> Array2D_float:
|
|
150
|
+
"""Build the diagonalized matrix."""
|
|
151
|
+
eigenvalues_of_a, eigenvectors_of_a = np.linalg.eig(a)
|
|
152
|
+
b = eigenvectors_of_a[:, np.abs(eigenvalues_of_a).argsort()]
|
|
153
|
+
return np.dot(np.linalg.inv(b), np.dot(a, b)) # type: ignore[no-any-return]
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def center_of_mass(coords: Array3D_float, masses: Array1D_float) -> Array1D_float:
|
|
157
|
+
"""Find the center of mass for the atomic system."""
|
|
158
|
+
total_mass = sum([masses[i] for i in range(len(coords))])
|
|
159
|
+
w = np.array([0.0, 0.0, 0.0])
|
|
160
|
+
for i in range(len(coords)):
|
|
161
|
+
w += coords[i] * masses[i]
|
|
162
|
+
return w / total_mass # type: ignore[no-any-return]
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def get_moi_deviation_vec(
|
|
166
|
+
coords1: Array2D_float, coords2: Array2D_float, masses: Array1D_float
|
|
167
|
+
) -> Array1D_float:
|
|
168
|
+
"""Determine the relative difference of the three principal axes moments of inertia."""
|
|
169
|
+
im_1 = get_inertia_moments(coords1, masses)
|
|
170
|
+
im_2 = get_inertia_moments(coords2, masses)
|
|
171
|
+
|
|
172
|
+
return np.abs(im_1 - im_2) / im_1
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def get_alignment_matrix(p: Array1D_float, q: Array1D_float) -> Array2D_float:
|
|
176
|
+
"""
|
|
177
|
+
Build the rotation matrix that aligns vectors q to p (Kabsch algorithm).
|
|
178
|
+
|
|
179
|
+
Assumes centered vector sets (i.e. their mean is the origin).
|
|
180
|
+
"""
|
|
181
|
+
# calculate the covariance matrix
|
|
182
|
+
cov_mat = np.ascontiguousarray(p.T) @ q
|
|
183
|
+
|
|
184
|
+
# Compute the SVD
|
|
185
|
+
v, _, w = np.linalg.svd(cov_mat)
|
|
186
|
+
|
|
187
|
+
if (np.linalg.det(v) * np.linalg.det(w)) < 0.0:
|
|
188
|
+
v[:, -1] = -v[:, -1]
|
|
189
|
+
|
|
190
|
+
return np.dot(v, w) # type: ignore[no-any-return]
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
"""Graph manipulation utilities for molecular structures."""
|
|
2
|
+
|
|
3
|
+
from functools import lru_cache
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
from networkx import Graph, all_simple_paths, from_numpy_array, set_node_attributes
|
|
7
|
+
from scipy.spatial.distance import cdist
|
|
8
|
+
|
|
9
|
+
from prism_pruner.algebra import dihedral, norm_of
|
|
10
|
+
from prism_pruner.pt import pt
|
|
11
|
+
from prism_pruner.typing import Array1D_bool, Array1D_int, Array2D_float
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@lru_cache()
|
|
15
|
+
def d_min_bond(a1: int, a2: int, factor: float = 1.2) -> float:
|
|
16
|
+
"""Return the bond distance between two atoms."""
|
|
17
|
+
return factor * (pt[a1].covalent_radius + pt[a2].covalent_radius) # type: ignore [no-any-return]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def graphize(
|
|
21
|
+
atoms: Array1D_int,
|
|
22
|
+
coords: Array2D_float,
|
|
23
|
+
mask: Array1D_bool | None = None,
|
|
24
|
+
) -> Graph:
|
|
25
|
+
"""
|
|
26
|
+
Return a NetworkX undirected graph of molecular connectivity.
|
|
27
|
+
|
|
28
|
+
:param atoms: atomic numbers
|
|
29
|
+
:param coords: atomic coordinates as 3D vectors
|
|
30
|
+
:param mask: bool array, with False for atoms to be excluded in the bond evaluation
|
|
31
|
+
:return: connectivity graph
|
|
32
|
+
"""
|
|
33
|
+
mask = np.array([True for _ in atoms], dtype=bool) if mask is None else mask
|
|
34
|
+
assert len(coords) == len(atoms)
|
|
35
|
+
assert len(coords) == len(mask)
|
|
36
|
+
|
|
37
|
+
matrix = np.zeros((len(coords), len(coords)))
|
|
38
|
+
for i, mask_i in enumerate(mask):
|
|
39
|
+
if not mask_i:
|
|
40
|
+
continue
|
|
41
|
+
|
|
42
|
+
for j, mask_j in enumerate(mask[i + 1 :], start=i + 1):
|
|
43
|
+
if not mask_j:
|
|
44
|
+
continue
|
|
45
|
+
|
|
46
|
+
if norm_of(coords[i] - coords[j]) < d_min_bond(atoms[i], atoms[j]):
|
|
47
|
+
matrix[i][j] = 1
|
|
48
|
+
|
|
49
|
+
graph = from_numpy_array(matrix)
|
|
50
|
+
set_node_attributes(graph, dict(enumerate(atoms)), "atomnos")
|
|
51
|
+
|
|
52
|
+
return graph
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def get_sp_n(index: int, graph: Graph) -> int | None:
|
|
56
|
+
"""
|
|
57
|
+
Get hybridization of selected atom.
|
|
58
|
+
|
|
59
|
+
Return n, that is the apex of sp^n hybridization for CONPS atoms.
|
|
60
|
+
This is just an assimilation to the carbon geometry in relation to sp^n:
|
|
61
|
+
- sp¹ is linear
|
|
62
|
+
- sp² is planar
|
|
63
|
+
- sp³ is tetraedral
|
|
64
|
+
This is mainly used to understand if a torsion is to be rotated or not.
|
|
65
|
+
"""
|
|
66
|
+
element = graph.nodes[index]["atomnos"]
|
|
67
|
+
|
|
68
|
+
if element not in {6, 7, 8, 15, 16}:
|
|
69
|
+
return None
|
|
70
|
+
|
|
71
|
+
d: dict[int, dict[int, int | None]] = {
|
|
72
|
+
6: {2: 1, 3: 2, 4: 3}, # C - 2 neighbors means sp, 3 nb means sp2, 4 nb sp3
|
|
73
|
+
7: {2: 2, 3: None, 4: 3}, # N - 2 neighbors means sp2, 3 nb could mean sp3 or sp2, 4 nb sp3
|
|
74
|
+
8: {1: 2, 2: 3, 3: 3, 4: 3}, # O
|
|
75
|
+
15: {2: 2, 3: 3, 4: 3}, # P - like N
|
|
76
|
+
16: {2: 2, 3: 3, 4: 3}, # S
|
|
77
|
+
}
|
|
78
|
+
return d[element].get(len(set(graph.neighbors(index))))
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def is_amide_n(index: int, graph: Graph, mode: int = -1) -> bool:
|
|
82
|
+
"""
|
|
83
|
+
Assess if the atom is an amide-like nitrogen.
|
|
84
|
+
|
|
85
|
+
Note: carbamates and ureas are considered amides.
|
|
86
|
+
|
|
87
|
+
mode:
|
|
88
|
+
-1 - any amide
|
|
89
|
+
0 - primary amide (CONH2)
|
|
90
|
+
1 - secondary amide (CONHR)
|
|
91
|
+
2 - tertiary amide (CONR2)
|
|
92
|
+
"""
|
|
93
|
+
# Must be a nitrogen atom
|
|
94
|
+
if graph.nodes[index]["atomnos"] == 7:
|
|
95
|
+
nb = set(graph.neighbors(index))
|
|
96
|
+
nb_atoms = [graph.nodes[j]["atomnos"] for j in nb]
|
|
97
|
+
|
|
98
|
+
if mode != -1:
|
|
99
|
+
# Primary amides need to have 1H, secondary amides none
|
|
100
|
+
if nb_atoms.count(1) != (2, 1, 0)[mode]:
|
|
101
|
+
return False
|
|
102
|
+
|
|
103
|
+
for n in nb:
|
|
104
|
+
# There must be at least one carbon atom next to N
|
|
105
|
+
if graph.nodes[n]["atomnos"] == 6:
|
|
106
|
+
nb_nb = set(graph.neighbors(n))
|
|
107
|
+
# Bonded to three atoms
|
|
108
|
+
if len(nb_nb) == 3:
|
|
109
|
+
# and at least one of them has to be an oxygen
|
|
110
|
+
if 8 in {graph.nodes[i]["atomnos"] for i in nb_nb}:
|
|
111
|
+
return True
|
|
112
|
+
return False
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def is_ester_o(index: int, graph: Graph) -> bool:
|
|
116
|
+
"""
|
|
117
|
+
Assess if the index is an ester-like oxygen.
|
|
118
|
+
|
|
119
|
+
Note: carbamates and carbonates return True, carboxylic acids return False.
|
|
120
|
+
"""
|
|
121
|
+
if graph.nodes[index]["atomnos"] == 8:
|
|
122
|
+
if 1 in (nb := set(graph.neighbors(index))):
|
|
123
|
+
return False
|
|
124
|
+
|
|
125
|
+
for n in nb:
|
|
126
|
+
if graph.nodes[n]["atomnos"] == 6:
|
|
127
|
+
nb_nb = set(graph.neighbors(n))
|
|
128
|
+
if len(nb_nb) == 3:
|
|
129
|
+
nb_nb_sym = [graph.nodes[i]["atomnos"] for i in nb_nb]
|
|
130
|
+
if nb_nb_sym.count(8) > 1:
|
|
131
|
+
return True
|
|
132
|
+
return False
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def is_phenyl(coords: Array2D_float) -> bool:
|
|
136
|
+
"""
|
|
137
|
+
Assess if the six atomic coords refer to a phenyl-like ring.
|
|
138
|
+
|
|
139
|
+
Note: quinones evaluate to True
|
|
140
|
+
|
|
141
|
+
:params coords: six coordinates of C/N atoms
|
|
142
|
+
:return: bool indicating if the six atoms look like part of a phenyl/naphtyl/pyridine
|
|
143
|
+
system, coordinates for the center of that ring
|
|
144
|
+
"""
|
|
145
|
+
# if any atomic couple is more than 3 A away from each other, this is not a Ph
|
|
146
|
+
if np.max(cdist(coords, coords)) > 3:
|
|
147
|
+
return False
|
|
148
|
+
|
|
149
|
+
threshold_delta: float = 1 - np.cos(10 * np.pi / 180)
|
|
150
|
+
flat_delta: float = 1 - np.abs(np.cos(dihedral(coords[[0, 1, 2, 3]]) * np.pi / 180))
|
|
151
|
+
|
|
152
|
+
return flat_delta < threshold_delta
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def get_phenyl_ids(index: int, graph: Graph) -> list[int] | None:
|
|
156
|
+
"""If index is part of a phenyl, return the six heavy atoms ids associated with the ring."""
|
|
157
|
+
for n in graph.neighbors(index):
|
|
158
|
+
for path in all_simple_paths(graph, source=index, target=n, cutoff=6):
|
|
159
|
+
if len(path) != 6 or any(graph.nodes[n]["atomnos"] == 1 for n in path):
|
|
160
|
+
continue
|
|
161
|
+
if all(len(set(graph.neighbors(i))) == 3 for i in path):
|
|
162
|
+
return path # type: ignore [no-any-return]
|
|
163
|
+
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def find_paths(
|
|
168
|
+
graph: Graph,
|
|
169
|
+
u: int,
|
|
170
|
+
n: int,
|
|
171
|
+
exclude_set: set[int] | None = None,
|
|
172
|
+
) -> list[list[int]]:
|
|
173
|
+
"""
|
|
174
|
+
Find paths in graph.
|
|
175
|
+
|
|
176
|
+
Recursively find all paths of a NetworkX graph with length = n, starting from node u.
|
|
177
|
+
|
|
178
|
+
:param graph: NetworkX graph
|
|
179
|
+
:param u: starting node
|
|
180
|
+
:param n: path length
|
|
181
|
+
:param exclude_set: set of nodes to exclude from the paths
|
|
182
|
+
:return: list of paths (each path is a list of node indices)
|
|
183
|
+
"""
|
|
184
|
+
exclude_set = (exclude_set or set()) | {u}
|
|
185
|
+
|
|
186
|
+
if n == 0:
|
|
187
|
+
return [[u]]
|
|
188
|
+
|
|
189
|
+
return [
|
|
190
|
+
[u, *path]
|
|
191
|
+
for neighbor in graph.neighbors(u)
|
|
192
|
+
if neighbor not in exclude_set
|
|
193
|
+
for path in find_paths(graph, neighbor, n - 1, exclude_set)
|
|
194
|
+
]
|