RNApolis 0.9.2__py3-none-any.whl → 0.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rnapolis/distiller.py +1119 -0
- rnapolis/parser.py +7 -0
- rnapolis/tertiary_v2.py +482 -18
- {rnapolis-0.9.2.dist-info → rnapolis-0.10.1.dist-info}/METADATA +4 -1
- {rnapolis-0.9.2.dist-info → rnapolis-0.10.1.dist-info}/RECORD +9 -8
- {rnapolis-0.9.2.dist-info → rnapolis-0.10.1.dist-info}/entry_points.txt +1 -0
- {rnapolis-0.9.2.dist-info → rnapolis-0.10.1.dist-info}/WHEEL +0 -0
- {rnapolis-0.9.2.dist-info → rnapolis-0.10.1.dist-info}/licenses/LICENSE +0 -0
- {rnapolis-0.9.2.dist-info → rnapolis-0.10.1.dist-info}/top_level.txt +0 -0
rnapolis/parser.py
CHANGED
@@ -18,6 +18,9 @@ def read_3d_structure(
|
|
18
18
|
atoms, modified, sequence_by_entity, is_nucleic_acid_by_entity = (
|
19
19
|
parse_cif(cif_or_pdb) if is_cif(cif_or_pdb) else parse_pdb(cif_or_pdb)
|
20
20
|
)
|
21
|
+
if not atoms:
|
22
|
+
logger.warning("No atoms parsed from file, returning empty Structure3D.")
|
23
|
+
return Structure3D([])
|
21
24
|
available_models = {atom.model: None for atom in atoms}
|
22
25
|
atoms_by_model = {
|
23
26
|
model: list(filter(lambda atom: atom.model == model, atoms))
|
@@ -439,6 +442,10 @@ def filter_clashing_atoms(atoms: List[Atom], clash_distance: float = 0.5) -> Lis
|
|
439
442
|
|
440
443
|
unique_atoms_list = list(unique_atoms.values())
|
441
444
|
|
445
|
+
# If there are zero or one atoms, no clashes can occur
|
446
|
+
if len(unique_atoms_list) <= 1:
|
447
|
+
return unique_atoms_list
|
448
|
+
|
442
449
|
# Now handle clashing atoms
|
443
450
|
coords = np.array([(atom.x, atom.y, atom.z) for atom in unique_atoms_list])
|
444
451
|
tree = KDTree(coords)
|
rnapolis/tertiary_v2.py
CHANGED
@@ -1,13 +1,68 @@
|
|
1
1
|
import string
|
2
2
|
from functools import cached_property
|
3
|
-
from typing import List, Optional
|
3
|
+
from typing import List, Optional, Tuple
|
4
4
|
|
5
5
|
import numpy as np
|
6
6
|
import pandas as pd
|
7
7
|
|
8
|
+
from rnapolis.parser_v2 import parse_cif_atoms, write_cif
|
9
|
+
|
8
10
|
# Constants
|
9
11
|
AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT = 1.6
|
10
12
|
|
13
|
+
# Define atom sets for different residue types
|
14
|
+
BACKBONE_RIBOSE_ATOMS = {
|
15
|
+
"P",
|
16
|
+
"O5'",
|
17
|
+
"C5'",
|
18
|
+
"C4'",
|
19
|
+
"O4'",
|
20
|
+
"C3'",
|
21
|
+
"O3'",
|
22
|
+
"C2'",
|
23
|
+
"O2'",
|
24
|
+
"C1'",
|
25
|
+
}
|
26
|
+
# DNA backbone atoms (no O2' compared to RNA)
|
27
|
+
BACKBONE_DEOXYRIBOSE_ATOMS = {
|
28
|
+
"P",
|
29
|
+
"O5'",
|
30
|
+
"C5'",
|
31
|
+
"C4'",
|
32
|
+
"O4'",
|
33
|
+
"C3'",
|
34
|
+
"O3'",
|
35
|
+
"C2'",
|
36
|
+
"C1'",
|
37
|
+
}
|
38
|
+
PURINE_CORE_ATOMS = {"N9", "C8", "N7", "C5", "C6", "N1", "C2", "N3", "C4"}
|
39
|
+
PYRIMIDINE_CORE_ATOMS = {"N1", "C2", "N3", "C4", "C5", "C6"}
|
40
|
+
|
41
|
+
# RNA nucleotides
|
42
|
+
ATOMS_A = BACKBONE_RIBOSE_ATOMS | PURINE_CORE_ATOMS | {"N6"}
|
43
|
+
ATOMS_G = BACKBONE_RIBOSE_ATOMS | PURINE_CORE_ATOMS | {"O6"}
|
44
|
+
ATOMS_C = BACKBONE_RIBOSE_ATOMS | PYRIMIDINE_CORE_ATOMS | {"N4", "O2"}
|
45
|
+
ATOMS_U = BACKBONE_RIBOSE_ATOMS | PYRIMIDINE_CORE_ATOMS | {"O4", "O2"}
|
46
|
+
|
47
|
+
# DNA nucleotides
|
48
|
+
ATOMS_DA = BACKBONE_DEOXYRIBOSE_ATOMS | PURINE_CORE_ATOMS | {"N6"}
|
49
|
+
ATOMS_DG = BACKBONE_DEOXYRIBOSE_ATOMS | PURINE_CORE_ATOMS | {"O6"}
|
50
|
+
ATOMS_DC = BACKBONE_DEOXYRIBOSE_ATOMS | PYRIMIDINE_CORE_ATOMS | {"N4", "O2"}
|
51
|
+
ATOMS_DT = BACKBONE_DEOXYRIBOSE_ATOMS | PYRIMIDINE_CORE_ATOMS | {"O4", "O2", "C7"}
|
52
|
+
|
53
|
+
PURINES = {"A", "G", "DA", "DG"}
|
54
|
+
PYRIMIDINES = {"C", "U", "DC", "DT"}
|
55
|
+
RESIDUE_ATOMS_MAP = {
|
56
|
+
"A": ATOMS_A,
|
57
|
+
"G": ATOMS_G,
|
58
|
+
"C": ATOMS_C,
|
59
|
+
"U": ATOMS_U,
|
60
|
+
"DA": ATOMS_DA,
|
61
|
+
"DG": ATOMS_DG,
|
62
|
+
"DC": ATOMS_DC,
|
63
|
+
"DT": ATOMS_DT,
|
64
|
+
}
|
65
|
+
|
11
66
|
|
12
67
|
def calculate_torsion_angle(
|
13
68
|
a1: np.ndarray, a2: np.ndarray, a3: np.ndarray, a4: np.ndarray
|
@@ -56,6 +111,407 @@ def calculate_torsion_angle(
|
|
56
111
|
return angle
|
57
112
|
|
58
113
|
|
114
|
+
def find_paired_coordinates(
|
115
|
+
residues1: List["Residue"], residues2: List["Residue"]
|
116
|
+
) -> Tuple[np.ndarray, np.ndarray]:
|
117
|
+
"""
|
118
|
+
Find paired coordinates of matching atoms between two residues.
|
119
|
+
|
120
|
+
Parameters:
|
121
|
+
-----------
|
122
|
+
residues1 : List[Residue]
|
123
|
+
List of residues from the first structure
|
124
|
+
residues2 : List[Residue]
|
125
|
+
List of residues from the second structure
|
126
|
+
|
127
|
+
Returns:
|
128
|
+
--------
|
129
|
+
Tuple[np.ndarray, np.ndarray]
|
130
|
+
Tuple of two numpy arrays containing coordinates of matching atom pairs
|
131
|
+
"""
|
132
|
+
all_paired_dfs = []
|
133
|
+
|
134
|
+
for residue1, residue2 in zip(residues1, residues2):
|
135
|
+
res_name1 = residue1.residue_name
|
136
|
+
res_name2 = residue2.residue_name
|
137
|
+
|
138
|
+
atoms_to_match = None
|
139
|
+
|
140
|
+
if res_name1 == res_name2:
|
141
|
+
atoms_to_match = RESIDUE_ATOMS_MAP.get(res_name1)
|
142
|
+
elif res_name1 in PURINES and res_name2 in PURINES:
|
143
|
+
# For mixed RNA/DNA purines, use common backbone + purine core
|
144
|
+
if any(name.startswith("D") for name in [res_name1, res_name2]):
|
145
|
+
# At least one is DNA, use deoxyribose backbone
|
146
|
+
atoms_to_match = BACKBONE_DEOXYRIBOSE_ATOMS | PURINE_CORE_ATOMS
|
147
|
+
else:
|
148
|
+
# Both RNA, use ribose backbone
|
149
|
+
atoms_to_match = BACKBONE_RIBOSE_ATOMS | PURINE_CORE_ATOMS
|
150
|
+
elif res_name1 in PYRIMIDINES and res_name2 in PYRIMIDINES:
|
151
|
+
# For mixed RNA/DNA pyrimidines, use common backbone + pyrimidine core
|
152
|
+
if any(name.startswith("D") for name in [res_name1, res_name2]):
|
153
|
+
# At least one is DNA, use deoxyribose backbone
|
154
|
+
atoms_to_match = BACKBONE_DEOXYRIBOSE_ATOMS | PYRIMIDINE_CORE_ATOMS
|
155
|
+
else:
|
156
|
+
# Both RNA, use ribose backbone
|
157
|
+
atoms_to_match = BACKBONE_RIBOSE_ATOMS | PYRIMIDINE_CORE_ATOMS
|
158
|
+
else:
|
159
|
+
# Different types, use minimal common backbone
|
160
|
+
if any(name.startswith("D") for name in [res_name1, res_name2]):
|
161
|
+
atoms_to_match = BACKBONE_DEOXYRIBOSE_ATOMS
|
162
|
+
else:
|
163
|
+
atoms_to_match = BACKBONE_RIBOSE_ATOMS
|
164
|
+
|
165
|
+
# Ensure atoms_to_match is not None
|
166
|
+
if atoms_to_match is None:
|
167
|
+
# Fallback to minimal backbone atoms
|
168
|
+
atoms_to_match = BACKBONE_DEOXYRIBOSE_ATOMS
|
169
|
+
|
170
|
+
if residue1.format == "mmCIF":
|
171
|
+
df1 = residue1.atoms
|
172
|
+
else:
|
173
|
+
df1 = parse_cif_atoms(write_cif(residue1.atoms))
|
174
|
+
|
175
|
+
if residue2.format == "mmCIF":
|
176
|
+
df2 = residue2.atoms
|
177
|
+
else:
|
178
|
+
df2 = parse_cif_atoms(write_cif(residue2.atoms))
|
179
|
+
|
180
|
+
if "auth_atom_id" in df1.columns and "auth_atom_id" in df2.columns:
|
181
|
+
atom_column = "auth_atom_id"
|
182
|
+
elif "label_atom_id" in df1.columns and "label_atom_id" in df2.columns:
|
183
|
+
atom_column = "label_atom_id"
|
184
|
+
else:
|
185
|
+
raise ValueError(
|
186
|
+
"No suitable atom identifier column found in the provided residues."
|
187
|
+
)
|
188
|
+
|
189
|
+
df1_filtered = df1[df1[atom_column].isin(atoms_to_match)]
|
190
|
+
df2_filtered = df2[df2[atom_column].isin(atoms_to_match)]
|
191
|
+
|
192
|
+
paired_df = pd.merge(
|
193
|
+
df1_filtered[[atom_column, "Cartn_x", "Cartn_y", "Cartn_z"]],
|
194
|
+
df2_filtered[[atom_column, "Cartn_x", "Cartn_y", "Cartn_z"]],
|
195
|
+
on=atom_column,
|
196
|
+
suffixes=("_1", "_2"),
|
197
|
+
)
|
198
|
+
|
199
|
+
if not paired_df.empty:
|
200
|
+
all_paired_dfs.append(paired_df)
|
201
|
+
|
202
|
+
final_df = pd.concat(all_paired_dfs, ignore_index=True)
|
203
|
+
coords_1 = final_df[["Cartn_x_1", "Cartn_y_1", "Cartn_z_1"]].to_numpy()
|
204
|
+
coords_2 = final_df[["Cartn_x_2", "Cartn_y_2", "Cartn_z_2"]].to_numpy()
|
205
|
+
return coords_1, coords_2
|
206
|
+
|
207
|
+
|
208
|
+
def rmsd_quaternions(coords1: np.ndarray, coords2: np.ndarray) -> float:
|
209
|
+
"""
|
210
|
+
Calculates RMSD using the Quaternion method.
|
211
|
+
|
212
|
+
Parameters:
|
213
|
+
-----------
|
214
|
+
coords1 : np.ndarray
|
215
|
+
Nx3 array of coordinates for the first structure
|
216
|
+
coords2 : np.ndarray
|
217
|
+
Nx3 array of coordinates for the second structure
|
218
|
+
"""
|
219
|
+
P, Q = coords1, coords2
|
220
|
+
|
221
|
+
# 1. Center coordinates using vectorized operations
|
222
|
+
centroid_P = np.mean(P, axis=0)
|
223
|
+
centroid_Q = np.mean(Q, axis=0)
|
224
|
+
P_centered = P - centroid_P
|
225
|
+
Q_centered = Q - centroid_Q
|
226
|
+
|
227
|
+
# 2. Covariance matrix using matrix multiplication
|
228
|
+
C = P_centered.T @ Q_centered
|
229
|
+
|
230
|
+
# 3. K matrix
|
231
|
+
K = np.zeros((4, 4))
|
232
|
+
K[0, 0] = C[0, 0] + C[1, 1] + C[2, 2]
|
233
|
+
K[0, 1] = K[1, 0] = C[1, 2] - C[2, 1]
|
234
|
+
K[0, 2] = K[2, 0] = C[2, 0] - C[0, 2]
|
235
|
+
K[0, 3] = K[3, 0] = C[0, 1] - C[1, 0]
|
236
|
+
K[1, 1] = C[0, 0] - C[1, 1] - C[2, 2]
|
237
|
+
K[1, 2] = K[2, 1] = C[0, 1] + C[1, 0]
|
238
|
+
K[1, 3] = K[3, 1] = C[0, 2] + C[2, 0]
|
239
|
+
K[2, 2] = -C[0, 0] + C[1, 1] - C[2, 2]
|
240
|
+
K[2, 3] = K[3, 2] = C[1, 2] + C[2, 1]
|
241
|
+
K[3, 3] = -C[0, 0] - C[1, 1] + C[2, 2]
|
242
|
+
|
243
|
+
# 4. Eigenvalue/vector
|
244
|
+
eigenvalues, _ = np.linalg.eigh(K)
|
245
|
+
|
246
|
+
# E0 = sum of squared distances from centroids
|
247
|
+
E0 = np.sum(P_centered**2) + np.sum(Q_centered**2)
|
248
|
+
|
249
|
+
# The min RMSD squared is (E0 - 2*max_eigenvalue) / N
|
250
|
+
N = P.shape[0]
|
251
|
+
rmsd_sq = (E0 - 2 * np.max(eigenvalues)) / N
|
252
|
+
|
253
|
+
# Handle potential floating point inaccuracies
|
254
|
+
return np.sqrt(max(0.0, rmsd_sq))
|
255
|
+
|
256
|
+
|
257
|
+
def rmsd_svd(coords1: np.ndarray, coords2: np.ndarray) -> float:
|
258
|
+
"""
|
259
|
+
Calculates RMSD using SVD decomposition (Kabsch algorithm).
|
260
|
+
|
261
|
+
Parameters:
|
262
|
+
-----------
|
263
|
+
coords1 : np.ndarray
|
264
|
+
Nx3 array of coordinates for the first structure
|
265
|
+
coords2 : np.ndarray
|
266
|
+
Nx3 array of coordinates for the second structure
|
267
|
+
"""
|
268
|
+
P, Q = coords1, coords2
|
269
|
+
|
270
|
+
# 1. Center coordinates
|
271
|
+
centroid_P = np.mean(P, axis=0)
|
272
|
+
centroid_Q = np.mean(Q, axis=0)
|
273
|
+
P_centered = P - centroid_P
|
274
|
+
Q_centered = Q - centroid_Q
|
275
|
+
|
276
|
+
# 2. Compute cross-covariance matrix
|
277
|
+
H = P_centered.T @ Q_centered
|
278
|
+
|
279
|
+
# 3. SVD decomposition
|
280
|
+
U, S, Vt = np.linalg.svd(H)
|
281
|
+
|
282
|
+
# 4. Compute optimal rotation matrix
|
283
|
+
R = Vt.T @ U.T
|
284
|
+
|
285
|
+
# Ensure proper rotation (det(R) = 1)
|
286
|
+
if np.linalg.det(R) < 0:
|
287
|
+
Vt[-1, :] *= -1
|
288
|
+
R = Vt.T @ U.T
|
289
|
+
|
290
|
+
# 5. Apply rotation to P_centered
|
291
|
+
P_rotated = P_centered @ R.T
|
292
|
+
|
293
|
+
# 6. Calculate RMSD
|
294
|
+
diff = P_rotated - Q_centered
|
295
|
+
rmsd_sq = np.sum(diff**2) / P.shape[0]
|
296
|
+
|
297
|
+
return np.sqrt(rmsd_sq)
|
298
|
+
|
299
|
+
|
300
|
+
def rmsd_qcp(coords1: np.ndarray, coords2: np.ndarray) -> float:
|
301
|
+
"""
|
302
|
+
Calculates RMSD using the QCP (Quaternion Characteristic Polynomial) method.
|
303
|
+
This implementation follows the BioPython QCP algorithm but uses np.linalg.eigh
|
304
|
+
instead of Newton-Raphson for simplicity.
|
305
|
+
|
306
|
+
Parameters:
|
307
|
+
-----------
|
308
|
+
coords1 : np.ndarray
|
309
|
+
Nx3 array of coordinates for the first structure
|
310
|
+
coords2 : np.ndarray
|
311
|
+
Nx3 array of coordinates for the second structure
|
312
|
+
"""
|
313
|
+
|
314
|
+
# Center coordinates at origin
|
315
|
+
centroid1 = np.mean(coords1, axis=0)
|
316
|
+
centroid2 = np.mean(coords2, axis=0)
|
317
|
+
coords1_centered = coords1 - centroid1
|
318
|
+
coords2_centered = coords2 - centroid2
|
319
|
+
|
320
|
+
# Calculate G1, G2, and cross-covariance matrix A (following BioPython)
|
321
|
+
G1 = np.trace(np.dot(coords2_centered, coords2_centered.T))
|
322
|
+
G2 = np.trace(np.dot(coords1_centered, coords1_centered.T))
|
323
|
+
A = np.dot(coords2_centered.T, coords1_centered) # Cross-covariance matrix
|
324
|
+
E0 = (G1 + G2) * 0.5
|
325
|
+
|
326
|
+
# Extract elements from A matrix
|
327
|
+
Sxx, Sxy, Sxz = A[0, 0], A[0, 1], A[0, 2]
|
328
|
+
Syx, Syy, Syz = A[1, 0], A[1, 1], A[1, 2]
|
329
|
+
Szx, Szy, Szz = A[2, 0], A[2, 1], A[2, 2]
|
330
|
+
|
331
|
+
# Build the K matrix (quaternion matrix) as in BioPython
|
332
|
+
K = np.zeros((4, 4))
|
333
|
+
K[0, 0] = Sxx + Syy + Szz
|
334
|
+
K[0, 1] = K[1, 0] = Syz - Szy
|
335
|
+
K[0, 2] = K[2, 0] = Szx - Sxz
|
336
|
+
K[0, 3] = K[3, 0] = Sxy - Syx
|
337
|
+
K[1, 1] = Sxx - Syy - Szz
|
338
|
+
K[1, 2] = K[2, 1] = Sxy + Syx
|
339
|
+
K[1, 3] = K[3, 1] = Szx + Sxz
|
340
|
+
K[2, 2] = -Sxx + Syy - Szz
|
341
|
+
K[2, 3] = K[3, 2] = Syz + Szy
|
342
|
+
K[3, 3] = -Sxx - Syy + Szz
|
343
|
+
|
344
|
+
# Find the largest eigenvalue using numpy
|
345
|
+
eigenvalues, _ = np.linalg.eigh(K)
|
346
|
+
max_eigenvalue = np.max(eigenvalues)
|
347
|
+
|
348
|
+
# Calculate RMSD following BioPython formula
|
349
|
+
natoms = coords1.shape[0]
|
350
|
+
rmsd_sq = (2.0 * abs(E0 - max_eigenvalue)) / natoms
|
351
|
+
rmsd = np.sqrt(rmsd_sq)
|
352
|
+
|
353
|
+
return rmsd
|
354
|
+
|
355
|
+
|
356
|
+
def rmsd_to_nrmsd(rmsd: float, num_atoms: int) -> float:
|
357
|
+
"""
|
358
|
+
Convert RMSD to normalized RMSD (nRMSD).
|
359
|
+
|
360
|
+
Parameters:
|
361
|
+
-----------
|
362
|
+
rmsd : float
|
363
|
+
Root Mean Square Deviation value
|
364
|
+
num_atoms : int
|
365
|
+
Number of atoms used in the RMSD calculation
|
366
|
+
|
367
|
+
Returns:
|
368
|
+
--------
|
369
|
+
float
|
370
|
+
Normalized RMSD value
|
371
|
+
"""
|
372
|
+
return rmsd / np.sqrt(num_atoms)
|
373
|
+
|
374
|
+
|
375
|
+
def nrmsd_quaternions(coords1: np.ndarray, coords2: np.ndarray) -> float:
|
376
|
+
"""
|
377
|
+
Calculates nRMSD using the Quaternion method.
|
378
|
+
|
379
|
+
Parameters:
|
380
|
+
-----------
|
381
|
+
coords1 : np.ndarray
|
382
|
+
Nx3 array of coordinates for the first structure
|
383
|
+
coords2 : np.ndarray
|
384
|
+
Nx3 array of coordinates for the second structure
|
385
|
+
"""
|
386
|
+
rmsd = rmsd_quaternions(coords1, coords2)
|
387
|
+
return rmsd_to_nrmsd(rmsd, coords1.shape[0])
|
388
|
+
|
389
|
+
|
390
|
+
def nrmsd_svd(coords1: np.ndarray, coords2: np.ndarray) -> float:
|
391
|
+
"""
|
392
|
+
Calculates nRMSD using SVD decomposition (Kabsch algorithm).
|
393
|
+
|
394
|
+
Parameters:
|
395
|
+
-----------
|
396
|
+
coords1 : np.ndarray
|
397
|
+
Nx3 array of coordinates for the first structure
|
398
|
+
coords2 : np.ndarray
|
399
|
+
Nx3 array of coordinates for the second structure
|
400
|
+
"""
|
401
|
+
rmsd = rmsd_svd(coords1, coords2)
|
402
|
+
return rmsd_to_nrmsd(rmsd, coords1.shape[0])
|
403
|
+
|
404
|
+
|
405
|
+
def nrmsd_qcp(coords1: np.ndarray, coords2: np.ndarray) -> float:
|
406
|
+
"""
|
407
|
+
Calculates nRMSD using the QCP (Quaternion Characteristic Polynomial) method.
|
408
|
+
|
409
|
+
Parameters:
|
410
|
+
-----------
|
411
|
+
coords1 : np.ndarray
|
412
|
+
Nx3 array of coordinates for the first structure
|
413
|
+
coords2 : np.ndarray
|
414
|
+
Nx3 array of coordinates for the second structure
|
415
|
+
"""
|
416
|
+
rmsd = rmsd_qcp(coords1, coords2)
|
417
|
+
return rmsd_to_nrmsd(rmsd, coords1.shape[0])
|
418
|
+
|
419
|
+
|
420
|
+
def nrmsd_validate(coords1: np.ndarray, coords2: np.ndarray) -> float:
|
421
|
+
"""
|
422
|
+
Validates that all nRMSD methods produce the same result.
|
423
|
+
Uses quaternions method as the primary result after validation.
|
424
|
+
|
425
|
+
Parameters:
|
426
|
+
-----------
|
427
|
+
coords1 : np.ndarray
|
428
|
+
Nx3 array of coordinates for the first structure
|
429
|
+
coords2 : np.ndarray
|
430
|
+
Nx3 array of coordinates for the second structure
|
431
|
+
|
432
|
+
Returns:
|
433
|
+
--------
|
434
|
+
float
|
435
|
+
nRMSD value (from quaternions method after validation)
|
436
|
+
|
437
|
+
Raises:
|
438
|
+
-------
|
439
|
+
ValueError
|
440
|
+
If any methods produce significantly different results
|
441
|
+
"""
|
442
|
+
# Calculate using all methods
|
443
|
+
result_quaternions = nrmsd_quaternions(coords1, coords2)
|
444
|
+
result_svd = nrmsd_svd(coords1, coords2)
|
445
|
+
result_qcp = nrmsd_qcp(coords1, coords2)
|
446
|
+
|
447
|
+
# Check if results are approximately equal (within 1e-6 tolerance)
|
448
|
+
tolerance = 1e-6
|
449
|
+
|
450
|
+
# Check quaternions vs SVD
|
451
|
+
if abs(result_quaternions - result_svd) > tolerance:
|
452
|
+
raise ValueError(
|
453
|
+
f"nRMSD methods disagree: quaternions={result_quaternions:.8f}, "
|
454
|
+
f"svd={result_svd:.8f}, difference={abs(result_quaternions - result_svd):.8f}"
|
455
|
+
)
|
456
|
+
|
457
|
+
# Check quaternions vs QCP
|
458
|
+
if abs(result_quaternions - result_qcp) > tolerance:
|
459
|
+
raise ValueError(
|
460
|
+
f"nRMSD methods disagree: quaternions={result_quaternions:.8f}, "
|
461
|
+
f"qcp={result_qcp:.8f}, difference={abs(result_quaternions - result_qcp):.8f}"
|
462
|
+
)
|
463
|
+
|
464
|
+
# Check SVD vs QCP
|
465
|
+
if abs(result_svd - result_qcp) > tolerance:
|
466
|
+
raise ValueError(
|
467
|
+
f"nRMSD methods disagree: svd={result_svd:.8f}, "
|
468
|
+
f"qcp={result_qcp:.8f}, difference={abs(result_svd - result_qcp):.8f}"
|
469
|
+
)
|
470
|
+
|
471
|
+
# Return quaternions result as the validated value
|
472
|
+
return result_quaternions
|
473
|
+
|
474
|
+
|
475
|
+
def nrmsd_quaternions_residues(
|
476
|
+
residues1: List["Residue"], residues2: List["Residue"]
|
477
|
+
) -> float:
|
478
|
+
"""
|
479
|
+
Calculates nRMSD using the Quaternion method from residue lists.
|
480
|
+
residues1 and residues2 are lists of Residue objects.
|
481
|
+
"""
|
482
|
+
coords1, coords2 = find_paired_coordinates(residues1, residues2)
|
483
|
+
return nrmsd_quaternions(coords1, coords2)
|
484
|
+
|
485
|
+
|
486
|
+
def nrmsd_svd_residues(residues1: List["Residue"], residues2: List["Residue"]) -> float:
|
487
|
+
"""
|
488
|
+
Calculates nRMSD using SVD decomposition from residue lists.
|
489
|
+
residues1 and residues2 are lists of Residue objects.
|
490
|
+
"""
|
491
|
+
coords1, coords2 = find_paired_coordinates(residues1, residues2)
|
492
|
+
return nrmsd_svd(coords1, coords2)
|
493
|
+
|
494
|
+
|
495
|
+
def nrmsd_qcp_residues(residues1: List["Residue"], residues2: List["Residue"]) -> float:
|
496
|
+
"""
|
497
|
+
Calculates nRMSD using the QCP method from residue lists.
|
498
|
+
residues1 and residues2 are lists of Residue objects.
|
499
|
+
"""
|
500
|
+
coords1, coords2 = find_paired_coordinates(residues1, residues2)
|
501
|
+
return nrmsd_qcp(coords1, coords2)
|
502
|
+
|
503
|
+
|
504
|
+
def nrmsd_validate_residues(
|
505
|
+
residues1: List["Residue"], residues2: List["Residue"]
|
506
|
+
) -> float:
|
507
|
+
"""
|
508
|
+
Validates that all nRMSD methods produce the same result from residue lists.
|
509
|
+
residues1 and residues2 are lists of Residue objects.
|
510
|
+
"""
|
511
|
+
coords1, coords2 = find_paired_coordinates(residues1, residues2)
|
512
|
+
return nrmsd_validate(coords1, coords2)
|
513
|
+
|
514
|
+
|
59
515
|
class Structure:
|
60
516
|
"""
|
61
517
|
A class representing a molecular structure parsed from PDB or mmCIF format.
|
@@ -465,6 +921,30 @@ class Residue:
|
|
465
921
|
"""Get a list of all atoms in this residue."""
|
466
922
|
return [Atom(self.atoms.iloc[i], self.format) for i in range(len(self.atoms))]
|
467
923
|
|
924
|
+
@cached_property
|
925
|
+
def _atom_dict(self) -> dict[str, "Atom"]:
|
926
|
+
"""Cache a dictionary of atom names to Atom instances."""
|
927
|
+
atom_dict = {}
|
928
|
+
|
929
|
+
for i in range(len(self.atoms)):
|
930
|
+
atom_data = self.atoms.iloc[i]
|
931
|
+
atom = Atom(atom_data, self.format)
|
932
|
+
|
933
|
+
# Get the atom name based on format
|
934
|
+
if self.format == "PDB":
|
935
|
+
atom_name = atom_data["name"]
|
936
|
+
elif self.format == "mmCIF":
|
937
|
+
if "auth_atom_id" in self.atoms.columns:
|
938
|
+
atom_name = atom_data["auth_atom_id"]
|
939
|
+
else:
|
940
|
+
atom_name = atom_data["label_atom_id"]
|
941
|
+
else:
|
942
|
+
continue
|
943
|
+
|
944
|
+
atom_dict[atom_name] = atom
|
945
|
+
|
946
|
+
return atom_dict
|
947
|
+
|
468
948
|
def find_atom(self, atom_name: str) -> Optional["Atom"]:
|
469
949
|
"""
|
470
950
|
Find an atom by name in this residue.
|
@@ -479,23 +959,7 @@ class Residue:
|
|
479
959
|
Optional[Atom]
|
480
960
|
The Atom object, or None if not found
|
481
961
|
"""
|
482
|
-
|
483
|
-
mask = self.atoms["name"] == atom_name
|
484
|
-
atoms_df = self.atoms[mask]
|
485
|
-
if len(atoms_df) > 0:
|
486
|
-
return Atom(atoms_df.iloc[0], self.format)
|
487
|
-
elif self.format == "mmCIF":
|
488
|
-
if "auth_atom_id" in self.atoms.columns:
|
489
|
-
mask = self.atoms["auth_atom_id"] == atom_name
|
490
|
-
atoms_df = self.atoms[mask]
|
491
|
-
if len(atoms_df) > 0:
|
492
|
-
return Atom(atoms_df.iloc[0], self.format)
|
493
|
-
else:
|
494
|
-
mask = self.atoms["label_atom_id"] == atom_name
|
495
|
-
atoms_df = self.atoms[mask]
|
496
|
-
if len(atoms_df) > 0:
|
497
|
-
return Atom(atoms_df.iloc[0], self.format)
|
498
|
-
return None
|
962
|
+
return self._atom_dict.get(atom_name)
|
499
963
|
|
500
964
|
@cached_property
|
501
965
|
def is_nucleotide(self) -> bool:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: RNApolis
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.10.1
|
4
4
|
Summary: A Python library containing RNA-related bioinformatics functions and classes
|
5
5
|
Home-page: https://github.com/tzok/rnapolis-py
|
6
6
|
Author: Tomasz Zok
|
@@ -16,6 +16,7 @@ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
16
16
|
Description-Content-Type: text/markdown
|
17
17
|
License-File: LICENSE
|
18
18
|
Requires-Dist: appdirs
|
19
|
+
Requires-Dist: faiss-cpu
|
19
20
|
Requires-Dist: graphviz
|
20
21
|
Requires-Dist: mmcif
|
21
22
|
Requires-Dist: numpy
|
@@ -24,7 +25,9 @@ Requires-Dist: orjson
|
|
24
25
|
Requires-Dist: pandas
|
25
26
|
Requires-Dist: pulp
|
26
27
|
Requires-Dist: requests
|
28
|
+
Requires-Dist: scikit-learn
|
27
29
|
Requires-Dist: scipy
|
30
|
+
Requires-Dist: tqdm
|
28
31
|
Requires-Dist: viennarna
|
29
32
|
Dynamic: author
|
30
33
|
Dynamic: author-email
|
@@ -7,23 +7,24 @@ rnapolis/component_A.csv,sha256=koirS-AwUZwoYGItT8yn3wS6Idvmh2FANfTQcOS_xh8,2897
|
|
7
7
|
rnapolis/component_C.csv,sha256=NtvsAu_YrUgTjzZm3j4poW4IZ99x3dPARB09XVIiMCc,2803
|
8
8
|
rnapolis/component_G.csv,sha256=Z5wl8OnHRyx4XhTyBiWgRZiEvmZXhoxtVRH8bn6Vxf0,2898
|
9
9
|
rnapolis/component_U.csv,sha256=8BUoU1m2YzGmi8_kw1xdpf3pucszHjFEtTex87CuXiE,2645
|
10
|
+
rnapolis/distiller.py,sha256=QFFSwiCFftrb4tW3YhaECKEePg5pJAqJalrabPPQVJk,36817
|
10
11
|
rnapolis/metareader.py,sha256=I1-cXc2YNBPwa3zihAnMTjEsAo79tEKzSmWu5yvN1Pk,2071
|
11
12
|
rnapolis/mmcif_pdbx_v50.dic,sha256=5QFx1ssDaehR4_DQ-tS9VQux262SiLXaqcwmwwejF5c,5744659
|
12
13
|
rnapolis/molecule_filter.py,sha256=jgcpJxx_oXEBX0d30v4k_FdwRouRUPUsEtCYWgLGpD4,7310
|
13
14
|
rnapolis/motif_extractor.py,sha256=Lfn1iEkhkP9eZD3GPEWNAfy00QO7QPCc8wM_XS1ory8,1147
|
14
|
-
rnapolis/parser.py,sha256=
|
15
|
+
rnapolis/parser.py,sha256=hdcBSyX-CJTVPeLy7h9aKlGLmVwLo73T-Tvi3ivqO-0,16600
|
15
16
|
rnapolis/parser_v2.py,sha256=qG6CO3or7zmuJu368g9Nzokiqdeip4yjD14F163uH6w,40618
|
16
17
|
rnapolis/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
18
|
rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
|
18
19
|
rnapolis/splitter.py,sha256=x-Zn21mkiMgvYPptUFD9BbdNIvoaM6b8GzGf6uYXEwE,4052
|
19
20
|
rnapolis/tertiary.py,sha256=zGRhza-GJLI_gmKGjnvPksvm3EjOi7O263ngckvE4rs,39408
|
20
|
-
rnapolis/tertiary_v2.py,sha256=
|
21
|
+
rnapolis/tertiary_v2.py,sha256=SgijTv0bPqMJwsMqyQk0O8QAnS2Ozk45vk8igxt9hRs,38001
|
21
22
|
rnapolis/transformer.py,sha256=aC0nBmHHJf5TyLvBIV57Jj3tlwpvHbPo347opfAOlQA,3844
|
22
23
|
rnapolis/unifier.py,sha256=2ge7IB9FdRgzSAiVD39U_ciwtdDJ2fGzf8mUIudbrqY,5820
|
23
24
|
rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
|
24
|
-
rnapolis-0.
|
25
|
-
rnapolis-0.
|
26
|
-
rnapolis-0.
|
27
|
-
rnapolis-0.
|
28
|
-
rnapolis-0.
|
29
|
-
rnapolis-0.
|
25
|
+
rnapolis-0.10.1.dist-info/licenses/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
|
26
|
+
rnapolis-0.10.1.dist-info/METADATA,sha256=rjPTfIJ666l8ZmlocJWBO2_5OSem_9r-AyCe-zFR7as,54611
|
27
|
+
rnapolis-0.10.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
28
|
+
rnapolis-0.10.1.dist-info/entry_points.txt,sha256=MZMWnYBUYnis-zWDmFfuA5yXtU3W5YdQrm5HA5LrkeM,474
|
29
|
+
rnapolis-0.10.1.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
|
30
|
+
rnapolis-0.10.1.dist-info/RECORD,,
|
@@ -3,6 +3,7 @@ adapter = rnapolis.adapter:main
|
|
3
3
|
aligner = rnapolis.aligner:main
|
4
4
|
annotator = rnapolis.annotator:main
|
5
5
|
clashfinder = rnapolis.clashfinder:main
|
6
|
+
distiller = rnapolis.distiller:main
|
6
7
|
metareader = rnapolis.metareader:main
|
7
8
|
molecule-filter = rnapolis.molecule_filter:main
|
8
9
|
motif-extractor = rnapolis.motif_extractor:main
|
File without changes
|
File without changes
|
File without changes
|