RNApolis 0.4.4__py3-none-any.whl → 0.4.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {RNApolis-0.4.4.dist-info → RNApolis-0.4.6.dist-info}/METADATA +1 -1
- {RNApolis-0.4.4.dist-info → RNApolis-0.4.6.dist-info}/RECORD +7 -7
- {RNApolis-0.4.4.dist-info → RNApolis-0.4.6.dist-info}/WHEEL +1 -1
- rnapolis/parser.py +47 -9
- {RNApolis-0.4.4.dist-info → RNApolis-0.4.6.dist-info}/LICENSE +0 -0
- {RNApolis-0.4.4.dist-info → RNApolis-0.4.6.dist-info}/entry_points.txt +0 -0
- {RNApolis-0.4.4.dist-info → RNApolis-0.4.6.dist-info}/top_level.txt +0 -0
@@ -4,14 +4,14 @@ rnapolis/common.py,sha256=PUYF01P2vevhyImhZjGYE0jJlsxWHX6GQmsxI4W7S-E,30255
|
|
4
4
|
rnapolis/metareader.py,sha256=I1-cXc2YNBPwa3zihAnMTjEsAo79tEKzSmWu5yvN1Pk,2071
|
5
5
|
rnapolis/molecule_filter.py,sha256=hB6-nXgjmw7FAsQ3bj0cZ2FvuW2I1PXunEfcdwEUB1o,7389
|
6
6
|
rnapolis/motif_extractor.py,sha256=duHvpi9Ulcny9K60E6VBpz5RpJZw-KdTB4_Ph0iP478,774
|
7
|
-
rnapolis/parser.py,sha256=
|
7
|
+
rnapolis/parser.py,sha256=2pQYy0sh8TCpeluMmmSJ7C5dudK_bsfstTWCdpwwpNU,15193
|
8
8
|
rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
|
9
9
|
rnapolis/tertiary.py,sha256=SQyiYWA0RJhAK70f88CKZvS4EzGKHQ2RoL1s4MueEDQ,21657
|
10
10
|
rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
|
11
11
|
rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
|
12
|
-
RNApolis-0.4.
|
13
|
-
RNApolis-0.4.
|
14
|
-
RNApolis-0.4.
|
15
|
-
RNApolis-0.4.
|
16
|
-
RNApolis-0.4.
|
17
|
-
RNApolis-0.4.
|
12
|
+
RNApolis-0.4.6.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
|
13
|
+
RNApolis-0.4.6.dist-info/METADATA,sha256=1Zd34oJFXeLnKDzG7HG-6jSjsAnIimsiWPbTlvDerO4,54322
|
14
|
+
RNApolis-0.4.6.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
15
|
+
RNApolis-0.4.6.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
|
16
|
+
RNApolis-0.4.6.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
|
17
|
+
RNApolis-0.4.6.dist-info/RECORD,,
|
rnapolis/parser.py
CHANGED
@@ -1,7 +1,10 @@
|
|
1
1
|
import logging
|
2
2
|
from typing import IO, Dict, List, Optional, Tuple, Union
|
3
3
|
|
4
|
+
import numpy as np
|
4
5
|
from mmcif.io.IoAdapterPy import IoAdapterPy
|
6
|
+
from scipy.spatial import KDTree
|
7
|
+
|
5
8
|
from rnapolis.common import ResidueAuth, ResidueLabel
|
6
9
|
from rnapolis.tertiary import BASE_ATOMS, Atom, Residue3D, Structure3D
|
7
10
|
|
@@ -53,10 +56,10 @@ def parse_cif(
|
|
53
56
|
|
54
57
|
io_adapter = IoAdapterPy()
|
55
58
|
data = io_adapter.readFile(cif.name)
|
56
|
-
|
59
|
+
atoms_to_process: List[Atom] = []
|
57
60
|
modified: Dict[Union[ResidueLabel, ResidueAuth], str] = {}
|
58
|
-
sequence_by_entity = {}
|
59
|
-
is_nucleic_acid_by_entity = {}
|
61
|
+
sequence_by_entity: Dict[str, str] = {}
|
62
|
+
is_nucleic_acid_by_entity: Dict[str, bool] = {}
|
60
63
|
|
61
64
|
if data:
|
62
65
|
atom_site = data[0].getObj("atom_site")
|
@@ -136,7 +139,7 @@ def parse_cif(
|
|
136
139
|
else None
|
137
140
|
)
|
138
141
|
|
139
|
-
|
142
|
+
atoms_to_process.append(
|
140
143
|
Atom(
|
141
144
|
label_entity_id,
|
142
145
|
label,
|
@@ -216,6 +219,7 @@ def parse_cif(
|
|
216
219
|
if entity_id and pdbx_seq_one_letter_code_can:
|
217
220
|
sequence_by_entity[entity_id] = pdbx_seq_one_letter_code_can
|
218
221
|
|
222
|
+
atoms = filter_clashing_atoms(atoms_to_process)
|
219
223
|
return atoms, modified, sequence_by_entity, is_nucleic_acid_by_entity
|
220
224
|
|
221
225
|
|
@@ -228,7 +232,7 @@ def parse_pdb(
|
|
228
232
|
Dict[str, bool],
|
229
233
|
]:
|
230
234
|
pdb.seek(0)
|
231
|
-
|
235
|
+
atoms_to_process: List[Atom] = []
|
232
236
|
modified: Dict[Union[ResidueLabel, ResidueAuth], str] = {}
|
233
237
|
model = 1
|
234
238
|
|
@@ -236,9 +240,6 @@ def parse_pdb(
|
|
236
240
|
if line.startswith("MODEL"):
|
237
241
|
model = int(line[10:14].strip())
|
238
242
|
elif line.startswith("ATOM") or line.startswith("HETATM"):
|
239
|
-
alternate_location = line[16]
|
240
|
-
if alternate_location != " ":
|
241
|
-
continue
|
242
243
|
atom_name = line[12:16].strip()
|
243
244
|
residue_name = line[17:20].strip()
|
244
245
|
chain_identifier = line[21]
|
@@ -251,7 +252,10 @@ def parse_pdb(
|
|
251
252
|
auth = ResidueAuth(
|
252
253
|
chain_identifier, residue_number, insertion_code, residue_name
|
253
254
|
)
|
254
|
-
|
255
|
+
|
256
|
+
atoms_to_process.append(
|
257
|
+
Atom(None, None, auth, model, atom_name, x, y, z, occupancy)
|
258
|
+
)
|
255
259
|
elif line.startswith("MODRES"):
|
256
260
|
original_name = line[12:15]
|
257
261
|
chain_identifier = line[16]
|
@@ -263,6 +267,7 @@ def parse_pdb(
|
|
263
267
|
)
|
264
268
|
modified[auth] = standard_residue_name
|
265
269
|
|
270
|
+
atoms = filter_clashing_atoms(atoms_to_process)
|
266
271
|
return atoms, modified, {}, {}
|
267
272
|
|
268
273
|
|
@@ -392,3 +397,36 @@ def try_parse_int(s: str) -> Optional[int]:
|
|
392
397
|
return int(s)
|
393
398
|
except ValueError:
|
394
399
|
return None
|
400
|
+
|
401
|
+
|
402
|
+
def filter_clashing_atoms(atoms: List[Atom], clash_distance: float = 0.5) -> List[Atom]:
|
403
|
+
# First, remove duplicate atoms
|
404
|
+
unique_atoms = {}
|
405
|
+
|
406
|
+
for i, atom in enumerate(atoms):
|
407
|
+
key = (atom.label, atom.auth, atom.name)
|
408
|
+
if key not in unique_atoms or atom.occupancy > unique_atoms[key].occupancy:
|
409
|
+
unique_atoms[key] = atom
|
410
|
+
|
411
|
+
unique_atoms_list = list(unique_atoms.values())
|
412
|
+
|
413
|
+
# Now handle clashing atoms
|
414
|
+
coords = np.array([(atom.x, atom.y, atom.z) for atom in unique_atoms_list])
|
415
|
+
tree = KDTree(coords)
|
416
|
+
|
417
|
+
pairs = tree.query_pairs(r=clash_distance)
|
418
|
+
|
419
|
+
atoms_to_keep = set(range(len(unique_atoms_list)))
|
420
|
+
|
421
|
+
for i, j in pairs:
|
422
|
+
if (
|
423
|
+
unique_atoms_list[i].occupancy is None
|
424
|
+
or unique_atoms_list[j].occupancy is None
|
425
|
+
):
|
426
|
+
continue
|
427
|
+
if unique_atoms_list[i].occupancy > unique_atoms_list[j].occupancy:
|
428
|
+
atoms_to_keep.discard(j)
|
429
|
+
else:
|
430
|
+
atoms_to_keep.discard(i)
|
431
|
+
|
432
|
+
return [unique_atoms_list[i] for i in atoms_to_keep]
|
File without changes
|
File without changes
|
File without changes
|