RNApolis 0.4.3__tar.gz → 0.4.6__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {rnapolis-0.4.3/src/RNApolis.egg-info → rnapolis-0.4.6}/PKG-INFO +1 -1
- {rnapolis-0.4.3 → rnapolis-0.4.6}/setup.py +1 -1
- {rnapolis-0.4.3 → rnapolis-0.4.6/src/RNApolis.egg-info}/PKG-INFO +1 -1
- {rnapolis-0.4.3 → rnapolis-0.4.6}/src/rnapolis/parser.py +47 -9
- {rnapolis-0.4.3 → rnapolis-0.4.6}/src/rnapolis/tertiary.py +43 -7
- {rnapolis-0.4.3 → rnapolis-0.4.6}/tests/test_annotator.py +11 -0
- {rnapolis-0.4.3 → rnapolis-0.4.6}/tests/test_common.py +44 -0
- {rnapolis-0.4.3 → rnapolis-0.4.6}/tests/test_parser.py +15 -0
- {rnapolis-0.4.3 → rnapolis-0.4.6}/LICENSE +0 -0
- {rnapolis-0.4.3 → rnapolis-0.4.6}/README.md +0 -0
- {rnapolis-0.4.3 → rnapolis-0.4.6}/pyproject.toml +0 -0
- {rnapolis-0.4.3 → rnapolis-0.4.6}/setup.cfg +0 -0
- {rnapolis-0.4.3 → rnapolis-0.4.6}/src/RNApolis.egg-info/SOURCES.txt +0 -0
- {rnapolis-0.4.3 → rnapolis-0.4.6}/src/RNApolis.egg-info/dependency_links.txt +0 -0
- {rnapolis-0.4.3 → rnapolis-0.4.6}/src/RNApolis.egg-info/entry_points.txt +0 -0
- {rnapolis-0.4.3 → rnapolis-0.4.6}/src/RNApolis.egg-info/requires.txt +0 -0
- {rnapolis-0.4.3 → rnapolis-0.4.6}/src/RNApolis.egg-info/top_level.txt +0 -0
- {rnapolis-0.4.3 → rnapolis-0.4.6}/src/rnapolis/annotator.py +0 -0
- {rnapolis-0.4.3 → rnapolis-0.4.6}/src/rnapolis/clashfinder.py +0 -0
- {rnapolis-0.4.3 → rnapolis-0.4.6}/src/rnapolis/common.py +0 -0
- {rnapolis-0.4.3 → rnapolis-0.4.6}/src/rnapolis/metareader.py +0 -0
- {rnapolis-0.4.3 → rnapolis-0.4.6}/src/rnapolis/molecule_filter.py +0 -0
- {rnapolis-0.4.3 → rnapolis-0.4.6}/src/rnapolis/motif_extractor.py +0 -0
- {rnapolis-0.4.3 → rnapolis-0.4.6}/src/rnapolis/rfam_folder.py +0 -0
- {rnapolis-0.4.3 → rnapolis-0.4.6}/src/rnapolis/transformer.py +0 -0
- {rnapolis-0.4.3 → rnapolis-0.4.6}/src/rnapolis/util.py +0 -0
- {rnapolis-0.4.3 → rnapolis-0.4.6}/tests/test_bugfixes.py +0 -0
- {rnapolis-0.4.3 → rnapolis-0.4.6}/tests/test_metareader.py +0 -0
- {rnapolis-0.4.3 → rnapolis-0.4.6}/tests/test_quadruplexes.py +0 -0
- {rnapolis-0.4.3 → rnapolis-0.4.6}/tests/test_rfam_folder.py +0 -0
- {rnapolis-0.4.3 → rnapolis-0.4.6}/tests/test_tertiary.py +0 -0
@@ -1,7 +1,10 @@
|
|
1
1
|
import logging
|
2
2
|
from typing import IO, Dict, List, Optional, Tuple, Union
|
3
3
|
|
4
|
+
import numpy as np
|
4
5
|
from mmcif.io.IoAdapterPy import IoAdapterPy
|
6
|
+
from scipy.spatial import KDTree
|
7
|
+
|
5
8
|
from rnapolis.common import ResidueAuth, ResidueLabel
|
6
9
|
from rnapolis.tertiary import BASE_ATOMS, Atom, Residue3D, Structure3D
|
7
10
|
|
@@ -53,10 +56,10 @@ def parse_cif(
|
|
53
56
|
|
54
57
|
io_adapter = IoAdapterPy()
|
55
58
|
data = io_adapter.readFile(cif.name)
|
56
|
-
|
59
|
+
atoms_to_process: List[Atom] = []
|
57
60
|
modified: Dict[Union[ResidueLabel, ResidueAuth], str] = {}
|
58
|
-
sequence_by_entity = {}
|
59
|
-
is_nucleic_acid_by_entity = {}
|
61
|
+
sequence_by_entity: Dict[str, str] = {}
|
62
|
+
is_nucleic_acid_by_entity: Dict[str, bool] = {}
|
60
63
|
|
61
64
|
if data:
|
62
65
|
atom_site = data[0].getObj("atom_site")
|
@@ -136,7 +139,7 @@ def parse_cif(
|
|
136
139
|
else None
|
137
140
|
)
|
138
141
|
|
139
|
-
|
142
|
+
atoms_to_process.append(
|
140
143
|
Atom(
|
141
144
|
label_entity_id,
|
142
145
|
label,
|
@@ -216,6 +219,7 @@ def parse_cif(
|
|
216
219
|
if entity_id and pdbx_seq_one_letter_code_can:
|
217
220
|
sequence_by_entity[entity_id] = pdbx_seq_one_letter_code_can
|
218
221
|
|
222
|
+
atoms = filter_clashing_atoms(atoms_to_process)
|
219
223
|
return atoms, modified, sequence_by_entity, is_nucleic_acid_by_entity
|
220
224
|
|
221
225
|
|
@@ -228,7 +232,7 @@ def parse_pdb(
|
|
228
232
|
Dict[str, bool],
|
229
233
|
]:
|
230
234
|
pdb.seek(0)
|
231
|
-
|
235
|
+
atoms_to_process: List[Atom] = []
|
232
236
|
modified: Dict[Union[ResidueLabel, ResidueAuth], str] = {}
|
233
237
|
model = 1
|
234
238
|
|
@@ -236,9 +240,6 @@ def parse_pdb(
|
|
236
240
|
if line.startswith("MODEL"):
|
237
241
|
model = int(line[10:14].strip())
|
238
242
|
elif line.startswith("ATOM") or line.startswith("HETATM"):
|
239
|
-
alternate_location = line[16]
|
240
|
-
if alternate_location != " ":
|
241
|
-
continue
|
242
243
|
atom_name = line[12:16].strip()
|
243
244
|
residue_name = line[17:20].strip()
|
244
245
|
chain_identifier = line[21]
|
@@ -251,7 +252,10 @@ def parse_pdb(
|
|
251
252
|
auth = ResidueAuth(
|
252
253
|
chain_identifier, residue_number, insertion_code, residue_name
|
253
254
|
)
|
254
|
-
|
255
|
+
|
256
|
+
atoms_to_process.append(
|
257
|
+
Atom(None, None, auth, model, atom_name, x, y, z, occupancy)
|
258
|
+
)
|
255
259
|
elif line.startswith("MODRES"):
|
256
260
|
original_name = line[12:15]
|
257
261
|
chain_identifier = line[16]
|
@@ -263,6 +267,7 @@ def parse_pdb(
|
|
263
267
|
)
|
264
268
|
modified[auth] = standard_residue_name
|
265
269
|
|
270
|
+
atoms = filter_clashing_atoms(atoms_to_process)
|
266
271
|
return atoms, modified, {}, {}
|
267
272
|
|
268
273
|
|
@@ -392,3 +397,36 @@ def try_parse_int(s: str) -> Optional[int]:
|
|
392
397
|
return int(s)
|
393
398
|
except ValueError:
|
394
399
|
return None
|
400
|
+
|
401
|
+
|
402
|
+
def filter_clashing_atoms(atoms: List[Atom], clash_distance: float = 0.5) -> List[Atom]:
|
403
|
+
# First, remove duplicate atoms
|
404
|
+
unique_atoms = {}
|
405
|
+
|
406
|
+
for i, atom in enumerate(atoms):
|
407
|
+
key = (atom.label, atom.auth, atom.name)
|
408
|
+
if key not in unique_atoms or atom.occupancy > unique_atoms[key].occupancy:
|
409
|
+
unique_atoms[key] = atom
|
410
|
+
|
411
|
+
unique_atoms_list = list(unique_atoms.values())
|
412
|
+
|
413
|
+
# Now handle clashing atoms
|
414
|
+
coords = np.array([(atom.x, atom.y, atom.z) for atom in unique_atoms_list])
|
415
|
+
tree = KDTree(coords)
|
416
|
+
|
417
|
+
pairs = tree.query_pairs(r=clash_distance)
|
418
|
+
|
419
|
+
atoms_to_keep = set(range(len(unique_atoms_list)))
|
420
|
+
|
421
|
+
for i, j in pairs:
|
422
|
+
if (
|
423
|
+
unique_atoms_list[i].occupancy is None
|
424
|
+
or unique_atoms_list[j].occupancy is None
|
425
|
+
):
|
426
|
+
continue
|
427
|
+
if unique_atoms_list[i].occupancy > unique_atoms_list[j].occupancy:
|
428
|
+
atoms_to_keep.discard(j)
|
429
|
+
else:
|
430
|
+
atoms_to_keep.discard(i)
|
431
|
+
|
432
|
+
return [unique_atoms_list[i] for i in atoms_to_keep]
|
@@ -7,6 +7,7 @@ from typing import Dict, List, Optional, Set, Tuple, Union
|
|
7
7
|
|
8
8
|
import numpy
|
9
9
|
import numpy.typing
|
10
|
+
|
10
11
|
from rnapolis.common import (
|
11
12
|
BasePair,
|
12
13
|
BpSeq,
|
@@ -16,6 +17,7 @@ from rnapolis.common import (
|
|
16
17
|
Residue,
|
17
18
|
ResidueAuth,
|
18
19
|
ResidueLabel,
|
20
|
+
Saenger,
|
19
21
|
Stacking,
|
20
22
|
)
|
21
23
|
|
@@ -521,13 +523,47 @@ class Mapping2D3D:
|
|
521
523
|
|
522
524
|
@cached_property
|
523
525
|
def bpseq(self) -> BpSeq:
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
526
|
+
def pair_scoring_function(pair: BasePair3D) -> int:
|
527
|
+
if pair.saenger is not None:
|
528
|
+
if pair.saenger in (Saenger.XIX, Saenger.XX):
|
529
|
+
return 0, pair.nt1, pair.nt2
|
530
|
+
else:
|
531
|
+
return 1, pair.nt1, pair.nt2
|
532
|
+
|
533
|
+
sequence = "".join(
|
534
|
+
sorted(
|
535
|
+
[
|
536
|
+
pair.nt1_3d.one_letter_name.upper(),
|
537
|
+
pair.nt2_3d.one_letter_name.upper(),
|
538
|
+
]
|
539
|
+
)
|
540
|
+
)
|
541
|
+
if sequence in ("AU", "AT", "CG"):
|
542
|
+
return 0, pair.nt1, pair.nt2
|
543
|
+
return 1, pair.nt1, pair.nt2
|
544
|
+
|
545
|
+
canonical = [
|
546
|
+
base_pair
|
547
|
+
for base_pair in self.base_pairs
|
548
|
+
if base_pair.is_canonical and base_pair.nt1 < base_pair.nt2
|
549
|
+
]
|
550
|
+
|
551
|
+
while True:
|
552
|
+
matches = defaultdict(set)
|
553
|
+
|
554
|
+
for base_pair in canonical:
|
555
|
+
matches[base_pair.nt1_3d].add(base_pair)
|
556
|
+
matches[base_pair.nt2_3d].add(base_pair)
|
557
|
+
|
558
|
+
for pairs in matches.values():
|
559
|
+
if len(pairs) > 1:
|
560
|
+
pairs = sorted(pairs, key=pair_scoring_function)
|
561
|
+
canonical.remove(pairs[-1])
|
562
|
+
break
|
563
|
+
else:
|
564
|
+
break
|
565
|
+
|
566
|
+
return self.__generate_bpseq(canonical)
|
531
567
|
|
532
568
|
def __generate_bpseq(self, base_pairs):
|
533
569
|
result: Dict[int, List] = {}
|
@@ -43,3 +43,14 @@ def test_8btk():
|
|
43
43
|
with open("tests/8btk_B7.cif") as f:
|
44
44
|
structure3d = read_3d_structure(f, 1)
|
45
45
|
assert extract_secondary_structure(structure3d, 1) is not None
|
46
|
+
|
47
|
+
|
48
|
+
def test_488d():
|
49
|
+
"""
|
50
|
+
There are clashing residues 151 in chains B and D. The clash is caused by occupancy factors less than 1.
|
51
|
+
"""
|
52
|
+
with open("tests/488d.pdb") as f:
|
53
|
+
structure3d = read_3d_structure(f)
|
54
|
+
|
55
|
+
base_interactions = extract_base_interactions(structure3d)
|
56
|
+
assert base_interactions is not None
|
@@ -1,7 +1,9 @@
|
|
1
1
|
from collections import Counter
|
2
2
|
|
3
|
+
import orjson
|
3
4
|
from hypothesis import given, settings
|
4
5
|
from hypothesis import strategies as st
|
6
|
+
|
5
7
|
from rnapolis.common import (
|
6
8
|
BaseInteractions,
|
7
9
|
BasePair,
|
@@ -10,13 +12,17 @@ from rnapolis.common import (
|
|
10
12
|
BpSeq,
|
11
13
|
DotBracket,
|
12
14
|
Interaction,
|
15
|
+
LeontisWesthof,
|
13
16
|
MultiStrandDotBracket,
|
14
17
|
OtherInteraction,
|
15
18
|
Residue,
|
16
19
|
ResidueAuth,
|
17
20
|
ResidueLabel,
|
21
|
+
Saenger,
|
18
22
|
Stacking,
|
19
23
|
)
|
24
|
+
from rnapolis.parser import read_3d_structure
|
25
|
+
from rnapolis.tertiary import Mapping2D3D
|
20
26
|
|
21
27
|
|
22
28
|
@given(st.from_type(ResidueLabel))
|
@@ -136,3 +142,41 @@ def test_multi_strand_dot_bracket():
|
|
136
142
|
assert dot_bracket.strands[1].structure == (
|
137
143
|
")))))))))))..(((...[[[[[[...)))......)))))...]]]]]][[[[[.((((((]]]]].....((((((......((((((....)))))).......))))))..))))))."
|
138
144
|
)
|
145
|
+
|
146
|
+
|
147
|
+
def test_conflicted_base_pairs():
|
148
|
+
with open("tests/1A1T_1_B-rnaview.json", "rb") as f:
|
149
|
+
data = orjson.loads(f.read())
|
150
|
+
|
151
|
+
base_pairs = []
|
152
|
+
|
153
|
+
for obj in data.get("basePairs", []):
|
154
|
+
nt1 = Residue(
|
155
|
+
None,
|
156
|
+
ResidueAuth(
|
157
|
+
obj["nt1"]["auth"]["chain"],
|
158
|
+
obj["nt1"]["auth"]["number"],
|
159
|
+
obj["nt1"]["auth"]["icode"],
|
160
|
+
obj["nt1"]["auth"]["name"],
|
161
|
+
),
|
162
|
+
)
|
163
|
+
nt2 = Residue(
|
164
|
+
None,
|
165
|
+
ResidueAuth(
|
166
|
+
obj["nt2"]["auth"]["chain"],
|
167
|
+
obj["nt2"]["auth"]["number"],
|
168
|
+
obj["nt2"]["auth"]["icode"],
|
169
|
+
obj["nt2"]["auth"]["name"],
|
170
|
+
),
|
171
|
+
)
|
172
|
+
lw = LeontisWesthof(obj["lw"])
|
173
|
+
saenger = Saenger(obj["saenger"]) if obj["saenger"] else None
|
174
|
+
base_pairs.append(BasePair(nt1, nt2, lw, saenger))
|
175
|
+
|
176
|
+
with open("tests/1A1T_1_B.cif") as f:
|
177
|
+
structure3d = read_3d_structure(f)
|
178
|
+
|
179
|
+
mapping = Mapping2D3D(structure3d, base_pairs, [], True)
|
180
|
+
assert (
|
181
|
+
mapping.dot_bracket == ">strand_B\nGGACUAGCGGAGGCUAGUCC\n((((((((....))))))))"
|
182
|
+
)
|
@@ -16,3 +16,18 @@ def test_1ato():
|
|
16
16
|
structure3d = read_3d_structure(f)
|
17
17
|
sequence = "".join([residue.one_letter_name for residue in structure3d.residues])
|
18
18
|
assert sequence == "GGCACCUCCUCGCGGUGCC"
|
19
|
+
|
20
|
+
|
21
|
+
def test_4qln_no_duplicate_atoms():
|
22
|
+
for ext in (".pdb", ".cif"):
|
23
|
+
with open(f"tests/4qln{ext}") as f:
|
24
|
+
structure3d = read_3d_structure(f)
|
25
|
+
|
26
|
+
chain_a = [r for r in structure3d.residues if r.auth.chain == "A"]
|
27
|
+
residues_to_check = [r for r in chain_a if r.auth.number in (18, 19, 20)]
|
28
|
+
|
29
|
+
for residue in residues_to_check:
|
30
|
+
atom_names = [atom.name for atom in residue.atoms]
|
31
|
+
assert len(atom_names) == len(
|
32
|
+
set(atom_names)
|
33
|
+
), f"Duplicate atoms found in residue {residue.auth}"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|