RNApolis 0.4.4__py3-none-any.whl → 0.4.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {RNApolis-0.4.4.dist-info → RNApolis-0.4.7.dist-info}/METADATA +1 -1
- {RNApolis-0.4.4.dist-info → RNApolis-0.4.7.dist-info}/RECORD +9 -9
- {RNApolis-0.4.4.dist-info → RNApolis-0.4.7.dist-info}/WHEEL +1 -1
- rnapolis/common.py +4 -1
- rnapolis/parser.py +47 -9
- rnapolis/tertiary.py +59 -29
- {RNApolis-0.4.4.dist-info → RNApolis-0.4.7.dist-info}/LICENSE +0 -0
- {RNApolis-0.4.4.dist-info → RNApolis-0.4.7.dist-info}/entry_points.txt +0 -0
- {RNApolis-0.4.4.dist-info → RNApolis-0.4.7.dist-info}/top_level.txt +0 -0
@@ -1,17 +1,17 @@
|
|
1
1
|
rnapolis/annotator.py,sha256=7U3f0gchKdIGc6FwJx0UAc_95HJI5SgECj-b7-1yBhc,22086
|
2
2
|
rnapolis/clashfinder.py,sha256=i95kp0o6OWNqmJDBr-PbsZd7RY2iJtBDr7QqolJSuAQ,8513
|
3
|
-
rnapolis/common.py,sha256=
|
3
|
+
rnapolis/common.py,sha256=NWhlPwT521jCSWcDcm_TNoYENjoZWpllf9sS-WuTEmA,30361
|
4
4
|
rnapolis/metareader.py,sha256=I1-cXc2YNBPwa3zihAnMTjEsAo79tEKzSmWu5yvN1Pk,2071
|
5
5
|
rnapolis/molecule_filter.py,sha256=hB6-nXgjmw7FAsQ3bj0cZ2FvuW2I1PXunEfcdwEUB1o,7389
|
6
6
|
rnapolis/motif_extractor.py,sha256=duHvpi9Ulcny9K60E6VBpz5RpJZw-KdTB4_Ph0iP478,774
|
7
|
-
rnapolis/parser.py,sha256=
|
7
|
+
rnapolis/parser.py,sha256=2pQYy0sh8TCpeluMmmSJ7C5dudK_bsfstTWCdpwwpNU,15193
|
8
8
|
rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
|
9
|
-
rnapolis/tertiary.py,sha256=
|
9
|
+
rnapolis/tertiary.py,sha256=6t9ZB4w33-5n_M3sns1RoFXCOTgVAgGH4WDNG5OG9Kg,23426
|
10
10
|
rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
|
11
11
|
rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
|
12
|
-
RNApolis-0.4.
|
13
|
-
RNApolis-0.4.
|
14
|
-
RNApolis-0.4.
|
15
|
-
RNApolis-0.4.
|
16
|
-
RNApolis-0.4.
|
17
|
-
RNApolis-0.4.
|
12
|
+
RNApolis-0.4.7.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
|
13
|
+
RNApolis-0.4.7.dist-info/METADATA,sha256=551L8oU_7CdBw7v0jezfHQX7YzF9Fo83E6NVbLVfA50,54322
|
14
|
+
RNApolis-0.4.7.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
15
|
+
RNApolis-0.4.7.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
|
16
|
+
RNApolis-0.4.7.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
|
17
|
+
RNApolis-0.4.7.dist-info/RECORD,,
|
rnapolis/common.py
CHANGED
@@ -338,6 +338,9 @@ class Entry(Sequence):
|
|
338
338
|
return self.pair
|
339
339
|
raise IndexError()
|
340
340
|
|
341
|
+
def __lt__(self, other):
|
342
|
+
return self.index_ < other.index_
|
343
|
+
|
341
344
|
def __len__(self) -> int:
|
342
345
|
return 3
|
343
346
|
|
@@ -838,7 +841,7 @@ class BpSeq:
|
|
838
841
|
|
839
842
|
for i in range(1, len(regions)):
|
840
843
|
k, l, _ = regions[i]
|
841
|
-
available = [True for
|
844
|
+
available = [True for _ in range(len("([{<" + string.ascii_uppercase))]
|
842
845
|
|
843
846
|
for j in range(i):
|
844
847
|
m, n, _ = regions[j]
|
rnapolis/parser.py
CHANGED
@@ -1,7 +1,10 @@
|
|
1
1
|
import logging
|
2
2
|
from typing import IO, Dict, List, Optional, Tuple, Union
|
3
3
|
|
4
|
+
import numpy as np
|
4
5
|
from mmcif.io.IoAdapterPy import IoAdapterPy
|
6
|
+
from scipy.spatial import KDTree
|
7
|
+
|
5
8
|
from rnapolis.common import ResidueAuth, ResidueLabel
|
6
9
|
from rnapolis.tertiary import BASE_ATOMS, Atom, Residue3D, Structure3D
|
7
10
|
|
@@ -53,10 +56,10 @@ def parse_cif(
|
|
53
56
|
|
54
57
|
io_adapter = IoAdapterPy()
|
55
58
|
data = io_adapter.readFile(cif.name)
|
56
|
-
|
59
|
+
atoms_to_process: List[Atom] = []
|
57
60
|
modified: Dict[Union[ResidueLabel, ResidueAuth], str] = {}
|
58
|
-
sequence_by_entity = {}
|
59
|
-
is_nucleic_acid_by_entity = {}
|
61
|
+
sequence_by_entity: Dict[str, str] = {}
|
62
|
+
is_nucleic_acid_by_entity: Dict[str, bool] = {}
|
60
63
|
|
61
64
|
if data:
|
62
65
|
atom_site = data[0].getObj("atom_site")
|
@@ -136,7 +139,7 @@ def parse_cif(
|
|
136
139
|
else None
|
137
140
|
)
|
138
141
|
|
139
|
-
|
142
|
+
atoms_to_process.append(
|
140
143
|
Atom(
|
141
144
|
label_entity_id,
|
142
145
|
label,
|
@@ -216,6 +219,7 @@ def parse_cif(
|
|
216
219
|
if entity_id and pdbx_seq_one_letter_code_can:
|
217
220
|
sequence_by_entity[entity_id] = pdbx_seq_one_letter_code_can
|
218
221
|
|
222
|
+
atoms = filter_clashing_atoms(atoms_to_process)
|
219
223
|
return atoms, modified, sequence_by_entity, is_nucleic_acid_by_entity
|
220
224
|
|
221
225
|
|
@@ -228,7 +232,7 @@ def parse_pdb(
|
|
228
232
|
Dict[str, bool],
|
229
233
|
]:
|
230
234
|
pdb.seek(0)
|
231
|
-
|
235
|
+
atoms_to_process: List[Atom] = []
|
232
236
|
modified: Dict[Union[ResidueLabel, ResidueAuth], str] = {}
|
233
237
|
model = 1
|
234
238
|
|
@@ -236,9 +240,6 @@ def parse_pdb(
|
|
236
240
|
if line.startswith("MODEL"):
|
237
241
|
model = int(line[10:14].strip())
|
238
242
|
elif line.startswith("ATOM") or line.startswith("HETATM"):
|
239
|
-
alternate_location = line[16]
|
240
|
-
if alternate_location != " ":
|
241
|
-
continue
|
242
243
|
atom_name = line[12:16].strip()
|
243
244
|
residue_name = line[17:20].strip()
|
244
245
|
chain_identifier = line[21]
|
@@ -251,7 +252,10 @@ def parse_pdb(
|
|
251
252
|
auth = ResidueAuth(
|
252
253
|
chain_identifier, residue_number, insertion_code, residue_name
|
253
254
|
)
|
254
|
-
|
255
|
+
|
256
|
+
atoms_to_process.append(
|
257
|
+
Atom(None, None, auth, model, atom_name, x, y, z, occupancy)
|
258
|
+
)
|
255
259
|
elif line.startswith("MODRES"):
|
256
260
|
original_name = line[12:15]
|
257
261
|
chain_identifier = line[16]
|
@@ -263,6 +267,7 @@ def parse_pdb(
|
|
263
267
|
)
|
264
268
|
modified[auth] = standard_residue_name
|
265
269
|
|
270
|
+
atoms = filter_clashing_atoms(atoms_to_process)
|
266
271
|
return atoms, modified, {}, {}
|
267
272
|
|
268
273
|
|
@@ -392,3 +397,36 @@ def try_parse_int(s: str) -> Optional[int]:
|
|
392
397
|
return int(s)
|
393
398
|
except ValueError:
|
394
399
|
return None
|
400
|
+
|
401
|
+
|
402
|
+
def filter_clashing_atoms(atoms: List[Atom], clash_distance: float = 0.5) -> List[Atom]:
|
403
|
+
# First, remove duplicate atoms
|
404
|
+
unique_atoms = {}
|
405
|
+
|
406
|
+
for i, atom in enumerate(atoms):
|
407
|
+
key = (atom.label, atom.auth, atom.name)
|
408
|
+
if key not in unique_atoms or atom.occupancy > unique_atoms[key].occupancy:
|
409
|
+
unique_atoms[key] = atom
|
410
|
+
|
411
|
+
unique_atoms_list = list(unique_atoms.values())
|
412
|
+
|
413
|
+
# Now handle clashing atoms
|
414
|
+
coords = np.array([(atom.x, atom.y, atom.z) for atom in unique_atoms_list])
|
415
|
+
tree = KDTree(coords)
|
416
|
+
|
417
|
+
pairs = tree.query_pairs(r=clash_distance)
|
418
|
+
|
419
|
+
atoms_to_keep = set(range(len(unique_atoms_list)))
|
420
|
+
|
421
|
+
for i, j in pairs:
|
422
|
+
if (
|
423
|
+
unique_atoms_list[i].occupancy is None
|
424
|
+
or unique_atoms_list[j].occupancy is None
|
425
|
+
):
|
426
|
+
continue
|
427
|
+
if unique_atoms_list[i].occupancy > unique_atoms_list[j].occupancy:
|
428
|
+
atoms_to_keep.discard(j)
|
429
|
+
else:
|
430
|
+
atoms_to_keep.discard(i)
|
431
|
+
|
432
|
+
return [unique_atoms_list[i] for i in atoms_to_keep]
|
rnapolis/tertiary.py
CHANGED
@@ -124,36 +124,17 @@ class Residue3D(Residue):
|
|
124
124
|
outermost_atoms = {"A": "N9", "G": "N9", "C": "N1", "U": "N1", "T": "N1"}
|
125
125
|
# Dist representing expected name of atom closest to the tetrad center
|
126
126
|
innermost_atoms = {"A": "N6", "G": "O6", "C": "N4", "U": "O4", "T": "O4"}
|
127
|
+
# Heavy atoms in phosphate and ribose
|
128
|
+
phosphate_atoms = {"P", "OP1", "OP2", "O3'", "O5'"}
|
129
|
+
sugar_atoms = {"C1'", "C2'", "C3'", "C4'", "C5'", "O4'"}
|
127
130
|
# Heavy atoms for each main nucleobase
|
128
131
|
nucleobase_heavy_atoms = {
|
129
132
|
"A": set(["N1", "C2", "N3", "C4", "C5", "C6", "N6", "N7", "C8", "N9"]),
|
130
133
|
"G": set(["N1", "C2", "N2", "N3", "C4", "C5", "C6", "O6", "N7", "C8", "N9"]),
|
131
134
|
"C": set(["N1", "C2", "O2", "N3", "C4", "N4", "C5", "C6"]),
|
132
135
|
"U": set(["N1", "C2", "O2", "N3", "C4", "O4", "C5", "C6"]),
|
136
|
+
"T": set(["N1", "C2", "O2", "N3", "C4", "O4", "C5", "C5M", "C6"]),
|
133
137
|
}
|
134
|
-
# Heavy atoms in nucleotide
|
135
|
-
nucleotide_heavy_atoms = (
|
136
|
-
set(
|
137
|
-
[
|
138
|
-
"P",
|
139
|
-
"OP1",
|
140
|
-
"OP2",
|
141
|
-
"O5'",
|
142
|
-
"C5'",
|
143
|
-
"C4'",
|
144
|
-
"O4'",
|
145
|
-
"C3'",
|
146
|
-
"O3'",
|
147
|
-
"C2'",
|
148
|
-
"O2'",
|
149
|
-
"C1'",
|
150
|
-
]
|
151
|
-
)
|
152
|
-
.union(nucleobase_heavy_atoms["A"])
|
153
|
-
.union(nucleobase_heavy_atoms["G"])
|
154
|
-
.union(nucleobase_heavy_atoms["C"])
|
155
|
-
.union(nucleobase_heavy_atoms["U"])
|
156
|
-
)
|
157
138
|
|
158
139
|
def __lt__(self, other):
|
159
140
|
return (self.model, self.chain, self.number, self.icode or " ") < (
|
@@ -202,9 +183,59 @@ class Residue3D(Residue):
|
|
202
183
|
|
203
184
|
@cached_property
|
204
185
|
def is_nucleotide(self) -> bool:
|
205
|
-
|
206
|
-
|
186
|
+
scores = {"phosphate": 0.0, "sugar": 0.0, "base": 0.0, "connections": 0.0}
|
187
|
+
weights = {"phosphate": 0.25, "sugar": 0.25, "base": 0.25, "connections": 0.25}
|
188
|
+
|
189
|
+
residue_atoms = {atom.name for atom in self.atoms}
|
190
|
+
|
191
|
+
phosphate_match = len(residue_atoms.intersection(self.phosphate_atoms))
|
192
|
+
scores["phosphate"] = phosphate_match / len(self.phosphate_atoms)
|
193
|
+
|
194
|
+
sugar_match = len(residue_atoms.intersection(self.sugar_atoms))
|
195
|
+
scores["sugar"] = sugar_match / len(self.sugar_atoms)
|
196
|
+
|
197
|
+
nucleobase_atoms = {
|
198
|
+
key: self.nucleobase_heavy_atoms[key] for key in self.nucleobase_heavy_atoms
|
199
|
+
}
|
200
|
+
matches = {
|
201
|
+
key: len(residue_atoms.intersection(nucleobase_atoms[key]))
|
202
|
+
/ len(nucleobase_atoms[key])
|
203
|
+
for key in nucleobase_atoms
|
204
|
+
}
|
205
|
+
best_match = max(matches.items(), key=lambda x: x[1])
|
206
|
+
scores["base"] = best_match[1]
|
207
|
+
|
208
|
+
connection_score = 0.0
|
209
|
+
distance_threshold = 2.0
|
210
|
+
|
211
|
+
if "P" in residue_atoms and "O5'" in residue_atoms:
|
212
|
+
p_atom = next(atom for atom in self.atoms if atom.name == "P")
|
213
|
+
o5_atom = next(atom for atom in self.atoms if atom.name == "O5'")
|
214
|
+
if (
|
215
|
+
numpy.linalg.norm(p_atom.coordinates - o5_atom.coordinates)
|
216
|
+
<= distance_threshold
|
217
|
+
):
|
218
|
+
connection_score += 0.5
|
219
|
+
if "C1'" in residue_atoms:
|
220
|
+
c1_atom = next(atom for atom in self.atoms if atom.name == "C1'")
|
221
|
+
for base_connection in ["N9", "N1"]:
|
222
|
+
if base_connection in residue_atoms:
|
223
|
+
base_atom = next(
|
224
|
+
atom for atom in self.atoms if atom.name == base_connection
|
225
|
+
)
|
226
|
+
if (
|
227
|
+
numpy.linalg.norm(c1_atom.coordinates - base_atom.coordinates)
|
228
|
+
<= distance_threshold
|
229
|
+
):
|
230
|
+
connection_score += 0.5
|
231
|
+
break
|
232
|
+
|
233
|
+
scores["connections"] = connection_score
|
234
|
+
|
235
|
+
probability = sum(
|
236
|
+
scores[component] * weights[component] for component in scores.keys()
|
207
237
|
)
|
238
|
+
return probability > 0.5
|
208
239
|
|
209
240
|
@cached_property
|
210
241
|
def base_normal_vector(self) -> Optional[numpy.typing.NDArray[numpy.floating]]:
|
@@ -566,15 +597,14 @@ class Mapping2D3D:
|
|
566
597
|
return self.__generate_bpseq(canonical)
|
567
598
|
|
568
599
|
def __generate_bpseq(self, base_pairs):
|
600
|
+
nucleotides = list(filter(lambda r: r.is_nucleotide, self.structure3d.residues))
|
569
601
|
result: Dict[int, List] = {}
|
570
602
|
residue_map: Dict[Residue3D, int] = {}
|
571
603
|
i = 1
|
572
604
|
|
573
|
-
for j, residue in enumerate(
|
574
|
-
filter(lambda r: r.is_nucleotide, self.structure3d.residues)
|
575
|
-
):
|
605
|
+
for j, residue in enumerate(nucleotides):
|
576
606
|
if self.find_gaps and j > 0:
|
577
|
-
previous =
|
607
|
+
previous = nucleotides[j - 1]
|
578
608
|
|
579
609
|
if (
|
580
610
|
not previous.is_connected(residue)
|
File without changes
|
File without changes
|
File without changes
|