RNApolis 0.3.10__py3-none-any.whl → 0.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {RNApolis-0.3.10.dist-info → RNApolis-0.3.11.dist-info}/METADATA +1 -1
- {RNApolis-0.3.10.dist-info → RNApolis-0.3.11.dist-info}/RECORD +8 -9
- rnapolis/annotator.py +1 -1
- rnapolis/common.py +32 -0
- rnapolis/annotator_ml.py +0 -278
- {RNApolis-0.3.10.dist-info → RNApolis-0.3.11.dist-info}/LICENSE +0 -0
- {RNApolis-0.3.10.dist-info → RNApolis-0.3.11.dist-info}/WHEEL +0 -0
- {RNApolis-0.3.10.dist-info → RNApolis-0.3.11.dist-info}/entry_points.txt +0 -0
- {RNApolis-0.3.10.dist-info → RNApolis-0.3.11.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,6 @@
|
|
1
|
-
rnapolis/annotator.py,sha256=
|
2
|
-
rnapolis/annotator_ml.py,sha256=eZYDIaUcAVWvUHKvA0lx6lWEaATU1THNYbfVKvVa9mQ,8226
|
1
|
+
rnapolis/annotator.py,sha256=bcyqmUSSRyl0ejA3548K4czSElBMX3EpCKAfJ2tYjsw,21476
|
3
2
|
rnapolis/clashfinder.py,sha256=jD3s_UovygWi01NUbQNeAeRRFkARTSRraLXUV43UbAA,8514
|
4
|
-
rnapolis/common.py,sha256=
|
3
|
+
rnapolis/common.py,sha256=owupPG9oylz4Ed4DqVYJqWIKpovLJ3EIIApgca6tuhg,27344
|
5
4
|
rnapolis/metareader.py,sha256=4qtMKRvww2sUStLeV8WVrLEt-ScydHUv4Gxx96tnf-M,1683
|
6
5
|
rnapolis/molecule_filter.py,sha256=NhjuqdCRnXgPefWZPeTq77tifmnAzamQtA0ODqPPG9k,6918
|
7
6
|
rnapolis/motif_extractor.py,sha256=duHvpi9Ulcny9K60E6VBpz5RpJZw-KdTB4_Ph0iP478,774
|
@@ -10,9 +9,9 @@ rnapolis/rfam_folder.py,sha256=3rgXEJR16uPFy_BOo8qkdClOAOQDVOkidnLE-yoRbeI,11112
|
|
10
9
|
rnapolis/tertiary.py,sha256=iWMPD9c21rjMPpEdBd7mPCQgds65IbOr4_Fy06s0NoU,18957
|
11
10
|
rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
|
12
11
|
rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
|
13
|
-
RNApolis-0.3.
|
14
|
-
RNApolis-0.3.
|
15
|
-
RNApolis-0.3.
|
16
|
-
RNApolis-0.3.
|
17
|
-
RNApolis-0.3.
|
18
|
-
RNApolis-0.3.
|
12
|
+
RNApolis-0.3.11.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
|
13
|
+
RNApolis-0.3.11.dist-info/METADATA,sha256=J0a3wmvQoWPVFgSgvIxMkMFSBCz3KFHB8BHKFNtIdKw,54301
|
14
|
+
RNApolis-0.3.11.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
15
|
+
RNApolis-0.3.11.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
|
16
|
+
RNApolis-0.3.11.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
|
17
|
+
RNApolis-0.3.11.dist-info/RECORD,,
|
rnapolis/annotator.py
CHANGED
@@ -486,7 +486,7 @@ def extract_secondary_structure(
|
|
486
486
|
tertiary_structure: Structure3D,
|
487
487
|
model: Optional[int] = None,
|
488
488
|
find_gaps: bool = False,
|
489
|
-
) ->
|
489
|
+
) -> Structure2D:
|
490
490
|
base_interactions = extract_base_interactions(tertiary_structure, model)
|
491
491
|
mapping = Mapping2D3D(
|
492
492
|
tertiary_structure,
|
rnapolis/common.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
import itertools
|
2
2
|
import logging
|
3
3
|
import os
|
4
|
+
import re
|
4
5
|
import string
|
5
6
|
from collections import defaultdict
|
6
7
|
from collections.abc import Sequence
|
@@ -896,6 +897,37 @@ class DotBracket:
|
|
896
897
|
return f"{self.sequence}\n{self.structure}"
|
897
898
|
|
898
899
|
|
900
|
+
@dataclass
|
901
|
+
class MultiStrandDotBracket(DotBracket):
|
902
|
+
strands: List[Strand]
|
903
|
+
|
904
|
+
@staticmethod
|
905
|
+
def from_string(input: str):
|
906
|
+
strands = []
|
907
|
+
first = 1
|
908
|
+
|
909
|
+
for match in re.finditer(
|
910
|
+
r"((>.*?\n)?([ACGUNacgun]+)\n([.()\[\]{}<>A-Za-z]+))", input
|
911
|
+
):
|
912
|
+
sequence = match.group(3)
|
913
|
+
structure = match.group(4)
|
914
|
+
assert len(sequence) == len(structure)
|
915
|
+
last = first + len(sequence) - 1
|
916
|
+
strands.append(Strand(first, last, sequence, structure))
|
917
|
+
first = last + 1
|
918
|
+
|
919
|
+
return MultiStrandDotBracket(
|
920
|
+
"".join(strand.sequence for strand in strands),
|
921
|
+
"".join(strand.structure for strand in strands),
|
922
|
+
strands,
|
923
|
+
)
|
924
|
+
|
925
|
+
@staticmethod
|
926
|
+
def from_file(path: str):
|
927
|
+
with open(path) as f:
|
928
|
+
return MultiStrandDotBracket.from_string(f.read())
|
929
|
+
|
930
|
+
|
899
931
|
@dataclass(frozen=True, order=True)
|
900
932
|
class BaseInteractions:
|
901
933
|
basePairs: List[BasePair]
|
rnapolis/annotator_ml.py
DELETED
@@ -1,278 +0,0 @@
|
|
1
|
-
#! /usr/bin/env python
|
2
|
-
import argparse
|
3
|
-
import csv
|
4
|
-
import logging
|
5
|
-
import math
|
6
|
-
import os
|
7
|
-
from collections import defaultdict
|
8
|
-
from typing import Dict, List, Optional, Tuple
|
9
|
-
|
10
|
-
import numpy
|
11
|
-
import numpy.typing
|
12
|
-
import orjson
|
13
|
-
from ordered_set import OrderedSet
|
14
|
-
from scipy.spatial import KDTree
|
15
|
-
|
16
|
-
from rnapolis.common import (
|
17
|
-
BaseInteractions,
|
18
|
-
BasePair,
|
19
|
-
BpSeq,
|
20
|
-
LeontisWesthof,
|
21
|
-
Residue,
|
22
|
-
Saenger,
|
23
|
-
Stacking,
|
24
|
-
Structure2D,
|
25
|
-
)
|
26
|
-
from rnapolis.parser import read_3d_structure
|
27
|
-
from rnapolis.tertiary import (
|
28
|
-
AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT,
|
29
|
-
Atom,
|
30
|
-
Mapping2D3D,
|
31
|
-
Residue3D,
|
32
|
-
Structure3D,
|
33
|
-
torsion_angle,
|
34
|
-
)
|
35
|
-
from rnapolis.util import handle_input_file
|
36
|
-
|
37
|
-
C1P_MAX_DISTANCE = 10.0
|
38
|
-
|
39
|
-
logging.basicConfig(level=os.getenv("LOGLEVEL", "INFO").upper())
|
40
|
-
|
41
|
-
|
42
|
-
# TODO: implement this function
|
43
|
-
def is_base_pair(residue_i: Residue3D, residue_j: Residue3D) -> bool:
|
44
|
-
return False
|
45
|
-
|
46
|
-
|
47
|
-
# TODO: implement this function
|
48
|
-
def classify_lw(residue_i: Residue3D, residue_j: Residue3D) -> Optional[LeontisWesthof]:
|
49
|
-
return None
|
50
|
-
|
51
|
-
|
52
|
-
# TODO: implement this function
|
53
|
-
def classify_saenger(residue_i: Residue3D, residue_j: Residue3D) -> Optional[Saenger]:
|
54
|
-
return None
|
55
|
-
|
56
|
-
|
57
|
-
# TODO: implement this function
|
58
|
-
def is_stacking(residue_i: Residue3D, residue_j: Residue3D) -> bool:
|
59
|
-
return False
|
60
|
-
|
61
|
-
|
62
|
-
def find_candidates(
|
63
|
-
structure: Structure3D, model: Optional[int] = None
|
64
|
-
) -> List[Tuple[Residue3D, Residue3D]]:
|
65
|
-
residue_map = {}
|
66
|
-
coordinates = []
|
67
|
-
|
68
|
-
for residue in structure.residues:
|
69
|
-
if model is not None and residue.model != model:
|
70
|
-
continue
|
71
|
-
|
72
|
-
atom = residue.find_atom("C1'")
|
73
|
-
|
74
|
-
if atom is not None:
|
75
|
-
atom_xyz = (atom.x, atom.y, atom.z)
|
76
|
-
residue_map[atom_xyz] = residue
|
77
|
-
coordinates.append(atom_xyz)
|
78
|
-
|
79
|
-
kdtree = KDTree(coordinates)
|
80
|
-
candidates = []
|
81
|
-
|
82
|
-
for i, j in kdtree.query_pairs(C1P_MAX_DISTANCE):
|
83
|
-
residue_i = residue_map[coordinates[i]]
|
84
|
-
residue_j = residue_map[coordinates[j]]
|
85
|
-
candidates.append((residue_i, residue_j))
|
86
|
-
|
87
|
-
return candidates
|
88
|
-
|
89
|
-
|
90
|
-
def find_pairs(structure: Structure3D, model: Optional[int] = None) -> List[BasePair]:
|
91
|
-
base_pairs = []
|
92
|
-
|
93
|
-
for residue_i, residue_j in find_candidates(structure, model):
|
94
|
-
if is_base_pair(residue_i, residue_j):
|
95
|
-
lw = classify_lw(residue_i, residue_j)
|
96
|
-
saenger = classify_saenger(residue_i, residue_j)
|
97
|
-
base_pairs.append(
|
98
|
-
BasePair(
|
99
|
-
Residue(residue_i.label, residue_i.auth),
|
100
|
-
Residue(residue_j.label, residue_j.auth),
|
101
|
-
lw,
|
102
|
-
saenger,
|
103
|
-
)
|
104
|
-
)
|
105
|
-
|
106
|
-
return base_pairs
|
107
|
-
|
108
|
-
|
109
|
-
def find_stackings(
|
110
|
-
structure: Structure3D, model: Optional[int] = None
|
111
|
-
) -> List[Stacking]:
|
112
|
-
stackings = []
|
113
|
-
|
114
|
-
for residue_i, residue_j in find_candidates(structure, model):
|
115
|
-
if is_stacking(residue_i, residue_j):
|
116
|
-
stackings.append(
|
117
|
-
Stacking(
|
118
|
-
Residue(residue_i.label, residue_i.auth),
|
119
|
-
Residue(residue_j.label, residue_j.auth),
|
120
|
-
None,
|
121
|
-
)
|
122
|
-
)
|
123
|
-
|
124
|
-
return stackings
|
125
|
-
|
126
|
-
|
127
|
-
def extract_base_interactions(
|
128
|
-
tertiary_structure: Structure3D, model: Optional[int] = None
|
129
|
-
) -> BaseInteractions:
|
130
|
-
base_pairs = find_pairs(tertiary_structure, model)
|
131
|
-
stackings = find_stackings(tertiary_structure, model)
|
132
|
-
return BaseInteractions(base_pairs, stackings, [], [], [])
|
133
|
-
|
134
|
-
|
135
|
-
def extract_secondary_structure(
|
136
|
-
tertiary_structure: Structure3D,
|
137
|
-
model: Optional[int] = None,
|
138
|
-
find_gaps: bool = False,
|
139
|
-
) -> BaseInteractions:
|
140
|
-
base_interactions = extract_base_interactions(tertiary_structure, model)
|
141
|
-
mapping = Mapping2D3D(
|
142
|
-
tertiary_structure,
|
143
|
-
base_interactions.basePairs,
|
144
|
-
base_interactions.stackings,
|
145
|
-
find_gaps,
|
146
|
-
)
|
147
|
-
stems, single_strands, hairpins, loops = mapping.bpseq.elements
|
148
|
-
return Structure2D(
|
149
|
-
base_interactions,
|
150
|
-
str(mapping.bpseq),
|
151
|
-
mapping.dot_bracket,
|
152
|
-
mapping.extended_dot_bracket,
|
153
|
-
stems,
|
154
|
-
single_strands,
|
155
|
-
hairpins,
|
156
|
-
loops,
|
157
|
-
)
|
158
|
-
|
159
|
-
|
160
|
-
def write_json(path: str, structure2d: BaseInteractions):
|
161
|
-
with open(path, "wb") as f:
|
162
|
-
f.write(orjson.dumps(structure2d))
|
163
|
-
|
164
|
-
|
165
|
-
def write_csv(path: str, structure2d: Structure2D):
|
166
|
-
with open(path, "w") as f:
|
167
|
-
writer = csv.writer(f)
|
168
|
-
writer.writerow(["nt1", "nt2", "type", "classification-1", "classification-2"])
|
169
|
-
for base_pair in structure2d.baseInteractions.basePairs:
|
170
|
-
writer.writerow(
|
171
|
-
[
|
172
|
-
base_pair.nt1.full_name,
|
173
|
-
base_pair.nt2.full_name,
|
174
|
-
"base pair",
|
175
|
-
base_pair.lw.value,
|
176
|
-
(
|
177
|
-
base_pair.saenger.value or ""
|
178
|
-
if base_pair.saenger is not None
|
179
|
-
else ""
|
180
|
-
),
|
181
|
-
]
|
182
|
-
)
|
183
|
-
for stacking in structure2d.baseInteractions.stackings:
|
184
|
-
writer.writerow(
|
185
|
-
[
|
186
|
-
stacking.nt1.full_name,
|
187
|
-
stacking.nt2.full_name,
|
188
|
-
"stacking",
|
189
|
-
stacking.topology.value if stacking.topology is not None else "",
|
190
|
-
"",
|
191
|
-
]
|
192
|
-
)
|
193
|
-
for base_phosphate in structure2d.baseInteractions.basePhosphateInteractions:
|
194
|
-
writer.writerow(
|
195
|
-
[
|
196
|
-
base_phosphate.nt1.full_name,
|
197
|
-
base_phosphate.nt2.full_name,
|
198
|
-
"base-phosphate interaction",
|
199
|
-
base_phosphate.bph.value if base_phosphate.bph is not None else "",
|
200
|
-
"",
|
201
|
-
]
|
202
|
-
)
|
203
|
-
for base_ribose in structure2d.baseInteractions.basePhosphateInteractions:
|
204
|
-
writer.writerow(
|
205
|
-
[
|
206
|
-
base_ribose.nt1.full_name,
|
207
|
-
base_ribose.nt2.full_name,
|
208
|
-
"base-ribose interaction",
|
209
|
-
base_ribose.bph.value if base_ribose.bph is not None else "",
|
210
|
-
"",
|
211
|
-
]
|
212
|
-
)
|
213
|
-
for other in structure2d.baseInteractions.otherInteractions:
|
214
|
-
writer.writerow(
|
215
|
-
[
|
216
|
-
other.nt1.full_name,
|
217
|
-
other.nt2.full_name,
|
218
|
-
"other interaction",
|
219
|
-
"",
|
220
|
-
"",
|
221
|
-
]
|
222
|
-
)
|
223
|
-
|
224
|
-
|
225
|
-
def write_bpseq(path: str, bpseq: BpSeq):
|
226
|
-
with open(path, "w") as f:
|
227
|
-
f.write(str(bpseq))
|
228
|
-
|
229
|
-
|
230
|
-
def main():
|
231
|
-
parser = argparse.ArgumentParser()
|
232
|
-
parser.add_argument("input", help="Path to PDB or mmCIF file")
|
233
|
-
parser.add_argument("--bpseq", help="(optional) path to output BPSEQ file")
|
234
|
-
parser.add_argument("--csv", help="(optional) path to output CSV file")
|
235
|
-
parser.add_argument(
|
236
|
-
"--json",
|
237
|
-
help="(optional) path to output JSON file",
|
238
|
-
)
|
239
|
-
parser.add_argument(
|
240
|
-
"--extended",
|
241
|
-
action="store_true",
|
242
|
-
help="(optional) if set, the program will print extended secondary structure to the standard output",
|
243
|
-
)
|
244
|
-
parser.add_argument(
|
245
|
-
"--find-gaps",
|
246
|
-
action="store_true",
|
247
|
-
help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands; "
|
248
|
-
f"the gap is defined as O3'-P distance greater then {1.5 * AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT}",
|
249
|
-
)
|
250
|
-
parser.add_argument("--dot", help="(optional) path to output DOT file")
|
251
|
-
args = parser.parse_args()
|
252
|
-
|
253
|
-
breakpoint()
|
254
|
-
|
255
|
-
file = handle_input_file(args.input)
|
256
|
-
structure3d = read_3d_structure(file, None)
|
257
|
-
structure2d = extract_secondary_structure(structure3d, None, args.find_gaps)
|
258
|
-
|
259
|
-
if args.csv:
|
260
|
-
write_csv(args.csv, structure2d)
|
261
|
-
|
262
|
-
if args.json:
|
263
|
-
write_json(args.json, structure2d)
|
264
|
-
|
265
|
-
if args.bpseq:
|
266
|
-
write_bpseq(args.bpseq, structure2d.bpseq)
|
267
|
-
|
268
|
-
if args.extended:
|
269
|
-
print(structure2d.extendedDotBracket)
|
270
|
-
else:
|
271
|
-
print(structure2d.dotBracket)
|
272
|
-
|
273
|
-
if args.dot:
|
274
|
-
print(BpSeq.from_string(structure2d.bpseq).graphviz)
|
275
|
-
|
276
|
-
|
277
|
-
if __name__ == "__main__":
|
278
|
-
main()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|