RNApolis 0.3.10__py3-none-any.whl → 0.3.11__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- {RNApolis-0.3.10.dist-info → RNApolis-0.3.11.dist-info}/METADATA +1 -1
- {RNApolis-0.3.10.dist-info → RNApolis-0.3.11.dist-info}/RECORD +8 -9
- rnapolis/annotator.py +1 -1
- rnapolis/common.py +32 -0
- rnapolis/annotator_ml.py +0 -278
- {RNApolis-0.3.10.dist-info → RNApolis-0.3.11.dist-info}/LICENSE +0 -0
- {RNApolis-0.3.10.dist-info → RNApolis-0.3.11.dist-info}/WHEEL +0 -0
- {RNApolis-0.3.10.dist-info → RNApolis-0.3.11.dist-info}/entry_points.txt +0 -0
- {RNApolis-0.3.10.dist-info → RNApolis-0.3.11.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,6 @@
|
|
1
|
-
rnapolis/annotator.py,sha256=
|
2
|
-
rnapolis/annotator_ml.py,sha256=eZYDIaUcAVWvUHKvA0lx6lWEaATU1THNYbfVKvVa9mQ,8226
|
1
|
+
rnapolis/annotator.py,sha256=bcyqmUSSRyl0ejA3548K4czSElBMX3EpCKAfJ2tYjsw,21476
|
3
2
|
rnapolis/clashfinder.py,sha256=jD3s_UovygWi01NUbQNeAeRRFkARTSRraLXUV43UbAA,8514
|
4
|
-
rnapolis/common.py,sha256=
|
3
|
+
rnapolis/common.py,sha256=owupPG9oylz4Ed4DqVYJqWIKpovLJ3EIIApgca6tuhg,27344
|
5
4
|
rnapolis/metareader.py,sha256=4qtMKRvww2sUStLeV8WVrLEt-ScydHUv4Gxx96tnf-M,1683
|
6
5
|
rnapolis/molecule_filter.py,sha256=NhjuqdCRnXgPefWZPeTq77tifmnAzamQtA0ODqPPG9k,6918
|
7
6
|
rnapolis/motif_extractor.py,sha256=duHvpi9Ulcny9K60E6VBpz5RpJZw-KdTB4_Ph0iP478,774
|
@@ -10,9 +9,9 @@ rnapolis/rfam_folder.py,sha256=3rgXEJR16uPFy_BOo8qkdClOAOQDVOkidnLE-yoRbeI,11112
|
|
10
9
|
rnapolis/tertiary.py,sha256=iWMPD9c21rjMPpEdBd7mPCQgds65IbOr4_Fy06s0NoU,18957
|
11
10
|
rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
|
12
11
|
rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
|
13
|
-
RNApolis-0.3.
|
14
|
-
RNApolis-0.3.
|
15
|
-
RNApolis-0.3.
|
16
|
-
RNApolis-0.3.
|
17
|
-
RNApolis-0.3.
|
18
|
-
RNApolis-0.3.
|
12
|
+
RNApolis-0.3.11.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
|
13
|
+
RNApolis-0.3.11.dist-info/METADATA,sha256=J0a3wmvQoWPVFgSgvIxMkMFSBCz3KFHB8BHKFNtIdKw,54301
|
14
|
+
RNApolis-0.3.11.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
15
|
+
RNApolis-0.3.11.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
|
16
|
+
RNApolis-0.3.11.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
|
17
|
+
RNApolis-0.3.11.dist-info/RECORD,,
|
rnapolis/annotator.py
CHANGED
@@ -486,7 +486,7 @@ def extract_secondary_structure(
|
|
486
486
|
tertiary_structure: Structure3D,
|
487
487
|
model: Optional[int] = None,
|
488
488
|
find_gaps: bool = False,
|
489
|
-
) ->
|
489
|
+
) -> Structure2D:
|
490
490
|
base_interactions = extract_base_interactions(tertiary_structure, model)
|
491
491
|
mapping = Mapping2D3D(
|
492
492
|
tertiary_structure,
|
rnapolis/common.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
import itertools
|
2
2
|
import logging
|
3
3
|
import os
|
4
|
+
import re
|
4
5
|
import string
|
5
6
|
from collections import defaultdict
|
6
7
|
from collections.abc import Sequence
|
@@ -896,6 +897,37 @@ class DotBracket:
|
|
896
897
|
return f"{self.sequence}\n{self.structure}"
|
897
898
|
|
898
899
|
|
900
|
+
@dataclass
|
901
|
+
class MultiStrandDotBracket(DotBracket):
|
902
|
+
strands: List[Strand]
|
903
|
+
|
904
|
+
@staticmethod
|
905
|
+
def from_string(input: str):
|
906
|
+
strands = []
|
907
|
+
first = 1
|
908
|
+
|
909
|
+
for match in re.finditer(
|
910
|
+
r"((>.*?\n)?([ACGUNacgun]+)\n([.()\[\]{}<>A-Za-z]+))", input
|
911
|
+
):
|
912
|
+
sequence = match.group(3)
|
913
|
+
structure = match.group(4)
|
914
|
+
assert len(sequence) == len(structure)
|
915
|
+
last = first + len(sequence) - 1
|
916
|
+
strands.append(Strand(first, last, sequence, structure))
|
917
|
+
first = last + 1
|
918
|
+
|
919
|
+
return MultiStrandDotBracket(
|
920
|
+
"".join(strand.sequence for strand in strands),
|
921
|
+
"".join(strand.structure for strand in strands),
|
922
|
+
strands,
|
923
|
+
)
|
924
|
+
|
925
|
+
@staticmethod
|
926
|
+
def from_file(path: str):
|
927
|
+
with open(path) as f:
|
928
|
+
return MultiStrandDotBracket.from_string(f.read())
|
929
|
+
|
930
|
+
|
899
931
|
@dataclass(frozen=True, order=True)
|
900
932
|
class BaseInteractions:
|
901
933
|
basePairs: List[BasePair]
|
rnapolis/annotator_ml.py
DELETED
@@ -1,278 +0,0 @@
|
|
1
|
-
#! /usr/bin/env python
|
2
|
-
import argparse
|
3
|
-
import csv
|
4
|
-
import logging
|
5
|
-
import math
|
6
|
-
import os
|
7
|
-
from collections import defaultdict
|
8
|
-
from typing import Dict, List, Optional, Tuple
|
9
|
-
|
10
|
-
import numpy
|
11
|
-
import numpy.typing
|
12
|
-
import orjson
|
13
|
-
from ordered_set import OrderedSet
|
14
|
-
from scipy.spatial import KDTree
|
15
|
-
|
16
|
-
from rnapolis.common import (
|
17
|
-
BaseInteractions,
|
18
|
-
BasePair,
|
19
|
-
BpSeq,
|
20
|
-
LeontisWesthof,
|
21
|
-
Residue,
|
22
|
-
Saenger,
|
23
|
-
Stacking,
|
24
|
-
Structure2D,
|
25
|
-
)
|
26
|
-
from rnapolis.parser import read_3d_structure
|
27
|
-
from rnapolis.tertiary import (
|
28
|
-
AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT,
|
29
|
-
Atom,
|
30
|
-
Mapping2D3D,
|
31
|
-
Residue3D,
|
32
|
-
Structure3D,
|
33
|
-
torsion_angle,
|
34
|
-
)
|
35
|
-
from rnapolis.util import handle_input_file
|
36
|
-
|
37
|
-
C1P_MAX_DISTANCE = 10.0
|
38
|
-
|
39
|
-
logging.basicConfig(level=os.getenv("LOGLEVEL", "INFO").upper())
|
40
|
-
|
41
|
-
|
42
|
-
# TODO: implement this function
|
43
|
-
def is_base_pair(residue_i: Residue3D, residue_j: Residue3D) -> bool:
|
44
|
-
return False
|
45
|
-
|
46
|
-
|
47
|
-
# TODO: implement this function
|
48
|
-
def classify_lw(residue_i: Residue3D, residue_j: Residue3D) -> Optional[LeontisWesthof]:
|
49
|
-
return None
|
50
|
-
|
51
|
-
|
52
|
-
# TODO: implement this function
|
53
|
-
def classify_saenger(residue_i: Residue3D, residue_j: Residue3D) -> Optional[Saenger]:
|
54
|
-
return None
|
55
|
-
|
56
|
-
|
57
|
-
# TODO: implement this function
|
58
|
-
def is_stacking(residue_i: Residue3D, residue_j: Residue3D) -> bool:
|
59
|
-
return False
|
60
|
-
|
61
|
-
|
62
|
-
def find_candidates(
|
63
|
-
structure: Structure3D, model: Optional[int] = None
|
64
|
-
) -> List[Tuple[Residue3D, Residue3D]]:
|
65
|
-
residue_map = {}
|
66
|
-
coordinates = []
|
67
|
-
|
68
|
-
for residue in structure.residues:
|
69
|
-
if model is not None and residue.model != model:
|
70
|
-
continue
|
71
|
-
|
72
|
-
atom = residue.find_atom("C1'")
|
73
|
-
|
74
|
-
if atom is not None:
|
75
|
-
atom_xyz = (atom.x, atom.y, atom.z)
|
76
|
-
residue_map[atom_xyz] = residue
|
77
|
-
coordinates.append(atom_xyz)
|
78
|
-
|
79
|
-
kdtree = KDTree(coordinates)
|
80
|
-
candidates = []
|
81
|
-
|
82
|
-
for i, j in kdtree.query_pairs(C1P_MAX_DISTANCE):
|
83
|
-
residue_i = residue_map[coordinates[i]]
|
84
|
-
residue_j = residue_map[coordinates[j]]
|
85
|
-
candidates.append((residue_i, residue_j))
|
86
|
-
|
87
|
-
return candidates
|
88
|
-
|
89
|
-
|
90
|
-
def find_pairs(structure: Structure3D, model: Optional[int] = None) -> List[BasePair]:
|
91
|
-
base_pairs = []
|
92
|
-
|
93
|
-
for residue_i, residue_j in find_candidates(structure, model):
|
94
|
-
if is_base_pair(residue_i, residue_j):
|
95
|
-
lw = classify_lw(residue_i, residue_j)
|
96
|
-
saenger = classify_saenger(residue_i, residue_j)
|
97
|
-
base_pairs.append(
|
98
|
-
BasePair(
|
99
|
-
Residue(residue_i.label, residue_i.auth),
|
100
|
-
Residue(residue_j.label, residue_j.auth),
|
101
|
-
lw,
|
102
|
-
saenger,
|
103
|
-
)
|
104
|
-
)
|
105
|
-
|
106
|
-
return base_pairs
|
107
|
-
|
108
|
-
|
109
|
-
def find_stackings(
|
110
|
-
structure: Structure3D, model: Optional[int] = None
|
111
|
-
) -> List[Stacking]:
|
112
|
-
stackings = []
|
113
|
-
|
114
|
-
for residue_i, residue_j in find_candidates(structure, model):
|
115
|
-
if is_stacking(residue_i, residue_j):
|
116
|
-
stackings.append(
|
117
|
-
Stacking(
|
118
|
-
Residue(residue_i.label, residue_i.auth),
|
119
|
-
Residue(residue_j.label, residue_j.auth),
|
120
|
-
None,
|
121
|
-
)
|
122
|
-
)
|
123
|
-
|
124
|
-
return stackings
|
125
|
-
|
126
|
-
|
127
|
-
def extract_base_interactions(
|
128
|
-
tertiary_structure: Structure3D, model: Optional[int] = None
|
129
|
-
) -> BaseInteractions:
|
130
|
-
base_pairs = find_pairs(tertiary_structure, model)
|
131
|
-
stackings = find_stackings(tertiary_structure, model)
|
132
|
-
return BaseInteractions(base_pairs, stackings, [], [], [])
|
133
|
-
|
134
|
-
|
135
|
-
def extract_secondary_structure(
|
136
|
-
tertiary_structure: Structure3D,
|
137
|
-
model: Optional[int] = None,
|
138
|
-
find_gaps: bool = False,
|
139
|
-
) -> BaseInteractions:
|
140
|
-
base_interactions = extract_base_interactions(tertiary_structure, model)
|
141
|
-
mapping = Mapping2D3D(
|
142
|
-
tertiary_structure,
|
143
|
-
base_interactions.basePairs,
|
144
|
-
base_interactions.stackings,
|
145
|
-
find_gaps,
|
146
|
-
)
|
147
|
-
stems, single_strands, hairpins, loops = mapping.bpseq.elements
|
148
|
-
return Structure2D(
|
149
|
-
base_interactions,
|
150
|
-
str(mapping.bpseq),
|
151
|
-
mapping.dot_bracket,
|
152
|
-
mapping.extended_dot_bracket,
|
153
|
-
stems,
|
154
|
-
single_strands,
|
155
|
-
hairpins,
|
156
|
-
loops,
|
157
|
-
)
|
158
|
-
|
159
|
-
|
160
|
-
def write_json(path: str, structure2d: BaseInteractions):
|
161
|
-
with open(path, "wb") as f:
|
162
|
-
f.write(orjson.dumps(structure2d))
|
163
|
-
|
164
|
-
|
165
|
-
def write_csv(path: str, structure2d: Structure2D):
|
166
|
-
with open(path, "w") as f:
|
167
|
-
writer = csv.writer(f)
|
168
|
-
writer.writerow(["nt1", "nt2", "type", "classification-1", "classification-2"])
|
169
|
-
for base_pair in structure2d.baseInteractions.basePairs:
|
170
|
-
writer.writerow(
|
171
|
-
[
|
172
|
-
base_pair.nt1.full_name,
|
173
|
-
base_pair.nt2.full_name,
|
174
|
-
"base pair",
|
175
|
-
base_pair.lw.value,
|
176
|
-
(
|
177
|
-
base_pair.saenger.value or ""
|
178
|
-
if base_pair.saenger is not None
|
179
|
-
else ""
|
180
|
-
),
|
181
|
-
]
|
182
|
-
)
|
183
|
-
for stacking in structure2d.baseInteractions.stackings:
|
184
|
-
writer.writerow(
|
185
|
-
[
|
186
|
-
stacking.nt1.full_name,
|
187
|
-
stacking.nt2.full_name,
|
188
|
-
"stacking",
|
189
|
-
stacking.topology.value if stacking.topology is not None else "",
|
190
|
-
"",
|
191
|
-
]
|
192
|
-
)
|
193
|
-
for base_phosphate in structure2d.baseInteractions.basePhosphateInteractions:
|
194
|
-
writer.writerow(
|
195
|
-
[
|
196
|
-
base_phosphate.nt1.full_name,
|
197
|
-
base_phosphate.nt2.full_name,
|
198
|
-
"base-phosphate interaction",
|
199
|
-
base_phosphate.bph.value if base_phosphate.bph is not None else "",
|
200
|
-
"",
|
201
|
-
]
|
202
|
-
)
|
203
|
-
for base_ribose in structure2d.baseInteractions.basePhosphateInteractions:
|
204
|
-
writer.writerow(
|
205
|
-
[
|
206
|
-
base_ribose.nt1.full_name,
|
207
|
-
base_ribose.nt2.full_name,
|
208
|
-
"base-ribose interaction",
|
209
|
-
base_ribose.bph.value if base_ribose.bph is not None else "",
|
210
|
-
"",
|
211
|
-
]
|
212
|
-
)
|
213
|
-
for other in structure2d.baseInteractions.otherInteractions:
|
214
|
-
writer.writerow(
|
215
|
-
[
|
216
|
-
other.nt1.full_name,
|
217
|
-
other.nt2.full_name,
|
218
|
-
"other interaction",
|
219
|
-
"",
|
220
|
-
"",
|
221
|
-
]
|
222
|
-
)
|
223
|
-
|
224
|
-
|
225
|
-
def write_bpseq(path: str, bpseq: BpSeq):
|
226
|
-
with open(path, "w") as f:
|
227
|
-
f.write(str(bpseq))
|
228
|
-
|
229
|
-
|
230
|
-
def main():
|
231
|
-
parser = argparse.ArgumentParser()
|
232
|
-
parser.add_argument("input", help="Path to PDB or mmCIF file")
|
233
|
-
parser.add_argument("--bpseq", help="(optional) path to output BPSEQ file")
|
234
|
-
parser.add_argument("--csv", help="(optional) path to output CSV file")
|
235
|
-
parser.add_argument(
|
236
|
-
"--json",
|
237
|
-
help="(optional) path to output JSON file",
|
238
|
-
)
|
239
|
-
parser.add_argument(
|
240
|
-
"--extended",
|
241
|
-
action="store_true",
|
242
|
-
help="(optional) if set, the program will print extended secondary structure to the standard output",
|
243
|
-
)
|
244
|
-
parser.add_argument(
|
245
|
-
"--find-gaps",
|
246
|
-
action="store_true",
|
247
|
-
help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands; "
|
248
|
-
f"the gap is defined as O3'-P distance greater then {1.5 * AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT}",
|
249
|
-
)
|
250
|
-
parser.add_argument("--dot", help="(optional) path to output DOT file")
|
251
|
-
args = parser.parse_args()
|
252
|
-
|
253
|
-
breakpoint()
|
254
|
-
|
255
|
-
file = handle_input_file(args.input)
|
256
|
-
structure3d = read_3d_structure(file, None)
|
257
|
-
structure2d = extract_secondary_structure(structure3d, None, args.find_gaps)
|
258
|
-
|
259
|
-
if args.csv:
|
260
|
-
write_csv(args.csv, structure2d)
|
261
|
-
|
262
|
-
if args.json:
|
263
|
-
write_json(args.json, structure2d)
|
264
|
-
|
265
|
-
if args.bpseq:
|
266
|
-
write_bpseq(args.bpseq, structure2d.bpseq)
|
267
|
-
|
268
|
-
if args.extended:
|
269
|
-
print(structure2d.extendedDotBracket)
|
270
|
-
else:
|
271
|
-
print(structure2d.dotBracket)
|
272
|
-
|
273
|
-
if args.dot:
|
274
|
-
print(BpSeq.from_string(structure2d.bpseq).graphviz)
|
275
|
-
|
276
|
-
|
277
|
-
if __name__ == "__main__":
|
278
|
-
main()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|