RNApolis 0.3.9__py3-none-any.whl → 0.3.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {RNApolis-0.3.9.dist-info → RNApolis-0.3.10.dist-info}/METADATA +1 -1
- {RNApolis-0.3.9.dist-info → RNApolis-0.3.10.dist-info}/RECORD +8 -7
- rnapolis/annotator_ml.py +278 -0
- rnapolis/parser.py +9 -2
- {RNApolis-0.3.9.dist-info → RNApolis-0.3.10.dist-info}/LICENSE +0 -0
- {RNApolis-0.3.9.dist-info → RNApolis-0.3.10.dist-info}/WHEEL +0 -0
- {RNApolis-0.3.9.dist-info → RNApolis-0.3.10.dist-info}/entry_points.txt +0 -0
- {RNApolis-0.3.9.dist-info → RNApolis-0.3.10.dist-info}/top_level.txt +0 -0
@@ -1,17 +1,18 @@
|
|
1
1
|
rnapolis/annotator.py,sha256=8AwrCKy_5CKU3HsRgqj5U2aQrAXiysoAnqjsDdvCAEA,21481
|
2
|
+
rnapolis/annotator_ml.py,sha256=eZYDIaUcAVWvUHKvA0lx6lWEaATU1THNYbfVKvVa9mQ,8226
|
2
3
|
rnapolis/clashfinder.py,sha256=jD3s_UovygWi01NUbQNeAeRRFkARTSRraLXUV43UbAA,8514
|
3
4
|
rnapolis/common.py,sha256=DPmRpNkMaxuIai3vfLzSlP6IN0zpj6kmT3LoRjnJUWE,26440
|
4
5
|
rnapolis/metareader.py,sha256=4qtMKRvww2sUStLeV8WVrLEt-ScydHUv4Gxx96tnf-M,1683
|
5
6
|
rnapolis/molecule_filter.py,sha256=NhjuqdCRnXgPefWZPeTq77tifmnAzamQtA0ODqPPG9k,6918
|
6
7
|
rnapolis/motif_extractor.py,sha256=duHvpi9Ulcny9K60E6VBpz5RpJZw-KdTB4_Ph0iP478,774
|
7
|
-
rnapolis/parser.py,sha256=
|
8
|
+
rnapolis/parser.py,sha256=0uNKPnKiv5uaFVFGIzP8xbGLokimBkjs1XdlV0JmKIw,12217
|
8
9
|
rnapolis/rfam_folder.py,sha256=3rgXEJR16uPFy_BOo8qkdClOAOQDVOkidnLE-yoRbeI,11112
|
9
10
|
rnapolis/tertiary.py,sha256=iWMPD9c21rjMPpEdBd7mPCQgds65IbOr4_Fy06s0NoU,18957
|
10
11
|
rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
|
11
12
|
rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
|
12
|
-
RNApolis-0.3.
|
13
|
-
RNApolis-0.3.
|
14
|
-
RNApolis-0.3.
|
15
|
-
RNApolis-0.3.
|
16
|
-
RNApolis-0.3.
|
17
|
-
RNApolis-0.3.
|
13
|
+
RNApolis-0.3.10.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
|
14
|
+
RNApolis-0.3.10.dist-info/METADATA,sha256=qfgkAg8MMlyGyOkWsl8y5JOffR_9AbsQgYOXE3L5XrA,54301
|
15
|
+
RNApolis-0.3.10.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
16
|
+
RNApolis-0.3.10.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
|
17
|
+
RNApolis-0.3.10.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
|
18
|
+
RNApolis-0.3.10.dist-info/RECORD,,
|
rnapolis/annotator_ml.py
ADDED
@@ -0,0 +1,278 @@
|
|
1
|
+
#! /usr/bin/env python
|
2
|
+
import argparse
|
3
|
+
import csv
|
4
|
+
import logging
|
5
|
+
import math
|
6
|
+
import os
|
7
|
+
from collections import defaultdict
|
8
|
+
from typing import Dict, List, Optional, Tuple
|
9
|
+
|
10
|
+
import numpy
|
11
|
+
import numpy.typing
|
12
|
+
import orjson
|
13
|
+
from ordered_set import OrderedSet
|
14
|
+
from scipy.spatial import KDTree
|
15
|
+
|
16
|
+
from rnapolis.common import (
|
17
|
+
BaseInteractions,
|
18
|
+
BasePair,
|
19
|
+
BpSeq,
|
20
|
+
LeontisWesthof,
|
21
|
+
Residue,
|
22
|
+
Saenger,
|
23
|
+
Stacking,
|
24
|
+
Structure2D,
|
25
|
+
)
|
26
|
+
from rnapolis.parser import read_3d_structure
|
27
|
+
from rnapolis.tertiary import (
|
28
|
+
AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT,
|
29
|
+
Atom,
|
30
|
+
Mapping2D3D,
|
31
|
+
Residue3D,
|
32
|
+
Structure3D,
|
33
|
+
torsion_angle,
|
34
|
+
)
|
35
|
+
from rnapolis.util import handle_input_file
|
36
|
+
|
37
|
+
C1P_MAX_DISTANCE = 10.0
|
38
|
+
|
39
|
+
logging.basicConfig(level=os.getenv("LOGLEVEL", "INFO").upper())
|
40
|
+
|
41
|
+
|
42
|
+
# TODO: implement this function
|
43
|
+
def is_base_pair(residue_i: Residue3D, residue_j: Residue3D) -> bool:
|
44
|
+
return False
|
45
|
+
|
46
|
+
|
47
|
+
# TODO: implement this function
|
48
|
+
def classify_lw(residue_i: Residue3D, residue_j: Residue3D) -> Optional[LeontisWesthof]:
|
49
|
+
return None
|
50
|
+
|
51
|
+
|
52
|
+
# TODO: implement this function
|
53
|
+
def classify_saenger(residue_i: Residue3D, residue_j: Residue3D) -> Optional[Saenger]:
|
54
|
+
return None
|
55
|
+
|
56
|
+
|
57
|
+
# TODO: implement this function
|
58
|
+
def is_stacking(residue_i: Residue3D, residue_j: Residue3D) -> bool:
|
59
|
+
return False
|
60
|
+
|
61
|
+
|
62
|
+
def find_candidates(
|
63
|
+
structure: Structure3D, model: Optional[int] = None
|
64
|
+
) -> List[Tuple[Residue3D, Residue3D]]:
|
65
|
+
residue_map = {}
|
66
|
+
coordinates = []
|
67
|
+
|
68
|
+
for residue in structure.residues:
|
69
|
+
if model is not None and residue.model != model:
|
70
|
+
continue
|
71
|
+
|
72
|
+
atom = residue.find_atom("C1'")
|
73
|
+
|
74
|
+
if atom is not None:
|
75
|
+
atom_xyz = (atom.x, atom.y, atom.z)
|
76
|
+
residue_map[atom_xyz] = residue
|
77
|
+
coordinates.append(atom_xyz)
|
78
|
+
|
79
|
+
kdtree = KDTree(coordinates)
|
80
|
+
candidates = []
|
81
|
+
|
82
|
+
for i, j in kdtree.query_pairs(C1P_MAX_DISTANCE):
|
83
|
+
residue_i = residue_map[coordinates[i]]
|
84
|
+
residue_j = residue_map[coordinates[j]]
|
85
|
+
candidates.append((residue_i, residue_j))
|
86
|
+
|
87
|
+
return candidates
|
88
|
+
|
89
|
+
|
90
|
+
def find_pairs(structure: Structure3D, model: Optional[int] = None) -> List[BasePair]:
|
91
|
+
base_pairs = []
|
92
|
+
|
93
|
+
for residue_i, residue_j in find_candidates(structure, model):
|
94
|
+
if is_base_pair(residue_i, residue_j):
|
95
|
+
lw = classify_lw(residue_i, residue_j)
|
96
|
+
saenger = classify_saenger(residue_i, residue_j)
|
97
|
+
base_pairs.append(
|
98
|
+
BasePair(
|
99
|
+
Residue(residue_i.label, residue_i.auth),
|
100
|
+
Residue(residue_j.label, residue_j.auth),
|
101
|
+
lw,
|
102
|
+
saenger,
|
103
|
+
)
|
104
|
+
)
|
105
|
+
|
106
|
+
return base_pairs
|
107
|
+
|
108
|
+
|
109
|
+
def find_stackings(
|
110
|
+
structure: Structure3D, model: Optional[int] = None
|
111
|
+
) -> List[Stacking]:
|
112
|
+
stackings = []
|
113
|
+
|
114
|
+
for residue_i, residue_j in find_candidates(structure, model):
|
115
|
+
if is_stacking(residue_i, residue_j):
|
116
|
+
stackings.append(
|
117
|
+
Stacking(
|
118
|
+
Residue(residue_i.label, residue_i.auth),
|
119
|
+
Residue(residue_j.label, residue_j.auth),
|
120
|
+
None,
|
121
|
+
)
|
122
|
+
)
|
123
|
+
|
124
|
+
return stackings
|
125
|
+
|
126
|
+
|
127
|
+
def extract_base_interactions(
|
128
|
+
tertiary_structure: Structure3D, model: Optional[int] = None
|
129
|
+
) -> BaseInteractions:
|
130
|
+
base_pairs = find_pairs(tertiary_structure, model)
|
131
|
+
stackings = find_stackings(tertiary_structure, model)
|
132
|
+
return BaseInteractions(base_pairs, stackings, [], [], [])
|
133
|
+
|
134
|
+
|
135
|
+
def extract_secondary_structure(
|
136
|
+
tertiary_structure: Structure3D,
|
137
|
+
model: Optional[int] = None,
|
138
|
+
find_gaps: bool = False,
|
139
|
+
) -> BaseInteractions:
|
140
|
+
base_interactions = extract_base_interactions(tertiary_structure, model)
|
141
|
+
mapping = Mapping2D3D(
|
142
|
+
tertiary_structure,
|
143
|
+
base_interactions.basePairs,
|
144
|
+
base_interactions.stackings,
|
145
|
+
find_gaps,
|
146
|
+
)
|
147
|
+
stems, single_strands, hairpins, loops = mapping.bpseq.elements
|
148
|
+
return Structure2D(
|
149
|
+
base_interactions,
|
150
|
+
str(mapping.bpseq),
|
151
|
+
mapping.dot_bracket,
|
152
|
+
mapping.extended_dot_bracket,
|
153
|
+
stems,
|
154
|
+
single_strands,
|
155
|
+
hairpins,
|
156
|
+
loops,
|
157
|
+
)
|
158
|
+
|
159
|
+
|
160
|
+
def write_json(path: str, structure2d: BaseInteractions):
|
161
|
+
with open(path, "wb") as f:
|
162
|
+
f.write(orjson.dumps(structure2d))
|
163
|
+
|
164
|
+
|
165
|
+
def write_csv(path: str, structure2d: Structure2D):
|
166
|
+
with open(path, "w") as f:
|
167
|
+
writer = csv.writer(f)
|
168
|
+
writer.writerow(["nt1", "nt2", "type", "classification-1", "classification-2"])
|
169
|
+
for base_pair in structure2d.baseInteractions.basePairs:
|
170
|
+
writer.writerow(
|
171
|
+
[
|
172
|
+
base_pair.nt1.full_name,
|
173
|
+
base_pair.nt2.full_name,
|
174
|
+
"base pair",
|
175
|
+
base_pair.lw.value,
|
176
|
+
(
|
177
|
+
base_pair.saenger.value or ""
|
178
|
+
if base_pair.saenger is not None
|
179
|
+
else ""
|
180
|
+
),
|
181
|
+
]
|
182
|
+
)
|
183
|
+
for stacking in structure2d.baseInteractions.stackings:
|
184
|
+
writer.writerow(
|
185
|
+
[
|
186
|
+
stacking.nt1.full_name,
|
187
|
+
stacking.nt2.full_name,
|
188
|
+
"stacking",
|
189
|
+
stacking.topology.value if stacking.topology is not None else "",
|
190
|
+
"",
|
191
|
+
]
|
192
|
+
)
|
193
|
+
for base_phosphate in structure2d.baseInteractions.basePhosphateInteractions:
|
194
|
+
writer.writerow(
|
195
|
+
[
|
196
|
+
base_phosphate.nt1.full_name,
|
197
|
+
base_phosphate.nt2.full_name,
|
198
|
+
"base-phosphate interaction",
|
199
|
+
base_phosphate.bph.value if base_phosphate.bph is not None else "",
|
200
|
+
"",
|
201
|
+
]
|
202
|
+
)
|
203
|
+
for base_ribose in structure2d.baseInteractions.basePhosphateInteractions:
|
204
|
+
writer.writerow(
|
205
|
+
[
|
206
|
+
base_ribose.nt1.full_name,
|
207
|
+
base_ribose.nt2.full_name,
|
208
|
+
"base-ribose interaction",
|
209
|
+
base_ribose.bph.value if base_ribose.bph is not None else "",
|
210
|
+
"",
|
211
|
+
]
|
212
|
+
)
|
213
|
+
for other in structure2d.baseInteractions.otherInteractions:
|
214
|
+
writer.writerow(
|
215
|
+
[
|
216
|
+
other.nt1.full_name,
|
217
|
+
other.nt2.full_name,
|
218
|
+
"other interaction",
|
219
|
+
"",
|
220
|
+
"",
|
221
|
+
]
|
222
|
+
)
|
223
|
+
|
224
|
+
|
225
|
+
def write_bpseq(path: str, bpseq: BpSeq):
|
226
|
+
with open(path, "w") as f:
|
227
|
+
f.write(str(bpseq))
|
228
|
+
|
229
|
+
|
230
|
+
def main():
|
231
|
+
parser = argparse.ArgumentParser()
|
232
|
+
parser.add_argument("input", help="Path to PDB or mmCIF file")
|
233
|
+
parser.add_argument("--bpseq", help="(optional) path to output BPSEQ file")
|
234
|
+
parser.add_argument("--csv", help="(optional) path to output CSV file")
|
235
|
+
parser.add_argument(
|
236
|
+
"--json",
|
237
|
+
help="(optional) path to output JSON file",
|
238
|
+
)
|
239
|
+
parser.add_argument(
|
240
|
+
"--extended",
|
241
|
+
action="store_true",
|
242
|
+
help="(optional) if set, the program will print extended secondary structure to the standard output",
|
243
|
+
)
|
244
|
+
parser.add_argument(
|
245
|
+
"--find-gaps",
|
246
|
+
action="store_true",
|
247
|
+
help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands; "
|
248
|
+
f"the gap is defined as O3'-P distance greater then {1.5 * AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT}",
|
249
|
+
)
|
250
|
+
parser.add_argument("--dot", help="(optional) path to output DOT file")
|
251
|
+
args = parser.parse_args()
|
252
|
+
|
253
|
+
breakpoint()
|
254
|
+
|
255
|
+
file = handle_input_file(args.input)
|
256
|
+
structure3d = read_3d_structure(file, None)
|
257
|
+
structure2d = extract_secondary_structure(structure3d, None, args.find_gaps)
|
258
|
+
|
259
|
+
if args.csv:
|
260
|
+
write_csv(args.csv, structure2d)
|
261
|
+
|
262
|
+
if args.json:
|
263
|
+
write_json(args.json, structure2d)
|
264
|
+
|
265
|
+
if args.bpseq:
|
266
|
+
write_bpseq(args.bpseq, structure2d.bpseq)
|
267
|
+
|
268
|
+
if args.extended:
|
269
|
+
print(structure2d.extendedDotBracket)
|
270
|
+
else:
|
271
|
+
print(structure2d.dotBracket)
|
272
|
+
|
273
|
+
if args.dot:
|
274
|
+
print(BpSeq.from_string(structure2d.bpseq).graphviz)
|
275
|
+
|
276
|
+
|
277
|
+
if __name__ == "__main__":
|
278
|
+
main()
|
rnapolis/parser.py
CHANGED
@@ -12,8 +12,15 @@ def read_3d_structure(
|
|
12
12
|
atoms, modified, sequence = (
|
13
13
|
parse_cif(cif_or_pdb) if is_cif(cif_or_pdb) else parse_pdb(cif_or_pdb)
|
14
14
|
)
|
15
|
-
|
16
|
-
|
15
|
+
available_models = {atom.model: None for atom in atoms}
|
16
|
+
atoms_by_model = {
|
17
|
+
model: list(filter(lambda atom: atom.model == model, atoms))
|
18
|
+
for model in available_models
|
19
|
+
}
|
20
|
+
if model is not None and model in available_models:
|
21
|
+
atoms = atoms_by_model[model]
|
22
|
+
else:
|
23
|
+
atoms = atoms_by_model[list(available_models.keys())[0]]
|
17
24
|
return group_atoms(atoms, modified, sequence, nucleic_acid_only)
|
18
25
|
|
19
26
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|