RNApolis 0.3.9__py3-none-any.whl → 0.3.10__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: RNApolis
3
- Version: 0.3.9
3
+ Version: 0.3.10
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -1,17 +1,18 @@
1
1
  rnapolis/annotator.py,sha256=8AwrCKy_5CKU3HsRgqj5U2aQrAXiysoAnqjsDdvCAEA,21481
2
+ rnapolis/annotator_ml.py,sha256=eZYDIaUcAVWvUHKvA0lx6lWEaATU1THNYbfVKvVa9mQ,8226
2
3
  rnapolis/clashfinder.py,sha256=jD3s_UovygWi01NUbQNeAeRRFkARTSRraLXUV43UbAA,8514
3
4
  rnapolis/common.py,sha256=DPmRpNkMaxuIai3vfLzSlP6IN0zpj6kmT3LoRjnJUWE,26440
4
5
  rnapolis/metareader.py,sha256=4qtMKRvww2sUStLeV8WVrLEt-ScydHUv4Gxx96tnf-M,1683
5
6
  rnapolis/molecule_filter.py,sha256=NhjuqdCRnXgPefWZPeTq77tifmnAzamQtA0ODqPPG9k,6918
6
7
  rnapolis/motif_extractor.py,sha256=duHvpi9Ulcny9K60E6VBpz5RpJZw-KdTB4_Ph0iP478,774
7
- rnapolis/parser.py,sha256=D2PwAFurTjZDP8WBV2AOxFHPLAFE8U1G_vC_5Y7BE4U,11948
8
+ rnapolis/parser.py,sha256=0uNKPnKiv5uaFVFGIzP8xbGLokimBkjs1XdlV0JmKIw,12217
8
9
  rnapolis/rfam_folder.py,sha256=3rgXEJR16uPFy_BOo8qkdClOAOQDVOkidnLE-yoRbeI,11112
9
10
  rnapolis/tertiary.py,sha256=iWMPD9c21rjMPpEdBd7mPCQgds65IbOr4_Fy06s0NoU,18957
10
11
  rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
11
12
  rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
12
- RNApolis-0.3.9.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
13
- RNApolis-0.3.9.dist-info/METADATA,sha256=E-6_MxMx9qQZK18G7EzHgwuxwYhchfkVXusFdBpG6nY,54300
14
- RNApolis-0.3.9.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
15
- RNApolis-0.3.9.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
16
- RNApolis-0.3.9.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
17
- RNApolis-0.3.9.dist-info/RECORD,,
13
+ RNApolis-0.3.10.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
14
+ RNApolis-0.3.10.dist-info/METADATA,sha256=qfgkAg8MMlyGyOkWsl8y5JOffR_9AbsQgYOXE3L5XrA,54301
15
+ RNApolis-0.3.10.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
16
+ RNApolis-0.3.10.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
17
+ RNApolis-0.3.10.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
18
+ RNApolis-0.3.10.dist-info/RECORD,,
@@ -0,0 +1,278 @@
1
+ #! /usr/bin/env python
2
+ import argparse
3
+ import csv
4
+ import logging
5
+ import math
6
+ import os
7
+ from collections import defaultdict
8
+ from typing import Dict, List, Optional, Tuple
9
+
10
+ import numpy
11
+ import numpy.typing
12
+ import orjson
13
+ from ordered_set import OrderedSet
14
+ from scipy.spatial import KDTree
15
+
16
+ from rnapolis.common import (
17
+ BaseInteractions,
18
+ BasePair,
19
+ BpSeq,
20
+ LeontisWesthof,
21
+ Residue,
22
+ Saenger,
23
+ Stacking,
24
+ Structure2D,
25
+ )
26
+ from rnapolis.parser import read_3d_structure
27
+ from rnapolis.tertiary import (
28
+ AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT,
29
+ Atom,
30
+ Mapping2D3D,
31
+ Residue3D,
32
+ Structure3D,
33
+ torsion_angle,
34
+ )
35
+ from rnapolis.util import handle_input_file
36
+
37
+ C1P_MAX_DISTANCE = 10.0
38
+
39
+ logging.basicConfig(level=os.getenv("LOGLEVEL", "INFO").upper())
40
+
41
+
42
+ # TODO: implement this function
43
+ def is_base_pair(residue_i: Residue3D, residue_j: Residue3D) -> bool:
44
+ return False
45
+
46
+
47
+ # TODO: implement this function
48
+ def classify_lw(residue_i: Residue3D, residue_j: Residue3D) -> Optional[LeontisWesthof]:
49
+ return None
50
+
51
+
52
+ # TODO: implement this function
53
+ def classify_saenger(residue_i: Residue3D, residue_j: Residue3D) -> Optional[Saenger]:
54
+ return None
55
+
56
+
57
+ # TODO: implement this function
58
+ def is_stacking(residue_i: Residue3D, residue_j: Residue3D) -> bool:
59
+ return False
60
+
61
+
62
+ def find_candidates(
63
+ structure: Structure3D, model: Optional[int] = None
64
+ ) -> List[Tuple[Residue3D, Residue3D]]:
65
+ residue_map = {}
66
+ coordinates = []
67
+
68
+ for residue in structure.residues:
69
+ if model is not None and residue.model != model:
70
+ continue
71
+
72
+ atom = residue.find_atom("C1'")
73
+
74
+ if atom is not None:
75
+ atom_xyz = (atom.x, atom.y, atom.z)
76
+ residue_map[atom_xyz] = residue
77
+ coordinates.append(atom_xyz)
78
+
79
+ kdtree = KDTree(coordinates)
80
+ candidates = []
81
+
82
+ for i, j in kdtree.query_pairs(C1P_MAX_DISTANCE):
83
+ residue_i = residue_map[coordinates[i]]
84
+ residue_j = residue_map[coordinates[j]]
85
+ candidates.append((residue_i, residue_j))
86
+
87
+ return candidates
88
+
89
+
90
+ def find_pairs(structure: Structure3D, model: Optional[int] = None) -> List[BasePair]:
91
+ base_pairs = []
92
+
93
+ for residue_i, residue_j in find_candidates(structure, model):
94
+ if is_base_pair(residue_i, residue_j):
95
+ lw = classify_lw(residue_i, residue_j)
96
+ saenger = classify_saenger(residue_i, residue_j)
97
+ base_pairs.append(
98
+ BasePair(
99
+ Residue(residue_i.label, residue_i.auth),
100
+ Residue(residue_j.label, residue_j.auth),
101
+ lw,
102
+ saenger,
103
+ )
104
+ )
105
+
106
+ return base_pairs
107
+
108
+
109
+ def find_stackings(
110
+ structure: Structure3D, model: Optional[int] = None
111
+ ) -> List[Stacking]:
112
+ stackings = []
113
+
114
+ for residue_i, residue_j in find_candidates(structure, model):
115
+ if is_stacking(residue_i, residue_j):
116
+ stackings.append(
117
+ Stacking(
118
+ Residue(residue_i.label, residue_i.auth),
119
+ Residue(residue_j.label, residue_j.auth),
120
+ None,
121
+ )
122
+ )
123
+
124
+ return stackings
125
+
126
+
127
+ def extract_base_interactions(
128
+ tertiary_structure: Structure3D, model: Optional[int] = None
129
+ ) -> BaseInteractions:
130
+ base_pairs = find_pairs(tertiary_structure, model)
131
+ stackings = find_stackings(tertiary_structure, model)
132
+ return BaseInteractions(base_pairs, stackings, [], [], [])
133
+
134
+
135
+ def extract_secondary_structure(
136
+ tertiary_structure: Structure3D,
137
+ model: Optional[int] = None,
138
+ find_gaps: bool = False,
139
+ ) -> BaseInteractions:
140
+ base_interactions = extract_base_interactions(tertiary_structure, model)
141
+ mapping = Mapping2D3D(
142
+ tertiary_structure,
143
+ base_interactions.basePairs,
144
+ base_interactions.stackings,
145
+ find_gaps,
146
+ )
147
+ stems, single_strands, hairpins, loops = mapping.bpseq.elements
148
+ return Structure2D(
149
+ base_interactions,
150
+ str(mapping.bpseq),
151
+ mapping.dot_bracket,
152
+ mapping.extended_dot_bracket,
153
+ stems,
154
+ single_strands,
155
+ hairpins,
156
+ loops,
157
+ )
158
+
159
+
160
+ def write_json(path: str, structure2d: BaseInteractions):
161
+ with open(path, "wb") as f:
162
+ f.write(orjson.dumps(structure2d))
163
+
164
+
165
+ def write_csv(path: str, structure2d: Structure2D):
166
+ with open(path, "w") as f:
167
+ writer = csv.writer(f)
168
+ writer.writerow(["nt1", "nt2", "type", "classification-1", "classification-2"])
169
+ for base_pair in structure2d.baseInteractions.basePairs:
170
+ writer.writerow(
171
+ [
172
+ base_pair.nt1.full_name,
173
+ base_pair.nt2.full_name,
174
+ "base pair",
175
+ base_pair.lw.value,
176
+ (
177
+ base_pair.saenger.value or ""
178
+ if base_pair.saenger is not None
179
+ else ""
180
+ ),
181
+ ]
182
+ )
183
+ for stacking in structure2d.baseInteractions.stackings:
184
+ writer.writerow(
185
+ [
186
+ stacking.nt1.full_name,
187
+ stacking.nt2.full_name,
188
+ "stacking",
189
+ stacking.topology.value if stacking.topology is not None else "",
190
+ "",
191
+ ]
192
+ )
193
+ for base_phosphate in structure2d.baseInteractions.basePhosphateInteractions:
194
+ writer.writerow(
195
+ [
196
+ base_phosphate.nt1.full_name,
197
+ base_phosphate.nt2.full_name,
198
+ "base-phosphate interaction",
199
+ base_phosphate.bph.value if base_phosphate.bph is not None else "",
200
+ "",
201
+ ]
202
+ )
203
+ for base_ribose in structure2d.baseInteractions.basePhosphateInteractions:
204
+ writer.writerow(
205
+ [
206
+ base_ribose.nt1.full_name,
207
+ base_ribose.nt2.full_name,
208
+ "base-ribose interaction",
209
+ base_ribose.bph.value if base_ribose.bph is not None else "",
210
+ "",
211
+ ]
212
+ )
213
+ for other in structure2d.baseInteractions.otherInteractions:
214
+ writer.writerow(
215
+ [
216
+ other.nt1.full_name,
217
+ other.nt2.full_name,
218
+ "other interaction",
219
+ "",
220
+ "",
221
+ ]
222
+ )
223
+
224
+
225
+ def write_bpseq(path: str, bpseq: BpSeq):
226
+ with open(path, "w") as f:
227
+ f.write(str(bpseq))
228
+
229
+
230
+ def main():
231
+ parser = argparse.ArgumentParser()
232
+ parser.add_argument("input", help="Path to PDB or mmCIF file")
233
+ parser.add_argument("--bpseq", help="(optional) path to output BPSEQ file")
234
+ parser.add_argument("--csv", help="(optional) path to output CSV file")
235
+ parser.add_argument(
236
+ "--json",
237
+ help="(optional) path to output JSON file",
238
+ )
239
+ parser.add_argument(
240
+ "--extended",
241
+ action="store_true",
242
+ help="(optional) if set, the program will print extended secondary structure to the standard output",
243
+ )
244
+ parser.add_argument(
245
+ "--find-gaps",
246
+ action="store_true",
247
+ help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands; "
248
+ f"the gap is defined as O3'-P distance greater then {1.5 * AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT}",
249
+ )
250
+ parser.add_argument("--dot", help="(optional) path to output DOT file")
251
+ args = parser.parse_args()
252
+
253
+ breakpoint()
254
+
255
+ file = handle_input_file(args.input)
256
+ structure3d = read_3d_structure(file, None)
257
+ structure2d = extract_secondary_structure(structure3d, None, args.find_gaps)
258
+
259
+ if args.csv:
260
+ write_csv(args.csv, structure2d)
261
+
262
+ if args.json:
263
+ write_json(args.json, structure2d)
264
+
265
+ if args.bpseq:
266
+ write_bpseq(args.bpseq, structure2d.bpseq)
267
+
268
+ if args.extended:
269
+ print(structure2d.extendedDotBracket)
270
+ else:
271
+ print(structure2d.dotBracket)
272
+
273
+ if args.dot:
274
+ print(BpSeq.from_string(structure2d.bpseq).graphviz)
275
+
276
+
277
+ if __name__ == "__main__":
278
+ main()
rnapolis/parser.py CHANGED
@@ -12,8 +12,15 @@ def read_3d_structure(
12
12
  atoms, modified, sequence = (
13
13
  parse_cif(cif_or_pdb) if is_cif(cif_or_pdb) else parse_pdb(cif_or_pdb)
14
14
  )
15
- if model is not None:
16
- atoms = list(filter(lambda atom: atom.model == model, atoms))
15
+ available_models = {atom.model: None for atom in atoms}
16
+ atoms_by_model = {
17
+ model: list(filter(lambda atom: atom.model == model, atoms))
18
+ for model in available_models
19
+ }
20
+ if model is not None and model in available_models:
21
+ atoms = atoms_by_model[model]
22
+ else:
23
+ atoms = atoms_by_model[list(available_models.keys())[0]]
17
24
  return group_atoms(atoms, modified, sequence, nucleic_acid_only)
18
25
 
19
26