RNApolis 0.3.9__py3-none-any.whl → 0.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: RNApolis
3
- Version: 0.3.9
3
+ Version: 0.3.10
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -1,17 +1,18 @@
1
1
  rnapolis/annotator.py,sha256=8AwrCKy_5CKU3HsRgqj5U2aQrAXiysoAnqjsDdvCAEA,21481
2
+ rnapolis/annotator_ml.py,sha256=eZYDIaUcAVWvUHKvA0lx6lWEaATU1THNYbfVKvVa9mQ,8226
2
3
  rnapolis/clashfinder.py,sha256=jD3s_UovygWi01NUbQNeAeRRFkARTSRraLXUV43UbAA,8514
3
4
  rnapolis/common.py,sha256=DPmRpNkMaxuIai3vfLzSlP6IN0zpj6kmT3LoRjnJUWE,26440
4
5
  rnapolis/metareader.py,sha256=4qtMKRvww2sUStLeV8WVrLEt-ScydHUv4Gxx96tnf-M,1683
5
6
  rnapolis/molecule_filter.py,sha256=NhjuqdCRnXgPefWZPeTq77tifmnAzamQtA0ODqPPG9k,6918
6
7
  rnapolis/motif_extractor.py,sha256=duHvpi9Ulcny9K60E6VBpz5RpJZw-KdTB4_Ph0iP478,774
7
- rnapolis/parser.py,sha256=D2PwAFurTjZDP8WBV2AOxFHPLAFE8U1G_vC_5Y7BE4U,11948
8
+ rnapolis/parser.py,sha256=0uNKPnKiv5uaFVFGIzP8xbGLokimBkjs1XdlV0JmKIw,12217
8
9
  rnapolis/rfam_folder.py,sha256=3rgXEJR16uPFy_BOo8qkdClOAOQDVOkidnLE-yoRbeI,11112
9
10
  rnapolis/tertiary.py,sha256=iWMPD9c21rjMPpEdBd7mPCQgds65IbOr4_Fy06s0NoU,18957
10
11
  rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
11
12
  rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
12
- RNApolis-0.3.9.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
13
- RNApolis-0.3.9.dist-info/METADATA,sha256=E-6_MxMx9qQZK18G7EzHgwuxwYhchfkVXusFdBpG6nY,54300
14
- RNApolis-0.3.9.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
15
- RNApolis-0.3.9.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
16
- RNApolis-0.3.9.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
17
- RNApolis-0.3.9.dist-info/RECORD,,
13
+ RNApolis-0.3.10.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
14
+ RNApolis-0.3.10.dist-info/METADATA,sha256=qfgkAg8MMlyGyOkWsl8y5JOffR_9AbsQgYOXE3L5XrA,54301
15
+ RNApolis-0.3.10.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
16
+ RNApolis-0.3.10.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
17
+ RNApolis-0.3.10.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
18
+ RNApolis-0.3.10.dist-info/RECORD,,
@@ -0,0 +1,278 @@
1
+ #! /usr/bin/env python
2
+ import argparse
3
+ import csv
4
+ import logging
5
+ import math
6
+ import os
7
+ from collections import defaultdict
8
+ from typing import Dict, List, Optional, Tuple
9
+
10
+ import numpy
11
+ import numpy.typing
12
+ import orjson
13
+ from ordered_set import OrderedSet
14
+ from scipy.spatial import KDTree
15
+
16
+ from rnapolis.common import (
17
+ BaseInteractions,
18
+ BasePair,
19
+ BpSeq,
20
+ LeontisWesthof,
21
+ Residue,
22
+ Saenger,
23
+ Stacking,
24
+ Structure2D,
25
+ )
26
+ from rnapolis.parser import read_3d_structure
27
+ from rnapolis.tertiary import (
28
+ AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT,
29
+ Atom,
30
+ Mapping2D3D,
31
+ Residue3D,
32
+ Structure3D,
33
+ torsion_angle,
34
+ )
35
+ from rnapolis.util import handle_input_file
36
+
37
+ C1P_MAX_DISTANCE = 10.0
38
+
39
+ logging.basicConfig(level=os.getenv("LOGLEVEL", "INFO").upper())
40
+
41
+
42
+ # TODO: implement this function
43
+ def is_base_pair(residue_i: Residue3D, residue_j: Residue3D) -> bool:
44
+ return False
45
+
46
+
47
+ # TODO: implement this function
48
+ def classify_lw(residue_i: Residue3D, residue_j: Residue3D) -> Optional[LeontisWesthof]:
49
+ return None
50
+
51
+
52
+ # TODO: implement this function
53
+ def classify_saenger(residue_i: Residue3D, residue_j: Residue3D) -> Optional[Saenger]:
54
+ return None
55
+
56
+
57
+ # TODO: implement this function
58
+ def is_stacking(residue_i: Residue3D, residue_j: Residue3D) -> bool:
59
+ return False
60
+
61
+
62
+ def find_candidates(
63
+ structure: Structure3D, model: Optional[int] = None
64
+ ) -> List[Tuple[Residue3D, Residue3D]]:
65
+ residue_map = {}
66
+ coordinates = []
67
+
68
+ for residue in structure.residues:
69
+ if model is not None and residue.model != model:
70
+ continue
71
+
72
+ atom = residue.find_atom("C1'")
73
+
74
+ if atom is not None:
75
+ atom_xyz = (atom.x, atom.y, atom.z)
76
+ residue_map[atom_xyz] = residue
77
+ coordinates.append(atom_xyz)
78
+
79
+ kdtree = KDTree(coordinates)
80
+ candidates = []
81
+
82
+ for i, j in kdtree.query_pairs(C1P_MAX_DISTANCE):
83
+ residue_i = residue_map[coordinates[i]]
84
+ residue_j = residue_map[coordinates[j]]
85
+ candidates.append((residue_i, residue_j))
86
+
87
+ return candidates
88
+
89
+
90
+ def find_pairs(structure: Structure3D, model: Optional[int] = None) -> List[BasePair]:
91
+ base_pairs = []
92
+
93
+ for residue_i, residue_j in find_candidates(structure, model):
94
+ if is_base_pair(residue_i, residue_j):
95
+ lw = classify_lw(residue_i, residue_j)
96
+ saenger = classify_saenger(residue_i, residue_j)
97
+ base_pairs.append(
98
+ BasePair(
99
+ Residue(residue_i.label, residue_i.auth),
100
+ Residue(residue_j.label, residue_j.auth),
101
+ lw,
102
+ saenger,
103
+ )
104
+ )
105
+
106
+ return base_pairs
107
+
108
+
109
+ def find_stackings(
110
+ structure: Structure3D, model: Optional[int] = None
111
+ ) -> List[Stacking]:
112
+ stackings = []
113
+
114
+ for residue_i, residue_j in find_candidates(structure, model):
115
+ if is_stacking(residue_i, residue_j):
116
+ stackings.append(
117
+ Stacking(
118
+ Residue(residue_i.label, residue_i.auth),
119
+ Residue(residue_j.label, residue_j.auth),
120
+ None,
121
+ )
122
+ )
123
+
124
+ return stackings
125
+
126
+
127
+ def extract_base_interactions(
128
+ tertiary_structure: Structure3D, model: Optional[int] = None
129
+ ) -> BaseInteractions:
130
+ base_pairs = find_pairs(tertiary_structure, model)
131
+ stackings = find_stackings(tertiary_structure, model)
132
+ return BaseInteractions(base_pairs, stackings, [], [], [])
133
+
134
+
135
+ def extract_secondary_structure(
136
+ tertiary_structure: Structure3D,
137
+ model: Optional[int] = None,
138
+ find_gaps: bool = False,
139
+ ) -> BaseInteractions:
140
+ base_interactions = extract_base_interactions(tertiary_structure, model)
141
+ mapping = Mapping2D3D(
142
+ tertiary_structure,
143
+ base_interactions.basePairs,
144
+ base_interactions.stackings,
145
+ find_gaps,
146
+ )
147
+ stems, single_strands, hairpins, loops = mapping.bpseq.elements
148
+ return Structure2D(
149
+ base_interactions,
150
+ str(mapping.bpseq),
151
+ mapping.dot_bracket,
152
+ mapping.extended_dot_bracket,
153
+ stems,
154
+ single_strands,
155
+ hairpins,
156
+ loops,
157
+ )
158
+
159
+
160
+ def write_json(path: str, structure2d: BaseInteractions):
161
+ with open(path, "wb") as f:
162
+ f.write(orjson.dumps(structure2d))
163
+
164
+
165
+ def write_csv(path: str, structure2d: Structure2D):
166
+ with open(path, "w") as f:
167
+ writer = csv.writer(f)
168
+ writer.writerow(["nt1", "nt2", "type", "classification-1", "classification-2"])
169
+ for base_pair in structure2d.baseInteractions.basePairs:
170
+ writer.writerow(
171
+ [
172
+ base_pair.nt1.full_name,
173
+ base_pair.nt2.full_name,
174
+ "base pair",
175
+ base_pair.lw.value,
176
+ (
177
+ base_pair.saenger.value or ""
178
+ if base_pair.saenger is not None
179
+ else ""
180
+ ),
181
+ ]
182
+ )
183
+ for stacking in structure2d.baseInteractions.stackings:
184
+ writer.writerow(
185
+ [
186
+ stacking.nt1.full_name,
187
+ stacking.nt2.full_name,
188
+ "stacking",
189
+ stacking.topology.value if stacking.topology is not None else "",
190
+ "",
191
+ ]
192
+ )
193
+ for base_phosphate in structure2d.baseInteractions.basePhosphateInteractions:
194
+ writer.writerow(
195
+ [
196
+ base_phosphate.nt1.full_name,
197
+ base_phosphate.nt2.full_name,
198
+ "base-phosphate interaction",
199
+ base_phosphate.bph.value if base_phosphate.bph is not None else "",
200
+ "",
201
+ ]
202
+ )
203
+ for base_ribose in structure2d.baseInteractions.basePhosphateInteractions:
204
+ writer.writerow(
205
+ [
206
+ base_ribose.nt1.full_name,
207
+ base_ribose.nt2.full_name,
208
+ "base-ribose interaction",
209
+ base_ribose.bph.value if base_ribose.bph is not None else "",
210
+ "",
211
+ ]
212
+ )
213
+ for other in structure2d.baseInteractions.otherInteractions:
214
+ writer.writerow(
215
+ [
216
+ other.nt1.full_name,
217
+ other.nt2.full_name,
218
+ "other interaction",
219
+ "",
220
+ "",
221
+ ]
222
+ )
223
+
224
+
225
+ def write_bpseq(path: str, bpseq: BpSeq):
226
+ with open(path, "w") as f:
227
+ f.write(str(bpseq))
228
+
229
+
230
+ def main():
231
+ parser = argparse.ArgumentParser()
232
+ parser.add_argument("input", help="Path to PDB or mmCIF file")
233
+ parser.add_argument("--bpseq", help="(optional) path to output BPSEQ file")
234
+ parser.add_argument("--csv", help="(optional) path to output CSV file")
235
+ parser.add_argument(
236
+ "--json",
237
+ help="(optional) path to output JSON file",
238
+ )
239
+ parser.add_argument(
240
+ "--extended",
241
+ action="store_true",
242
+ help="(optional) if set, the program will print extended secondary structure to the standard output",
243
+ )
244
+ parser.add_argument(
245
+ "--find-gaps",
246
+ action="store_true",
247
+ help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands; "
248
+ f"the gap is defined as O3'-P distance greater then {1.5 * AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT}",
249
+ )
250
+ parser.add_argument("--dot", help="(optional) path to output DOT file")
251
+ args = parser.parse_args()
252
+
253
+ breakpoint()
254
+
255
+ file = handle_input_file(args.input)
256
+ structure3d = read_3d_structure(file, None)
257
+ structure2d = extract_secondary_structure(structure3d, None, args.find_gaps)
258
+
259
+ if args.csv:
260
+ write_csv(args.csv, structure2d)
261
+
262
+ if args.json:
263
+ write_json(args.json, structure2d)
264
+
265
+ if args.bpseq:
266
+ write_bpseq(args.bpseq, structure2d.bpseq)
267
+
268
+ if args.extended:
269
+ print(structure2d.extendedDotBracket)
270
+ else:
271
+ print(structure2d.dotBracket)
272
+
273
+ if args.dot:
274
+ print(BpSeq.from_string(structure2d.bpseq).graphviz)
275
+
276
+
277
+ if __name__ == "__main__":
278
+ main()
rnapolis/parser.py CHANGED
@@ -12,8 +12,15 @@ def read_3d_structure(
12
12
  atoms, modified, sequence = (
13
13
  parse_cif(cif_or_pdb) if is_cif(cif_or_pdb) else parse_pdb(cif_or_pdb)
14
14
  )
15
- if model is not None:
16
- atoms = list(filter(lambda atom: atom.model == model, atoms))
15
+ available_models = {atom.model: None for atom in atoms}
16
+ atoms_by_model = {
17
+ model: list(filter(lambda atom: atom.model == model, atoms))
18
+ for model in available_models
19
+ }
20
+ if model is not None and model in available_models:
21
+ atoms = atoms_by_model[model]
22
+ else:
23
+ atoms = atoms_by_model[list(available_models.keys())[0]]
17
24
  return group_atoms(atoms, modified, sequence, nucleic_acid_only)
18
25
 
19
26