RNApolis 0.3.10__py3-none-any.whl → 0.3.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: RNApolis
3
- Version: 0.3.10
3
+ Version: 0.3.11
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -1,7 +1,6 @@
1
- rnapolis/annotator.py,sha256=8AwrCKy_5CKU3HsRgqj5U2aQrAXiysoAnqjsDdvCAEA,21481
2
- rnapolis/annotator_ml.py,sha256=eZYDIaUcAVWvUHKvA0lx6lWEaATU1THNYbfVKvVa9mQ,8226
1
+ rnapolis/annotator.py,sha256=bcyqmUSSRyl0ejA3548K4czSElBMX3EpCKAfJ2tYjsw,21476
3
2
  rnapolis/clashfinder.py,sha256=jD3s_UovygWi01NUbQNeAeRRFkARTSRraLXUV43UbAA,8514
4
- rnapolis/common.py,sha256=DPmRpNkMaxuIai3vfLzSlP6IN0zpj6kmT3LoRjnJUWE,26440
3
+ rnapolis/common.py,sha256=owupPG9oylz4Ed4DqVYJqWIKpovLJ3EIIApgca6tuhg,27344
5
4
  rnapolis/metareader.py,sha256=4qtMKRvww2sUStLeV8WVrLEt-ScydHUv4Gxx96tnf-M,1683
6
5
  rnapolis/molecule_filter.py,sha256=NhjuqdCRnXgPefWZPeTq77tifmnAzamQtA0ODqPPG9k,6918
7
6
  rnapolis/motif_extractor.py,sha256=duHvpi9Ulcny9K60E6VBpz5RpJZw-KdTB4_Ph0iP478,774
@@ -10,9 +9,9 @@ rnapolis/rfam_folder.py,sha256=3rgXEJR16uPFy_BOo8qkdClOAOQDVOkidnLE-yoRbeI,11112
10
9
  rnapolis/tertiary.py,sha256=iWMPD9c21rjMPpEdBd7mPCQgds65IbOr4_Fy06s0NoU,18957
11
10
  rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
12
11
  rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
13
- RNApolis-0.3.10.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
14
- RNApolis-0.3.10.dist-info/METADATA,sha256=qfgkAg8MMlyGyOkWsl8y5JOffR_9AbsQgYOXE3L5XrA,54301
15
- RNApolis-0.3.10.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
16
- RNApolis-0.3.10.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
17
- RNApolis-0.3.10.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
18
- RNApolis-0.3.10.dist-info/RECORD,,
12
+ RNApolis-0.3.11.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
13
+ RNApolis-0.3.11.dist-info/METADATA,sha256=J0a3wmvQoWPVFgSgvIxMkMFSBCz3KFHB8BHKFNtIdKw,54301
14
+ RNApolis-0.3.11.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
15
+ RNApolis-0.3.11.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
16
+ RNApolis-0.3.11.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
17
+ RNApolis-0.3.11.dist-info/RECORD,,
rnapolis/annotator.py CHANGED
@@ -486,7 +486,7 @@ def extract_secondary_structure(
486
486
  tertiary_structure: Structure3D,
487
487
  model: Optional[int] = None,
488
488
  find_gaps: bool = False,
489
- ) -> BaseInteractions:
489
+ ) -> Structure2D:
490
490
  base_interactions = extract_base_interactions(tertiary_structure, model)
491
491
  mapping = Mapping2D3D(
492
492
  tertiary_structure,
rnapolis/common.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import itertools
2
2
  import logging
3
3
  import os
4
+ import re
4
5
  import string
5
6
  from collections import defaultdict
6
7
  from collections.abc import Sequence
@@ -896,6 +897,37 @@ class DotBracket:
896
897
  return f"{self.sequence}\n{self.structure}"
897
898
 
898
899
 
900
+ @dataclass
901
+ class MultiStrandDotBracket(DotBracket):
902
+ strands: List[Strand]
903
+
904
+ @staticmethod
905
+ def from_string(input: str):
906
+ strands = []
907
+ first = 1
908
+
909
+ for match in re.finditer(
910
+ r"((>.*?\n)?([ACGUNacgun]+)\n([.()\[\]{}<>A-Za-z]+))", input
911
+ ):
912
+ sequence = match.group(3)
913
+ structure = match.group(4)
914
+ assert len(sequence) == len(structure)
915
+ last = first + len(sequence) - 1
916
+ strands.append(Strand(first, last, sequence, structure))
917
+ first = last + 1
918
+
919
+ return MultiStrandDotBracket(
920
+ "".join(strand.sequence for strand in strands),
921
+ "".join(strand.structure for strand in strands),
922
+ strands,
923
+ )
924
+
925
+ @staticmethod
926
+ def from_file(path: str):
927
+ with open(path) as f:
928
+ return MultiStrandDotBracket.from_string(f.read())
929
+
930
+
899
931
  @dataclass(frozen=True, order=True)
900
932
  class BaseInteractions:
901
933
  basePairs: List[BasePair]
rnapolis/annotator_ml.py DELETED
@@ -1,278 +0,0 @@
1
- #! /usr/bin/env python
2
- import argparse
3
- import csv
4
- import logging
5
- import math
6
- import os
7
- from collections import defaultdict
8
- from typing import Dict, List, Optional, Tuple
9
-
10
- import numpy
11
- import numpy.typing
12
- import orjson
13
- from ordered_set import OrderedSet
14
- from scipy.spatial import KDTree
15
-
16
- from rnapolis.common import (
17
- BaseInteractions,
18
- BasePair,
19
- BpSeq,
20
- LeontisWesthof,
21
- Residue,
22
- Saenger,
23
- Stacking,
24
- Structure2D,
25
- )
26
- from rnapolis.parser import read_3d_structure
27
- from rnapolis.tertiary import (
28
- AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT,
29
- Atom,
30
- Mapping2D3D,
31
- Residue3D,
32
- Structure3D,
33
- torsion_angle,
34
- )
35
- from rnapolis.util import handle_input_file
36
-
37
- C1P_MAX_DISTANCE = 10.0
38
-
39
- logging.basicConfig(level=os.getenv("LOGLEVEL", "INFO").upper())
40
-
41
-
42
- # TODO: implement this function
43
- def is_base_pair(residue_i: Residue3D, residue_j: Residue3D) -> bool:
44
- return False
45
-
46
-
47
- # TODO: implement this function
48
- def classify_lw(residue_i: Residue3D, residue_j: Residue3D) -> Optional[LeontisWesthof]:
49
- return None
50
-
51
-
52
- # TODO: implement this function
53
- def classify_saenger(residue_i: Residue3D, residue_j: Residue3D) -> Optional[Saenger]:
54
- return None
55
-
56
-
57
- # TODO: implement this function
58
- def is_stacking(residue_i: Residue3D, residue_j: Residue3D) -> bool:
59
- return False
60
-
61
-
62
- def find_candidates(
63
- structure: Structure3D, model: Optional[int] = None
64
- ) -> List[Tuple[Residue3D, Residue3D]]:
65
- residue_map = {}
66
- coordinates = []
67
-
68
- for residue in structure.residues:
69
- if model is not None and residue.model != model:
70
- continue
71
-
72
- atom = residue.find_atom("C1'")
73
-
74
- if atom is not None:
75
- atom_xyz = (atom.x, atom.y, atom.z)
76
- residue_map[atom_xyz] = residue
77
- coordinates.append(atom_xyz)
78
-
79
- kdtree = KDTree(coordinates)
80
- candidates = []
81
-
82
- for i, j in kdtree.query_pairs(C1P_MAX_DISTANCE):
83
- residue_i = residue_map[coordinates[i]]
84
- residue_j = residue_map[coordinates[j]]
85
- candidates.append((residue_i, residue_j))
86
-
87
- return candidates
88
-
89
-
90
- def find_pairs(structure: Structure3D, model: Optional[int] = None) -> List[BasePair]:
91
- base_pairs = []
92
-
93
- for residue_i, residue_j in find_candidates(structure, model):
94
- if is_base_pair(residue_i, residue_j):
95
- lw = classify_lw(residue_i, residue_j)
96
- saenger = classify_saenger(residue_i, residue_j)
97
- base_pairs.append(
98
- BasePair(
99
- Residue(residue_i.label, residue_i.auth),
100
- Residue(residue_j.label, residue_j.auth),
101
- lw,
102
- saenger,
103
- )
104
- )
105
-
106
- return base_pairs
107
-
108
-
109
- def find_stackings(
110
- structure: Structure3D, model: Optional[int] = None
111
- ) -> List[Stacking]:
112
- stackings = []
113
-
114
- for residue_i, residue_j in find_candidates(structure, model):
115
- if is_stacking(residue_i, residue_j):
116
- stackings.append(
117
- Stacking(
118
- Residue(residue_i.label, residue_i.auth),
119
- Residue(residue_j.label, residue_j.auth),
120
- None,
121
- )
122
- )
123
-
124
- return stackings
125
-
126
-
127
- def extract_base_interactions(
128
- tertiary_structure: Structure3D, model: Optional[int] = None
129
- ) -> BaseInteractions:
130
- base_pairs = find_pairs(tertiary_structure, model)
131
- stackings = find_stackings(tertiary_structure, model)
132
- return BaseInteractions(base_pairs, stackings, [], [], [])
133
-
134
-
135
- def extract_secondary_structure(
136
- tertiary_structure: Structure3D,
137
- model: Optional[int] = None,
138
- find_gaps: bool = False,
139
- ) -> BaseInteractions:
140
- base_interactions = extract_base_interactions(tertiary_structure, model)
141
- mapping = Mapping2D3D(
142
- tertiary_structure,
143
- base_interactions.basePairs,
144
- base_interactions.stackings,
145
- find_gaps,
146
- )
147
- stems, single_strands, hairpins, loops = mapping.bpseq.elements
148
- return Structure2D(
149
- base_interactions,
150
- str(mapping.bpseq),
151
- mapping.dot_bracket,
152
- mapping.extended_dot_bracket,
153
- stems,
154
- single_strands,
155
- hairpins,
156
- loops,
157
- )
158
-
159
-
160
- def write_json(path: str, structure2d: BaseInteractions):
161
- with open(path, "wb") as f:
162
- f.write(orjson.dumps(structure2d))
163
-
164
-
165
- def write_csv(path: str, structure2d: Structure2D):
166
- with open(path, "w") as f:
167
- writer = csv.writer(f)
168
- writer.writerow(["nt1", "nt2", "type", "classification-1", "classification-2"])
169
- for base_pair in structure2d.baseInteractions.basePairs:
170
- writer.writerow(
171
- [
172
- base_pair.nt1.full_name,
173
- base_pair.nt2.full_name,
174
- "base pair",
175
- base_pair.lw.value,
176
- (
177
- base_pair.saenger.value or ""
178
- if base_pair.saenger is not None
179
- else ""
180
- ),
181
- ]
182
- )
183
- for stacking in structure2d.baseInteractions.stackings:
184
- writer.writerow(
185
- [
186
- stacking.nt1.full_name,
187
- stacking.nt2.full_name,
188
- "stacking",
189
- stacking.topology.value if stacking.topology is not None else "",
190
- "",
191
- ]
192
- )
193
- for base_phosphate in structure2d.baseInteractions.basePhosphateInteractions:
194
- writer.writerow(
195
- [
196
- base_phosphate.nt1.full_name,
197
- base_phosphate.nt2.full_name,
198
- "base-phosphate interaction",
199
- base_phosphate.bph.value if base_phosphate.bph is not None else "",
200
- "",
201
- ]
202
- )
203
- for base_ribose in structure2d.baseInteractions.basePhosphateInteractions:
204
- writer.writerow(
205
- [
206
- base_ribose.nt1.full_name,
207
- base_ribose.nt2.full_name,
208
- "base-ribose interaction",
209
- base_ribose.bph.value if base_ribose.bph is not None else "",
210
- "",
211
- ]
212
- )
213
- for other in structure2d.baseInteractions.otherInteractions:
214
- writer.writerow(
215
- [
216
- other.nt1.full_name,
217
- other.nt2.full_name,
218
- "other interaction",
219
- "",
220
- "",
221
- ]
222
- )
223
-
224
-
225
- def write_bpseq(path: str, bpseq: BpSeq):
226
- with open(path, "w") as f:
227
- f.write(str(bpseq))
228
-
229
-
230
- def main():
231
- parser = argparse.ArgumentParser()
232
- parser.add_argument("input", help="Path to PDB or mmCIF file")
233
- parser.add_argument("--bpseq", help="(optional) path to output BPSEQ file")
234
- parser.add_argument("--csv", help="(optional) path to output CSV file")
235
- parser.add_argument(
236
- "--json",
237
- help="(optional) path to output JSON file",
238
- )
239
- parser.add_argument(
240
- "--extended",
241
- action="store_true",
242
- help="(optional) if set, the program will print extended secondary structure to the standard output",
243
- )
244
- parser.add_argument(
245
- "--find-gaps",
246
- action="store_true",
247
- help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands; "
248
- f"the gap is defined as O3'-P distance greater then {1.5 * AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT}",
249
- )
250
- parser.add_argument("--dot", help="(optional) path to output DOT file")
251
- args = parser.parse_args()
252
-
253
- breakpoint()
254
-
255
- file = handle_input_file(args.input)
256
- structure3d = read_3d_structure(file, None)
257
- structure2d = extract_secondary_structure(structure3d, None, args.find_gaps)
258
-
259
- if args.csv:
260
- write_csv(args.csv, structure2d)
261
-
262
- if args.json:
263
- write_json(args.json, structure2d)
264
-
265
- if args.bpseq:
266
- write_bpseq(args.bpseq, structure2d.bpseq)
267
-
268
- if args.extended:
269
- print(structure2d.extendedDotBracket)
270
- else:
271
- print(structure2d.dotBracket)
272
-
273
- if args.dot:
274
- print(BpSeq.from_string(structure2d.bpseq).graphviz)
275
-
276
-
277
- if __name__ == "__main__":
278
- main()