RNApolis 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: RNApolis
3
- Version: 0.3.10
3
+ Version: 0.3.12
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -1,18 +1,17 @@
1
- rnapolis/annotator.py,sha256=8AwrCKy_5CKU3HsRgqj5U2aQrAXiysoAnqjsDdvCAEA,21481
2
- rnapolis/annotator_ml.py,sha256=eZYDIaUcAVWvUHKvA0lx6lWEaATU1THNYbfVKvVa9mQ,8226
1
+ rnapolis/annotator.py,sha256=bcyqmUSSRyl0ejA3548K4czSElBMX3EpCKAfJ2tYjsw,21476
3
2
  rnapolis/clashfinder.py,sha256=jD3s_UovygWi01NUbQNeAeRRFkARTSRraLXUV43UbAA,8514
4
- rnapolis/common.py,sha256=DPmRpNkMaxuIai3vfLzSlP6IN0zpj6kmT3LoRjnJUWE,26440
3
+ rnapolis/common.py,sha256=QdmAGF8DvG1EHpefumU27LceGm7l9obgWLhL4FELGT4,27381
5
4
  rnapolis/metareader.py,sha256=4qtMKRvww2sUStLeV8WVrLEt-ScydHUv4Gxx96tnf-M,1683
6
5
  rnapolis/molecule_filter.py,sha256=NhjuqdCRnXgPefWZPeTq77tifmnAzamQtA0ODqPPG9k,6918
7
6
  rnapolis/motif_extractor.py,sha256=duHvpi9Ulcny9K60E6VBpz5RpJZw-KdTB4_Ph0iP478,774
8
- rnapolis/parser.py,sha256=0uNKPnKiv5uaFVFGIzP8xbGLokimBkjs1XdlV0JmKIw,12217
7
+ rnapolis/parser.py,sha256=Cmjt7p8UkiSNhSQDjc6I7BRqtuIIHs23Fp-Glb8Zikw,12216
9
8
  rnapolis/rfam_folder.py,sha256=3rgXEJR16uPFy_BOo8qkdClOAOQDVOkidnLE-yoRbeI,11112
10
9
  rnapolis/tertiary.py,sha256=iWMPD9c21rjMPpEdBd7mPCQgds65IbOr4_Fy06s0NoU,18957
11
10
  rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
12
11
  rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
13
- RNApolis-0.3.10.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
14
- RNApolis-0.3.10.dist-info/METADATA,sha256=qfgkAg8MMlyGyOkWsl8y5JOffR_9AbsQgYOXE3L5XrA,54301
15
- RNApolis-0.3.10.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
16
- RNApolis-0.3.10.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
17
- RNApolis-0.3.10.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
18
- RNApolis-0.3.10.dist-info/RECORD,,
12
+ RNApolis-0.3.12.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
13
+ RNApolis-0.3.12.dist-info/METADATA,sha256=PGjy4PTp1C5K1afBs6ZjeWz4pLJ5iFaiZfUSdgwusDk,54301
14
+ RNApolis-0.3.12.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
15
+ RNApolis-0.3.12.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
16
+ RNApolis-0.3.12.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
17
+ RNApolis-0.3.12.dist-info/RECORD,,
rnapolis/annotator.py CHANGED
@@ -486,7 +486,7 @@ def extract_secondary_structure(
486
486
  tertiary_structure: Structure3D,
487
487
  model: Optional[int] = None,
488
488
  find_gaps: bool = False,
489
- ) -> BaseInteractions:
489
+ ) -> Structure2D:
490
490
  base_interactions = extract_base_interactions(tertiary_structure, model)
491
491
  mapping = Mapping2D3D(
492
492
  tertiary_structure,
rnapolis/common.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import itertools
2
2
  import logging
3
3
  import os
4
+ import re
4
5
  import string
5
6
  from collections import defaultdict
6
7
  from collections.abc import Sequence
@@ -896,6 +897,38 @@ class DotBracket:
896
897
  return f"{self.sequence}\n{self.structure}"
897
898
 
898
899
 
900
+ @dataclass
901
+ class MultiStrandDotBracket(DotBracket):
902
+ strands: List[Strand]
903
+
904
+ @staticmethod
905
+ def from_string(input: str):
906
+ strands = []
907
+ first = 1
908
+
909
+ for match in re.finditer(
910
+ r"((>.*?\n)?([ACGTURYSWKMBDHVNacgturyswkmbdhvn.-]+)\n([.()\[\]{}<>A-Za-z]+))",
911
+ input,
912
+ ):
913
+ sequence = match.group(3)
914
+ structure = match.group(4)
915
+ assert len(sequence) == len(structure)
916
+ last = first + len(sequence) - 1
917
+ strands.append(Strand(first, last, sequence, structure))
918
+ first = last + 1
919
+
920
+ return MultiStrandDotBracket(
921
+ "".join(strand.sequence for strand in strands),
922
+ "".join(strand.structure for strand in strands),
923
+ strands,
924
+ )
925
+
926
+ @staticmethod
927
+ def from_file(path: str):
928
+ with open(path) as f:
929
+ return MultiStrandDotBracket.from_string(f.read())
930
+
931
+
899
932
  @dataclass(frozen=True, order=True)
900
933
  class BaseInteractions:
901
934
  basePairs: List[BasePair]
rnapolis/parser.py CHANGED
@@ -300,7 +300,6 @@ def get_one_letter_name(
300
300
  key = (label.chain, label.number)
301
301
  if key in sequence:
302
302
  return sequence[key]
303
-
304
303
  # RNA
305
304
  if len(name) == 1:
306
305
  return name
rnapolis/annotator_ml.py DELETED
@@ -1,278 +0,0 @@
1
- #! /usr/bin/env python
2
- import argparse
3
- import csv
4
- import logging
5
- import math
6
- import os
7
- from collections import defaultdict
8
- from typing import Dict, List, Optional, Tuple
9
-
10
- import numpy
11
- import numpy.typing
12
- import orjson
13
- from ordered_set import OrderedSet
14
- from scipy.spatial import KDTree
15
-
16
- from rnapolis.common import (
17
- BaseInteractions,
18
- BasePair,
19
- BpSeq,
20
- LeontisWesthof,
21
- Residue,
22
- Saenger,
23
- Stacking,
24
- Structure2D,
25
- )
26
- from rnapolis.parser import read_3d_structure
27
- from rnapolis.tertiary import (
28
- AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT,
29
- Atom,
30
- Mapping2D3D,
31
- Residue3D,
32
- Structure3D,
33
- torsion_angle,
34
- )
35
- from rnapolis.util import handle_input_file
36
-
37
- C1P_MAX_DISTANCE = 10.0
38
-
39
- logging.basicConfig(level=os.getenv("LOGLEVEL", "INFO").upper())
40
-
41
-
42
- # TODO: implement this function
43
- def is_base_pair(residue_i: Residue3D, residue_j: Residue3D) -> bool:
44
- return False
45
-
46
-
47
- # TODO: implement this function
48
- def classify_lw(residue_i: Residue3D, residue_j: Residue3D) -> Optional[LeontisWesthof]:
49
- return None
50
-
51
-
52
- # TODO: implement this function
53
- def classify_saenger(residue_i: Residue3D, residue_j: Residue3D) -> Optional[Saenger]:
54
- return None
55
-
56
-
57
- # TODO: implement this function
58
- def is_stacking(residue_i: Residue3D, residue_j: Residue3D) -> bool:
59
- return False
60
-
61
-
62
- def find_candidates(
63
- structure: Structure3D, model: Optional[int] = None
64
- ) -> List[Tuple[Residue3D, Residue3D]]:
65
- residue_map = {}
66
- coordinates = []
67
-
68
- for residue in structure.residues:
69
- if model is not None and residue.model != model:
70
- continue
71
-
72
- atom = residue.find_atom("C1'")
73
-
74
- if atom is not None:
75
- atom_xyz = (atom.x, atom.y, atom.z)
76
- residue_map[atom_xyz] = residue
77
- coordinates.append(atom_xyz)
78
-
79
- kdtree = KDTree(coordinates)
80
- candidates = []
81
-
82
- for i, j in kdtree.query_pairs(C1P_MAX_DISTANCE):
83
- residue_i = residue_map[coordinates[i]]
84
- residue_j = residue_map[coordinates[j]]
85
- candidates.append((residue_i, residue_j))
86
-
87
- return candidates
88
-
89
-
90
- def find_pairs(structure: Structure3D, model: Optional[int] = None) -> List[BasePair]:
91
- base_pairs = []
92
-
93
- for residue_i, residue_j in find_candidates(structure, model):
94
- if is_base_pair(residue_i, residue_j):
95
- lw = classify_lw(residue_i, residue_j)
96
- saenger = classify_saenger(residue_i, residue_j)
97
- base_pairs.append(
98
- BasePair(
99
- Residue(residue_i.label, residue_i.auth),
100
- Residue(residue_j.label, residue_j.auth),
101
- lw,
102
- saenger,
103
- )
104
- )
105
-
106
- return base_pairs
107
-
108
-
109
- def find_stackings(
110
- structure: Structure3D, model: Optional[int] = None
111
- ) -> List[Stacking]:
112
- stackings = []
113
-
114
- for residue_i, residue_j in find_candidates(structure, model):
115
- if is_stacking(residue_i, residue_j):
116
- stackings.append(
117
- Stacking(
118
- Residue(residue_i.label, residue_i.auth),
119
- Residue(residue_j.label, residue_j.auth),
120
- None,
121
- )
122
- )
123
-
124
- return stackings
125
-
126
-
127
- def extract_base_interactions(
128
- tertiary_structure: Structure3D, model: Optional[int] = None
129
- ) -> BaseInteractions:
130
- base_pairs = find_pairs(tertiary_structure, model)
131
- stackings = find_stackings(tertiary_structure, model)
132
- return BaseInteractions(base_pairs, stackings, [], [], [])
133
-
134
-
135
- def extract_secondary_structure(
136
- tertiary_structure: Structure3D,
137
- model: Optional[int] = None,
138
- find_gaps: bool = False,
139
- ) -> BaseInteractions:
140
- base_interactions = extract_base_interactions(tertiary_structure, model)
141
- mapping = Mapping2D3D(
142
- tertiary_structure,
143
- base_interactions.basePairs,
144
- base_interactions.stackings,
145
- find_gaps,
146
- )
147
- stems, single_strands, hairpins, loops = mapping.bpseq.elements
148
- return Structure2D(
149
- base_interactions,
150
- str(mapping.bpseq),
151
- mapping.dot_bracket,
152
- mapping.extended_dot_bracket,
153
- stems,
154
- single_strands,
155
- hairpins,
156
- loops,
157
- )
158
-
159
-
160
- def write_json(path: str, structure2d: BaseInteractions):
161
- with open(path, "wb") as f:
162
- f.write(orjson.dumps(structure2d))
163
-
164
-
165
- def write_csv(path: str, structure2d: Structure2D):
166
- with open(path, "w") as f:
167
- writer = csv.writer(f)
168
- writer.writerow(["nt1", "nt2", "type", "classification-1", "classification-2"])
169
- for base_pair in structure2d.baseInteractions.basePairs:
170
- writer.writerow(
171
- [
172
- base_pair.nt1.full_name,
173
- base_pair.nt2.full_name,
174
- "base pair",
175
- base_pair.lw.value,
176
- (
177
- base_pair.saenger.value or ""
178
- if base_pair.saenger is not None
179
- else ""
180
- ),
181
- ]
182
- )
183
- for stacking in structure2d.baseInteractions.stackings:
184
- writer.writerow(
185
- [
186
- stacking.nt1.full_name,
187
- stacking.nt2.full_name,
188
- "stacking",
189
- stacking.topology.value if stacking.topology is not None else "",
190
- "",
191
- ]
192
- )
193
- for base_phosphate in structure2d.baseInteractions.basePhosphateInteractions:
194
- writer.writerow(
195
- [
196
- base_phosphate.nt1.full_name,
197
- base_phosphate.nt2.full_name,
198
- "base-phosphate interaction",
199
- base_phosphate.bph.value if base_phosphate.bph is not None else "",
200
- "",
201
- ]
202
- )
203
- for base_ribose in structure2d.baseInteractions.basePhosphateInteractions:
204
- writer.writerow(
205
- [
206
- base_ribose.nt1.full_name,
207
- base_ribose.nt2.full_name,
208
- "base-ribose interaction",
209
- base_ribose.bph.value if base_ribose.bph is not None else "",
210
- "",
211
- ]
212
- )
213
- for other in structure2d.baseInteractions.otherInteractions:
214
- writer.writerow(
215
- [
216
- other.nt1.full_name,
217
- other.nt2.full_name,
218
- "other interaction",
219
- "",
220
- "",
221
- ]
222
- )
223
-
224
-
225
- def write_bpseq(path: str, bpseq: BpSeq):
226
- with open(path, "w") as f:
227
- f.write(str(bpseq))
228
-
229
-
230
- def main():
231
- parser = argparse.ArgumentParser()
232
- parser.add_argument("input", help="Path to PDB or mmCIF file")
233
- parser.add_argument("--bpseq", help="(optional) path to output BPSEQ file")
234
- parser.add_argument("--csv", help="(optional) path to output CSV file")
235
- parser.add_argument(
236
- "--json",
237
- help="(optional) path to output JSON file",
238
- )
239
- parser.add_argument(
240
- "--extended",
241
- action="store_true",
242
- help="(optional) if set, the program will print extended secondary structure to the standard output",
243
- )
244
- parser.add_argument(
245
- "--find-gaps",
246
- action="store_true",
247
- help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands; "
248
- f"the gap is defined as O3'-P distance greater then {1.5 * AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT}",
249
- )
250
- parser.add_argument("--dot", help="(optional) path to output DOT file")
251
- args = parser.parse_args()
252
-
253
- breakpoint()
254
-
255
- file = handle_input_file(args.input)
256
- structure3d = read_3d_structure(file, None)
257
- structure2d = extract_secondary_structure(structure3d, None, args.find_gaps)
258
-
259
- if args.csv:
260
- write_csv(args.csv, structure2d)
261
-
262
- if args.json:
263
- write_json(args.json, structure2d)
264
-
265
- if args.bpseq:
266
- write_bpseq(args.bpseq, structure2d.bpseq)
267
-
268
- if args.extended:
269
- print(structure2d.extendedDotBracket)
270
- else:
271
- print(structure2d.dotBracket)
272
-
273
- if args.dot:
274
- print(BpSeq.from_string(structure2d.bpseq).graphviz)
275
-
276
-
277
- if __name__ == "__main__":
278
- main()