RNApolis 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {RNApolis-0.3.11.dist-info → RNApolis-0.3.13.dist-info}/METADATA +1 -1
- RNApolis-0.3.13.dist-info/RECORD +17 -0
- rnapolis/annotator.py +25 -7
- rnapolis/common.py +81 -1
- rnapolis/parser.py +0 -1
- rnapolis/tertiary.py +25 -4
- RNApolis-0.3.11.dist-info/RECORD +0 -17
- {RNApolis-0.3.11.dist-info → RNApolis-0.3.13.dist-info}/LICENSE +0 -0
- {RNApolis-0.3.11.dist-info → RNApolis-0.3.13.dist-info}/WHEEL +0 -0
- {RNApolis-0.3.11.dist-info → RNApolis-0.3.13.dist-info}/entry_points.txt +0 -0
- {RNApolis-0.3.11.dist-info → RNApolis-0.3.13.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,17 @@
|
|
1
|
+
rnapolis/annotator.py,sha256=XnjFBeu3P_2UMdkD4Ao7m7K6JfeqYa-13xRzghrLvt8,22086
|
2
|
+
rnapolis/clashfinder.py,sha256=jD3s_UovygWi01NUbQNeAeRRFkARTSRraLXUV43UbAA,8514
|
3
|
+
rnapolis/common.py,sha256=T13Lqhr8V9nziThUrzh5FFuGazQFF5H_p8avOOdlqpc,29996
|
4
|
+
rnapolis/metareader.py,sha256=4qtMKRvww2sUStLeV8WVrLEt-ScydHUv4Gxx96tnf-M,1683
|
5
|
+
rnapolis/molecule_filter.py,sha256=NhjuqdCRnXgPefWZPeTq77tifmnAzamQtA0ODqPPG9k,6918
|
6
|
+
rnapolis/motif_extractor.py,sha256=duHvpi9Ulcny9K60E6VBpz5RpJZw-KdTB4_Ph0iP478,774
|
7
|
+
rnapolis/parser.py,sha256=Cmjt7p8UkiSNhSQDjc6I7BRqtuIIHs23Fp-Glb8Zikw,12216
|
8
|
+
rnapolis/rfam_folder.py,sha256=3rgXEJR16uPFy_BOo8qkdClOAOQDVOkidnLE-yoRbeI,11112
|
9
|
+
rnapolis/tertiary.py,sha256=VuATTN2SD7lBL9iUgT-doDwuEYsLodgV2u-SwQsyQcU,19658
|
10
|
+
rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
|
11
|
+
rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
|
12
|
+
RNApolis-0.3.13.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
|
13
|
+
RNApolis-0.3.13.dist-info/METADATA,sha256=DD4VAvMES_WYFSYUi6wzcKlgA1-jIsBHwAnBfy0z7mE,54301
|
14
|
+
RNApolis-0.3.13.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
15
|
+
RNApolis-0.3.13.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
|
16
|
+
RNApolis-0.3.13.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
|
17
|
+
RNApolis-0.3.13.dist-info/RECORD,,
|
rnapolis/annotator.py
CHANGED
@@ -9,7 +9,6 @@ from typing import Dict, List, Optional, Set, Tuple
|
|
9
9
|
|
10
10
|
import numpy
|
11
11
|
import numpy.typing
|
12
|
-
import orjson
|
13
12
|
from ordered_set import OrderedSet
|
14
13
|
from scipy.spatial import KDTree
|
15
14
|
|
@@ -486,7 +485,8 @@ def extract_secondary_structure(
|
|
486
485
|
tertiary_structure: Structure3D,
|
487
486
|
model: Optional[int] = None,
|
488
487
|
find_gaps: bool = False,
|
489
|
-
|
488
|
+
all_dot_brackets: bool = False,
|
489
|
+
) -> Tuple[Structure2D, List[str]]:
|
490
490
|
base_interactions = extract_base_interactions(tertiary_structure, model)
|
491
491
|
mapping = Mapping2D3D(
|
492
492
|
tertiary_structure,
|
@@ -495,7 +495,7 @@ def extract_secondary_structure(
|
|
495
495
|
find_gaps,
|
496
496
|
)
|
497
497
|
stems, single_strands, hairpins, loops = mapping.bpseq.elements
|
498
|
-
|
498
|
+
structure2d = Structure2D(
|
499
499
|
base_interactions,
|
500
500
|
str(mapping.bpseq),
|
501
501
|
mapping.dot_bracket,
|
@@ -505,6 +505,10 @@ def extract_secondary_structure(
|
|
505
505
|
hairpins,
|
506
506
|
loops,
|
507
507
|
)
|
508
|
+
if all_dot_brackets:
|
509
|
+
return structure2d, mapping.all_dot_brackets
|
510
|
+
else:
|
511
|
+
return structure2d, [structure2d.dotBracket]
|
508
512
|
|
509
513
|
|
510
514
|
def write_json(path: str, structure2d: BaseInteractions):
|
@@ -580,29 +584,40 @@ def write_bpseq(path: str, bpseq: BpSeq):
|
|
580
584
|
def main():
|
581
585
|
parser = argparse.ArgumentParser()
|
582
586
|
parser.add_argument("input", help="Path to PDB or mmCIF file")
|
583
|
-
parser.add_argument("--bpseq", help="(optional) path to output BPSEQ file")
|
584
|
-
parser.add_argument("--csv", help="(optional) path to output CSV file")
|
585
587
|
parser.add_argument(
|
588
|
+
"-a",
|
589
|
+
"--all-dot-brackets",
|
590
|
+
action="store_true",
|
591
|
+
help=")optional) print all dot-brackets, not only optimal one (exclusive with -e/--extended)",
|
592
|
+
)
|
593
|
+
parser.add_argument("-b", "--bpseq", help="(optional) path to output BPSEQ file")
|
594
|
+
parser.add_argument("-c", "--csv", help="(optional) path to output CSV file")
|
595
|
+
parser.add_argument(
|
596
|
+
"-j",
|
586
597
|
"--json",
|
587
598
|
help="(optional) path to output JSON file",
|
588
599
|
)
|
589
600
|
parser.add_argument(
|
601
|
+
"-e",
|
590
602
|
"--extended",
|
591
603
|
action="store_true",
|
592
604
|
help="(optional) if set, the program will print extended secondary structure to the standard output",
|
593
605
|
)
|
594
606
|
parser.add_argument(
|
607
|
+
"-f",
|
595
608
|
"--find-gaps",
|
596
609
|
action="store_true",
|
597
610
|
help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands; "
|
598
611
|
f"the gap is defined as O3'-P distance greater then {1.5 * AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT}",
|
599
612
|
)
|
600
|
-
parser.add_argument("--dot", help="(optional) path to output DOT file")
|
613
|
+
parser.add_argument("-d", "--dot", help="(optional) path to output DOT file")
|
601
614
|
args = parser.parse_args()
|
602
615
|
|
603
616
|
file = handle_input_file(args.input)
|
604
617
|
structure3d = read_3d_structure(file, None)
|
605
|
-
structure2d = extract_secondary_structure(
|
618
|
+
structure2d, dot_brackets = extract_secondary_structure(
|
619
|
+
structure3d, None, args.find_gaps, args.all_dot_brackets
|
620
|
+
)
|
606
621
|
|
607
622
|
if args.csv:
|
608
623
|
write_csv(args.csv, structure2d)
|
@@ -615,6 +630,9 @@ def main():
|
|
615
630
|
|
616
631
|
if args.extended:
|
617
632
|
print(structure2d.extendedDotBracket)
|
633
|
+
elif args.all_dot_brackets:
|
634
|
+
for dot_bracket in dot_brackets:
|
635
|
+
print(dot_bracket)
|
618
636
|
else:
|
619
637
|
print(structure2d.dotBracket)
|
620
638
|
|
rnapolis/common.py
CHANGED
@@ -8,6 +8,7 @@ from collections.abc import Sequence
|
|
8
8
|
from dataclasses import dataclass
|
9
9
|
from enum import Enum
|
10
10
|
from functools import cache, cached_property, total_ordering
|
11
|
+
from itertools import permutations
|
11
12
|
from typing import Dict, List, Optional, Tuple
|
12
13
|
|
13
14
|
import graphviz
|
@@ -852,6 +853,78 @@ class BpSeq:
|
|
852
853
|
|
853
854
|
return self.__make_dot_bracket(regions, orders)
|
854
855
|
|
856
|
+
@cached_property
|
857
|
+
def all_dot_brackets(self):
|
858
|
+
# build conflict graph
|
859
|
+
regions = self.__regions
|
860
|
+
graph = defaultdict(set)
|
861
|
+
|
862
|
+
for i, j in itertools.combinations(range(len(regions)), 2):
|
863
|
+
ri, rj = regions[i], regions[j]
|
864
|
+
k, l, _ = ri
|
865
|
+
m, n, _ = rj
|
866
|
+
|
867
|
+
# is pseudoknot?
|
868
|
+
if (k < m < l < n) or (m < k < n < l):
|
869
|
+
graph[i].add(j)
|
870
|
+
graph[j].add(i)
|
871
|
+
|
872
|
+
# find all connected components
|
873
|
+
vertices = list(graph.keys())
|
874
|
+
visited = {vertex: False for vertex in vertices}
|
875
|
+
components = []
|
876
|
+
|
877
|
+
for vertex in vertices:
|
878
|
+
if not visited[vertex]:
|
879
|
+
visited[vertex] = True
|
880
|
+
stack = [vertex]
|
881
|
+
components.append([vertex])
|
882
|
+
|
883
|
+
while stack:
|
884
|
+
current = stack[-1]
|
885
|
+
next_vertex = None
|
886
|
+
|
887
|
+
for neighbor in graph[current]:
|
888
|
+
if not visited[neighbor]:
|
889
|
+
next_vertex = neighbor
|
890
|
+
break
|
891
|
+
|
892
|
+
if next_vertex is not None:
|
893
|
+
visited[next_vertex] = True
|
894
|
+
stack.append(next_vertex)
|
895
|
+
components[-1].append(next_vertex)
|
896
|
+
else:
|
897
|
+
stack.pop()
|
898
|
+
|
899
|
+
# permute order of every component
|
900
|
+
permutations = [
|
901
|
+
list(itertools.permutations(component)) for component in components
|
902
|
+
]
|
903
|
+
solutions = set()
|
904
|
+
|
905
|
+
for permutation in itertools.product(*permutations):
|
906
|
+
orders = [0 for _ in range(len(regions))]
|
907
|
+
|
908
|
+
for component in permutation:
|
909
|
+
for i in range(1, len(component)):
|
910
|
+
region_i = component[i]
|
911
|
+
available = [True for _ in range(len(graph))]
|
912
|
+
|
913
|
+
for j in range(i):
|
914
|
+
region_j = component[j]
|
915
|
+
|
916
|
+
if region_j in graph[region_i]:
|
917
|
+
available[orders[region_j]] = False
|
918
|
+
|
919
|
+
order = next(
|
920
|
+
filter(lambda i: available[i] is True, range(len(available)))
|
921
|
+
)
|
922
|
+
orders[region_i] = order
|
923
|
+
|
924
|
+
solutions.add(self.__make_dot_bracket(regions, orders))
|
925
|
+
|
926
|
+
return list(solutions)
|
927
|
+
|
855
928
|
|
856
929
|
@dataclass
|
857
930
|
class DotBracket:
|
@@ -896,6 +969,12 @@ class DotBracket:
|
|
896
969
|
def __str__(self):
|
897
970
|
return f"{self.sequence}\n{self.structure}"
|
898
971
|
|
972
|
+
def __eq__(self, other):
|
973
|
+
return self.sequence == other.sequence and self.structure == other.structure
|
974
|
+
|
975
|
+
def __hash__(self) -> int:
|
976
|
+
return hash((self.sequence, self.structure))
|
977
|
+
|
899
978
|
|
900
979
|
@dataclass
|
901
980
|
class MultiStrandDotBracket(DotBracket):
|
@@ -907,7 +986,8 @@ class MultiStrandDotBracket(DotBracket):
|
|
907
986
|
first = 1
|
908
987
|
|
909
988
|
for match in re.finditer(
|
910
|
-
r"((>.*?\n)?([
|
989
|
+
r"((>.*?\n)?([ACGTURYSWKMBDHVNacgturyswkmbdhvn.-]+)\n([.()\[\]{}<>A-Za-z]+))",
|
990
|
+
input,
|
911
991
|
):
|
912
992
|
sequence = match.group(3)
|
913
993
|
structure = match.group(4)
|
rnapolis/parser.py
CHANGED
rnapolis/tertiary.py
CHANGED
@@ -548,7 +548,7 @@ class Mapping2D3D:
|
|
548
548
|
|
549
549
|
@cached_property
|
550
550
|
def dot_bracket(self) -> str:
|
551
|
-
dbns = self.__generate_dot_bracket_per_strand(self.bpseq)
|
551
|
+
dbns = self.__generate_dot_bracket_per_strand(self.bpseq.dot_bracket.structure)
|
552
552
|
i = 0
|
553
553
|
result = []
|
554
554
|
|
@@ -560,8 +560,8 @@ class Mapping2D3D:
|
|
560
560
|
i += len(sequence)
|
561
561
|
return "\n".join(result)
|
562
562
|
|
563
|
-
def __generate_dot_bracket_per_strand(self,
|
564
|
-
dbn =
|
563
|
+
def __generate_dot_bracket_per_strand(self, dbn_structure: str) -> List[str]:
|
564
|
+
dbn = dbn_structure
|
565
565
|
i = 0
|
566
566
|
result = []
|
567
567
|
|
@@ -570,6 +570,25 @@ class Mapping2D3D:
|
|
570
570
|
i += len(sequence)
|
571
571
|
return result
|
572
572
|
|
573
|
+
@cached_property
|
574
|
+
def all_dot_brackets(self) -> List[str]:
|
575
|
+
dot_brackets = []
|
576
|
+
|
577
|
+
for dot_bracket in self.bpseq.all_dot_brackets:
|
578
|
+
dbns = self.__generate_dot_bracket_per_strand(dot_bracket.structure)
|
579
|
+
i = 0
|
580
|
+
result = []
|
581
|
+
|
582
|
+
for i, pair in enumerate(self.strands_sequences):
|
583
|
+
chain, sequence = pair
|
584
|
+
result.append(f">strand_{chain}")
|
585
|
+
result.append(sequence)
|
586
|
+
result.append(dbns[i])
|
587
|
+
i += len(sequence)
|
588
|
+
dot_brackets.append("\n".join(result))
|
589
|
+
|
590
|
+
return dot_brackets
|
591
|
+
|
573
592
|
@cached_property
|
574
593
|
def extended_dot_bracket(self) -> str:
|
575
594
|
result = [
|
@@ -593,7 +612,9 @@ class Mapping2D3D:
|
|
593
612
|
for row in [row1, row2]:
|
594
613
|
if row:
|
595
614
|
bpseq = self.__generate_bpseq(row)
|
596
|
-
dbns = self.__generate_dot_bracket_per_strand(
|
615
|
+
dbns = self.__generate_dot_bracket_per_strand(
|
616
|
+
bpseq.dot_bracket.structure
|
617
|
+
)
|
597
618
|
|
598
619
|
for i in range(len(self.strands_sequences)):
|
599
620
|
result[i].append(f"{lw.value} {dbns[i]}")
|
RNApolis-0.3.11.dist-info/RECORD
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
rnapolis/annotator.py,sha256=bcyqmUSSRyl0ejA3548K4czSElBMX3EpCKAfJ2tYjsw,21476
|
2
|
-
rnapolis/clashfinder.py,sha256=jD3s_UovygWi01NUbQNeAeRRFkARTSRraLXUV43UbAA,8514
|
3
|
-
rnapolis/common.py,sha256=owupPG9oylz4Ed4DqVYJqWIKpovLJ3EIIApgca6tuhg,27344
|
4
|
-
rnapolis/metareader.py,sha256=4qtMKRvww2sUStLeV8WVrLEt-ScydHUv4Gxx96tnf-M,1683
|
5
|
-
rnapolis/molecule_filter.py,sha256=NhjuqdCRnXgPefWZPeTq77tifmnAzamQtA0ODqPPG9k,6918
|
6
|
-
rnapolis/motif_extractor.py,sha256=duHvpi9Ulcny9K60E6VBpz5RpJZw-KdTB4_Ph0iP478,774
|
7
|
-
rnapolis/parser.py,sha256=0uNKPnKiv5uaFVFGIzP8xbGLokimBkjs1XdlV0JmKIw,12217
|
8
|
-
rnapolis/rfam_folder.py,sha256=3rgXEJR16uPFy_BOo8qkdClOAOQDVOkidnLE-yoRbeI,11112
|
9
|
-
rnapolis/tertiary.py,sha256=iWMPD9c21rjMPpEdBd7mPCQgds65IbOr4_Fy06s0NoU,18957
|
10
|
-
rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
|
11
|
-
rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
|
12
|
-
RNApolis-0.3.11.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
|
13
|
-
RNApolis-0.3.11.dist-info/METADATA,sha256=J0a3wmvQoWPVFgSgvIxMkMFSBCz3KFHB8BHKFNtIdKw,54301
|
14
|
-
RNApolis-0.3.11.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
15
|
-
RNApolis-0.3.11.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
|
16
|
-
RNApolis-0.3.11.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
|
17
|
-
RNApolis-0.3.11.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|