RNApolis 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- {RNApolis-0.3.11.dist-info → RNApolis-0.3.13.dist-info}/METADATA +1 -1
- RNApolis-0.3.13.dist-info/RECORD +17 -0
- rnapolis/annotator.py +25 -7
- rnapolis/common.py +81 -1
- rnapolis/parser.py +0 -1
- rnapolis/tertiary.py +25 -4
- RNApolis-0.3.11.dist-info/RECORD +0 -17
- {RNApolis-0.3.11.dist-info → RNApolis-0.3.13.dist-info}/LICENSE +0 -0
- {RNApolis-0.3.11.dist-info → RNApolis-0.3.13.dist-info}/WHEEL +0 -0
- {RNApolis-0.3.11.dist-info → RNApolis-0.3.13.dist-info}/entry_points.txt +0 -0
- {RNApolis-0.3.11.dist-info → RNApolis-0.3.13.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,17 @@
|
|
1
|
+
rnapolis/annotator.py,sha256=XnjFBeu3P_2UMdkD4Ao7m7K6JfeqYa-13xRzghrLvt8,22086
|
2
|
+
rnapolis/clashfinder.py,sha256=jD3s_UovygWi01NUbQNeAeRRFkARTSRraLXUV43UbAA,8514
|
3
|
+
rnapolis/common.py,sha256=T13Lqhr8V9nziThUrzh5FFuGazQFF5H_p8avOOdlqpc,29996
|
4
|
+
rnapolis/metareader.py,sha256=4qtMKRvww2sUStLeV8WVrLEt-ScydHUv4Gxx96tnf-M,1683
|
5
|
+
rnapolis/molecule_filter.py,sha256=NhjuqdCRnXgPefWZPeTq77tifmnAzamQtA0ODqPPG9k,6918
|
6
|
+
rnapolis/motif_extractor.py,sha256=duHvpi9Ulcny9K60E6VBpz5RpJZw-KdTB4_Ph0iP478,774
|
7
|
+
rnapolis/parser.py,sha256=Cmjt7p8UkiSNhSQDjc6I7BRqtuIIHs23Fp-Glb8Zikw,12216
|
8
|
+
rnapolis/rfam_folder.py,sha256=3rgXEJR16uPFy_BOo8qkdClOAOQDVOkidnLE-yoRbeI,11112
|
9
|
+
rnapolis/tertiary.py,sha256=VuATTN2SD7lBL9iUgT-doDwuEYsLodgV2u-SwQsyQcU,19658
|
10
|
+
rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
|
11
|
+
rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
|
12
|
+
RNApolis-0.3.13.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
|
13
|
+
RNApolis-0.3.13.dist-info/METADATA,sha256=DD4VAvMES_WYFSYUi6wzcKlgA1-jIsBHwAnBfy0z7mE,54301
|
14
|
+
RNApolis-0.3.13.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
15
|
+
RNApolis-0.3.13.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
|
16
|
+
RNApolis-0.3.13.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
|
17
|
+
RNApolis-0.3.13.dist-info/RECORD,,
|
rnapolis/annotator.py
CHANGED
@@ -9,7 +9,6 @@ from typing import Dict, List, Optional, Set, Tuple
|
|
9
9
|
|
10
10
|
import numpy
|
11
11
|
import numpy.typing
|
12
|
-
import orjson
|
13
12
|
from ordered_set import OrderedSet
|
14
13
|
from scipy.spatial import KDTree
|
15
14
|
|
@@ -486,7 +485,8 @@ def extract_secondary_structure(
|
|
486
485
|
tertiary_structure: Structure3D,
|
487
486
|
model: Optional[int] = None,
|
488
487
|
find_gaps: bool = False,
|
489
|
-
|
488
|
+
all_dot_brackets: bool = False,
|
489
|
+
) -> Tuple[Structure2D, List[str]]:
|
490
490
|
base_interactions = extract_base_interactions(tertiary_structure, model)
|
491
491
|
mapping = Mapping2D3D(
|
492
492
|
tertiary_structure,
|
@@ -495,7 +495,7 @@ def extract_secondary_structure(
|
|
495
495
|
find_gaps,
|
496
496
|
)
|
497
497
|
stems, single_strands, hairpins, loops = mapping.bpseq.elements
|
498
|
-
|
498
|
+
structure2d = Structure2D(
|
499
499
|
base_interactions,
|
500
500
|
str(mapping.bpseq),
|
501
501
|
mapping.dot_bracket,
|
@@ -505,6 +505,10 @@ def extract_secondary_structure(
|
|
505
505
|
hairpins,
|
506
506
|
loops,
|
507
507
|
)
|
508
|
+
if all_dot_brackets:
|
509
|
+
return structure2d, mapping.all_dot_brackets
|
510
|
+
else:
|
511
|
+
return structure2d, [structure2d.dotBracket]
|
508
512
|
|
509
513
|
|
510
514
|
def write_json(path: str, structure2d: BaseInteractions):
|
@@ -580,29 +584,40 @@ def write_bpseq(path: str, bpseq: BpSeq):
|
|
580
584
|
def main():
|
581
585
|
parser = argparse.ArgumentParser()
|
582
586
|
parser.add_argument("input", help="Path to PDB or mmCIF file")
|
583
|
-
parser.add_argument("--bpseq", help="(optional) path to output BPSEQ file")
|
584
|
-
parser.add_argument("--csv", help="(optional) path to output CSV file")
|
585
587
|
parser.add_argument(
|
588
|
+
"-a",
|
589
|
+
"--all-dot-brackets",
|
590
|
+
action="store_true",
|
591
|
+
help=")optional) print all dot-brackets, not only optimal one (exclusive with -e/--extended)",
|
592
|
+
)
|
593
|
+
parser.add_argument("-b", "--bpseq", help="(optional) path to output BPSEQ file")
|
594
|
+
parser.add_argument("-c", "--csv", help="(optional) path to output CSV file")
|
595
|
+
parser.add_argument(
|
596
|
+
"-j",
|
586
597
|
"--json",
|
587
598
|
help="(optional) path to output JSON file",
|
588
599
|
)
|
589
600
|
parser.add_argument(
|
601
|
+
"-e",
|
590
602
|
"--extended",
|
591
603
|
action="store_true",
|
592
604
|
help="(optional) if set, the program will print extended secondary structure to the standard output",
|
593
605
|
)
|
594
606
|
parser.add_argument(
|
607
|
+
"-f",
|
595
608
|
"--find-gaps",
|
596
609
|
action="store_true",
|
597
610
|
help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands; "
|
598
611
|
f"the gap is defined as O3'-P distance greater then {1.5 * AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT}",
|
599
612
|
)
|
600
|
-
parser.add_argument("--dot", help="(optional) path to output DOT file")
|
613
|
+
parser.add_argument("-d", "--dot", help="(optional) path to output DOT file")
|
601
614
|
args = parser.parse_args()
|
602
615
|
|
603
616
|
file = handle_input_file(args.input)
|
604
617
|
structure3d = read_3d_structure(file, None)
|
605
|
-
structure2d = extract_secondary_structure(
|
618
|
+
structure2d, dot_brackets = extract_secondary_structure(
|
619
|
+
structure3d, None, args.find_gaps, args.all_dot_brackets
|
620
|
+
)
|
606
621
|
|
607
622
|
if args.csv:
|
608
623
|
write_csv(args.csv, structure2d)
|
@@ -615,6 +630,9 @@ def main():
|
|
615
630
|
|
616
631
|
if args.extended:
|
617
632
|
print(structure2d.extendedDotBracket)
|
633
|
+
elif args.all_dot_brackets:
|
634
|
+
for dot_bracket in dot_brackets:
|
635
|
+
print(dot_bracket)
|
618
636
|
else:
|
619
637
|
print(structure2d.dotBracket)
|
620
638
|
|
rnapolis/common.py
CHANGED
@@ -8,6 +8,7 @@ from collections.abc import Sequence
|
|
8
8
|
from dataclasses import dataclass
|
9
9
|
from enum import Enum
|
10
10
|
from functools import cache, cached_property, total_ordering
|
11
|
+
from itertools import permutations
|
11
12
|
from typing import Dict, List, Optional, Tuple
|
12
13
|
|
13
14
|
import graphviz
|
@@ -852,6 +853,78 @@ class BpSeq:
|
|
852
853
|
|
853
854
|
return self.__make_dot_bracket(regions, orders)
|
854
855
|
|
856
|
+
@cached_property
|
857
|
+
def all_dot_brackets(self):
|
858
|
+
# build conflict graph
|
859
|
+
regions = self.__regions
|
860
|
+
graph = defaultdict(set)
|
861
|
+
|
862
|
+
for i, j in itertools.combinations(range(len(regions)), 2):
|
863
|
+
ri, rj = regions[i], regions[j]
|
864
|
+
k, l, _ = ri
|
865
|
+
m, n, _ = rj
|
866
|
+
|
867
|
+
# is pseudoknot?
|
868
|
+
if (k < m < l < n) or (m < k < n < l):
|
869
|
+
graph[i].add(j)
|
870
|
+
graph[j].add(i)
|
871
|
+
|
872
|
+
# find all connected components
|
873
|
+
vertices = list(graph.keys())
|
874
|
+
visited = {vertex: False for vertex in vertices}
|
875
|
+
components = []
|
876
|
+
|
877
|
+
for vertex in vertices:
|
878
|
+
if not visited[vertex]:
|
879
|
+
visited[vertex] = True
|
880
|
+
stack = [vertex]
|
881
|
+
components.append([vertex])
|
882
|
+
|
883
|
+
while stack:
|
884
|
+
current = stack[-1]
|
885
|
+
next_vertex = None
|
886
|
+
|
887
|
+
for neighbor in graph[current]:
|
888
|
+
if not visited[neighbor]:
|
889
|
+
next_vertex = neighbor
|
890
|
+
break
|
891
|
+
|
892
|
+
if next_vertex is not None:
|
893
|
+
visited[next_vertex] = True
|
894
|
+
stack.append(next_vertex)
|
895
|
+
components[-1].append(next_vertex)
|
896
|
+
else:
|
897
|
+
stack.pop()
|
898
|
+
|
899
|
+
# permute order of every component
|
900
|
+
permutations = [
|
901
|
+
list(itertools.permutations(component)) for component in components
|
902
|
+
]
|
903
|
+
solutions = set()
|
904
|
+
|
905
|
+
for permutation in itertools.product(*permutations):
|
906
|
+
orders = [0 for _ in range(len(regions))]
|
907
|
+
|
908
|
+
for component in permutation:
|
909
|
+
for i in range(1, len(component)):
|
910
|
+
region_i = component[i]
|
911
|
+
available = [True for _ in range(len(graph))]
|
912
|
+
|
913
|
+
for j in range(i):
|
914
|
+
region_j = component[j]
|
915
|
+
|
916
|
+
if region_j in graph[region_i]:
|
917
|
+
available[orders[region_j]] = False
|
918
|
+
|
919
|
+
order = next(
|
920
|
+
filter(lambda i: available[i] is True, range(len(available)))
|
921
|
+
)
|
922
|
+
orders[region_i] = order
|
923
|
+
|
924
|
+
solutions.add(self.__make_dot_bracket(regions, orders))
|
925
|
+
|
926
|
+
return list(solutions)
|
927
|
+
|
855
928
|
|
856
929
|
@dataclass
|
857
930
|
class DotBracket:
|
@@ -896,6 +969,12 @@ class DotBracket:
|
|
896
969
|
def __str__(self):
|
897
970
|
return f"{self.sequence}\n{self.structure}"
|
898
971
|
|
972
|
+
def __eq__(self, other):
|
973
|
+
return self.sequence == other.sequence and self.structure == other.structure
|
974
|
+
|
975
|
+
def __hash__(self) -> int:
|
976
|
+
return hash((self.sequence, self.structure))
|
977
|
+
|
899
978
|
|
900
979
|
@dataclass
|
901
980
|
class MultiStrandDotBracket(DotBracket):
|
@@ -907,7 +986,8 @@ class MultiStrandDotBracket(DotBracket):
|
|
907
986
|
first = 1
|
908
987
|
|
909
988
|
for match in re.finditer(
|
910
|
-
r"((>.*?\n)?([
|
989
|
+
r"((>.*?\n)?([ACGTURYSWKMBDHVNacgturyswkmbdhvn.-]+)\n([.()\[\]{}<>A-Za-z]+))",
|
990
|
+
input,
|
911
991
|
):
|
912
992
|
sequence = match.group(3)
|
913
993
|
structure = match.group(4)
|
rnapolis/parser.py
CHANGED
rnapolis/tertiary.py
CHANGED
@@ -548,7 +548,7 @@ class Mapping2D3D:
|
|
548
548
|
|
549
549
|
@cached_property
|
550
550
|
def dot_bracket(self) -> str:
|
551
|
-
dbns = self.__generate_dot_bracket_per_strand(self.bpseq)
|
551
|
+
dbns = self.__generate_dot_bracket_per_strand(self.bpseq.dot_bracket.structure)
|
552
552
|
i = 0
|
553
553
|
result = []
|
554
554
|
|
@@ -560,8 +560,8 @@ class Mapping2D3D:
|
|
560
560
|
i += len(sequence)
|
561
561
|
return "\n".join(result)
|
562
562
|
|
563
|
-
def __generate_dot_bracket_per_strand(self,
|
564
|
-
dbn =
|
563
|
+
def __generate_dot_bracket_per_strand(self, dbn_structure: str) -> List[str]:
|
564
|
+
dbn = dbn_structure
|
565
565
|
i = 0
|
566
566
|
result = []
|
567
567
|
|
@@ -570,6 +570,25 @@ class Mapping2D3D:
|
|
570
570
|
i += len(sequence)
|
571
571
|
return result
|
572
572
|
|
573
|
+
@cached_property
|
574
|
+
def all_dot_brackets(self) -> List[str]:
|
575
|
+
dot_brackets = []
|
576
|
+
|
577
|
+
for dot_bracket in self.bpseq.all_dot_brackets:
|
578
|
+
dbns = self.__generate_dot_bracket_per_strand(dot_bracket.structure)
|
579
|
+
i = 0
|
580
|
+
result = []
|
581
|
+
|
582
|
+
for i, pair in enumerate(self.strands_sequences):
|
583
|
+
chain, sequence = pair
|
584
|
+
result.append(f">strand_{chain}")
|
585
|
+
result.append(sequence)
|
586
|
+
result.append(dbns[i])
|
587
|
+
i += len(sequence)
|
588
|
+
dot_brackets.append("\n".join(result))
|
589
|
+
|
590
|
+
return dot_brackets
|
591
|
+
|
573
592
|
@cached_property
|
574
593
|
def extended_dot_bracket(self) -> str:
|
575
594
|
result = [
|
@@ -593,7 +612,9 @@ class Mapping2D3D:
|
|
593
612
|
for row in [row1, row2]:
|
594
613
|
if row:
|
595
614
|
bpseq = self.__generate_bpseq(row)
|
596
|
-
dbns = self.__generate_dot_bracket_per_strand(
|
615
|
+
dbns = self.__generate_dot_bracket_per_strand(
|
616
|
+
bpseq.dot_bracket.structure
|
617
|
+
)
|
597
618
|
|
598
619
|
for i in range(len(self.strands_sequences)):
|
599
620
|
result[i].append(f"{lw.value} {dbns[i]}")
|
RNApolis-0.3.11.dist-info/RECORD
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
rnapolis/annotator.py,sha256=bcyqmUSSRyl0ejA3548K4czSElBMX3EpCKAfJ2tYjsw,21476
|
2
|
-
rnapolis/clashfinder.py,sha256=jD3s_UovygWi01NUbQNeAeRRFkARTSRraLXUV43UbAA,8514
|
3
|
-
rnapolis/common.py,sha256=owupPG9oylz4Ed4DqVYJqWIKpovLJ3EIIApgca6tuhg,27344
|
4
|
-
rnapolis/metareader.py,sha256=4qtMKRvww2sUStLeV8WVrLEt-ScydHUv4Gxx96tnf-M,1683
|
5
|
-
rnapolis/molecule_filter.py,sha256=NhjuqdCRnXgPefWZPeTq77tifmnAzamQtA0ODqPPG9k,6918
|
6
|
-
rnapolis/motif_extractor.py,sha256=duHvpi9Ulcny9K60E6VBpz5RpJZw-KdTB4_Ph0iP478,774
|
7
|
-
rnapolis/parser.py,sha256=0uNKPnKiv5uaFVFGIzP8xbGLokimBkjs1XdlV0JmKIw,12217
|
8
|
-
rnapolis/rfam_folder.py,sha256=3rgXEJR16uPFy_BOo8qkdClOAOQDVOkidnLE-yoRbeI,11112
|
9
|
-
rnapolis/tertiary.py,sha256=iWMPD9c21rjMPpEdBd7mPCQgds65IbOr4_Fy06s0NoU,18957
|
10
|
-
rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
|
11
|
-
rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
|
12
|
-
RNApolis-0.3.11.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
|
13
|
-
RNApolis-0.3.11.dist-info/METADATA,sha256=J0a3wmvQoWPVFgSgvIxMkMFSBCz3KFHB8BHKFNtIdKw,54301
|
14
|
-
RNApolis-0.3.11.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
15
|
-
RNApolis-0.3.11.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
|
16
|
-
RNApolis-0.3.11.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
|
17
|
-
RNApolis-0.3.11.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|