RNApolis 0.3.12__tar.gz → 0.3.13__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {rnapolis-0.3.12/src/RNApolis.egg-info → rnapolis-0.3.13}/PKG-INFO +1 -1
- {rnapolis-0.3.12 → rnapolis-0.3.13}/setup.py +1 -1
- {rnapolis-0.3.12 → rnapolis-0.3.13/src/RNApolis.egg-info}/PKG-INFO +1 -1
- {rnapolis-0.3.12 → rnapolis-0.3.13}/src/rnapolis/annotator.py +25 -7
- {rnapolis-0.3.12 → rnapolis-0.3.13}/src/rnapolis/common.py +79 -0
- {rnapolis-0.3.12 → rnapolis-0.3.13}/src/rnapolis/tertiary.py +25 -4
- {rnapolis-0.3.12 → rnapolis-0.3.13}/LICENSE +0 -0
- {rnapolis-0.3.12 → rnapolis-0.3.13}/README.md +0 -0
- {rnapolis-0.3.12 → rnapolis-0.3.13}/pyproject.toml +0 -0
- {rnapolis-0.3.12 → rnapolis-0.3.13}/setup.cfg +0 -0
- {rnapolis-0.3.12 → rnapolis-0.3.13}/src/RNApolis.egg-info/SOURCES.txt +0 -0
- {rnapolis-0.3.12 → rnapolis-0.3.13}/src/RNApolis.egg-info/dependency_links.txt +0 -0
- {rnapolis-0.3.12 → rnapolis-0.3.13}/src/RNApolis.egg-info/entry_points.txt +0 -0
- {rnapolis-0.3.12 → rnapolis-0.3.13}/src/RNApolis.egg-info/requires.txt +0 -0
- {rnapolis-0.3.12 → rnapolis-0.3.13}/src/RNApolis.egg-info/top_level.txt +0 -0
- {rnapolis-0.3.12 → rnapolis-0.3.13}/src/rnapolis/clashfinder.py +0 -0
- {rnapolis-0.3.12 → rnapolis-0.3.13}/src/rnapolis/metareader.py +0 -0
- {rnapolis-0.3.12 → rnapolis-0.3.13}/src/rnapolis/molecule_filter.py +0 -0
- {rnapolis-0.3.12 → rnapolis-0.3.13}/src/rnapolis/motif_extractor.py +0 -0
- {rnapolis-0.3.12 → rnapolis-0.3.13}/src/rnapolis/parser.py +0 -0
- {rnapolis-0.3.12 → rnapolis-0.3.13}/src/rnapolis/rfam_folder.py +0 -0
- {rnapolis-0.3.12 → rnapolis-0.3.13}/src/rnapolis/transformer.py +0 -0
- {rnapolis-0.3.12 → rnapolis-0.3.13}/src/rnapolis/util.py +0 -0
- {rnapolis-0.3.12 → rnapolis-0.3.13}/tests/test_annotator.py +0 -0
- {rnapolis-0.3.12 → rnapolis-0.3.13}/tests/test_bugfixes.py +0 -0
- {rnapolis-0.3.12 → rnapolis-0.3.13}/tests/test_common.py +0 -0
- {rnapolis-0.3.12 → rnapolis-0.3.13}/tests/test_metareader.py +0 -0
- {rnapolis-0.3.12 → rnapolis-0.3.13}/tests/test_parser.py +0 -0
- {rnapolis-0.3.12 → rnapolis-0.3.13}/tests/test_quadruplexes.py +0 -0
- {rnapolis-0.3.12 → rnapolis-0.3.13}/tests/test_rfam_folder.py +0 -0
- {rnapolis-0.3.12 → rnapolis-0.3.13}/tests/test_tertiary.py +0 -0
@@ -9,7 +9,6 @@ from typing import Dict, List, Optional, Set, Tuple
|
|
9
9
|
|
10
10
|
import numpy
|
11
11
|
import numpy.typing
|
12
|
-
import orjson
|
13
12
|
from ordered_set import OrderedSet
|
14
13
|
from scipy.spatial import KDTree
|
15
14
|
|
@@ -486,7 +485,8 @@ def extract_secondary_structure(
|
|
486
485
|
tertiary_structure: Structure3D,
|
487
486
|
model: Optional[int] = None,
|
488
487
|
find_gaps: bool = False,
|
489
|
-
|
488
|
+
all_dot_brackets: bool = False,
|
489
|
+
) -> Tuple[Structure2D, List[str]]:
|
490
490
|
base_interactions = extract_base_interactions(tertiary_structure, model)
|
491
491
|
mapping = Mapping2D3D(
|
492
492
|
tertiary_structure,
|
@@ -495,7 +495,7 @@ def extract_secondary_structure(
|
|
495
495
|
find_gaps,
|
496
496
|
)
|
497
497
|
stems, single_strands, hairpins, loops = mapping.bpseq.elements
|
498
|
-
|
498
|
+
structure2d = Structure2D(
|
499
499
|
base_interactions,
|
500
500
|
str(mapping.bpseq),
|
501
501
|
mapping.dot_bracket,
|
@@ -505,6 +505,10 @@ def extract_secondary_structure(
|
|
505
505
|
hairpins,
|
506
506
|
loops,
|
507
507
|
)
|
508
|
+
if all_dot_brackets:
|
509
|
+
return structure2d, mapping.all_dot_brackets
|
510
|
+
else:
|
511
|
+
return structure2d, [structure2d.dotBracket]
|
508
512
|
|
509
513
|
|
510
514
|
def write_json(path: str, structure2d: BaseInteractions):
|
@@ -580,29 +584,40 @@ def write_bpseq(path: str, bpseq: BpSeq):
|
|
580
584
|
def main():
|
581
585
|
parser = argparse.ArgumentParser()
|
582
586
|
parser.add_argument("input", help="Path to PDB or mmCIF file")
|
583
|
-
parser.add_argument("--bpseq", help="(optional) path to output BPSEQ file")
|
584
|
-
parser.add_argument("--csv", help="(optional) path to output CSV file")
|
585
587
|
parser.add_argument(
|
588
|
+
"-a",
|
589
|
+
"--all-dot-brackets",
|
590
|
+
action="store_true",
|
591
|
+
help=")optional) print all dot-brackets, not only optimal one (exclusive with -e/--extended)",
|
592
|
+
)
|
593
|
+
parser.add_argument("-b", "--bpseq", help="(optional) path to output BPSEQ file")
|
594
|
+
parser.add_argument("-c", "--csv", help="(optional) path to output CSV file")
|
595
|
+
parser.add_argument(
|
596
|
+
"-j",
|
586
597
|
"--json",
|
587
598
|
help="(optional) path to output JSON file",
|
588
599
|
)
|
589
600
|
parser.add_argument(
|
601
|
+
"-e",
|
590
602
|
"--extended",
|
591
603
|
action="store_true",
|
592
604
|
help="(optional) if set, the program will print extended secondary structure to the standard output",
|
593
605
|
)
|
594
606
|
parser.add_argument(
|
607
|
+
"-f",
|
595
608
|
"--find-gaps",
|
596
609
|
action="store_true",
|
597
610
|
help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands; "
|
598
611
|
f"the gap is defined as O3'-P distance greater then {1.5 * AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT}",
|
599
612
|
)
|
600
|
-
parser.add_argument("--dot", help="(optional) path to output DOT file")
|
613
|
+
parser.add_argument("-d", "--dot", help="(optional) path to output DOT file")
|
601
614
|
args = parser.parse_args()
|
602
615
|
|
603
616
|
file = handle_input_file(args.input)
|
604
617
|
structure3d = read_3d_structure(file, None)
|
605
|
-
structure2d = extract_secondary_structure(
|
618
|
+
structure2d, dot_brackets = extract_secondary_structure(
|
619
|
+
structure3d, None, args.find_gaps, args.all_dot_brackets
|
620
|
+
)
|
606
621
|
|
607
622
|
if args.csv:
|
608
623
|
write_csv(args.csv, structure2d)
|
@@ -615,6 +630,9 @@ def main():
|
|
615
630
|
|
616
631
|
if args.extended:
|
617
632
|
print(structure2d.extendedDotBracket)
|
633
|
+
elif args.all_dot_brackets:
|
634
|
+
for dot_bracket in dot_brackets:
|
635
|
+
print(dot_bracket)
|
618
636
|
else:
|
619
637
|
print(structure2d.dotBracket)
|
620
638
|
|
@@ -8,6 +8,7 @@ from collections.abc import Sequence
|
|
8
8
|
from dataclasses import dataclass
|
9
9
|
from enum import Enum
|
10
10
|
from functools import cache, cached_property, total_ordering
|
11
|
+
from itertools import permutations
|
11
12
|
from typing import Dict, List, Optional, Tuple
|
12
13
|
|
13
14
|
import graphviz
|
@@ -852,6 +853,78 @@ class BpSeq:
|
|
852
853
|
|
853
854
|
return self.__make_dot_bracket(regions, orders)
|
854
855
|
|
856
|
+
@cached_property
|
857
|
+
def all_dot_brackets(self):
|
858
|
+
# build conflict graph
|
859
|
+
regions = self.__regions
|
860
|
+
graph = defaultdict(set)
|
861
|
+
|
862
|
+
for i, j in itertools.combinations(range(len(regions)), 2):
|
863
|
+
ri, rj = regions[i], regions[j]
|
864
|
+
k, l, _ = ri
|
865
|
+
m, n, _ = rj
|
866
|
+
|
867
|
+
# is pseudoknot?
|
868
|
+
if (k < m < l < n) or (m < k < n < l):
|
869
|
+
graph[i].add(j)
|
870
|
+
graph[j].add(i)
|
871
|
+
|
872
|
+
# find all connected components
|
873
|
+
vertices = list(graph.keys())
|
874
|
+
visited = {vertex: False for vertex in vertices}
|
875
|
+
components = []
|
876
|
+
|
877
|
+
for vertex in vertices:
|
878
|
+
if not visited[vertex]:
|
879
|
+
visited[vertex] = True
|
880
|
+
stack = [vertex]
|
881
|
+
components.append([vertex])
|
882
|
+
|
883
|
+
while stack:
|
884
|
+
current = stack[-1]
|
885
|
+
next_vertex = None
|
886
|
+
|
887
|
+
for neighbor in graph[current]:
|
888
|
+
if not visited[neighbor]:
|
889
|
+
next_vertex = neighbor
|
890
|
+
break
|
891
|
+
|
892
|
+
if next_vertex is not None:
|
893
|
+
visited[next_vertex] = True
|
894
|
+
stack.append(next_vertex)
|
895
|
+
components[-1].append(next_vertex)
|
896
|
+
else:
|
897
|
+
stack.pop()
|
898
|
+
|
899
|
+
# permute order of every component
|
900
|
+
permutations = [
|
901
|
+
list(itertools.permutations(component)) for component in components
|
902
|
+
]
|
903
|
+
solutions = set()
|
904
|
+
|
905
|
+
for permutation in itertools.product(*permutations):
|
906
|
+
orders = [0 for _ in range(len(regions))]
|
907
|
+
|
908
|
+
for component in permutation:
|
909
|
+
for i in range(1, len(component)):
|
910
|
+
region_i = component[i]
|
911
|
+
available = [True for _ in range(len(graph))]
|
912
|
+
|
913
|
+
for j in range(i):
|
914
|
+
region_j = component[j]
|
915
|
+
|
916
|
+
if region_j in graph[region_i]:
|
917
|
+
available[orders[region_j]] = False
|
918
|
+
|
919
|
+
order = next(
|
920
|
+
filter(lambda i: available[i] is True, range(len(available)))
|
921
|
+
)
|
922
|
+
orders[region_i] = order
|
923
|
+
|
924
|
+
solutions.add(self.__make_dot_bracket(regions, orders))
|
925
|
+
|
926
|
+
return list(solutions)
|
927
|
+
|
855
928
|
|
856
929
|
@dataclass
|
857
930
|
class DotBracket:
|
@@ -896,6 +969,12 @@ class DotBracket:
|
|
896
969
|
def __str__(self):
|
897
970
|
return f"{self.sequence}\n{self.structure}"
|
898
971
|
|
972
|
+
def __eq__(self, other):
|
973
|
+
return self.sequence == other.sequence and self.structure == other.structure
|
974
|
+
|
975
|
+
def __hash__(self) -> int:
|
976
|
+
return hash((self.sequence, self.structure))
|
977
|
+
|
899
978
|
|
900
979
|
@dataclass
|
901
980
|
class MultiStrandDotBracket(DotBracket):
|
@@ -548,7 +548,7 @@ class Mapping2D3D:
|
|
548
548
|
|
549
549
|
@cached_property
|
550
550
|
def dot_bracket(self) -> str:
|
551
|
-
dbns = self.__generate_dot_bracket_per_strand(self.bpseq)
|
551
|
+
dbns = self.__generate_dot_bracket_per_strand(self.bpseq.dot_bracket.structure)
|
552
552
|
i = 0
|
553
553
|
result = []
|
554
554
|
|
@@ -560,8 +560,8 @@ class Mapping2D3D:
|
|
560
560
|
i += len(sequence)
|
561
561
|
return "\n".join(result)
|
562
562
|
|
563
|
-
def __generate_dot_bracket_per_strand(self,
|
564
|
-
dbn =
|
563
|
+
def __generate_dot_bracket_per_strand(self, dbn_structure: str) -> List[str]:
|
564
|
+
dbn = dbn_structure
|
565
565
|
i = 0
|
566
566
|
result = []
|
567
567
|
|
@@ -570,6 +570,25 @@ class Mapping2D3D:
|
|
570
570
|
i += len(sequence)
|
571
571
|
return result
|
572
572
|
|
573
|
+
@cached_property
|
574
|
+
def all_dot_brackets(self) -> List[str]:
|
575
|
+
dot_brackets = []
|
576
|
+
|
577
|
+
for dot_bracket in self.bpseq.all_dot_brackets:
|
578
|
+
dbns = self.__generate_dot_bracket_per_strand(dot_bracket.structure)
|
579
|
+
i = 0
|
580
|
+
result = []
|
581
|
+
|
582
|
+
for i, pair in enumerate(self.strands_sequences):
|
583
|
+
chain, sequence = pair
|
584
|
+
result.append(f">strand_{chain}")
|
585
|
+
result.append(sequence)
|
586
|
+
result.append(dbns[i])
|
587
|
+
i += len(sequence)
|
588
|
+
dot_brackets.append("\n".join(result))
|
589
|
+
|
590
|
+
return dot_brackets
|
591
|
+
|
573
592
|
@cached_property
|
574
593
|
def extended_dot_bracket(self) -> str:
|
575
594
|
result = [
|
@@ -593,7 +612,9 @@ class Mapping2D3D:
|
|
593
612
|
for row in [row1, row2]:
|
594
613
|
if row:
|
595
614
|
bpseq = self.__generate_bpseq(row)
|
596
|
-
dbns = self.__generate_dot_bracket_per_strand(
|
615
|
+
dbns = self.__generate_dot_bracket_per_strand(
|
616
|
+
bpseq.dot_bracket.structure
|
617
|
+
)
|
597
618
|
|
598
619
|
for i in range(len(self.strands_sequences)):
|
599
620
|
result[i].append(f"{lw.value} {dbns[i]}")
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|