RNApolis 0.3.12__py3-none-any.whl → 0.3.14__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: RNApolis
3
- Version: 0.3.12
3
+ Version: 0.3.14
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -1,17 +1,17 @@
1
- rnapolis/annotator.py,sha256=bcyqmUSSRyl0ejA3548K4czSElBMX3EpCKAfJ2tYjsw,21476
1
+ rnapolis/annotator.py,sha256=XnjFBeu3P_2UMdkD4Ao7m7K6JfeqYa-13xRzghrLvt8,22086
2
2
  rnapolis/clashfinder.py,sha256=jD3s_UovygWi01NUbQNeAeRRFkARTSRraLXUV43UbAA,8514
3
- rnapolis/common.py,sha256=QdmAGF8DvG1EHpefumU27LceGm7l9obgWLhL4FELGT4,27381
3
+ rnapolis/common.py,sha256=mgn9psuBcgJVGPRxsxilVedPygqfwFz9wn5Z0vTU6SY,30107
4
4
  rnapolis/metareader.py,sha256=4qtMKRvww2sUStLeV8WVrLEt-ScydHUv4Gxx96tnf-M,1683
5
5
  rnapolis/molecule_filter.py,sha256=NhjuqdCRnXgPefWZPeTq77tifmnAzamQtA0ODqPPG9k,6918
6
6
  rnapolis/motif_extractor.py,sha256=duHvpi9Ulcny9K60E6VBpz5RpJZw-KdTB4_Ph0iP478,774
7
7
  rnapolis/parser.py,sha256=Cmjt7p8UkiSNhSQDjc6I7BRqtuIIHs23Fp-Glb8Zikw,12216
8
8
  rnapolis/rfam_folder.py,sha256=3rgXEJR16uPFy_BOo8qkdClOAOQDVOkidnLE-yoRbeI,11112
9
- rnapolis/tertiary.py,sha256=iWMPD9c21rjMPpEdBd7mPCQgds65IbOr4_Fy06s0NoU,18957
9
+ rnapolis/tertiary.py,sha256=VuATTN2SD7lBL9iUgT-doDwuEYsLodgV2u-SwQsyQcU,19658
10
10
  rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
11
11
  rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
12
- RNApolis-0.3.12.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
13
- RNApolis-0.3.12.dist-info/METADATA,sha256=PGjy4PTp1C5K1afBs6ZjeWz4pLJ5iFaiZfUSdgwusDk,54301
14
- RNApolis-0.3.12.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
15
- RNApolis-0.3.12.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
16
- RNApolis-0.3.12.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
17
- RNApolis-0.3.12.dist-info/RECORD,,
12
+ RNApolis-0.3.14.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
13
+ RNApolis-0.3.14.dist-info/METADATA,sha256=Yrp600ac6KI9zfglV3wf4bY9-dOHO32YgWwwKzYzo94,54301
14
+ RNApolis-0.3.14.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
15
+ RNApolis-0.3.14.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
16
+ RNApolis-0.3.14.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
17
+ RNApolis-0.3.14.dist-info/RECORD,,
rnapolis/annotator.py CHANGED
@@ -9,7 +9,6 @@ from typing import Dict, List, Optional, Set, Tuple
9
9
 
10
10
  import numpy
11
11
  import numpy.typing
12
- import orjson
13
12
  from ordered_set import OrderedSet
14
13
  from scipy.spatial import KDTree
15
14
 
@@ -486,7 +485,8 @@ def extract_secondary_structure(
486
485
  tertiary_structure: Structure3D,
487
486
  model: Optional[int] = None,
488
487
  find_gaps: bool = False,
489
- ) -> Structure2D:
488
+ all_dot_brackets: bool = False,
489
+ ) -> Tuple[Structure2D, List[str]]:
490
490
  base_interactions = extract_base_interactions(tertiary_structure, model)
491
491
  mapping = Mapping2D3D(
492
492
  tertiary_structure,
@@ -495,7 +495,7 @@ def extract_secondary_structure(
495
495
  find_gaps,
496
496
  )
497
497
  stems, single_strands, hairpins, loops = mapping.bpseq.elements
498
- return Structure2D(
498
+ structure2d = Structure2D(
499
499
  base_interactions,
500
500
  str(mapping.bpseq),
501
501
  mapping.dot_bracket,
@@ -505,6 +505,10 @@ def extract_secondary_structure(
505
505
  hairpins,
506
506
  loops,
507
507
  )
508
+ if all_dot_brackets:
509
+ return structure2d, mapping.all_dot_brackets
510
+ else:
511
+ return structure2d, [structure2d.dotBracket]
508
512
 
509
513
 
510
514
  def write_json(path: str, structure2d: BaseInteractions):
@@ -580,29 +584,40 @@ def write_bpseq(path: str, bpseq: BpSeq):
580
584
  def main():
581
585
  parser = argparse.ArgumentParser()
582
586
  parser.add_argument("input", help="Path to PDB or mmCIF file")
583
- parser.add_argument("--bpseq", help="(optional) path to output BPSEQ file")
584
- parser.add_argument("--csv", help="(optional) path to output CSV file")
585
587
  parser.add_argument(
588
+ "-a",
589
+ "--all-dot-brackets",
590
+ action="store_true",
591
+ help=")optional) print all dot-brackets, not only optimal one (exclusive with -e/--extended)",
592
+ )
593
+ parser.add_argument("-b", "--bpseq", help="(optional) path to output BPSEQ file")
594
+ parser.add_argument("-c", "--csv", help="(optional) path to output CSV file")
595
+ parser.add_argument(
596
+ "-j",
586
597
  "--json",
587
598
  help="(optional) path to output JSON file",
588
599
  )
589
600
  parser.add_argument(
601
+ "-e",
590
602
  "--extended",
591
603
  action="store_true",
592
604
  help="(optional) if set, the program will print extended secondary structure to the standard output",
593
605
  )
594
606
  parser.add_argument(
607
+ "-f",
595
608
  "--find-gaps",
596
609
  action="store_true",
597
610
  help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands; "
598
611
  f"the gap is defined as O3'-P distance greater then {1.5 * AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT}",
599
612
  )
600
- parser.add_argument("--dot", help="(optional) path to output DOT file")
613
+ parser.add_argument("-d", "--dot", help="(optional) path to output DOT file")
601
614
  args = parser.parse_args()
602
615
 
603
616
  file = handle_input_file(args.input)
604
617
  structure3d = read_3d_structure(file, None)
605
- structure2d = extract_secondary_structure(structure3d, None, args.find_gaps)
618
+ structure2d, dot_brackets = extract_secondary_structure(
619
+ structure3d, None, args.find_gaps, args.all_dot_brackets
620
+ )
606
621
 
607
622
  if args.csv:
608
623
  write_csv(args.csv, structure2d)
@@ -615,6 +630,9 @@ def main():
615
630
 
616
631
  if args.extended:
617
632
  print(structure2d.extendedDotBracket)
633
+ elif args.all_dot_brackets:
634
+ for dot_bracket in dot_brackets:
635
+ print(dot_bracket)
618
636
  else:
619
637
  print(structure2d.dotBracket)
620
638
 
rnapolis/common.py CHANGED
@@ -8,6 +8,7 @@ from collections.abc import Sequence
8
8
  from dataclasses import dataclass
9
9
  from enum import Enum
10
10
  from functools import cache, cached_property, total_ordering
11
+ from itertools import permutations
11
12
  from typing import Dict, List, Optional, Tuple
12
13
 
13
14
  import graphviz
@@ -852,6 +853,82 @@ class BpSeq:
852
853
 
853
854
  return self.__make_dot_bracket(regions, orders)
854
855
 
856
+ @cached_property
857
+ def all_dot_brackets(self):
858
+ # build conflict graph
859
+ regions = self.__regions
860
+ graph = defaultdict(set)
861
+
862
+ for i, j in itertools.combinations(range(len(regions)), 2):
863
+ ri, rj = regions[i], regions[j]
864
+ k, l, _ = ri
865
+ m, n, _ = rj
866
+
867
+ # is pseudoknot?
868
+ if (k < m < l < n) or (m < k < n < l):
869
+ graph[i].add(j)
870
+ graph[j].add(i)
871
+
872
+ # early exit for non-pseudoknotted structures
873
+ vertices = list(graph.keys())
874
+ if not vertices:
875
+ return [self.fcfs]
876
+
877
+ # find all connected components
878
+ visited = {vertex: False for vertex in vertices}
879
+ components = []
880
+
881
+ for vertex in vertices:
882
+ if not visited[vertex]:
883
+ visited[vertex] = True
884
+ stack = [vertex]
885
+ components.append([vertex])
886
+
887
+ while stack:
888
+ current = stack[-1]
889
+ next_vertex = None
890
+
891
+ for neighbor in graph[current]:
892
+ if not visited[neighbor]:
893
+ next_vertex = neighbor
894
+ break
895
+
896
+ if next_vertex is not None:
897
+ visited[next_vertex] = True
898
+ stack.append(next_vertex)
899
+ components[-1].append(next_vertex)
900
+ else:
901
+ stack.pop()
902
+
903
+ # permute order of every component
904
+ permutations = [
905
+ list(itertools.permutations(component)) for component in components
906
+ ]
907
+ solutions = set()
908
+
909
+ for permutation in itertools.product(*permutations):
910
+ orders = [0 for _ in range(len(regions))]
911
+
912
+ for component in permutation:
913
+ for i in range(1, len(component)):
914
+ region_i = component[i]
915
+ available = [True for _ in range(len(graph))]
916
+
917
+ for j in range(i):
918
+ region_j = component[j]
919
+
920
+ if region_j in graph[region_i]:
921
+ available[orders[region_j]] = False
922
+
923
+ order = next(
924
+ filter(lambda i: available[i] is True, range(len(available)))
925
+ )
926
+ orders[region_i] = order
927
+
928
+ solutions.add(self.__make_dot_bracket(regions, orders))
929
+
930
+ return list(solutions)
931
+
855
932
 
856
933
  @dataclass
857
934
  class DotBracket:
@@ -896,6 +973,12 @@ class DotBracket:
896
973
  def __str__(self):
897
974
  return f"{self.sequence}\n{self.structure}"
898
975
 
976
+ def __eq__(self, other):
977
+ return self.sequence == other.sequence and self.structure == other.structure
978
+
979
+ def __hash__(self) -> int:
980
+ return hash((self.sequence, self.structure))
981
+
899
982
 
900
983
  @dataclass
901
984
  class MultiStrandDotBracket(DotBracket):
rnapolis/tertiary.py CHANGED
@@ -548,7 +548,7 @@ class Mapping2D3D:
548
548
 
549
549
  @cached_property
550
550
  def dot_bracket(self) -> str:
551
- dbns = self.__generate_dot_bracket_per_strand(self.bpseq)
551
+ dbns = self.__generate_dot_bracket_per_strand(self.bpseq.dot_bracket.structure)
552
552
  i = 0
553
553
  result = []
554
554
 
@@ -560,8 +560,8 @@ class Mapping2D3D:
560
560
  i += len(sequence)
561
561
  return "\n".join(result)
562
562
 
563
- def __generate_dot_bracket_per_strand(self, bpseq: BpSeq) -> List[str]:
564
- dbn = bpseq.dot_bracket.structure
563
+ def __generate_dot_bracket_per_strand(self, dbn_structure: str) -> List[str]:
564
+ dbn = dbn_structure
565
565
  i = 0
566
566
  result = []
567
567
 
@@ -570,6 +570,25 @@ class Mapping2D3D:
570
570
  i += len(sequence)
571
571
  return result
572
572
 
573
+ @cached_property
574
+ def all_dot_brackets(self) -> List[str]:
575
+ dot_brackets = []
576
+
577
+ for dot_bracket in self.bpseq.all_dot_brackets:
578
+ dbns = self.__generate_dot_bracket_per_strand(dot_bracket.structure)
579
+ i = 0
580
+ result = []
581
+
582
+ for i, pair in enumerate(self.strands_sequences):
583
+ chain, sequence = pair
584
+ result.append(f">strand_{chain}")
585
+ result.append(sequence)
586
+ result.append(dbns[i])
587
+ i += len(sequence)
588
+ dot_brackets.append("\n".join(result))
589
+
590
+ return dot_brackets
591
+
573
592
  @cached_property
574
593
  def extended_dot_bracket(self) -> str:
575
594
  result = [
@@ -593,7 +612,9 @@ class Mapping2D3D:
593
612
  for row in [row1, row2]:
594
613
  if row:
595
614
  bpseq = self.__generate_bpseq(row)
596
- dbns = self.__generate_dot_bracket_per_strand(bpseq)
615
+ dbns = self.__generate_dot_bracket_per_strand(
616
+ bpseq.dot_bracket.structure
617
+ )
597
618
 
598
619
  for i in range(len(self.strands_sequences)):
599
620
  result[i].append(f"{lw.value} {dbns[i]}")