RNApolis 0.3.12__py3-none-any.whl → 0.3.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: RNApolis
3
- Version: 0.3.12
3
+ Version: 0.3.14
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -1,17 +1,17 @@
1
- rnapolis/annotator.py,sha256=bcyqmUSSRyl0ejA3548K4czSElBMX3EpCKAfJ2tYjsw,21476
1
+ rnapolis/annotator.py,sha256=XnjFBeu3P_2UMdkD4Ao7m7K6JfeqYa-13xRzghrLvt8,22086
2
2
  rnapolis/clashfinder.py,sha256=jD3s_UovygWi01NUbQNeAeRRFkARTSRraLXUV43UbAA,8514
3
- rnapolis/common.py,sha256=QdmAGF8DvG1EHpefumU27LceGm7l9obgWLhL4FELGT4,27381
3
+ rnapolis/common.py,sha256=mgn9psuBcgJVGPRxsxilVedPygqfwFz9wn5Z0vTU6SY,30107
4
4
  rnapolis/metareader.py,sha256=4qtMKRvww2sUStLeV8WVrLEt-ScydHUv4Gxx96tnf-M,1683
5
5
  rnapolis/molecule_filter.py,sha256=NhjuqdCRnXgPefWZPeTq77tifmnAzamQtA0ODqPPG9k,6918
6
6
  rnapolis/motif_extractor.py,sha256=duHvpi9Ulcny9K60E6VBpz5RpJZw-KdTB4_Ph0iP478,774
7
7
  rnapolis/parser.py,sha256=Cmjt7p8UkiSNhSQDjc6I7BRqtuIIHs23Fp-Glb8Zikw,12216
8
8
  rnapolis/rfam_folder.py,sha256=3rgXEJR16uPFy_BOo8qkdClOAOQDVOkidnLE-yoRbeI,11112
9
- rnapolis/tertiary.py,sha256=iWMPD9c21rjMPpEdBd7mPCQgds65IbOr4_Fy06s0NoU,18957
9
+ rnapolis/tertiary.py,sha256=VuATTN2SD7lBL9iUgT-doDwuEYsLodgV2u-SwQsyQcU,19658
10
10
  rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
11
11
  rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
12
- RNApolis-0.3.12.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
13
- RNApolis-0.3.12.dist-info/METADATA,sha256=PGjy4PTp1C5K1afBs6ZjeWz4pLJ5iFaiZfUSdgwusDk,54301
14
- RNApolis-0.3.12.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
15
- RNApolis-0.3.12.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
16
- RNApolis-0.3.12.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
17
- RNApolis-0.3.12.dist-info/RECORD,,
12
+ RNApolis-0.3.14.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
13
+ RNApolis-0.3.14.dist-info/METADATA,sha256=Yrp600ac6KI9zfglV3wf4bY9-dOHO32YgWwwKzYzo94,54301
14
+ RNApolis-0.3.14.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
15
+ RNApolis-0.3.14.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
16
+ RNApolis-0.3.14.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
17
+ RNApolis-0.3.14.dist-info/RECORD,,
rnapolis/annotator.py CHANGED
@@ -9,7 +9,6 @@ from typing import Dict, List, Optional, Set, Tuple
9
9
 
10
10
  import numpy
11
11
  import numpy.typing
12
- import orjson
13
12
  from ordered_set import OrderedSet
14
13
  from scipy.spatial import KDTree
15
14
 
@@ -486,7 +485,8 @@ def extract_secondary_structure(
486
485
  tertiary_structure: Structure3D,
487
486
  model: Optional[int] = None,
488
487
  find_gaps: bool = False,
489
- ) -> Structure2D:
488
+ all_dot_brackets: bool = False,
489
+ ) -> Tuple[Structure2D, List[str]]:
490
490
  base_interactions = extract_base_interactions(tertiary_structure, model)
491
491
  mapping = Mapping2D3D(
492
492
  tertiary_structure,
@@ -495,7 +495,7 @@ def extract_secondary_structure(
495
495
  find_gaps,
496
496
  )
497
497
  stems, single_strands, hairpins, loops = mapping.bpseq.elements
498
- return Structure2D(
498
+ structure2d = Structure2D(
499
499
  base_interactions,
500
500
  str(mapping.bpseq),
501
501
  mapping.dot_bracket,
@@ -505,6 +505,10 @@ def extract_secondary_structure(
505
505
  hairpins,
506
506
  loops,
507
507
  )
508
+ if all_dot_brackets:
509
+ return structure2d, mapping.all_dot_brackets
510
+ else:
511
+ return structure2d, [structure2d.dotBracket]
508
512
 
509
513
 
510
514
  def write_json(path: str, structure2d: BaseInteractions):
@@ -580,29 +584,40 @@ def write_bpseq(path: str, bpseq: BpSeq):
580
584
  def main():
581
585
  parser = argparse.ArgumentParser()
582
586
  parser.add_argument("input", help="Path to PDB or mmCIF file")
583
- parser.add_argument("--bpseq", help="(optional) path to output BPSEQ file")
584
- parser.add_argument("--csv", help="(optional) path to output CSV file")
585
587
  parser.add_argument(
588
+ "-a",
589
+ "--all-dot-brackets",
590
+ action="store_true",
591
+ help=")optional) print all dot-brackets, not only optimal one (exclusive with -e/--extended)",
592
+ )
593
+ parser.add_argument("-b", "--bpseq", help="(optional) path to output BPSEQ file")
594
+ parser.add_argument("-c", "--csv", help="(optional) path to output CSV file")
595
+ parser.add_argument(
596
+ "-j",
586
597
  "--json",
587
598
  help="(optional) path to output JSON file",
588
599
  )
589
600
  parser.add_argument(
601
+ "-e",
590
602
  "--extended",
591
603
  action="store_true",
592
604
  help="(optional) if set, the program will print extended secondary structure to the standard output",
593
605
  )
594
606
  parser.add_argument(
607
+ "-f",
595
608
  "--find-gaps",
596
609
  action="store_true",
597
610
  help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands; "
598
611
  f"the gap is defined as O3'-P distance greater then {1.5 * AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT}",
599
612
  )
600
- parser.add_argument("--dot", help="(optional) path to output DOT file")
613
+ parser.add_argument("-d", "--dot", help="(optional) path to output DOT file")
601
614
  args = parser.parse_args()
602
615
 
603
616
  file = handle_input_file(args.input)
604
617
  structure3d = read_3d_structure(file, None)
605
- structure2d = extract_secondary_structure(structure3d, None, args.find_gaps)
618
+ structure2d, dot_brackets = extract_secondary_structure(
619
+ structure3d, None, args.find_gaps, args.all_dot_brackets
620
+ )
606
621
 
607
622
  if args.csv:
608
623
  write_csv(args.csv, structure2d)
@@ -615,6 +630,9 @@ def main():
615
630
 
616
631
  if args.extended:
617
632
  print(structure2d.extendedDotBracket)
633
+ elif args.all_dot_brackets:
634
+ for dot_bracket in dot_brackets:
635
+ print(dot_bracket)
618
636
  else:
619
637
  print(structure2d.dotBracket)
620
638
 
rnapolis/common.py CHANGED
@@ -8,6 +8,7 @@ from collections.abc import Sequence
8
8
  from dataclasses import dataclass
9
9
  from enum import Enum
10
10
  from functools import cache, cached_property, total_ordering
11
+ from itertools import permutations
11
12
  from typing import Dict, List, Optional, Tuple
12
13
 
13
14
  import graphviz
@@ -852,6 +853,82 @@ class BpSeq:
852
853
 
853
854
  return self.__make_dot_bracket(regions, orders)
854
855
 
856
+ @cached_property
857
+ def all_dot_brackets(self):
858
+ # build conflict graph
859
+ regions = self.__regions
860
+ graph = defaultdict(set)
861
+
862
+ for i, j in itertools.combinations(range(len(regions)), 2):
863
+ ri, rj = regions[i], regions[j]
864
+ k, l, _ = ri
865
+ m, n, _ = rj
866
+
867
+ # is pseudoknot?
868
+ if (k < m < l < n) or (m < k < n < l):
869
+ graph[i].add(j)
870
+ graph[j].add(i)
871
+
872
+ # early exit for non-pseudoknotted structures
873
+ vertices = list(graph.keys())
874
+ if not vertices:
875
+ return [self.fcfs]
876
+
877
+ # find all connected components
878
+ visited = {vertex: False for vertex in vertices}
879
+ components = []
880
+
881
+ for vertex in vertices:
882
+ if not visited[vertex]:
883
+ visited[vertex] = True
884
+ stack = [vertex]
885
+ components.append([vertex])
886
+
887
+ while stack:
888
+ current = stack[-1]
889
+ next_vertex = None
890
+
891
+ for neighbor in graph[current]:
892
+ if not visited[neighbor]:
893
+ next_vertex = neighbor
894
+ break
895
+
896
+ if next_vertex is not None:
897
+ visited[next_vertex] = True
898
+ stack.append(next_vertex)
899
+ components[-1].append(next_vertex)
900
+ else:
901
+ stack.pop()
902
+
903
+ # permute order of every component
904
+ permutations = [
905
+ list(itertools.permutations(component)) for component in components
906
+ ]
907
+ solutions = set()
908
+
909
+ for permutation in itertools.product(*permutations):
910
+ orders = [0 for _ in range(len(regions))]
911
+
912
+ for component in permutation:
913
+ for i in range(1, len(component)):
914
+ region_i = component[i]
915
+ available = [True for _ in range(len(graph))]
916
+
917
+ for j in range(i):
918
+ region_j = component[j]
919
+
920
+ if region_j in graph[region_i]:
921
+ available[orders[region_j]] = False
922
+
923
+ order = next(
924
+ filter(lambda i: available[i] is True, range(len(available)))
925
+ )
926
+ orders[region_i] = order
927
+
928
+ solutions.add(self.__make_dot_bracket(regions, orders))
929
+
930
+ return list(solutions)
931
+
855
932
 
856
933
  @dataclass
857
934
  class DotBracket:
@@ -896,6 +973,12 @@ class DotBracket:
896
973
  def __str__(self):
897
974
  return f"{self.sequence}\n{self.structure}"
898
975
 
976
+ def __eq__(self, other):
977
+ return self.sequence == other.sequence and self.structure == other.structure
978
+
979
+ def __hash__(self) -> int:
980
+ return hash((self.sequence, self.structure))
981
+
899
982
 
900
983
  @dataclass
901
984
  class MultiStrandDotBracket(DotBracket):
rnapolis/tertiary.py CHANGED
@@ -548,7 +548,7 @@ class Mapping2D3D:
548
548
 
549
549
  @cached_property
550
550
  def dot_bracket(self) -> str:
551
- dbns = self.__generate_dot_bracket_per_strand(self.bpseq)
551
+ dbns = self.__generate_dot_bracket_per_strand(self.bpseq.dot_bracket.structure)
552
552
  i = 0
553
553
  result = []
554
554
 
@@ -560,8 +560,8 @@ class Mapping2D3D:
560
560
  i += len(sequence)
561
561
  return "\n".join(result)
562
562
 
563
- def __generate_dot_bracket_per_strand(self, bpseq: BpSeq) -> List[str]:
564
- dbn = bpseq.dot_bracket.structure
563
+ def __generate_dot_bracket_per_strand(self, dbn_structure: str) -> List[str]:
564
+ dbn = dbn_structure
565
565
  i = 0
566
566
  result = []
567
567
 
@@ -570,6 +570,25 @@ class Mapping2D3D:
570
570
  i += len(sequence)
571
571
  return result
572
572
 
573
+ @cached_property
574
+ def all_dot_brackets(self) -> List[str]:
575
+ dot_brackets = []
576
+
577
+ for dot_bracket in self.bpseq.all_dot_brackets:
578
+ dbns = self.__generate_dot_bracket_per_strand(dot_bracket.structure)
579
+ i = 0
580
+ result = []
581
+
582
+ for i, pair in enumerate(self.strands_sequences):
583
+ chain, sequence = pair
584
+ result.append(f">strand_{chain}")
585
+ result.append(sequence)
586
+ result.append(dbns[i])
587
+ i += len(sequence)
588
+ dot_brackets.append("\n".join(result))
589
+
590
+ return dot_brackets
591
+
573
592
  @cached_property
574
593
  def extended_dot_bracket(self) -> str:
575
594
  result = [
@@ -593,7 +612,9 @@ class Mapping2D3D:
593
612
  for row in [row1, row2]:
594
613
  if row:
595
614
  bpseq = self.__generate_bpseq(row)
596
- dbns = self.__generate_dot_bracket_per_strand(bpseq)
615
+ dbns = self.__generate_dot_bracket_per_strand(
616
+ bpseq.dot_bracket.structure
617
+ )
597
618
 
598
619
  for i in range(len(self.strands_sequences)):
599
620
  result[i].append(f"{lw.value} {dbns[i]}")