RNApolis 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: RNApolis
3
- Version: 0.3.11
3
+ Version: 0.3.13
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -0,0 +1,17 @@
1
+ rnapolis/annotator.py,sha256=XnjFBeu3P_2UMdkD4Ao7m7K6JfeqYa-13xRzghrLvt8,22086
2
+ rnapolis/clashfinder.py,sha256=jD3s_UovygWi01NUbQNeAeRRFkARTSRraLXUV43UbAA,8514
3
+ rnapolis/common.py,sha256=T13Lqhr8V9nziThUrzh5FFuGazQFF5H_p8avOOdlqpc,29996
4
+ rnapolis/metareader.py,sha256=4qtMKRvww2sUStLeV8WVrLEt-ScydHUv4Gxx96tnf-M,1683
5
+ rnapolis/molecule_filter.py,sha256=NhjuqdCRnXgPefWZPeTq77tifmnAzamQtA0ODqPPG9k,6918
6
+ rnapolis/motif_extractor.py,sha256=duHvpi9Ulcny9K60E6VBpz5RpJZw-KdTB4_Ph0iP478,774
7
+ rnapolis/parser.py,sha256=Cmjt7p8UkiSNhSQDjc6I7BRqtuIIHs23Fp-Glb8Zikw,12216
8
+ rnapolis/rfam_folder.py,sha256=3rgXEJR16uPFy_BOo8qkdClOAOQDVOkidnLE-yoRbeI,11112
9
+ rnapolis/tertiary.py,sha256=VuATTN2SD7lBL9iUgT-doDwuEYsLodgV2u-SwQsyQcU,19658
10
+ rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
11
+ rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
12
+ RNApolis-0.3.13.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
13
+ RNApolis-0.3.13.dist-info/METADATA,sha256=DD4VAvMES_WYFSYUi6wzcKlgA1-jIsBHwAnBfy0z7mE,54301
14
+ RNApolis-0.3.13.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
15
+ RNApolis-0.3.13.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
16
+ RNApolis-0.3.13.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
17
+ RNApolis-0.3.13.dist-info/RECORD,,
rnapolis/annotator.py CHANGED
@@ -9,7 +9,6 @@ from typing import Dict, List, Optional, Set, Tuple
9
9
 
10
10
  import numpy
11
11
  import numpy.typing
12
- import orjson
13
12
  from ordered_set import OrderedSet
14
13
  from scipy.spatial import KDTree
15
14
 
@@ -486,7 +485,8 @@ def extract_secondary_structure(
486
485
  tertiary_structure: Structure3D,
487
486
  model: Optional[int] = None,
488
487
  find_gaps: bool = False,
489
- ) -> Structure2D:
488
+ all_dot_brackets: bool = False,
489
+ ) -> Tuple[Structure2D, List[str]]:
490
490
  base_interactions = extract_base_interactions(tertiary_structure, model)
491
491
  mapping = Mapping2D3D(
492
492
  tertiary_structure,
@@ -495,7 +495,7 @@ def extract_secondary_structure(
495
495
  find_gaps,
496
496
  )
497
497
  stems, single_strands, hairpins, loops = mapping.bpseq.elements
498
- return Structure2D(
498
+ structure2d = Structure2D(
499
499
  base_interactions,
500
500
  str(mapping.bpseq),
501
501
  mapping.dot_bracket,
@@ -505,6 +505,10 @@ def extract_secondary_structure(
505
505
  hairpins,
506
506
  loops,
507
507
  )
508
+ if all_dot_brackets:
509
+ return structure2d, mapping.all_dot_brackets
510
+ else:
511
+ return structure2d, [structure2d.dotBracket]
508
512
 
509
513
 
510
514
  def write_json(path: str, structure2d: BaseInteractions):
@@ -580,29 +584,40 @@ def write_bpseq(path: str, bpseq: BpSeq):
580
584
  def main():
581
585
  parser = argparse.ArgumentParser()
582
586
  parser.add_argument("input", help="Path to PDB or mmCIF file")
583
- parser.add_argument("--bpseq", help="(optional) path to output BPSEQ file")
584
- parser.add_argument("--csv", help="(optional) path to output CSV file")
585
587
  parser.add_argument(
588
+ "-a",
589
+ "--all-dot-brackets",
590
+ action="store_true",
591
+ help=")optional) print all dot-brackets, not only optimal one (exclusive with -e/--extended)",
592
+ )
593
+ parser.add_argument("-b", "--bpseq", help="(optional) path to output BPSEQ file")
594
+ parser.add_argument("-c", "--csv", help="(optional) path to output CSV file")
595
+ parser.add_argument(
596
+ "-j",
586
597
  "--json",
587
598
  help="(optional) path to output JSON file",
588
599
  )
589
600
  parser.add_argument(
601
+ "-e",
590
602
  "--extended",
591
603
  action="store_true",
592
604
  help="(optional) if set, the program will print extended secondary structure to the standard output",
593
605
  )
594
606
  parser.add_argument(
607
+ "-f",
595
608
  "--find-gaps",
596
609
  action="store_true",
597
610
  help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands; "
598
611
  f"the gap is defined as O3'-P distance greater then {1.5 * AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT}",
599
612
  )
600
- parser.add_argument("--dot", help="(optional) path to output DOT file")
613
+ parser.add_argument("-d", "--dot", help="(optional) path to output DOT file")
601
614
  args = parser.parse_args()
602
615
 
603
616
  file = handle_input_file(args.input)
604
617
  structure3d = read_3d_structure(file, None)
605
- structure2d = extract_secondary_structure(structure3d, None, args.find_gaps)
618
+ structure2d, dot_brackets = extract_secondary_structure(
619
+ structure3d, None, args.find_gaps, args.all_dot_brackets
620
+ )
606
621
 
607
622
  if args.csv:
608
623
  write_csv(args.csv, structure2d)
@@ -615,6 +630,9 @@ def main():
615
630
 
616
631
  if args.extended:
617
632
  print(structure2d.extendedDotBracket)
633
+ elif args.all_dot_brackets:
634
+ for dot_bracket in dot_brackets:
635
+ print(dot_bracket)
618
636
  else:
619
637
  print(structure2d.dotBracket)
620
638
 
rnapolis/common.py CHANGED
@@ -8,6 +8,7 @@ from collections.abc import Sequence
8
8
  from dataclasses import dataclass
9
9
  from enum import Enum
10
10
  from functools import cache, cached_property, total_ordering
11
+ from itertools import permutations
11
12
  from typing import Dict, List, Optional, Tuple
12
13
 
13
14
  import graphviz
@@ -852,6 +853,78 @@ class BpSeq:
852
853
 
853
854
  return self.__make_dot_bracket(regions, orders)
854
855
 
856
+ @cached_property
857
+ def all_dot_brackets(self):
858
+ # build conflict graph
859
+ regions = self.__regions
860
+ graph = defaultdict(set)
861
+
862
+ for i, j in itertools.combinations(range(len(regions)), 2):
863
+ ri, rj = regions[i], regions[j]
864
+ k, l, _ = ri
865
+ m, n, _ = rj
866
+
867
+ # is pseudoknot?
868
+ if (k < m < l < n) or (m < k < n < l):
869
+ graph[i].add(j)
870
+ graph[j].add(i)
871
+
872
+ # find all connected components
873
+ vertices = list(graph.keys())
874
+ visited = {vertex: False for vertex in vertices}
875
+ components = []
876
+
877
+ for vertex in vertices:
878
+ if not visited[vertex]:
879
+ visited[vertex] = True
880
+ stack = [vertex]
881
+ components.append([vertex])
882
+
883
+ while stack:
884
+ current = stack[-1]
885
+ next_vertex = None
886
+
887
+ for neighbor in graph[current]:
888
+ if not visited[neighbor]:
889
+ next_vertex = neighbor
890
+ break
891
+
892
+ if next_vertex is not None:
893
+ visited[next_vertex] = True
894
+ stack.append(next_vertex)
895
+ components[-1].append(next_vertex)
896
+ else:
897
+ stack.pop()
898
+
899
+ # permute order of every component
900
+ permutations = [
901
+ list(itertools.permutations(component)) for component in components
902
+ ]
903
+ solutions = set()
904
+
905
+ for permutation in itertools.product(*permutations):
906
+ orders = [0 for _ in range(len(regions))]
907
+
908
+ for component in permutation:
909
+ for i in range(1, len(component)):
910
+ region_i = component[i]
911
+ available = [True for _ in range(len(graph))]
912
+
913
+ for j in range(i):
914
+ region_j = component[j]
915
+
916
+ if region_j in graph[region_i]:
917
+ available[orders[region_j]] = False
918
+
919
+ order = next(
920
+ filter(lambda i: available[i] is True, range(len(available)))
921
+ )
922
+ orders[region_i] = order
923
+
924
+ solutions.add(self.__make_dot_bracket(regions, orders))
925
+
926
+ return list(solutions)
927
+
855
928
 
856
929
  @dataclass
857
930
  class DotBracket:
@@ -896,6 +969,12 @@ class DotBracket:
896
969
  def __str__(self):
897
970
  return f"{self.sequence}\n{self.structure}"
898
971
 
972
+ def __eq__(self, other):
973
+ return self.sequence == other.sequence and self.structure == other.structure
974
+
975
+ def __hash__(self) -> int:
976
+ return hash((self.sequence, self.structure))
977
+
899
978
 
900
979
  @dataclass
901
980
  class MultiStrandDotBracket(DotBracket):
@@ -907,7 +986,8 @@ class MultiStrandDotBracket(DotBracket):
907
986
  first = 1
908
987
 
909
988
  for match in re.finditer(
910
- r"((>.*?\n)?([ACGUNacgun]+)\n([.()\[\]{}<>A-Za-z]+))", input
989
+ r"((>.*?\n)?([ACGTURYSWKMBDHVNacgturyswkmbdhvn.-]+)\n([.()\[\]{}<>A-Za-z]+))",
990
+ input,
911
991
  ):
912
992
  sequence = match.group(3)
913
993
  structure = match.group(4)
rnapolis/parser.py CHANGED
@@ -300,7 +300,6 @@ def get_one_letter_name(
300
300
  key = (label.chain, label.number)
301
301
  if key in sequence:
302
302
  return sequence[key]
303
-
304
303
  # RNA
305
304
  if len(name) == 1:
306
305
  return name
rnapolis/tertiary.py CHANGED
@@ -548,7 +548,7 @@ class Mapping2D3D:
548
548
 
549
549
  @cached_property
550
550
  def dot_bracket(self) -> str:
551
- dbns = self.__generate_dot_bracket_per_strand(self.bpseq)
551
+ dbns = self.__generate_dot_bracket_per_strand(self.bpseq.dot_bracket.structure)
552
552
  i = 0
553
553
  result = []
554
554
 
@@ -560,8 +560,8 @@ class Mapping2D3D:
560
560
  i += len(sequence)
561
561
  return "\n".join(result)
562
562
 
563
- def __generate_dot_bracket_per_strand(self, bpseq: BpSeq) -> List[str]:
564
- dbn = bpseq.dot_bracket.structure
563
+ def __generate_dot_bracket_per_strand(self, dbn_structure: str) -> List[str]:
564
+ dbn = dbn_structure
565
565
  i = 0
566
566
  result = []
567
567
 
@@ -570,6 +570,25 @@ class Mapping2D3D:
570
570
  i += len(sequence)
571
571
  return result
572
572
 
573
+ @cached_property
574
+ def all_dot_brackets(self) -> List[str]:
575
+ dot_brackets = []
576
+
577
+ for dot_bracket in self.bpseq.all_dot_brackets:
578
+ dbns = self.__generate_dot_bracket_per_strand(dot_bracket.structure)
579
+ i = 0
580
+ result = []
581
+
582
+ for i, pair in enumerate(self.strands_sequences):
583
+ chain, sequence = pair
584
+ result.append(f">strand_{chain}")
585
+ result.append(sequence)
586
+ result.append(dbns[i])
587
+ i += len(sequence)
588
+ dot_brackets.append("\n".join(result))
589
+
590
+ return dot_brackets
591
+
573
592
  @cached_property
574
593
  def extended_dot_bracket(self) -> str:
575
594
  result = [
@@ -593,7 +612,9 @@ class Mapping2D3D:
593
612
  for row in [row1, row2]:
594
613
  if row:
595
614
  bpseq = self.__generate_bpseq(row)
596
- dbns = self.__generate_dot_bracket_per_strand(bpseq)
615
+ dbns = self.__generate_dot_bracket_per_strand(
616
+ bpseq.dot_bracket.structure
617
+ )
597
618
 
598
619
  for i in range(len(self.strands_sequences)):
599
620
  result[i].append(f"{lw.value} {dbns[i]}")
@@ -1,17 +0,0 @@
1
- rnapolis/annotator.py,sha256=bcyqmUSSRyl0ejA3548K4czSElBMX3EpCKAfJ2tYjsw,21476
2
- rnapolis/clashfinder.py,sha256=jD3s_UovygWi01NUbQNeAeRRFkARTSRraLXUV43UbAA,8514
3
- rnapolis/common.py,sha256=owupPG9oylz4Ed4DqVYJqWIKpovLJ3EIIApgca6tuhg,27344
4
- rnapolis/metareader.py,sha256=4qtMKRvww2sUStLeV8WVrLEt-ScydHUv4Gxx96tnf-M,1683
5
- rnapolis/molecule_filter.py,sha256=NhjuqdCRnXgPefWZPeTq77tifmnAzamQtA0ODqPPG9k,6918
6
- rnapolis/motif_extractor.py,sha256=duHvpi9Ulcny9K60E6VBpz5RpJZw-KdTB4_Ph0iP478,774
7
- rnapolis/parser.py,sha256=0uNKPnKiv5uaFVFGIzP8xbGLokimBkjs1XdlV0JmKIw,12217
8
- rnapolis/rfam_folder.py,sha256=3rgXEJR16uPFy_BOo8qkdClOAOQDVOkidnLE-yoRbeI,11112
9
- rnapolis/tertiary.py,sha256=iWMPD9c21rjMPpEdBd7mPCQgds65IbOr4_Fy06s0NoU,18957
10
- rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
11
- rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
12
- RNApolis-0.3.11.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
13
- RNApolis-0.3.11.dist-info/METADATA,sha256=J0a3wmvQoWPVFgSgvIxMkMFSBCz3KFHB8BHKFNtIdKw,54301
14
- RNApolis-0.3.11.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
15
- RNApolis-0.3.11.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
16
- RNApolis-0.3.11.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
17
- RNApolis-0.3.11.dist-info/RECORD,,