RNApolis 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: RNApolis
3
- Version: 0.1.4
3
+ Version: 0.2.0
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -15,6 +15,7 @@ Classifier: Programming Language :: Python :: 3
15
15
  Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
+ Requires-Dist: graphviz
18
19
  Requires-Dist: mmcif
19
20
  Requires-Dist: numpy
20
21
  Requires-Dist: ordered-set
@@ -0,0 +1,15 @@
1
+ rnapolis/annotator.py,sha256=XtEJrog8c3zZ-pLlGBytJExGahPstGd-nbPLKQLa58A,21228
2
+ rnapolis/clashfinder.py,sha256=jD3s_UovygWi01NUbQNeAeRRFkARTSRraLXUV43UbAA,8514
3
+ rnapolis/common.py,sha256=DPmRpNkMaxuIai3vfLzSlP6IN0zpj6kmT3LoRjnJUWE,26440
4
+ rnapolis/metareader.py,sha256=4qtMKRvww2sUStLeV8WVrLEt-ScydHUv4Gxx96tnf-M,1683
5
+ rnapolis/motif_extractor.py,sha256=duHvpi9Ulcny9K60E6VBpz5RpJZw-KdTB4_Ph0iP478,774
6
+ rnapolis/parser.py,sha256=Z3Zd_IuRyOP45x5BStgu7UgoyHthhw55fT3udHUhAE4,11905
7
+ rnapolis/tertiary.py,sha256=iWMPD9c21rjMPpEdBd7mPCQgds65IbOr4_Fy06s0NoU,18957
8
+ rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
9
+ rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
10
+ RNApolis-0.2.0.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
11
+ RNApolis-0.2.0.dist-info/METADATA,sha256=-ApE3-kETFzu512tLPAQa7MJjY7tuT-qyHLkPMBhI7A,1148
12
+ RNApolis-0.2.0.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
13
+ RNApolis-0.2.0.dist-info/entry_points.txt,sha256=113HwzaWSIoCvcPxkg_j2TIbnezP4_7akpTNr5n3Zjg,220
14
+ RNApolis-0.2.0.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
15
+ RNApolis-0.2.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.41.1)
2
+ Generator: bdist_wheel (0.41.3)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
rnapolis/annotator.py CHANGED
@@ -5,7 +5,7 @@ import logging
5
5
  import math
6
6
  import os
7
7
  from collections import Counter, defaultdict
8
- from typing import IO, Dict, List, Optional, Set, Tuple
8
+ from typing import Dict, List, Optional, Set, Tuple
9
9
 
10
10
  import numpy
11
11
  import numpy.typing
@@ -15,6 +15,7 @@ from scipy.spatial import KDTree
15
15
 
16
16
  from rnapolis.common import (
17
17
  BR,
18
+ BaseInteractions,
18
19
  BasePair,
19
20
  BasePhosphate,
20
21
  BaseRibose,
@@ -471,19 +472,27 @@ def find_stackings(structure: Structure3D, model: int = 1) -> List[Stacking]:
471
472
  return stackings
472
473
 
473
474
 
474
- def extract_secondary_structure(
475
- tertiary_structure: Structure3D, model: int = 1, find_gaps: bool = False
476
- ) -> Structure2D:
475
+ def extract_base_interactions(
476
+ tertiary_structure: Structure3D, model: int = 1
477
+ ) -> BaseInteractions:
477
478
  base_pairs, base_phosphate, base_ribose = find_pairs(tertiary_structure, model)
478
479
  stackings = find_stackings(tertiary_structure, model)
479
- mapping = Mapping2D3D(tertiary_structure, base_pairs, stackings, find_gaps)
480
+ return BaseInteractions(base_pairs, stackings, base_ribose, base_phosphate, [])
481
+
482
+
483
+ def extract_secondary_structure(
484
+ tertiary_structure: Structure3D, model: int = 1, find_gaps: bool = False
485
+ ) -> BaseInteractions:
486
+ base_interactions = extract_base_interactions(tertiary_structure, model)
487
+ mapping = Mapping2D3D(
488
+ tertiary_structure,
489
+ base_interactions.basePairs,
490
+ base_interactions.stackings,
491
+ find_gaps,
492
+ )
480
493
  stems, single_strands, hairpins, loops = mapping.bpseq.elements
481
494
  return Structure2D(
482
- base_pairs,
483
- stackings,
484
- base_ribose,
485
- base_phosphate,
486
- [],
495
+ base_interactions,
487
496
  str(mapping.bpseq),
488
497
  mapping.dot_bracket,
489
498
  mapping.extended_dot_bracket,
@@ -494,12 +503,12 @@ def extract_secondary_structure(
494
503
  )
495
504
 
496
505
 
497
- def write_json(path: str, structure2d: Structure2D):
506
+ def write_json(path: str, structure2d: BaseInteractions):
498
507
  with open(path, "wb") as f:
499
508
  f.write(orjson.dumps(structure2d))
500
509
 
501
510
 
502
- def write_csv(path: str, structure2d: Structure2D):
511
+ def write_csv(path: str, structure2d: BaseInteractions):
503
512
  with open(path, "w") as f:
504
513
  writer = csv.writer(f)
505
514
  writer.writerow(["nt1", "nt2", "type", "classification-1", "classification-2"])
@@ -582,6 +591,7 @@ def main():
582
591
  help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands; "
583
592
  f"the gap is defined as O3'-P distance greater then {1.5 * AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT}",
584
593
  )
594
+ parser.add_argument("--dot", help="(optional) path to output DOT file")
585
595
  args = parser.parse_args()
586
596
 
587
597
  file = handle_input_file(args.input)
@@ -602,6 +612,9 @@ def main():
602
612
  else:
603
613
  print(structure2d.dotBracket)
604
614
 
615
+ if args.dot:
616
+ print(BpSeq.from_string(structure2d.bpseq).graphviz)
617
+
605
618
 
606
619
  if __name__ == "__main__":
607
620
  main()
rnapolis/common.py CHANGED
@@ -9,6 +9,7 @@ from enum import Enum
9
9
  from functools import cache, cached_property, total_ordering
10
10
  from typing import Dict, List, Optional, Tuple
11
11
 
12
+ import graphviz
12
13
  import pulp
13
14
 
14
15
  LOGLEVEL = os.environ.get("LOGLEVEL", "INFO").upper()
@@ -609,13 +610,14 @@ class BpSeq:
609
610
  continue
610
611
 
611
612
  loop = [loop_candidates[i]]
612
- used.add(i)
613
613
 
614
614
  while True:
615
615
  for j in graph[i]:
616
- if j not in used:
616
+ if (
617
+ loop_candidates[j] not in used
618
+ and loop_candidates[j] not in loop
619
+ ):
617
620
  loop.append(loop_candidates[j])
618
- used.add(j)
619
621
  i = j
620
622
  break
621
623
  else:
@@ -624,13 +626,71 @@ class BpSeq:
624
626
  if self.entries[loop[0].first - 1].pair == loop[-1].last:
625
627
  if not all([strand.last - strand.first <= 1 for strand in loop]):
626
628
  loops.append(Loop(loop))
629
+ used.update(loop)
627
630
 
628
- for i in range(len(loop_candidates)):
629
- if i not in used:
630
- single_strands.append(SingleStrand(loop_candidates[i], False, False))
631
+ for loop_candidate in loop_candidates:
632
+ if loop_candidate not in used:
633
+ single_strands.append(SingleStrand(loop_candidate, False, False))
631
634
 
632
635
  return stems, single_strands, hairpins, loops
633
636
 
637
+ @cached_property
638
+ def graphviz(self):
639
+ stems, single_strands, hairpins, loops = self.elements
640
+ graph = defaultdict(set)
641
+ dot = graphviz.Graph()
642
+
643
+ for single_strand in single_strands:
644
+ graph[str(single_strand)].update(
645
+ [
646
+ single_strand.strand.first,
647
+ single_strand.strand.last,
648
+ ]
649
+ )
650
+
651
+ for stem in stems:
652
+ if stem.strand5p.first == stem.strand5p.last:
653
+ continue
654
+ graph[str(stem)].update(
655
+ [
656
+ stem.strand5p.first,
657
+ stem.strand5p.last,
658
+ stem.strand3p.first,
659
+ stem.strand3p.last,
660
+ ]
661
+ )
662
+
663
+ for hairpin in hairpins:
664
+ graph[str(hairpin)].update(
665
+ [
666
+ hairpin.strand.first,
667
+ hairpin.strand.last,
668
+ ]
669
+ )
670
+
671
+ for loop in loops:
672
+ stops = set()
673
+ for strand in loop.strands:
674
+ stops.update(
675
+ [
676
+ strand.first,
677
+ strand.last,
678
+ ]
679
+ )
680
+ graph[str(loop)].update(stops)
681
+
682
+ for i, element in enumerate(graph.keys()):
683
+ dot.node(f"E{i}", str(element))
684
+
685
+ keys = list(graph.keys())
686
+
687
+ for i in range(len(keys)):
688
+ for j in range(i + 1, len(keys)):
689
+ if graph[keys[i]].intersection(graph[keys[j]]):
690
+ dot.edge(f"E{i}", f"E{j}")
691
+
692
+ return dot.render()
693
+
634
694
  @cached_property
635
695
  def __regions(self) -> List[Tuple[int, int, int]]:
636
696
  return [
@@ -749,18 +809,8 @@ class BpSeq:
749
809
  # build dot-bracket
750
810
  sequence = self.sequence
751
811
  structure = ["." for _ in range(len(sequence))]
752
- brackets = [
753
- "()",
754
- "[]",
755
- "{}",
756
- "<>",
757
- "Aa",
758
- "Bb",
759
- "Cc",
760
- "Dd",
761
- "Ee",
762
- "Ff",
763
- "Gg",
812
+ brackets = ["()", "[]", "{}", "<>"] + [
813
+ "".join(p) for p in zip(string.ascii_uppercase, string.ascii_lowercase)
764
814
  ]
765
815
 
766
816
  for i, stem in enumerate(regions):
@@ -829,8 +879,8 @@ class DotBracket:
829
879
  def __post_init__(self):
830
880
  self.pairs = []
831
881
 
832
- opening = "([{<ABC"
833
- closing = ")]}>abc"
882
+ opening = "([{<" + string.ascii_uppercase
883
+ closing = ")]}>" + string.ascii_lowercase
834
884
  begins = {bracket: list() for bracket in opening}
835
885
  matches = {end: begin for begin, end in zip(opening, closing)}
836
886
 
@@ -847,12 +897,17 @@ class DotBracket:
847
897
 
848
898
 
849
899
  @dataclass(frozen=True, order=True)
850
- class Structure2D:
900
+ class BaseInteractions:
851
901
  basePairs: List[BasePair]
852
902
  stackings: List[Stacking]
853
903
  baseRiboseInteractions: List[BaseRibose]
854
904
  basePhosphateInteractions: List[BasePhosphate]
855
905
  otherInteractions: List[OtherInteraction]
906
+
907
+
908
+ @dataclass(frozen=True, order=True)
909
+ class Structure2D:
910
+ baseInteractions: BaseInteractions
856
911
  bpseq: str
857
912
  dotBracket: str
858
913
  extendedDotBracket: str
@@ -1,14 +1,8 @@
1
1
  #! /usr/bin/env python
2
2
  import argparse
3
3
  import itertools
4
- from typing import IO, Dict, List
5
-
6
- import orjson
7
- from mmcif.io.IoAdapterPy import IoAdapterPy
8
- from mmcif.io.PdbxReader import DataContainer
9
4
 
10
5
  from rnapolis.common import BpSeq, DotBracket
11
- from rnapolis.util import handle_input_file
12
6
 
13
7
 
14
8
  def main():
rnapolis/tertiary.py CHANGED
@@ -1,7 +1,5 @@
1
- import itertools
2
1
  import logging
3
2
  import math
4
- import string
5
3
  from collections import defaultdict
6
4
  from dataclasses import dataclass, field
7
5
  from functools import cached_property, total_ordering
@@ -20,7 +18,6 @@ from rnapolis.common import (
20
18
  ResidueAuth,
21
19
  ResidueLabel,
22
20
  Stacking,
23
- Structure2D,
24
21
  )
25
22
 
26
23
  BASE_ATOMS = {
@@ -1,15 +0,0 @@
1
- rnapolis/annotator.py,sha256=CLdwuq0Y42unPd3W-NKIqy4JMCSanzUj6GmP9VnfRb4,20766
2
- rnapolis/clashfinder.py,sha256=jD3s_UovygWi01NUbQNeAeRRFkARTSRraLXUV43UbAA,8514
3
- rnapolis/common.py,sha256=PRCIuHp7H6Gq38yoLZSaRcR8XvObSo_RYc9C5TsD7uA,24620
4
- rnapolis/metareader.py,sha256=4qtMKRvww2sUStLeV8WVrLEt-ScydHUv4Gxx96tnf-M,1683
5
- rnapolis/motif_extractor.py,sha256=VJV1jUPschb_YZTwIatoqF3JQ_jc8uygQZGBao8k9wo,958
6
- rnapolis/parser.py,sha256=Z3Zd_IuRyOP45x5BStgu7UgoyHthhw55fT3udHUhAE4,11905
7
- rnapolis/tertiary.py,sha256=PuW2TZJuXzrDvYpR4fnFzsvyOvZlRv2b97N9sUPH1vQ,19005
8
- rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
9
- rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
10
- RNApolis-0.1.4.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
11
- RNApolis-0.1.4.dist-info/METADATA,sha256=mQbEA9rXCrbmgwParYgzB4HK9BAWnSJvHpzqieUf0Vs,1124
12
- RNApolis-0.1.4.dist-info/WHEEL,sha256=5sUXSg9e4bi7lTLOHcm6QEYwO5TIF1TNbTSVFVjcJcc,92
13
- RNApolis-0.1.4.dist-info/entry_points.txt,sha256=113HwzaWSIoCvcPxkg_j2TIbnezP4_7akpTNr5n3Zjg,220
14
- RNApolis-0.1.4.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
15
- RNApolis-0.1.4.dist-info/RECORD,,