RNApolis 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: RNApolis
3
- Version: 0.1.4
3
+ Version: 0.2.0
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -15,6 +15,7 @@ Classifier: Programming Language :: Python :: 3
15
15
  Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
+ Requires-Dist: graphviz
18
19
  Requires-Dist: mmcif
19
20
  Requires-Dist: numpy
20
21
  Requires-Dist: ordered-set
@@ -0,0 +1,15 @@
1
+ rnapolis/annotator.py,sha256=XtEJrog8c3zZ-pLlGBytJExGahPstGd-nbPLKQLa58A,21228
2
+ rnapolis/clashfinder.py,sha256=jD3s_UovygWi01NUbQNeAeRRFkARTSRraLXUV43UbAA,8514
3
+ rnapolis/common.py,sha256=DPmRpNkMaxuIai3vfLzSlP6IN0zpj6kmT3LoRjnJUWE,26440
4
+ rnapolis/metareader.py,sha256=4qtMKRvww2sUStLeV8WVrLEt-ScydHUv4Gxx96tnf-M,1683
5
+ rnapolis/motif_extractor.py,sha256=duHvpi9Ulcny9K60E6VBpz5RpJZw-KdTB4_Ph0iP478,774
6
+ rnapolis/parser.py,sha256=Z3Zd_IuRyOP45x5BStgu7UgoyHthhw55fT3udHUhAE4,11905
7
+ rnapolis/tertiary.py,sha256=iWMPD9c21rjMPpEdBd7mPCQgds65IbOr4_Fy06s0NoU,18957
8
+ rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
9
+ rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
10
+ RNApolis-0.2.0.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
11
+ RNApolis-0.2.0.dist-info/METADATA,sha256=-ApE3-kETFzu512tLPAQa7MJjY7tuT-qyHLkPMBhI7A,1148
12
+ RNApolis-0.2.0.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
13
+ RNApolis-0.2.0.dist-info/entry_points.txt,sha256=113HwzaWSIoCvcPxkg_j2TIbnezP4_7akpTNr5n3Zjg,220
14
+ RNApolis-0.2.0.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
15
+ RNApolis-0.2.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.41.1)
2
+ Generator: bdist_wheel (0.41.3)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
rnapolis/annotator.py CHANGED
@@ -5,7 +5,7 @@ import logging
5
5
  import math
6
6
  import os
7
7
  from collections import Counter, defaultdict
8
- from typing import IO, Dict, List, Optional, Set, Tuple
8
+ from typing import Dict, List, Optional, Set, Tuple
9
9
 
10
10
  import numpy
11
11
  import numpy.typing
@@ -15,6 +15,7 @@ from scipy.spatial import KDTree
15
15
 
16
16
  from rnapolis.common import (
17
17
  BR,
18
+ BaseInteractions,
18
19
  BasePair,
19
20
  BasePhosphate,
20
21
  BaseRibose,
@@ -471,19 +472,27 @@ def find_stackings(structure: Structure3D, model: int = 1) -> List[Stacking]:
471
472
  return stackings
472
473
 
473
474
 
474
- def extract_secondary_structure(
475
- tertiary_structure: Structure3D, model: int = 1, find_gaps: bool = False
476
- ) -> Structure2D:
475
+ def extract_base_interactions(
476
+ tertiary_structure: Structure3D, model: int = 1
477
+ ) -> BaseInteractions:
477
478
  base_pairs, base_phosphate, base_ribose = find_pairs(tertiary_structure, model)
478
479
  stackings = find_stackings(tertiary_structure, model)
479
- mapping = Mapping2D3D(tertiary_structure, base_pairs, stackings, find_gaps)
480
+ return BaseInteractions(base_pairs, stackings, base_ribose, base_phosphate, [])
481
+
482
+
483
+ def extract_secondary_structure(
484
+ tertiary_structure: Structure3D, model: int = 1, find_gaps: bool = False
485
+ ) -> BaseInteractions:
486
+ base_interactions = extract_base_interactions(tertiary_structure, model)
487
+ mapping = Mapping2D3D(
488
+ tertiary_structure,
489
+ base_interactions.basePairs,
490
+ base_interactions.stackings,
491
+ find_gaps,
492
+ )
480
493
  stems, single_strands, hairpins, loops = mapping.bpseq.elements
481
494
  return Structure2D(
482
- base_pairs,
483
- stackings,
484
- base_ribose,
485
- base_phosphate,
486
- [],
495
+ base_interactions,
487
496
  str(mapping.bpseq),
488
497
  mapping.dot_bracket,
489
498
  mapping.extended_dot_bracket,
@@ -494,12 +503,12 @@ def extract_secondary_structure(
494
503
  )
495
504
 
496
505
 
497
- def write_json(path: str, structure2d: Structure2D):
506
+ def write_json(path: str, structure2d: BaseInteractions):
498
507
  with open(path, "wb") as f:
499
508
  f.write(orjson.dumps(structure2d))
500
509
 
501
510
 
502
- def write_csv(path: str, structure2d: Structure2D):
511
+ def write_csv(path: str, structure2d: BaseInteractions):
503
512
  with open(path, "w") as f:
504
513
  writer = csv.writer(f)
505
514
  writer.writerow(["nt1", "nt2", "type", "classification-1", "classification-2"])
@@ -582,6 +591,7 @@ def main():
582
591
  help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands; "
583
592
  f"the gap is defined as O3'-P distance greater then {1.5 * AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT}",
584
593
  )
594
+ parser.add_argument("--dot", help="(optional) path to output DOT file")
585
595
  args = parser.parse_args()
586
596
 
587
597
  file = handle_input_file(args.input)
@@ -602,6 +612,9 @@ def main():
602
612
  else:
603
613
  print(structure2d.dotBracket)
604
614
 
615
+ if args.dot:
616
+ print(BpSeq.from_string(structure2d.bpseq).graphviz)
617
+
605
618
 
606
619
  if __name__ == "__main__":
607
620
  main()
rnapolis/common.py CHANGED
@@ -9,6 +9,7 @@ from enum import Enum
9
9
  from functools import cache, cached_property, total_ordering
10
10
  from typing import Dict, List, Optional, Tuple
11
11
 
12
+ import graphviz
12
13
  import pulp
13
14
 
14
15
  LOGLEVEL = os.environ.get("LOGLEVEL", "INFO").upper()
@@ -609,13 +610,14 @@ class BpSeq:
609
610
  continue
610
611
 
611
612
  loop = [loop_candidates[i]]
612
- used.add(i)
613
613
 
614
614
  while True:
615
615
  for j in graph[i]:
616
- if j not in used:
616
+ if (
617
+ loop_candidates[j] not in used
618
+ and loop_candidates[j] not in loop
619
+ ):
617
620
  loop.append(loop_candidates[j])
618
- used.add(j)
619
621
  i = j
620
622
  break
621
623
  else:
@@ -624,13 +626,71 @@ class BpSeq:
624
626
  if self.entries[loop[0].first - 1].pair == loop[-1].last:
625
627
  if not all([strand.last - strand.first <= 1 for strand in loop]):
626
628
  loops.append(Loop(loop))
629
+ used.update(loop)
627
630
 
628
- for i in range(len(loop_candidates)):
629
- if i not in used:
630
- single_strands.append(SingleStrand(loop_candidates[i], False, False))
631
+ for loop_candidate in loop_candidates:
632
+ if loop_candidate not in used:
633
+ single_strands.append(SingleStrand(loop_candidate, False, False))
631
634
 
632
635
  return stems, single_strands, hairpins, loops
633
636
 
637
+ @cached_property
638
+ def graphviz(self):
639
+ stems, single_strands, hairpins, loops = self.elements
640
+ graph = defaultdict(set)
641
+ dot = graphviz.Graph()
642
+
643
+ for single_strand in single_strands:
644
+ graph[str(single_strand)].update(
645
+ [
646
+ single_strand.strand.first,
647
+ single_strand.strand.last,
648
+ ]
649
+ )
650
+
651
+ for stem in stems:
652
+ if stem.strand5p.first == stem.strand5p.last:
653
+ continue
654
+ graph[str(stem)].update(
655
+ [
656
+ stem.strand5p.first,
657
+ stem.strand5p.last,
658
+ stem.strand3p.first,
659
+ stem.strand3p.last,
660
+ ]
661
+ )
662
+
663
+ for hairpin in hairpins:
664
+ graph[str(hairpin)].update(
665
+ [
666
+ hairpin.strand.first,
667
+ hairpin.strand.last,
668
+ ]
669
+ )
670
+
671
+ for loop in loops:
672
+ stops = set()
673
+ for strand in loop.strands:
674
+ stops.update(
675
+ [
676
+ strand.first,
677
+ strand.last,
678
+ ]
679
+ )
680
+ graph[str(loop)].update(stops)
681
+
682
+ for i, element in enumerate(graph.keys()):
683
+ dot.node(f"E{i}", str(element))
684
+
685
+ keys = list(graph.keys())
686
+
687
+ for i in range(len(keys)):
688
+ for j in range(i + 1, len(keys)):
689
+ if graph[keys[i]].intersection(graph[keys[j]]):
690
+ dot.edge(f"E{i}", f"E{j}")
691
+
692
+ return dot.render()
693
+
634
694
  @cached_property
635
695
  def __regions(self) -> List[Tuple[int, int, int]]:
636
696
  return [
@@ -749,18 +809,8 @@ class BpSeq:
749
809
  # build dot-bracket
750
810
  sequence = self.sequence
751
811
  structure = ["." for _ in range(len(sequence))]
752
- brackets = [
753
- "()",
754
- "[]",
755
- "{}",
756
- "<>",
757
- "Aa",
758
- "Bb",
759
- "Cc",
760
- "Dd",
761
- "Ee",
762
- "Ff",
763
- "Gg",
812
+ brackets = ["()", "[]", "{}", "<>"] + [
813
+ "".join(p) for p in zip(string.ascii_uppercase, string.ascii_lowercase)
764
814
  ]
765
815
 
766
816
  for i, stem in enumerate(regions):
@@ -829,8 +879,8 @@ class DotBracket:
829
879
  def __post_init__(self):
830
880
  self.pairs = []
831
881
 
832
- opening = "([{<ABC"
833
- closing = ")]}>abc"
882
+ opening = "([{<" + string.ascii_uppercase
883
+ closing = ")]}>" + string.ascii_lowercase
834
884
  begins = {bracket: list() for bracket in opening}
835
885
  matches = {end: begin for begin, end in zip(opening, closing)}
836
886
 
@@ -847,12 +897,17 @@ class DotBracket:
847
897
 
848
898
 
849
899
  @dataclass(frozen=True, order=True)
850
- class Structure2D:
900
+ class BaseInteractions:
851
901
  basePairs: List[BasePair]
852
902
  stackings: List[Stacking]
853
903
  baseRiboseInteractions: List[BaseRibose]
854
904
  basePhosphateInteractions: List[BasePhosphate]
855
905
  otherInteractions: List[OtherInteraction]
906
+
907
+
908
+ @dataclass(frozen=True, order=True)
909
+ class Structure2D:
910
+ baseInteractions: BaseInteractions
856
911
  bpseq: str
857
912
  dotBracket: str
858
913
  extendedDotBracket: str
@@ -1,14 +1,8 @@
1
1
  #! /usr/bin/env python
2
2
  import argparse
3
3
  import itertools
4
- from typing import IO, Dict, List
5
-
6
- import orjson
7
- from mmcif.io.IoAdapterPy import IoAdapterPy
8
- from mmcif.io.PdbxReader import DataContainer
9
4
 
10
5
  from rnapolis.common import BpSeq, DotBracket
11
- from rnapolis.util import handle_input_file
12
6
 
13
7
 
14
8
  def main():
rnapolis/tertiary.py CHANGED
@@ -1,7 +1,5 @@
1
- import itertools
2
1
  import logging
3
2
  import math
4
- import string
5
3
  from collections import defaultdict
6
4
  from dataclasses import dataclass, field
7
5
  from functools import cached_property, total_ordering
@@ -20,7 +18,6 @@ from rnapolis.common import (
20
18
  ResidueAuth,
21
19
  ResidueLabel,
22
20
  Stacking,
23
- Structure2D,
24
21
  )
25
22
 
26
23
  BASE_ATOMS = {
@@ -1,15 +0,0 @@
1
- rnapolis/annotator.py,sha256=CLdwuq0Y42unPd3W-NKIqy4JMCSanzUj6GmP9VnfRb4,20766
2
- rnapolis/clashfinder.py,sha256=jD3s_UovygWi01NUbQNeAeRRFkARTSRraLXUV43UbAA,8514
3
- rnapolis/common.py,sha256=PRCIuHp7H6Gq38yoLZSaRcR8XvObSo_RYc9C5TsD7uA,24620
4
- rnapolis/metareader.py,sha256=4qtMKRvww2sUStLeV8WVrLEt-ScydHUv4Gxx96tnf-M,1683
5
- rnapolis/motif_extractor.py,sha256=VJV1jUPschb_YZTwIatoqF3JQ_jc8uygQZGBao8k9wo,958
6
- rnapolis/parser.py,sha256=Z3Zd_IuRyOP45x5BStgu7UgoyHthhw55fT3udHUhAE4,11905
7
- rnapolis/tertiary.py,sha256=PuW2TZJuXzrDvYpR4fnFzsvyOvZlRv2b97N9sUPH1vQ,19005
8
- rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
9
- rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
10
- RNApolis-0.1.4.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
11
- RNApolis-0.1.4.dist-info/METADATA,sha256=mQbEA9rXCrbmgwParYgzB4HK9BAWnSJvHpzqieUf0Vs,1124
12
- RNApolis-0.1.4.dist-info/WHEEL,sha256=5sUXSg9e4bi7lTLOHcm6QEYwO5TIF1TNbTSVFVjcJcc,92
13
- RNApolis-0.1.4.dist-info/entry_points.txt,sha256=113HwzaWSIoCvcPxkg_j2TIbnezP4_7akpTNr5n3Zjg,220
14
- RNApolis-0.1.4.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
15
- RNApolis-0.1.4.dist-info/RECORD,,