RNApolis 0.0.16__py3-none-any.whl → 0.1.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: RNApolis
3
- Version: 0.0.16
3
+ Version: 0.1.1
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -0,0 +1,15 @@
1
+ rnapolis/annotator.py,sha256=CLdwuq0Y42unPd3W-NKIqy4JMCSanzUj6GmP9VnfRb4,20766
2
+ rnapolis/clashfinder.py,sha256=jD3s_UovygWi01NUbQNeAeRRFkARTSRraLXUV43UbAA,8514
3
+ rnapolis/common.py,sha256=o6I51SkBuHxiP9G91Uy42h8_JpCVc2Sd8lGLviCTKAc,24518
4
+ rnapolis/metareader.py,sha256=4qtMKRvww2sUStLeV8WVrLEt-ScydHUv4Gxx96tnf-M,1683
5
+ rnapolis/motif_extractor.py,sha256=5gUSNGeT_jBVzpj3gCaZJaRA4XPUpxJ9NteWns6UMAY,961
6
+ rnapolis/parser.py,sha256=Z3Zd_IuRyOP45x5BStgu7UgoyHthhw55fT3udHUhAE4,11905
7
+ rnapolis/tertiary.py,sha256=fA7ml3zlcWz0xsq4z73m-HGQh3KAkwzv7vVM5ivfgkg,18293
8
+ rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
9
+ rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
10
+ RNApolis-0.1.1.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
11
+ RNApolis-0.1.1.dist-info/METADATA,sha256=0MspfjX3C2UlAe1c1eHVncrtXagp_DlMqA09ABKiS1E,1124
12
+ RNApolis-0.1.1.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
13
+ RNApolis-0.1.1.dist-info/entry_points.txt,sha256=113HwzaWSIoCvcPxkg_j2TIbnezP4_7akpTNr5n3Zjg,220
14
+ RNApolis-0.1.1.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
15
+ RNApolis-0.1.1.dist-info/RECORD,,
rnapolis/annotator.py CHANGED
@@ -19,6 +19,7 @@ from rnapolis.common import (
19
19
  BasePhosphate,
20
20
  BaseRibose,
21
21
  BPh,
22
+ BpSeq,
22
23
  LeontisWesthof,
23
24
  Residue,
24
25
  Saenger,
@@ -471,11 +472,26 @@ def find_stackings(structure: Structure3D, model: int = 1) -> List[Stacking]:
471
472
 
472
473
 
473
474
  def extract_secondary_structure(
474
- tertiary_structure: Structure3D, model: int = 1
475
+ tertiary_structure: Structure3D, model: int = 1, find_gaps: bool = False
475
476
  ) -> Structure2D:
476
477
  base_pairs, base_phosphate, base_ribose = find_pairs(tertiary_structure, model)
477
478
  stackings = find_stackings(tertiary_structure, model)
478
- return Structure2D(base_pairs, stackings, base_ribose, base_phosphate, [])
479
+ mapping = Mapping2D3D(tertiary_structure, base_pairs, stackings, find_gaps)
480
+ stems, single_strands, hairpins, loops = mapping.bpseq.elements
481
+ return Structure2D(
482
+ base_pairs,
483
+ stackings,
484
+ base_ribose,
485
+ base_phosphate,
486
+ [],
487
+ str(mapping.bpseq),
488
+ mapping.dot_bracket,
489
+ mapping.extended_dot_bracket,
490
+ stems,
491
+ single_strands,
492
+ hairpins,
493
+ loops,
494
+ )
479
495
 
480
496
 
481
497
  def write_json(path: str, structure2d: Structure2D):
@@ -541,9 +557,9 @@ def write_csv(path: str, structure2d: Structure2D):
541
557
  )
542
558
 
543
559
 
544
- def write_bpseq(path: str, mapping: Mapping2D3D):
560
+ def write_bpseq(path: str, bpseq: BpSeq):
545
561
  with open(path, "w") as f:
546
- f.write(str(mapping.bpseq))
562
+ f.write(str(bpseq))
547
563
 
548
564
 
549
565
  def main():
@@ -563,14 +579,14 @@ def main():
563
579
  parser.add_argument(
564
580
  "--find-gaps",
565
581
  action="store_true",
566
- help=f"(optional) if set, the program will detect gaps and break the PDB chain into two or more strands; "
567
- "the gap is defined as O3'-P distance greater then {1.5 * AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT}",
582
+ help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands; "
583
+ f"the gap is defined as O3'-P distance greater then {1.5 * AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT}",
568
584
  )
569
585
  args = parser.parse_args()
570
586
 
571
587
  file = handle_input_file(args.input)
572
588
  structure3d = read_3d_structure(file, 1)
573
- structure2d = extract_secondary_structure(structure3d)
589
+ structure2d = extract_secondary_structure(structure3d, 1, args.find_gaps)
574
590
 
575
591
  if args.csv:
576
592
  write_csv(args.csv, structure2d)
@@ -578,15 +594,13 @@ def main():
578
594
  if args.json:
579
595
  write_json(args.json, structure2d)
580
596
 
581
- mapping = Mapping2D3D(structure3d, structure2d, args.find_gaps)
582
-
583
597
  if args.bpseq:
584
- write_bpseq(args.bpseq, mapping)
598
+ write_bpseq(args.bpseq, structure2d.bpseq)
585
599
 
586
600
  if args.extended:
587
- print(mapping.extended_dot_bracket)
601
+ print(structure2d.extendedDotBracket)
588
602
  else:
589
- print(mapping.dot_bracket)
603
+ print(structure2d.dotBracket)
590
604
 
591
605
 
592
606
  if __name__ == "__main__":
rnapolis/common.py CHANGED
@@ -321,15 +321,6 @@ class OtherInteraction(Interaction):
321
321
  pass
322
322
 
323
323
 
324
- @dataclass(frozen=True, order=True)
325
- class Structure2D:
326
- basePairs: List[BasePair]
327
- stackings: List[Stacking]
328
- baseRiboseInteractions: List[BaseRibose]
329
- basePhosphateInteractions: List[BasePhosphate]
330
- otherInteractions: List[OtherInteraction]
331
-
332
-
333
324
  @dataclass
334
325
  class Entry(Sequence):
335
326
  index_: int
@@ -380,12 +371,15 @@ class Strand:
380
371
  return f"{self.first}-{self.sequence}-{self.last}"
381
372
 
382
373
 
383
- @dataclass(frozen=True)
374
+ @dataclass
384
375
  class SingleStrand:
385
376
  strand: Strand
386
377
  is5p: bool
387
378
  is3p: bool
388
379
 
380
+ def __post_init__(self):
381
+ self.description = str(self)
382
+
389
383
  def __str__(self):
390
384
  if self.is5p:
391
385
  return f"SingleStrand5p {self.strand.first} {self.strand.last} {self.strand.sequence} {self.strand.structure}"
@@ -394,7 +388,7 @@ class SingleStrand:
394
388
  return f"SingleStrand {self.strand.first} {self.strand.last} {self.strand.sequence} {self.strand.structure}"
395
389
 
396
390
 
397
- @dataclass(frozen=True)
391
+ @dataclass
398
392
  class Stem:
399
393
  strand5p: Strand
400
394
  strand3p: Strand
@@ -410,22 +404,31 @@ class Stem:
410
404
  Strand.from_bpseq_entries(strand3p_entries, dotbracket),
411
405
  )
412
406
 
407
+ def __post_init__(self):
408
+ self.description = str(self)
409
+
413
410
  def __str__(self):
414
411
  return f"Stem {self.strand5p.first} {self.strand5p.last} {self.strand5p.sequence} {self.strand5p.structure} {self.strand3p.first} {self.strand3p.last} {self.strand3p.sequence} {self.strand3p.structure}"
415
412
 
416
413
 
417
- @dataclass(frozen=True)
414
+ @dataclass
418
415
  class Hairpin:
419
416
  strand: Strand
420
417
 
418
+ def __post_init__(self):
419
+ self.description = str(self)
420
+
421
421
  def __str__(self):
422
422
  return f"Hairpin {self.strand.first} {self.strand.last} {self.strand.sequence} {self.strand.structure}"
423
423
 
424
424
 
425
- @dataclass(frozen=True)
425
+ @dataclass
426
426
  class Loop:
427
427
  strands: List[Strand]
428
428
 
429
+ def __post_init__(self):
430
+ self.description = str(self)
431
+
429
432
  def __str__(self):
430
433
  desc = " ".join(
431
434
  [
@@ -516,30 +519,32 @@ class BpSeq:
516
519
  return stems
517
520
 
518
521
  @cached_property
519
- def elements(self):
522
+ def elements(
523
+ self,
524
+ ) -> Tuple[List[Stem], List[SingleStrand], List[Hairpin], List[Loop]]:
520
525
  if not self.__stems_entries:
521
- return []
526
+ return [], [], [], []
522
527
 
523
- elements = []
524
- stops = set()
528
+ stems, single_strands, hairpins, loops = [], [], [], []
529
+ stopset = set()
525
530
 
526
531
  # stems
527
532
  for stem_entries in self.__stems_entries:
528
533
  stem = Stem.from_bpseq_entries(
529
534
  stem_entries, self.entries, self.to_dot_bracket.structure
530
535
  )
531
- elements.append(stem)
532
- stops.add(stem.strand5p.first - 1)
533
- stops.add(stem.strand5p.last - 1)
534
- stops.add(stem.strand3p.first - 1)
535
- stops.add(stem.strand3p.last - 1)
536
+ stems.append(stem)
537
+ stopset.add(stem.strand5p.first - 1)
538
+ stopset.add(stem.strand5p.last - 1)
539
+ stopset.add(stem.strand3p.first - 1)
540
+ stopset.add(stem.strand3p.last - 1)
536
541
 
537
- stops = sorted(stops)
542
+ stops = sorted(stopset)
538
543
  loop_candidates = []
539
544
 
540
545
  # 5' single strand
541
546
  if stops[0] > 0:
542
- elements.append(
547
+ single_strands.append(
543
548
  SingleStrand(
544
549
  Strand.from_bpseq_entries(
545
550
  self.entries[: stops[0] + 1],
@@ -555,7 +560,7 @@ class BpSeq:
555
560
  candidate = self.entries[stops[i - 1] : stops[i] + 1]
556
561
  if all([entry.pair == 0 for entry in candidate[1:-1]]):
557
562
  if candidate[0].pair == candidate[-1].index_:
558
- elements.append(
563
+ hairpins.append(
559
564
  Hairpin(
560
565
  Strand.from_bpseq_entries(
561
566
  candidate, self.to_dot_bracket.structure
@@ -571,7 +576,7 @@ class BpSeq:
571
576
 
572
577
  # 3' single strand
573
578
  if stops[-1] < len(self.entries) - 1:
574
- elements.append(
579
+ single_strands.append(
575
580
  SingleStrand(
576
581
  Strand.from_bpseq_entries(
577
582
  self.entries[stops[-1] :], self.to_dot_bracket.structure
@@ -613,13 +618,13 @@ class BpSeq:
613
618
 
614
619
  if self.entries[loop[0].first - 1].pair == loop[-1].last:
615
620
  if not all([strand.last - strand.first <= 1 for strand in loop]):
616
- elements.append(Loop(loop))
621
+ loops.append(Loop(loop))
617
622
 
618
623
  for i in range(len(loop_candidates)):
619
624
  if i not in used:
620
- elements.append(SingleStrand(loop_candidates[i]))
625
+ single_strands.append(SingleStrand(loop_candidates[i], False, False))
621
626
 
622
- return elements
627
+ return stems, single_strands, hairpins, loops
623
628
 
624
629
  @cached_property
625
630
  def __regions(self) -> List[Tuple[int, int, int]]:
@@ -675,14 +680,14 @@ class BpSeq:
675
680
  vars_by_region[i].append(variable)
676
681
  vars_by_order[j].append(variable)
677
682
  var_by_region_order[(i, j)] = variable
678
- region_by_var[variable] = i
683
+ region_by_var[variable] = regions[i]
679
684
 
680
685
  # define objective function terms
681
686
  terms = []
682
687
 
683
688
  for order, vars in vars_by_order.items():
684
689
  for var in vars:
685
- length = len(regions[region_by_var[var]])
690
+ length = region_by_var[var][2]
686
691
  if order == 0:
687
692
  terms.append(var * length)
688
693
  else:
@@ -707,6 +712,7 @@ class BpSeq:
707
712
 
708
713
  # solve the problem
709
714
  try:
715
+ logging.debug(f"POA: problem formulation\n{problem}")
710
716
  problem.solve(solver)
711
717
  except pulp.PulpSolverError:
712
718
  logging.warning(
@@ -720,7 +726,7 @@ class BpSeq:
720
726
  return self.fcfs()
721
727
 
722
728
  # log solver time statistics
723
- logging.info(
729
+ logging.debug(
724
730
  f"POA: solver {solver.name} took {round(problem.solutionTime, 2)} seconds"
725
731
  )
726
732
 
@@ -833,3 +839,19 @@ class DotBracket:
833
839
 
834
840
  def __str__(self):
835
841
  return f"{self.sequence}\n{self.structure}"
842
+
843
+
844
+ @dataclass(frozen=True, order=True)
845
+ class Structure2D:
846
+ basePairs: List[BasePair]
847
+ stackings: List[Stacking]
848
+ baseRiboseInteractions: List[BaseRibose]
849
+ basePhosphateInteractions: List[BasePhosphate]
850
+ otherInteractions: List[OtherInteraction]
851
+ bpseq: str
852
+ dotBracket: str
853
+ extendedDotBracket: str
854
+ stems: List[Stem]
855
+ singleStrands: List[SingleStrand]
856
+ hairpins: List[Hairpin]
857
+ loops: List[Loop]
@@ -1,5 +1,6 @@
1
1
  #! /usr/bin/env python
2
2
  import argparse
3
+ import itertools
3
4
  from typing import IO, Dict, List
4
5
 
5
6
  import orjson
@@ -24,7 +25,10 @@ def main():
24
25
  parser.print_help()
25
26
  return
26
27
 
27
- for element in bpseq.elements:
28
+ print(f"Full dot-bracket:\n{bpseq.to_dot_bracket}")
29
+ stems, single_strands, hairpins, loops = bpseq.elements
30
+
31
+ for element in itertools.chain(stems, single_strands, hairpins, loops):
28
32
  print(element)
29
33
 
30
34
 
rnapolis/tertiary.py CHANGED
@@ -388,14 +388,15 @@ class Structure3D:
388
388
  @dataclass
389
389
  class Mapping2D3D:
390
390
  structure3d: Structure3D
391
- structure2d: Structure2D
391
+ base_pairs2d: List[BasePair]
392
+ stackings2d: List[Stacking]
392
393
  find_gaps: bool
393
394
 
394
395
  @cached_property
395
396
  def base_pairs(self) -> List[BasePair3D]:
396
397
  result = []
397
398
  used = set()
398
- for base_pair in self.structure2d.basePairs:
399
+ for base_pair in self.base_pairs2d:
399
400
  nt1 = self.structure3d.find_residue(base_pair.nt1.label, base_pair.nt1.auth)
400
401
  nt2 = self.structure3d.find_residue(base_pair.nt2.label, base_pair.nt2.auth)
401
402
  if nt1 is not None and nt2 is not None:
@@ -439,7 +440,7 @@ class Mapping2D3D:
439
440
  def stackings(self) -> List[Stacking3D]:
440
441
  result = []
441
442
  used = set()
442
- for stacking in self.structure2d.stackings:
443
+ for stacking in self.stackings2d:
443
444
  nt1 = self.structure3d.find_residue(stacking.nt1.label, stacking.nt1.auth)
444
445
  nt2 = self.structure3d.find_residue(stacking.nt2.label, stacking.nt2.auth)
445
446
  if nt1 is not None and nt2 is not None:
@@ -1,15 +0,0 @@
1
- rnapolis/annotator.py,sha256=aaPb1gMbzVv8Jx1iSJ12nW9R-aWE-lq95gZks9RZDfk,20419
2
- rnapolis/clashfinder.py,sha256=jD3s_UovygWi01NUbQNeAeRRFkARTSRraLXUV43UbAA,8514
3
- rnapolis/common.py,sha256=_ylHz3KdB75cugX_7wTzqbPQChAUKlnuThsvMLuxsfM,23859
4
- rnapolis/metareader.py,sha256=4qtMKRvww2sUStLeV8WVrLEt-ScydHUv4Gxx96tnf-M,1683
5
- rnapolis/motif_extractor.py,sha256=nz3aenyhPBZNuWzdHOCpFG95SLqf5yLhQQi1Bx-i2H8,786
6
- rnapolis/parser.py,sha256=Z3Zd_IuRyOP45x5BStgu7UgoyHthhw55fT3udHUhAE4,11905
7
- rnapolis/tertiary.py,sha256=DcY4OPRg9d05L9Fr9G_1KTbMwkNVTyEPnLkJOPODlAM,18276
8
- rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
9
- rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
10
- RNApolis-0.0.16.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
11
- RNApolis-0.0.16.dist-info/METADATA,sha256=ODBaX0GLTPv7WaCiFdJErR9dCdeOV50N7sjkf1U1sFs,1125
12
- RNApolis-0.0.16.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
13
- RNApolis-0.0.16.dist-info/entry_points.txt,sha256=113HwzaWSIoCvcPxkg_j2TIbnezP4_7akpTNr5n3Zjg,220
14
- RNApolis-0.0.16.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
15
- RNApolis-0.0.16.dist-info/RECORD,,