RNApolis 0.9.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rnapolis/annotator.py CHANGED
@@ -24,6 +24,7 @@ from rnapolis.common import (
24
24
  BPh,
25
25
  BpSeq,
26
26
  LeontisWesthof,
27
+ OtherInteraction,
27
28
  Residue,
28
29
  Saenger,
29
30
  Stacking,
@@ -660,12 +661,6 @@ def write_bpseq(path: str, bpseq: BpSeq):
660
661
 
661
662
  def add_common_output_arguments(parser: argparse.ArgumentParser):
662
663
  """Adds common output and processing arguments to the parser."""
663
- parser.add_argument(
664
- "-a",
665
- "--all-dot-brackets",
666
- action="store_true",
667
- help="(optional) print all dot-brackets, not only optimal one (exclusive with -e/--extended)",
668
- )
669
664
  parser.add_argument("-b", "--bpseq", help="(optional) path to output BPSEQ file")
670
665
  parser.add_argument("-c", "--csv", help="(optional) path to output CSV file")
671
666
  parser.add_argument(
@@ -693,42 +688,126 @@ def add_common_output_arguments(parser: argparse.ArgumentParser):
693
688
  )
694
689
 
695
690
 
691
+ def unify_structure_data(structure2d: Structure2D, mapping: Mapping2D3D) -> Structure2D:
692
+ """
693
+ Unify structure data by:
694
+ 1. Adding missing Saenger classifications to base pairs
695
+ 2. Filling in empty residue labels from Structure3D
696
+ """
697
+ # Create a mapping from residue to residue3d for label filling
698
+ residue_to_residue3d = {}
699
+ for residue3d in mapping.structure3d.residues:
700
+ residue_key = Residue(residue3d.label, residue3d.auth)
701
+ residue_to_residue3d[residue_key] = residue3d
702
+
703
+ def fill_residue_label(residue: Residue) -> Residue:
704
+ """Fill empty label from Structure3D if available."""
705
+ if residue.label is not None:
706
+ return residue
707
+
708
+ # Try to find matching residue3d by auth
709
+ for residue3d in mapping.structure3d.residues:
710
+ if residue.auth == residue3d.auth:
711
+ return Residue(residue3d.label, residue.auth)
712
+
713
+ return residue
714
+
715
+ # Process base pairs
716
+ unified_base_pairs = []
717
+ for base_pair in structure2d.base_pairs:
718
+ # Fill in missing labels
719
+ nt1 = fill_residue_label(base_pair.nt1)
720
+ nt2 = fill_residue_label(base_pair.nt2)
721
+
722
+ # Detect missing Saenger classification
723
+ saenger = base_pair.saenger
724
+ if saenger is None:
725
+ # Find corresponding 3D residues for Saenger detection
726
+ residue3d_1 = residue_to_residue3d.get(Residue(nt1.label, nt1.auth))
727
+ residue3d_2 = residue_to_residue3d.get(Residue(nt2.label, nt2.auth))
728
+
729
+ if residue3d_1 is not None and residue3d_2 is not None:
730
+ saenger = detect_saenger(residue3d_1, residue3d_2, base_pair.lw)
731
+
732
+ unified_base_pairs.append(BasePair(nt1, nt2, base_pair.lw, saenger))
733
+
734
+ # Process other interaction types (fill labels only)
735
+ unified_stackings = []
736
+ for stacking in structure2d.stackings:
737
+ nt1 = fill_residue_label(stacking.nt1)
738
+ nt2 = fill_residue_label(stacking.nt2)
739
+ unified_stackings.append(Stacking(nt1, nt2, stacking.topology))
740
+
741
+ unified_base_ribose = []
742
+ for base_ribose in structure2d.base_ribose_interactions:
743
+ nt1 = fill_residue_label(base_ribose.nt1)
744
+ nt2 = fill_residue_label(base_ribose.nt2)
745
+ unified_base_ribose.append(BaseRibose(nt1, nt2, base_ribose.br))
746
+
747
+ unified_base_phosphate = []
748
+ for base_phosphate in structure2d.base_phosphate_interactions:
749
+ nt1 = fill_residue_label(base_phosphate.nt1)
750
+ nt2 = fill_residue_label(base_phosphate.nt2)
751
+ unified_base_phosphate.append(BasePhosphate(nt1, nt2, base_phosphate.bph))
752
+
753
+ unified_other = []
754
+ for other in structure2d.other_interactions:
755
+ nt1 = fill_residue_label(other.nt1)
756
+ nt2 = fill_residue_label(other.nt2)
757
+ unified_other.append(OtherInteraction(nt1, nt2))
758
+
759
+ # Create new Structure2D with unified data
760
+ unified_base_interactions = BaseInteractions(
761
+ unified_base_pairs,
762
+ unified_stackings,
763
+ unified_base_ribose,
764
+ unified_base_phosphate,
765
+ unified_other,
766
+ )
767
+
768
+ # Recreate Structure2D with unified interactions
769
+ unified_structure2d, _ = mapping.structure3d.extract_secondary_structure(
770
+ unified_base_interactions, False
771
+ )
772
+
773
+ return unified_structure2d
774
+
775
+
696
776
  def handle_output_arguments(
697
777
  args: argparse.Namespace,
698
778
  structure2d: Structure2D,
699
- dot_brackets: List[str],
700
779
  mapping: Mapping2D3D,
701
780
  input_filename: str,
702
781
  ):
703
782
  """Handles writing output based on provided arguments."""
783
+ # Unify the structure data before processing outputs
784
+ unified_structure2d = unify_structure_data(structure2d, mapping)
785
+
704
786
  input_basename = os.path.basename(input_filename)
705
787
  if args.csv:
706
- write_csv(args.csv, structure2d)
788
+ write_csv(args.csv, unified_structure2d)
707
789
 
708
790
  if args.json:
709
- write_json(args.json, structure2d)
791
+ write_json(args.json, unified_structure2d)
710
792
 
711
793
  if args.bpseq:
712
- write_bpseq(args.bpseq, structure2d.bpseq)
794
+ write_bpseq(args.bpseq, unified_structure2d.bpseq)
713
795
 
714
796
  if args.extended:
715
- print(structure2d.extended_dot_bracket)
716
- elif args.all_dot_brackets:
717
- for dot_bracket in dot_brackets:
718
- print(dot_bracket)
797
+ print(unified_structure2d.extended_dot_bracket)
719
798
  else:
720
- print(structure2d.dot_bracket)
799
+ print(unified_structure2d.dot_bracket)
721
800
 
722
801
  if args.dot:
723
- print(BpSeq.from_string(structure2d.bpseq).graphviz)
802
+ print(BpSeq.from_string(unified_structure2d.bpseq).graphviz)
724
803
 
725
804
  if args.pml:
726
- pml_script = generate_pymol_script(mapping, structure2d.stems)
805
+ pml_script = generate_pymol_script(mapping, unified_structure2d.stems)
727
806
  with open(args.pml, "w") as f:
728
807
  f.write(pml_script)
729
808
 
730
809
  if args.inter_stem_csv:
731
- if structure2d.inter_stem_parameters:
810
+ if unified_structure2d.inter_stem_parameters:
732
811
  # Convert list of dataclasses to list of dicts
733
812
  params_list = [
734
813
  {
@@ -741,7 +820,7 @@ def handle_output_arguments(
741
820
  "min_endpoint_distance_pdf": p.min_endpoint_distance_pdf,
742
821
  "coaxial_probability": p.coaxial_probability,
743
822
  }
744
- for p in structure2d.interStemParameters
823
+ for p in unified_structure2d.interStemParameters
745
824
  ]
746
825
  df = pd.DataFrame(params_list)
747
826
  df["input_basename"] = input_basename
@@ -759,9 +838,9 @@ def handle_output_arguments(
759
838
  # pd.DataFrame(columns=['input_basename', 'stem1_idx', ...]).to_csv(args.inter_stem_csv, index=False)
760
839
 
761
840
  if args.stems_csv:
762
- if structure2d.stems:
841
+ if unified_structure2d.stems:
763
842
  stems_data = []
764
- for i, stem in enumerate(structure2d.stems):
843
+ for i, stem in enumerate(unified_structure2d.stems):
765
844
  try:
766
845
  res5p_first = mapping.bpseq_index_to_residue_map.get(
767
846
  stem.strand5p.first
@@ -842,12 +921,7 @@ def main():
842
921
  base_interactions, args.find_gaps
843
922
  )
844
923
 
845
- if args.all_dot_brackets:
846
- dot_brackets = mapping.all_dot_brackets
847
- else:
848
- dot_brackets = [mapping.dot_bracket]
849
-
850
- handle_output_arguments(args, structure2d, dot_brackets, mapping, args.input)
924
+ handle_output_arguments(args, structure2d, mapping, args.input)
851
925
 
852
926
 
853
927
  if __name__ == "__main__":