PyPI - RNApolis - Versions diffs - 0.9.1__py3-none-any.whl → 0.10.0__py3-none-any.whl - Mend

RNApolis 0.9.1py3-none-any.whl → 0.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

rnapolis/adapter.py +791 -37
rnapolis/annotator.py +101 -27
rnapolis/distiller.py +1119 -0
rnapolis/parser.py +4 -0
rnapolis/tertiary_v2.py +482 -18
{rnapolis-0.9.1.dist-info → rnapolis-0.10.0.dist-info}/METADATA +4 -1
{rnapolis-0.9.1.dist-info → rnapolis-0.10.0.dist-info}/RECORD +11 -10
{rnapolis-0.9.1.dist-info → rnapolis-0.10.0.dist-info}/entry_points.txt +1 -0
{rnapolis-0.9.1.dist-info → rnapolis-0.10.0.dist-info}/WHEEL +0 -0
{rnapolis-0.9.1.dist-info → rnapolis-0.10.0.dist-info}/licenses/LICENSE +0 -0
{rnapolis-0.9.1.dist-info → rnapolis-0.10.0.dist-info}/top_level.txt +0 -0

rnapolis/annotator.py CHANGED Viewed

@@ -24,6 +24,7 @@ from rnapolis.common import (
     BPh,
     BpSeq,
     LeontisWesthof,
+    OtherInteraction,
     Residue,
     Saenger,
     Stacking,
@@ -660,12 +661,6 @@ def write_bpseq(path: str, bpseq: BpSeq):
 def add_common_output_arguments(parser: argparse.ArgumentParser):
     """Adds common output and processing arguments to the parser."""
-    parser.add_argument(
-        "-a",
-        "--all-dot-brackets",
-        action="store_true",
-        help="(optional) print all dot-brackets, not only optimal one (exclusive with -e/--extended)",
-    )
     parser.add_argument("-b", "--bpseq", help="(optional) path to output BPSEQ file")
     parser.add_argument("-c", "--csv", help="(optional) path to output CSV file")
     parser.add_argument(
@@ -693,42 +688,126 @@ def add_common_output_arguments(parser: argparse.ArgumentParser):
     )
+def unify_structure_data(structure2d: Structure2D, mapping: Mapping2D3D) -> Structure2D:
+    """
+    Unify structure data by:
+    1. Adding missing Saenger classifications to base pairs
+    2. Filling in empty residue labels from Structure3D
+    """
+    # Create a mapping from residue to residue3d for label filling
+    residue_to_residue3d = {}
+    for residue3d in mapping.structure3d.residues:
+        residue_key = Residue(residue3d.label, residue3d.auth)
+        residue_to_residue3d[residue_key] = residue3d
+    def fill_residue_label(residue: Residue) -> Residue:
+        """Fill empty label from Structure3D if available."""
+        if residue.label is not None:
+            return residue
+        # Try to find matching residue3d by auth
+        for residue3d in mapping.structure3d.residues:
+            if residue.auth == residue3d.auth:
+                return Residue(residue3d.label, residue.auth)
+        return residue
+    # Process base pairs
+    unified_base_pairs = []
+    for base_pair in structure2d.base_pairs:
+        # Fill in missing labels
+        nt1 = fill_residue_label(base_pair.nt1)
+        nt2 = fill_residue_label(base_pair.nt2)
+        # Detect missing Saenger classification
+        saenger = base_pair.saenger
+        if saenger is None:
+            # Find corresponding 3D residues for Saenger detection
+            residue3d_1 = residue_to_residue3d.get(Residue(nt1.label, nt1.auth))
+            residue3d_2 = residue_to_residue3d.get(Residue(nt2.label, nt2.auth))
+            if residue3d_1 is not None and residue3d_2 is not None:
+                saenger = detect_saenger(residue3d_1, residue3d_2, base_pair.lw)
+        unified_base_pairs.append(BasePair(nt1, nt2, base_pair.lw, saenger))
+    # Process other interaction types (fill labels only)
+    unified_stackings = []
+    for stacking in structure2d.stackings:
+        nt1 = fill_residue_label(stacking.nt1)
+        nt2 = fill_residue_label(stacking.nt2)
+        unified_stackings.append(Stacking(nt1, nt2, stacking.topology))
+    unified_base_ribose = []
+    for base_ribose in structure2d.base_ribose_interactions:
+        nt1 = fill_residue_label(base_ribose.nt1)
+        nt2 = fill_residue_label(base_ribose.nt2)
+        unified_base_ribose.append(BaseRibose(nt1, nt2, base_ribose.br))
+    unified_base_phosphate = []
+    for base_phosphate in structure2d.base_phosphate_interactions:
+        nt1 = fill_residue_label(base_phosphate.nt1)
+        nt2 = fill_residue_label(base_phosphate.nt2)
+        unified_base_phosphate.append(BasePhosphate(nt1, nt2, base_phosphate.bph))
+    unified_other = []
+    for other in structure2d.other_interactions:
+        nt1 = fill_residue_label(other.nt1)
+        nt2 = fill_residue_label(other.nt2)
+        unified_other.append(OtherInteraction(nt1, nt2))
+    # Create new Structure2D with unified data
+    unified_base_interactions = BaseInteractions(
+        unified_base_pairs,
+        unified_stackings,
+        unified_base_ribose,
+        unified_base_phosphate,
+        unified_other,
+    )
+    # Recreate Structure2D with unified interactions
+    unified_structure2d, _ = mapping.structure3d.extract_secondary_structure(
+        unified_base_interactions, False
+    )
+    return unified_structure2d
 def handle_output_arguments(
     args: argparse.Namespace,
     structure2d: Structure2D,
-    dot_brackets: List[str],
     mapping: Mapping2D3D,
     input_filename: str,
 ):
     """Handles writing output based on provided arguments."""
+    # Unify the structure data before processing outputs
+    unified_structure2d = unify_structure_data(structure2d, mapping)
     input_basename = os.path.basename(input_filename)
     if args.csv:
-        write_csv(args.csv, structure2d)
+        write_csv(args.csv, unified_structure2d)
     if args.json:
-        write_json(args.json, structure2d)
+        write_json(args.json, unified_structure2d)
     if args.bpseq:
-        write_bpseq(args.bpseq, structure2d.bpseq)
+        write_bpseq(args.bpseq, unified_structure2d.bpseq)
     if args.extended:
-        print(structure2d.extended_dot_bracket)
-    elif args.all_dot_brackets:
-        for dot_bracket in dot_brackets:
-            print(dot_bracket)
+        print(unified_structure2d.extended_dot_bracket)
     else:
-        print(structure2d.dot_bracket)
+        print(unified_structure2d.dot_bracket)
     if args.dot:
-        print(BpSeq.from_string(structure2d.bpseq).graphviz)
+        print(BpSeq.from_string(unified_structure2d.bpseq).graphviz)
     if args.pml:
-        pml_script = generate_pymol_script(mapping, structure2d.stems)
+        pml_script = generate_pymol_script(mapping, unified_structure2d.stems)
         with open(args.pml, "w") as f:
             f.write(pml_script)
     if args.inter_stem_csv:
-        if structure2d.inter_stem_parameters:
+        if unified_structure2d.inter_stem_parameters:
             # Convert list of dataclasses to list of dicts
             params_list = [
                 {
@@ -741,7 +820,7 @@ def handle_output_arguments(
                     "min_endpoint_distance_pdf": p.min_endpoint_distance_pdf,
                     "coaxial_probability": p.coaxial_probability,
                 }
-                for p in structure2d.interStemParameters
+                for p in unified_structure2d.interStemParameters
             ]
             df = pd.DataFrame(params_list)
             df["input_basename"] = input_basename
@@ -759,9 +838,9 @@ def handle_output_arguments(
             # pd.DataFrame(columns=['input_basename', 'stem1_idx', ...]).to_csv(args.inter_stem_csv, index=False)
     if args.stems_csv:
-        if structure2d.stems:
+        if unified_structure2d.stems:
             stems_data = []
-            for i, stem in enumerate(structure2d.stems):
+            for i, stem in enumerate(unified_structure2d.stems):
                 try:
                     res5p_first = mapping.bpseq_index_to_residue_map.get(
                         stem.strand5p.first
@@ -842,12 +921,7 @@ def main():
         base_interactions, args.find_gaps
     )
-    if args.all_dot_brackets:
-        dot_brackets = mapping.all_dot_brackets
-    else:
-        dot_brackets = [mapping.dot_bracket]
-    handle_output_arguments(args, structure2d, dot_brackets, mapping, args.input)
+    handle_output_arguments(args, structure2d, mapping, args.input)
 if __name__ == "__main__":

RNApolis 0.9.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

RNApolis 0.9.1py3-none-any.whl → 0.10.0py3-none-any.whl