PyPI - RNApolis - Versions diffs - 0.8.1__py3-none-any.whl → 0.8.2__py3-none-any.whl - Mend

RNApolis 0.8.1py3-none-any.whl → 0.8.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

rnapolis/adapter.py +27 -117
rnapolis/annotator.py +256 -20
rnapolis/common.py +13 -0
rnapolis/tertiary.py +383 -10
{rnapolis-0.8.1.dist-info → rnapolis-0.8.2.dist-info}/METADATA +1 -1
{rnapolis-0.8.1.dist-info → rnapolis-0.8.2.dist-info}/RECORD +10 -10
{rnapolis-0.8.1.dist-info → rnapolis-0.8.2.dist-info}/WHEEL +0 -0
{rnapolis-0.8.1.dist-info → rnapolis-0.8.2.dist-info}/entry_points.txt +0 -0
{rnapolis-0.8.1.dist-info → rnapolis-0.8.2.dist-info}/licenses/LICENSE +0 -0
{rnapolis-0.8.1.dist-info → rnapolis-0.8.2.dist-info}/top_level.txt +0 -0

rnapolis/adapter.py CHANGED Viewed

@@ -1,6 +1,5 @@
 #! /usr/bin/env python
 import argparse
-import csv
 import logging
 import os
 from enum import Enum
@@ -8,6 +7,10 @@ from typing import Dict, List, Optional, Tuple
 import orjson
+from rnapolis.annotator import (
+    add_common_output_arguments,
+    handle_output_arguments,
+)
 from rnapolis.common import (
     BR,
     BaseInteractions,
@@ -15,7 +18,6 @@ from rnapolis.common import (
     BasePhosphate,
     BaseRibose,
     BPh,
-    BpSeq,
     LeontisWesthof,
     OtherInteraction,
     Residue,
@@ -25,7 +27,11 @@ from rnapolis.common import (
     Structure2D,
 )
 from rnapolis.parser import read_3d_structure
-from rnapolis.tertiary import Mapping2D3D, Structure3D
+from rnapolis.tertiary import (
+    Mapping2D3D,
+    Structure3D,
+    calculate_all_inter_stem_parameters,  # Import the new helper function
+)
 from rnapolis.util import handle_input_file
@@ -317,7 +323,7 @@ def process_external_tool_output(
     model: Optional[int] = None,
     find_gaps: bool = False,
     all_dot_brackets: bool = False,
-) -> Tuple[Structure2D, List[str]]:
+) -> Tuple[Structure2D, List[str], Mapping2D3D]:  # Added Mapping2D3D to return tuple
     """
     Process external tool output and create a secondary structure representation.
@@ -333,7 +339,8 @@ def process_external_tool_output(
         all_dot_brackets: Whether to return all possible dot-bracket notations
     Returns:
-        A tuple containing the Structure2D object and a list of dot-bracket notations
+        A tuple containing the Structure2D object, a list of dot-bracket notations,
+        and the Mapping2D3D object.
     """
     # Parse external tool output
     base_interactions = parse_external_output(external_file_path, tool, structure3d)
@@ -350,7 +357,7 @@ def extract_secondary_structure_from_external(
     model: Optional[int] = None,
     find_gaps: bool = False,
     all_dot_brackets: bool = False,
-) -> Tuple[Structure2D, List[str]]:
+) -> Tuple[Structure2D, List[str], Mapping2D3D]:  # Added Mapping2D3D to return tuple
     """
     Create a secondary structure representation using interactions from an external tool.
@@ -362,7 +369,8 @@ def extract_secondary_structure_from_external(
         all_dot_brackets: Whether to return all possible dot-bracket notations
     Returns:
-        A tuple containing the Structure2D object and a list of dot-bracket notations
+        A tuple containing the Structure2D object, a list of dot-bracket notations,
+        and the Mapping2D3D object.
     """
     mapping = Mapping2D3D(
         tertiary_structure,
@@ -371,6 +379,10 @@ def extract_secondary_structure_from_external(
         find_gaps,
     )
     stems, single_strands, hairpins, loops = mapping.bpseq.elements
+    # Calculate inter-stem parameters using the helper function
+    inter_stem_params = calculate_all_inter_stem_parameters(mapping)
     structure2d = Structure2D(
         base_interactions,
         str(mapping.bpseq),
@@ -380,81 +392,15 @@ def extract_secondary_structure_from_external(
         single_strands,
         hairpins,
         loops,
+        inter_stem_params,  # Added inter-stem parameters
     )
     if all_dot_brackets:
-        return structure2d, mapping.all_dot_brackets
+        return structure2d, mapping.all_dot_brackets, mapping  # Return mapping
     else:
-        return structure2d, [structure2d.dotBracket]
-def write_json(path: str, structure2d: BaseInteractions):
-    with open(path, "wb") as f:
-        f.write(orjson.dumps(structure2d))
-def write_csv(path: str, structure2d: Structure2D):
-    with open(path, "w") as f:
-        writer = csv.writer(f)
-        writer.writerow(["nt1", "nt2", "type", "classification-1", "classification-2"])
-        for base_pair in structure2d.baseInteractions.basePairs:
-            writer.writerow(
-                [
-                    base_pair.nt1.full_name,
-                    base_pair.nt2.full_name,
-                    "base pair",
-                    base_pair.lw.value,
-                    (
-                        base_pair.saenger.value or ""
-                        if base_pair.saenger is not None
-                        else ""
-                    ),
-                ]
-            )
-        for stacking in structure2d.baseInteractions.stackings:
-            writer.writerow(
-                [
-                    stacking.nt1.full_name,
-                    stacking.nt2.full_name,
-                    "stacking",
-                    stacking.topology.value if stacking.topology is not None else "",
-                    "",
-                ]
-            )
-        for base_phosphate in structure2d.baseInteractions.basePhosphateInteractions:
-            writer.writerow(
-                [
-                    base_phosphate.nt1.full_name,
-                    base_phosphate.nt2.full_name,
-                    "base-phosphate interaction",
-                    base_phosphate.bph.value if base_phosphate.bph is not None else "",
-                    "",
-                ]
-            )
-        for base_ribose in structure2d.baseInteractions.baseRiboseInteractions:
-            writer.writerow(
-                [
-                    base_ribose.nt1.full_name,
-                    base_ribose.nt2.full_name,
-                    "base-ribose interaction",
-                    base_ribose.br.value if base_ribose.br is not None else "",
-                    "",
-                ]
-            )
-        for other in structure2d.baseInteractions.otherInteractions:
-            writer.writerow(
-                [
-                    other.nt1.full_name,
-                    other.nt2.full_name,
-                    "other interaction",
-                    "",
-                    "",
-                ]
-            )
+        return structure2d, [structure2d.dotBracket], mapping  # Return mapping
-def write_bpseq(path: str, bpseq: BpSeq):
-    with open(path, "w") as f:
-        f.write(str(bpseq))
+# Removed duplicate functions - now imported from annotator
 def main():
@@ -471,39 +417,21 @@ def main():
         required=True,
         help="External tool that generated the output file",
     )
-    parser.add_argument(
-        "-a",
-        "--all-dot-brackets",
-        action="store_true",
-        help="(optional) print all dot-brackets, not only optimal one (exclusive with -e/--extended)",
-    )
-    parser.add_argument("-b", "--bpseq", help="(optional) path to output BPSEQ file")
-    parser.add_argument("-c", "--csv", help="(optional) path to output CSV file")
-    parser.add_argument(
-        "-j",
-        "--json",
-        help="(optional) path to output JSON file",
-    )
-    parser.add_argument(
-        "-e",
-        "--extended",
-        action="store_true",
-        help="(optional) if set, the program will print extended secondary structure to the standard output",
-    )
     parser.add_argument(
         "-f",
         "--find-gaps",
         action="store_true",
         help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands",
     )
-    parser.add_argument("-d", "--dot", help="(optional) path to output DOT file")
+    add_common_output_arguments(parser)
+    # The --inter-stem-csv and --stems-csv arguments are now added by add_common_output_arguments
     args = parser.parse_args()
     file = handle_input_file(args.input)
     structure3d = read_3d_structure(file, None)
     # Process external tool output and get secondary structure
-    structure2d, dot_brackets = process_external_tool_output(
+    structure2d, dot_brackets, mapping = process_external_tool_output(
         structure3d,
         args.external,
         ExternalTool(args.tool),
@@ -512,25 +440,7 @@ def main():
         args.all_dot_brackets,
     )
-    if args.csv:
-        write_csv(args.csv, structure2d)
-    if args.json:
-        write_json(args.json, structure2d)
-    if args.bpseq:
-        write_bpseq(args.bpseq, structure2d.bpseq)
-    if args.extended:
-        print(structure2d.extendedDotBracket)
-    elif args.all_dot_brackets:
-        for dot_bracket in dot_brackets:
-            print(dot_bracket)
-    else:
-        print(structure2d.dotBracket)
-    if args.dot:
-        print(BpSeq.from_string(structure2d.bpseq).graphviz)
+    handle_output_arguments(args, structure2d, dot_brackets, mapping, args.input)
 if __name__ == "__main__":

rnapolis/annotator.py CHANGED Viewed

@@ -10,6 +10,7 @@ from typing import Dict, List, Optional, Set, Tuple
 import numpy
 import numpy.typing
 import orjson
+import pandas as pd
 from ordered_set import OrderedSet
 from scipy.spatial import KDTree
@@ -26,6 +27,7 @@ from rnapolis.common import (
     Saenger,
     Stacking,
     StackingTopology,
+    Stem,
     Structure2D,
 )
 from rnapolis.parser import read_3d_structure
@@ -38,9 +40,10 @@ from rnapolis.tertiary import (
     PHOSPHATE_ACCEPTORS,
     RIBOSE_ACCEPTORS,
     Atom,
-    Mapping2D3D,
+    Mapping2D3D,  # Added import
     Residue3D,
     Structure3D,
+    calculate_all_inter_stem_parameters,  # Import the new helper function
     torsion_angle,
 )
 from rnapolis.util import handle_input_file
@@ -496,6 +499,10 @@ def extract_secondary_structure(
         find_gaps,
     )
     stems, single_strands, hairpins, loops = mapping.bpseq.elements
+    # Calculate inter-stem parameters using the helper function
+    inter_stem_params = calculate_all_inter_stem_parameters(mapping)
     structure2d = Structure2D(
         base_interactions,
         str(mapping.bpseq),
@@ -505,6 +512,7 @@ def extract_secondary_structure(
         single_strands,
         hairpins,
         loops,
+        inter_stem_params,  # Added inter-stem parameters
     )
     if all_dot_brackets:
         return structure2d, mapping.all_dot_brackets
@@ -512,9 +520,102 @@ def extract_secondary_structure(
         return structure2d, [structure2d.dotBracket]
-def write_json(path: str, structure2d: BaseInteractions):
+def generate_pymol_script(mapping: Mapping2D3D, stems: List[Stem]) -> str:
+    """Generates a PyMOL script to draw stems as cylinders."""
+    pymol_commands = []
+    radius = 0.5
+    r, g, b = 1.0, 0.0, 0.0  # Red color
+    for stem_idx, stem in enumerate(stems):
+        # Get residues for selection string
+        try:
+            res5p_first = mapping.bpseq_index_to_residue_map[stem.strand5p.first]
+            res5p_last = mapping.bpseq_index_to_residue_map[stem.strand5p.last]
+            res3p_first = mapping.bpseq_index_to_residue_map[stem.strand3p.first]
+            res3p_last = mapping.bpseq_index_to_residue_map[stem.strand3p.last]
+            # Prefer auth chain/number if available
+            chain5p = (
+                res5p_first.auth.chain if res5p_first.auth else res5p_first.label.chain
+            )
+            num5p_first = (
+                res5p_first.auth.number
+                if res5p_first.auth
+                else res5p_first.label.number
+            )
+            num5p_last = (
+                res5p_last.auth.number if res5p_last.auth else res5p_last.label.number
+            )
+            chain3p = (
+                res3p_first.auth.chain if res3p_first.auth else res3p_first.label.chain
+            )
+            num3p_first = (
+                res3p_first.auth.number
+                if res3p_first.auth
+                else res3p_first.label.number
+            )
+            num3p_last = (
+                res3p_last.auth.number if res3p_last.auth else res3p_last.label.number
+            )
+            # Format selection string: select stem0, A/1-5/ or A/10-15/
+            selection_str = f"{chain5p}/{num5p_first}-{num5p_last}/ or {chain3p}/{num3p_first}-{num3p_last}/"
+            pymol_commands.append(f"select stem{stem_idx}, {selection_str}")
+        except (KeyError, AttributeError) as e:
+            logging.warning(
+                f"Could not generate selection string for stem {stem_idx}: Missing residue data ({e})"
+            )
+        centroids = mapping.get_stem_coordinates(stem)
+        # Need at least 2 centroids to draw a segment
+        if len(centroids) < 2:
+            # Removed warning log for stems with < 2 base pairs
+            continue
+        # Create pseudoatoms for each centroid
+        for centroid_idx, centroid in enumerate(centroids):
+            x, y, z = centroid
+            pseudoatom_name = f"stem{stem_idx}_centroid{centroid_idx}"
+            pymol_commands.append(
+                f"pseudoatom {pseudoatom_name}, pos=[{x:.3f}, {y:.3f}, {z:.3f}]"
+            )
+        # Draw cylinders between consecutive centroids
+        for seg_idx in range(len(centroids) - 1):
+            p1 = centroids[seg_idx]
+            p2 = centroids[seg_idx + 1]
+            x1, y1, z1 = p1
+            x2, y2, z2 = p2
+            # Format: [CYLINDER, x1, y1, z1, x2, y2, z2, radius, r1, g1, b1, r2, g2, b2]
+            # Use 9.0 for CYLINDER code
+            # Use same color for both ends
+            cgo_object = f"[ 9.0, {x1:.3f}, {y1:.3f}, {z1:.3f}, {x2:.3f}, {y2:.3f}, {z2:.3f}, {radius}, {r}, {g}, {b}, {r}, {g}, {b} ]"
+            pymol_commands.append(
+                f'cmd.load_cgo({cgo_object}, "stem_{stem_idx}_seg_{seg_idx}")'
+            )
+        # Calculate and display dihedral angles between consecutive centroids
+        if len(centroids) >= 4:
+            for i in range(len(centroids) - 3):
+                pa1 = f"stem{stem_idx}_centroid{i}"
+                pa2 = f"stem{stem_idx}_centroid{i + 1}"
+                pa3 = f"stem{stem_idx}_centroid{i + 2}"
+                pa4 = f"stem{stem_idx}_centroid{i + 3}"
+                dihedral_name = f"stem{stem_idx}_dihedral{i}"
+                pymol_commands.append(
+                    f"dihedral {dihedral_name}, {pa1}, {pa2}, {pa3}, {pa4}"
+                )
+    return "\n".join(pymol_commands)
+def write_json(path: str, structure2d: Structure2D):
     with open(path, "wb") as f:
-        f.write(orjson.dumps(structure2d))
+        # Add OPT_SERIALIZE_NUMPY to handle numpy types like float64
+        f.write(orjson.dumps(structure2d, option=orjson.OPT_SERIALIZE_NUMPY))
 def write_csv(path: str, structure2d: Structure2D):
@@ -555,13 +656,13 @@ def write_csv(path: str, structure2d: Structure2D):
                     "",
                 ]
             )
-        for base_ribose in structure2d.baseInteractions.basePhosphateInteractions:
+        for base_ribose in structure2d.baseInteractions.baseRiboseInteractions:
             writer.writerow(
                 [
                     base_ribose.nt1.full_name,
                     base_ribose.nt2.full_name,
                     "base-ribose interaction",
-                    base_ribose.bph.value if base_ribose.bph is not None else "",
+                    base_ribose.br.value if base_ribose.br is not None else "",
                     "",
                 ]
             )
@@ -582,9 +683,8 @@ def write_bpseq(path: str, bpseq: BpSeq):
         f.write(str(bpseq))
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("input", help="Path to PDB or mmCIF file")
+def add_common_output_arguments(parser: argparse.ArgumentParser):
+    """Adds common output and processing arguments to the parser."""
     parser.add_argument(
         "-a",
         "--all-dot-brackets",
@@ -604,22 +704,29 @@ def main():
         action="store_true",
         help="(optional) if set, the program will print extended secondary structure to the standard output",
     )
+    parser.add_argument("-d", "--dot", help="(optional) path to output DOT file")
     parser.add_argument(
-        "-f",
-        "--find-gaps",
-        action="store_true",
-        help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands; "
-        f"the gap is defined as O3'-P distance greater then {1.5 * AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT}",
+        "-p", "--pml", help="(optional) path to output PyMOL PML script for stems"
     )
-    parser.add_argument("-d", "--dot", help="(optional) path to output DOT file")
-    args = parser.parse_args()
-    file = handle_input_file(args.input)
-    structure3d = read_3d_structure(file, None)
-    structure2d, dot_brackets = extract_secondary_structure(
-        structure3d, None, args.find_gaps, args.all_dot_brackets
+    parser.add_argument(
+        "--inter-stem-csv",
+        help="(optional) path to output CSV file for inter-stem parameters",
+    )
+    parser.add_argument(
+        "--stems-csv",
+        help="(optional) path to output CSV file for stem details",
     )
+def handle_output_arguments(
+    args: argparse.Namespace,
+    structure2d: Structure2D,
+    dot_brackets: List[str],
+    mapping: Mapping2D3D,
+    input_filename: str,
+):
+    """Handles writing output based on provided arguments."""
+    input_basename = os.path.basename(input_filename)
     if args.csv:
         write_csv(args.csv, structure2d)
@@ -640,6 +747,135 @@ def main():
     if args.dot:
         print(BpSeq.from_string(structure2d.bpseq).graphviz)
+    if args.pml:
+        pml_script = generate_pymol_script(mapping, structure2d.stems)
+        with open(args.pml, "w") as f:
+            f.write(pml_script)
+    if args.inter_stem_csv:
+        if structure2d.interStemParameters:
+            # Convert list of dataclasses to list of dicts
+            params_list = [
+                {
+                    "stem1_idx": p.stem1_idx,
+                    "stem2_idx": p.stem2_idx,
+                    "type": p.type,
+                    "torsion": p.torsion,
+                    "min_endpoint_distance": p.min_endpoint_distance,
+                    "torsion_angle_pdf": p.torsion_angle_pdf,
+                    "min_endpoint_distance_pdf": p.min_endpoint_distance_pdf,
+                    "coaxial_probability": p.coaxial_probability,
+                }
+                for p in structure2d.interStemParameters
+            ]
+            df = pd.DataFrame(params_list)
+            df["input_basename"] = input_basename
+            # Reorder columns to put input_basename first
+            cols = ["input_basename"] + [
+                col for col in df.columns if col != "input_basename"
+            ]
+            df = df[cols]
+            df.to_csv(args.inter_stem_csv, index=False)
+        else:
+            logging.warning(
+                f"No inter-stem parameters calculated for {input_basename}, CSV file '{args.inter_stem_csv}' will be empty or not created."
+            )
+            # Optionally create an empty file with headers
+            # pd.DataFrame(columns=['input_basename', 'stem1_idx', ...]).to_csv(args.inter_stem_csv, index=False)
+    if args.stems_csv:
+        if structure2d.stems:
+            stems_data = []
+            for i, stem in enumerate(structure2d.stems):
+                try:
+                    res5p_first = mapping.bpseq_index_to_residue_map.get(
+                        stem.strand5p.first
+                    )
+                    res5p_last = mapping.bpseq_index_to_residue_map.get(
+                        stem.strand5p.last
+                    )
+                    res3p_first = mapping.bpseq_index_to_residue_map.get(
+                        stem.strand3p.first
+                    )
+                    res3p_last = mapping.bpseq_index_to_residue_map.get(
+                        stem.strand3p.last
+                    )
+                    stems_data.append(
+                        {
+                            "stem_idx": i,
+                            "strand5p_first_nt_id": res5p_first.full_name
+                            if res5p_first
+                            else None,
+                            "strand5p_last_nt_id": res5p_last.full_name
+                            if res5p_last
+                            else None,
+                            "strand3p_first_nt_id": res3p_first.full_name
+                            if res3p_first
+                            else None,
+                            "strand3p_last_nt_id": res3p_last.full_name
+                            if res3p_last
+                            else None,
+                            "strand5p_sequence": stem.strand5p.sequence,
+                            "strand3p_sequence": stem.strand3p.sequence,
+                        }
+                    )
+                except KeyError as e:
+                    logging.warning(
+                        f"Could not find residue for stem {i} (index {e}), skipping stem details."
+                    )
+                    continue
+            if stems_data:
+                df_stems = pd.DataFrame(stems_data)
+                df_stems["input_basename"] = input_basename
+                # Reorder columns
+                stem_cols = ["input_basename", "stem_idx"] + [
+                    col
+                    for col in df_stems.columns
+                    if col not in ["input_basename", "stem_idx"]
+                ]
+                df_stems = df_stems[stem_cols]
+                df_stems.to_csv(args.stems_csv, index=False)
+            else:
+                logging.warning(
+                    f"No valid stem data generated for {input_basename}, CSV file '{args.stems_csv}' will be empty or not created."
+                )
+        else:
+            logging.warning(
+                f"No stems found for {input_basename}, CSV file '{args.stems_csv}' will be empty or not created."
+            )
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("input", help="Path to PDB or mmCIF file")
+    parser.add_argument(
+        "-f",
+        "--find-gaps",
+        action="store_true",
+        help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands; "
+        f"the gap is defined as O3'-P distance greater then {1.5 * AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT}",
+    )
+    add_common_output_arguments(parser)
+    args = parser.parse_args()
+    file = handle_input_file(args.input)
+    structure3d = read_3d_structure(file, None)
+    structure2d, dot_brackets = extract_secondary_structure(
+        structure3d, None, args.find_gaps, args.all_dot_brackets
+    )
+    # Need the mapping object for PML generation
+    mapping = Mapping2D3D(
+        structure3d,
+        structure2d.baseInteractions.basePairs,
+        structure2d.baseInteractions.stackings,
+        args.find_gaps,
+    )
+    handle_output_arguments(args, structure2d, dot_brackets, mapping, args.input)
 if __name__ == "__main__":
     main()

rnapolis/common.py CHANGED Viewed

@@ -1057,6 +1057,18 @@ class BaseInteractions:
     otherInteractions: List[OtherInteraction]
+@dataclass(frozen=True, order=True)
+class InterStemParameters:
+    stem1_idx: int
+    stem2_idx: int
+    type: Optional[str]  # Type of closest endpoint pair ('cs55', 'cs53', etc.)
+    torsion: Optional[float]  # Torsion angle between stem segments (degrees)
+    min_endpoint_distance: Optional[float]  # Minimum distance between stem endpoints
+    torsion_angle_pdf: Optional[float]  # PDF value of the torsion angle
+    min_endpoint_distance_pdf: Optional[float]  # PDF value of the min endpoint distance
+    coaxial_probability: Optional[float]  # Probability of stems being coaxial (0-1)
 @dataclass(frozen=True, order=True)
 class Structure2D:
     baseInteractions: BaseInteractions
@@ -1067,3 +1079,4 @@ class Structure2D:
     singleStrands: List[SingleStrand]
     hairpins: List[Hairpin]
     loops: List[Loop]
+    interStemParameters: List[InterStemParameters]

rnapolis/tertiary.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import itertools
 import logging
 import math
 from collections import defaultdict
@@ -7,18 +8,22 @@ from typing import Dict, List, Optional, Set, Tuple, Union
 import numpy
 import numpy.typing
+from scipy.stats import vonmises
 from rnapolis.common import (
     BasePair,
     BpSeq,
     Entry,
     GlycosidicBond,
+    InterStemParameters,
     LeontisWesthof,
     Residue,
     ResidueAuth,
     ResidueLabel,
     Saenger,
     Stacking,
+    Stem,
+    Strand,
 )
 BASE_ATOMS = {
@@ -579,6 +584,57 @@ class Mapping2D3D:
             if base_pair.is_canonical and base_pair.nt1 < base_pair.nt2
         ]
+        while True:
+            matches = defaultdict(set)
+            for base_pair in canonical:
+                matches[base_pair.nt1_3d].add(base_pair)
+                matches[base_pair.nt2_3d].add(base_pair)
+            for pairs in matches.values():
+                if len(pairs) > 1:
+                    pairs = sorted(pairs, key=pair_scoring_function)
+                    canonical.remove(pairs[-1])
+                    break
+            else:
+                break
+        return self._generated_bpseq_data[0]
+    @cached_property
+    def bpseq_index_to_residue_map(self) -> Dict[int, Residue3D]:
+        """Mapping from BpSeq entry index to the corresponding Residue3D object."""
+        return self._generated_bpseq_data[1]
+    @cached_property
+    def _generated_bpseq_data(self) -> Tuple[BpSeq, Dict[int, Residue3D]]:
+        """Helper property to compute BpSeq and index map simultaneously."""
+        def pair_scoring_function(pair: BasePair3D) -> int:
+            if pair.saenger is not None:
+                if pair.saenger in (Saenger.XIX, Saenger.XX):
+                    return 0, pair.nt1, pair.nt2
+                else:
+                    return 1, pair.nt1, pair.nt2
+            sequence = "".join(
+                sorted(
+                    [
+                        pair.nt1_3d.one_letter_name.upper(),
+                        pair.nt2_3d.one_letter_name.upper(),
+                    ]
+                )
+            )
+            if sequence in ("AU", "AT", "CG"):
+                return 0, pair.nt1, pair.nt2
+            return 1, pair.nt1, pair.nt2
+        canonical = [
+            base_pair
+            for base_pair in self.base_pairs
+            if base_pair.is_canonical and base_pair.nt1 < base_pair.nt2
+        ]
         while True:
             matches = defaultdict(set)
@@ -596,10 +652,12 @@ class Mapping2D3D:
         return self.__generate_bpseq(canonical)
-    def __generate_bpseq(self, base_pairs):
+    def __generate_bpseq(self, base_pairs) -> Tuple[BpSeq, Dict[int, Residue3D]]:
+        """Generates BpSeq entries and a map from index to Residue3D."""
         nucleotides = list(filter(lambda r: r.is_nucleotide, self.structure3d.residues))
         result: Dict[int, List] = {}
         residue_map: Dict[Residue3D, int] = {}
+        index_to_residue_map: Dict[int, Residue3D] = {}
         i = 1
         for j, residue in enumerate(nucleotides):
@@ -616,6 +674,7 @@ class Mapping2D3D:
             result[i] = [i, residue.one_letter_name, 0]
             residue_map[residue] = i
+            index_to_residue_map[i] = residue
             i += 1
         for base_pair in base_pairs:
@@ -631,7 +690,21 @@ class Mapping2D3D:
                 Entry(index_, sequence, pair)
                 for index_, sequence, pair in result.values()
             ]
-        )
+        ), index_to_residue_map
+    def find_residue_for_entry(self, entry: Entry) -> Optional[Residue3D]:
+        """Finds the Residue3D object corresponding to a BpSeq Entry."""
+        return self.bpseq_index_to_residue_map.get(entry.index_)
+    def get_residues_for_strand(self, strand: Strand) -> List[Residue3D]:
+        """Retrieves the list of Residue3D objects corresponding to a Strand."""
+        residues = []
+        # Strand indices are 1-based and inclusive
+        for index_ in range(strand.first, strand.last + 1):
+            residue = self.bpseq_index_to_residue_map.get(index_)
+            if residue:
+                residues.append(residue)
+        return residues
     @cached_property
     def dot_bracket(self) -> str:
@@ -647,6 +720,196 @@ class Mapping2D3D:
             i += len(sequence)
         return "\n".join(result)
+    def _calculate_pair_centroid(
+        self, residue1: Residue3D, residue2: Residue3D
+    ) -> Optional[numpy.typing.NDArray[numpy.floating]]:
+        """Calculates the geometric mean of base atoms for a pair of residues."""
+        base_atoms = []
+        for residue in [residue1, residue2]:
+            base_atom_names = Residue3D.nucleobase_heavy_atoms.get(
+                residue.one_letter_name.upper(), set()
+            )
+            if not base_atom_names:
+                logging.warning(
+                    f"Could not find base atom definition for residue {residue.full_name}"
+                )
+                continue
+            for atom in residue.atoms:
+                if atom.name in base_atom_names:
+                    base_atoms.append(atom)
+        if not base_atoms:
+            logging.warning(
+                f"No base atoms found for pair {residue1.full_name} - {residue2.full_name}"
+            )
+            return None
+        coordinates = [atom.coordinates for atom in base_atoms]
+        return numpy.mean(coordinates, axis=0)
+    def get_stem_coordinates(
+        self, stem: Stem
+    ) -> List[numpy.typing.NDArray[numpy.floating]]:
+        """
+        Calculates the geometric centroid for each base pair in the stem.
+        Args:
+            stem: The Stem object.
+        Returns:
+            A list of numpy arrays, where each array is the centroid of a
+            base pair in the stem. Returns an empty list if no centroids
+            can be calculated.
+        """
+        all_pair_centroids = []
+        stem_len = stem.strand5p.last - stem.strand5p.first + 1
+        for i in range(stem_len):
+            idx5p = stem.strand5p.first + i
+            idx3p = stem.strand3p.last - i
+            try:
+                res5p = self.bpseq_index_to_residue_map[idx5p]
+                res3p = self.bpseq_index_to_residue_map[idx3p]
+                centroid = self._calculate_pair_centroid(res5p, res3p)
+                if centroid is not None:
+                    all_pair_centroids.append(centroid)
+            except KeyError:
+                logging.warning(
+                    f"Could not find residues for pair {idx5p}-{idx3p} in stem {stem}"
+                )
+                continue  # Continue calculating other centroids
+        return all_pair_centroids
+    def calculate_inter_stem_parameters(
+        self, stem1: Stem, stem2: Stem, kappa: float = 10.0
+    ) -> Optional[Dict[str, Union[str, float]]]:
+        """
+        Calculates geometric parameters between two stems based on closest endpoints
+        and the probability of the observed torsion angle based on an expected
+        A-RNA twist using a von Mises distribution.
+        Args:
+            stem1: The first Stem object.
+            stem2: The second Stem object.
+            kappa: Concentration parameter for the von Mises distribution (default: 10.0).
+        Returns:
+            A dictionary containing:
+            - 'type': The type of closest endpoint pair ('cs55', 'cs53', 'cs35', 'cs33').
+            - 'torsion_angle': The calculated torsion angle in degrees.
+            - 'min_endpoint_distance': The minimum distance between the endpoints.
+            - 'torsion_angle_pdf': The probability density function (PDF) value of the
+              torsion angle under the von Mises distribution.
+            - 'min_endpoint_distance_pdf': The probability density function (PDF) value
+              based on the minimum endpoint distance using a Lennard-Jones-like function.
+            - 'coaxial_probability': The normalized product of the torsion angle PDF and
+              distance PDF, indicating the likelihood of coaxial stacking (0-1).
+            Returns None if either stem has fewer than 2 base pairs or centroids
+            cannot be calculated.
+        """
+        stem1_centroids = self.get_stem_coordinates(stem1)
+        stem2_centroids = self.get_stem_coordinates(stem2)
+        # Need at least 2 centroids (base pairs) per stem
+        if len(stem1_centroids) < 2 or len(stem2_centroids) < 2:
+            logging.warning(
+                f"Cannot calculate inter-stem parameters for stems {stem1} and {stem2}: "
+                f"Insufficient base pairs ({len(stem1_centroids)} and {len(stem2_centroids)} respectively)."
+            )
+            return None
+        # Define the endpoints for each stem
+        s1_first, s1_last = stem1_centroids[0], stem1_centroids[-1]
+        s2_first, s2_last = stem2_centroids[0], stem2_centroids[-1]
+        # Calculate distances between the four endpoint pairs
+        endpoint_distances = {
+            "cs55": numpy.linalg.norm(s1_first - s2_first),
+            "cs53": numpy.linalg.norm(s1_first - s2_last),
+            "cs35": numpy.linalg.norm(s1_last - s2_first),
+            "cs33": numpy.linalg.norm(s1_last - s2_last),
+        }
+        # Find the minimum endpoint distance and the corresponding pair
+        min_endpoint_distance = min(endpoint_distances.values())
+        closest_pair_key = min(endpoint_distances, key=endpoint_distances.get)
+        # Select the points for torsion and determine mu based on the closest pair.
+        # s1p2 and s2p1 must be the endpoints involved in the minimum distance.
+        a_rna_twist = 32.7
+        mu_degrees = 0.0
+        if closest_pair_key == "cs55":
+            # Closest: s1_first and s2_first
+            # Torsion points: s1_second, s1_first, s2_first, s2_second
+            s1p1, s1p2 = stem1_centroids[1], stem1_centroids[0]
+            s2p1, s2p2 = stem2_centroids[0], stem2_centroids[1]
+            mu_degrees = 180.0 - a_rna_twist
+        elif closest_pair_key == "cs53":
+            # Closest: s1_first and s2_last
+            # Torsion points: s1_second, s1_first, s2_last, s2_second_last
+            s1p1, s1p2 = stem1_centroids[1], stem1_centroids[0]
+            s2p1, s2p2 = stem2_centroids[-1], stem2_centroids[-2]
+            mu_degrees = 0.0 - a_rna_twist
+        elif closest_pair_key == "cs35":
+            # Closest: s1_last and s2_first
+            # Torsion points: s1_second_last, s1_last, s2_first, s2_second
+            s1p1, s1p2 = stem1_centroids[-2], stem1_centroids[-1]
+            s2p1, s2p2 = stem2_centroids[0], stem2_centroids[1]
+            mu_degrees = 0.0 + a_rna_twist
+        elif closest_pair_key == "cs33":
+            # Closest: s1_last and s2_last
+            # Torsion points: s1_second_last, s1_last, s2_last, s2_second_last
+            s1p1, s1p2 = stem1_centroids[-2], stem1_centroids[-1]
+            s2p1, s2p2 = stem2_centroids[-1], stem2_centroids[-2]
+            mu_degrees = 180.0 + a_rna_twist
+        else:
+            # This case should ideally not be reached if endpoint_distances is not empty
+            logging.error(
+                f"Unexpected closest pair key: {closest_pair_key}. Cannot calculate parameters."
+            )
+            return None
+        # Calculate torsion angle (in radians)
+        torsion_radians = calculate_torsion_angle_coords(s1p1, s1p2, s2p1, s2p2)
+        # Create von Mises distribution instance
+        mu_radians = math.radians(mu_degrees)
+        vm_dist = vonmises(kappa=kappa, loc=mu_radians)
+        # Calculate the probability density function (PDF) value for the torsion angle
+        torsion_probability = vm_dist.pdf(torsion_radians)
+        # Calculate the probability density for the minimum endpoint distance
+        distance_probability = distance_pdf(
+            min_endpoint_distance
+        )  # Use the new function
+        # Calculate the coaxial probability
+        # Max torsion probability occurs at mu (location of the distribution)
+        max_torsion_probability = vm_dist.pdf(mu_radians)
+        # Max distance probability is 1.0 by design of lennard_jones_like_pdf
+        max_distance_probability = 1.0
+        # Normalization factor is the product of maximum possible probabilities
+        normalization_factor = max_torsion_probability * max_distance_probability
+        coaxial_probability = 0.0
+        if normalization_factor > 1e-9:  # Avoid division by zero
+            probability_product = torsion_probability * distance_probability
+            coaxial_probability = probability_product / normalization_factor
+            # Clamp between 0 and 1
+            coaxial_probability = max(0.0, min(1.0, coaxial_probability))
+        return {
+            "type": closest_pair_key,
+            "torsion_angle": math.degrees(torsion_radians),
+            "min_endpoint_distance": min_endpoint_distance,
+            "torsion_angle_pdf": torsion_probability,
+            "min_endpoint_distance_pdf": distance_probability,
+            "coaxial_probability": coaxial_probability,
+        }
     def __generate_dot_bracket_per_strand(self, dbn_structure: str) -> List[str]:
         dbn = dbn_structure
         i = 0
@@ -698,7 +961,7 @@ class Mapping2D3D:
             for row in [row1, row2]:
                 if row:
-                    bpseq = self.__generate_bpseq(row)
+                    bpseq, _ = self.__generate_bpseq(row)  # Unpack the tuple
                     dbns = self.__generate_dot_bracket_per_strand(
                         bpseq.dot_bracket.structure
                     )
@@ -709,11 +972,121 @@ class Mapping2D3D:
         return "\n".join(["\n".join(r) for r in result])
+def distance_pdf(
+    x: float, lower_bound: float = 3.0, upper_bound: float = 7.0, steepness: float = 5.0
+) -> float:
+    """
+    Calculates a probability density based on distance using a plateau function.
+    The function uses the product of two sigmoid functions to create a distribution
+    that is close to 1.0 between lower_bound and upper_bound, and drops off
+    rapidly outside this range.
+    Args:
+        x: The distance value.
+        lower_bound: The start of the high-probability plateau (default: 3.0).
+        upper_bound: The end of the high-probability plateau (default: 7.0).
+        steepness: Controls how quickly the probability drops outside the plateau
+                   (default: 5.0). Higher values mean steeper drops.
+    Returns:
+        The calculated probability density (between 0.0 and 1.0).
+    """
+    # Define a maximum exponent value to prevent overflow
+    max_exponent = 700.0
+    # Calculate exponent for the first sigmoid (increasing)
+    exponent1 = -steepness * (x - lower_bound)
+    # Clamp the exponent if it's excessively large (which happens when x << lower_bound)
+    exponent1 = min(exponent1, max_exponent)
+    sigmoid1 = 1.0 / (1.0 + math.exp(exponent1))
+    # Calculate exponent for the second sigmoid (decreasing)
+    exponent2 = steepness * (x - upper_bound)
+    # Clamp the exponent if it's excessively large (which happens when x >> upper_bound)
+    exponent2 = min(exponent2, max_exponent)
+    sigmoid2 = 1.0 / (1.0 + math.exp(exponent2))
+    # The product creates the plateau effect
+    probability = sigmoid1 * sigmoid2
+    # Clamp to handle potential floating point inaccuracies near 0 and 1
+    return max(0.0, min(1.0, probability))
+def calculate_all_inter_stem_parameters(
+    mapping: Mapping2D3D,
+) -> List[InterStemParameters]:
+    """
+    Calculates InterStemParameters for all valid pairs of stems found in the mapping.
+    Args:
+        mapping: The Mapping2D3D object containing structure, 2D info, and mapping.
+    """
+    stems = mapping.bpseq.elements[0]  # Get stems from mapping
+    inter_stem_params = []
+    for i, j in itertools.combinations(range(len(stems)), 2):
+        stem1 = stems[i]
+        stem2 = stems[j]
+        # Ensure both stems have at least 2 base pairs for parameter calculation
+        if (stem1.strand5p.last - stem1.strand5p.first + 1) > 1 and (
+            stem2.strand5p.last - stem2.strand5p.first + 1
+        ) > 1:
+            params = mapping.calculate_inter_stem_parameters(stem1, stem2)
+            # Only add if calculation returned valid values
+            if params is not None:
+                inter_stem_params.append(
+                    InterStemParameters(
+                        stem1_idx=i,
+                        stem2_idx=j,
+                        type=params["type"],
+                        torsion=params["torsion_angle"],
+                        min_endpoint_distance=params["min_endpoint_distance"],
+                        torsion_angle_pdf=params["torsion_angle_pdf"],
+                        min_endpoint_distance_pdf=params["min_endpoint_distance_pdf"],
+                        coaxial_probability=params["coaxial_probability"],
+                    )
+                )
+    return inter_stem_params
 def torsion_angle(a1: Atom, a2: Atom, a3: Atom, a4: Atom) -> float:
-    v1 = a2.coordinates - a1.coordinates
-    v2 = a3.coordinates - a2.coordinates
-    v3 = a4.coordinates - a3.coordinates
-    t1: numpy.typing.NDArray[numpy.floating] = numpy.cross(v1, v2)
-    t2: numpy.typing.NDArray[numpy.floating] = numpy.cross(v2, v3)
-    t3: numpy.typing.NDArray[numpy.floating] = v1 * numpy.linalg.norm(v2)
-    return math.atan2(numpy.dot(t2, t3), numpy.dot(t1, t2))
+    """Calculates the torsion angle between four atoms."""
+    return calculate_torsion_angle_coords(
+        a1.coordinates, a2.coordinates, a3.coordinates, a4.coordinates
+    )
+def calculate_torsion_angle_coords(
+    p1: numpy.typing.NDArray[numpy.floating],
+    p2: numpy.typing.NDArray[numpy.floating],
+    p3: numpy.typing.NDArray[numpy.floating],
+    p4: numpy.typing.NDArray[numpy.floating],
+) -> float:
+    """Calculates the torsion angle between four points defined by their coordinates."""
+    v1 = p2 - p1
+    v2 = p3 - p2
+    v3 = p4 - p3
+    # Normalize vectors to avoid issues with very short vectors
+    v1_norm = v1 / numpy.linalg.norm(v1) if numpy.linalg.norm(v1) > 1e-6 else v1
+    v2_norm = v2 / numpy.linalg.norm(v2) if numpy.linalg.norm(v2) > 1e-6 else v2
+    v3_norm = v3 / numpy.linalg.norm(v3) if numpy.linalg.norm(v3) > 1e-6 else v3
+    t1 = numpy.cross(v1_norm, v2_norm)
+    t2 = numpy.cross(v2_norm, v3_norm)
+    t3 = v1_norm * numpy.linalg.norm(v2_norm)
+    # Ensure t1 and t2 are not zero vectors before calculating dot products
+    if numpy.linalg.norm(t1) < 1e-6 or numpy.linalg.norm(t2) < 1e-6:
+        return 0.0  # Or handle as undefined/error
+    dot_t1_t2 = numpy.dot(t1, t2)
+    dot_t2_t3 = numpy.dot(t2, t3)
+    # Clamp dot product arguments for acos/atan2 to avoid domain errors
+    dot_t1_t2 = numpy.clip(dot_t1_t2, -1.0, 1.0)
+    angle = math.atan2(dot_t2_t3, dot_t1_t2)
+    return angle if not math.isnan(angle) else 0.0

{rnapolis-0.8.1.dist-info → rnapolis-0.8.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: RNApolis
-Version: 0.8.1
+Version: 0.8.2
 Summary: A Python library containing RNA-related bioinformatics functions and classes
 Home-page: https://github.com/tzok/rnapolis-py
 Author: Tomasz Zok

{rnapolis-0.8.1.dist-info → rnapolis-0.8.2.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
-rnapolis/adapter.py,sha256=n7f5e8dbP-grJI7L9GycYAbMjpMvTuUM5aXiiCqG91k,18239
+rnapolis/adapter.py,sha256=hgOPzbvLhdPxuqpV2fLqizHQSpAtglIXrySf_SzsxCc,15379
 rnapolis/aligner.py,sha256=o7rQyjAZ3n4VXcnSPY3HVB8nLNRkVbl552O3NVh0mfg,3429
-rnapolis/annotator.py,sha256=hRRzRmneYxbg2tvwVHMWLfzmJb4szV0JL_6EOC09Gwg,22101
+rnapolis/annotator.py,sha256=zzjyZ13JYd32E_SUcTCyfV4XYpWHtgzuBsacNDHCMI8,31835
 rnapolis/clashfinder.py,sha256=AC9_tIx7QIk57sELq_aKfU1u3UMOXbgcccQeGHhMR6c,8517
-rnapolis/common.py,sha256=LY6Uz96Br8ki_gA8LpfatgtvVbt9jOTkwgagayqTgf8,31251
+rnapolis/common.py,sha256=p70gydcG8bcA8_NXPef40efvz9Jrt4TAKPfhNpg4iKg,31896
 rnapolis/component_A.csv,sha256=koirS-AwUZwoYGItT8yn3wS6Idvmh2FANfTQcOS_xh8,2897
 rnapolis/component_C.csv,sha256=NtvsAu_YrUgTjzZm3j4poW4IZ99x3dPARB09XVIiMCc,2803
 rnapolis/component_G.csv,sha256=Z5wl8OnHRyx4XhTyBiWgRZiEvmZXhoxtVRH8bn6Vxf0,2898
@@ -15,14 +15,14 @@ rnapolis/parser.py,sha256=3g4mtFvpiEENFcSBBtx_E_x1vJPF9BujWnts0kb9XjE,16340
 rnapolis/parser_v2.py,sha256=qG6CO3or7zmuJu368g9Nzokiqdeip4yjD14F163uH6w,40618
 rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
 rnapolis/splitter.py,sha256=x-Zn21mkiMgvYPptUFD9BbdNIvoaM6b8GzGf6uYXEwE,4052
-rnapolis/tertiary.py,sha256=6t9ZB4w33-5n_M3sns1RoFXCOTgVAgGH4WDNG5OG9Kg,23426
+rnapolis/tertiary.py,sha256=mTVpz8rz5Q9s5_QVSMdEMXSooCce0rAD5RQMh00bHm4,39200
 rnapolis/tertiary_v2.py,sha256=I1uyHWIUePNGO5m-suoL4ibtz02qAJUMvYm0BUKUygY,22480
 rnapolis/transformer.py,sha256=aC0nBmHHJf5TyLvBIV57Jj3tlwpvHbPo347opfAOlQA,3844
 rnapolis/unifier.py,sha256=2ge7IB9FdRgzSAiVD39U_ciwtdDJ2fGzf8mUIudbrqY,5820
 rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
-rnapolis-0.8.1.dist-info/licenses/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
-rnapolis-0.8.1.dist-info/METADATA,sha256=NOg9-s2n313HElku8z06JiBvEhPf6oV9RR7ur20hwys,54537
-rnapolis-0.8.1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-rnapolis-0.8.1.dist-info/entry_points.txt,sha256=H00KoN54wU3dFOofAu3H_3PADmZOBTB1hXf5TUU2uzo,438
-rnapolis-0.8.1.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
-rnapolis-0.8.1.dist-info/RECORD,,
+rnapolis-0.8.2.dist-info/licenses/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
+rnapolis-0.8.2.dist-info/METADATA,sha256=1_ITZKV6JF324k9MawJIQCYH1VI8fQffIp9IRquKwnE,54537
+rnapolis-0.8.2.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+rnapolis-0.8.2.dist-info/entry_points.txt,sha256=H00KoN54wU3dFOofAu3H_3PADmZOBTB1hXf5TUU2uzo,438
+rnapolis-0.8.2.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
+rnapolis-0.8.2.dist-info/RECORD,,

{rnapolis-0.8.1.dist-info → rnapolis-0.8.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{rnapolis-0.8.1.dist-info → rnapolis-0.8.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{rnapolis-0.8.1.dist-info → rnapolis-0.8.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{rnapolis-0.8.1.dist-info → rnapolis-0.8.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

RNApolis 0.8.1__py3-none-any.whl → 0.8.2__py3-none-any.whl

RNApolis 0.8.1py3-none-any.whl → 0.8.2py3-none-any.whl