PyPI - RNApolis - Versions diffs - 0.10.4__py3-none-any.whl → 0.10.6__py3-none-any.whl - Mend

RNApolis 0.10.4py3-none-any.whl → 0.10.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

rnapolis/adapter.py CHANGED Viewed

@@ -8,7 +8,7 @@ from collections import defaultdict
 from dataclasses import dataclass
 from enum import Enum
 from tempfile import NamedTemporaryFile
-from typing import DefaultDict, Dict, List, Optional, Set, Tuple
+from typing import Any, DefaultDict, Dict, List, Optional, Set, Tuple, Union
 import orjson
@@ -49,6 +49,7 @@ class ExternalTool(Enum):
     BPNET = "bpnet"
     MAXIT = "maxit"
     BARNABA = "barnaba"
+    MCANNOTATE = "mc-annotate"
 logging.basicConfig(level=os.getenv("LOGLEVEL", "INFO").upper())
@@ -68,25 +69,30 @@ def auto_detect_tool(external_files: List[str]) -> ExternalTool:
         return ExternalTool.MAXIT
     for file_path in external_files:
+        basename = os.path.basename(file_path)
         # Check for FR3D pattern
-        if file_path.endswith("basepair_detail.txt"):
+        if basename.endswith("basepair_detail.txt"):
             return ExternalTool.FR3D
         # Check for RNAView pattern
-        if file_path.endswith(".out"):
+        if basename.endswith(".out"):
             return ExternalTool.RNAVIEW
         # Check for BPNet pattern
-        if file_path.endswith("basepair.json"):
+        if basename.endswith("basepair.json"):
             return ExternalTool.BPNET
+        # Check for MC-Annotate pattern
+        if basename.endswith("stdout.txt"):
+            return ExternalTool.MCANNOTATE
         # Check for Barnaba pattern
-        basename = os.path.basename(file_path)
         if "pairing" in basename or "stacking" in basename:
             return ExternalTool.BARNABA
         # Check for JSON files (DSSR)
-        if file_path.endswith(".json"):
+        if basename.endswith(".json"):
             return ExternalTool.DSSR
     # Default to MAXIT if no patterns match
@@ -317,10 +323,14 @@ def parse_dssr_output(
             if nt1 is not None and nt2 is not None:
                 stackings.append(Stacking(nt1, nt2, None))
-    return BaseInteractions(base_pairs, stackings, [], [], [])
+    return BaseInteractions.from_structure3d(
+        structure3d, base_pairs, stackings, [], [], []
+    )
-def parse_maxit_output(file_paths: List[str]) -> BaseInteractions:
+def parse_maxit_output(
+    file_paths: List[str], structure3d: Structure3D
+) -> BaseInteractions:
     """
     Parse MAXIT output files and convert to BaseInteractions.
@@ -448,10 +458,14 @@ def parse_maxit_output(file_paths: List[str]) -> BaseInteractions:
     except Exception as e:
         logging.warning(f"Error processing MAXIT file {cif_file}: {e}", exc_info=True)
-    return BaseInteractions(all_base_pairs, [], [], [], all_other_interactions)
+    return BaseInteractions.from_structure3d(
+        structure3d, all_base_pairs, [], [], [], all_other_interactions
+    )
-def parse_bpnet_output(file_paths: List[str]) -> BaseInteractions:
+def parse_bpnet_output(
+    file_paths: List[str], structure3d: Structure3D
+) -> BaseInteractions:
     """
     Parse BPNet output files and convert to BaseInteractions.
@@ -649,7 +663,8 @@ def parse_bpnet_output(file_paths: List[str]) -> BaseInteractions:
                 f"Error processing BPNet rob file {rob_file}: {e}", exc_info=True
             )
-    return BaseInteractions(
+    return BaseInteractions.from_structure3d(
+        structure3d,
         base_pairs,
         stackings,
         base_ribose_interactions,
@@ -986,7 +1001,8 @@ def parse_rnaview_output(
     except Exception as e:
         logging.warning(f"Error processing RNAView file {out_file}: {e}", exc_info=True)
-    return BaseInteractions(
+    return BaseInteractions.from_structure3d(
+        structure3d,
         base_pairs,
         stackings,
         base_ribose_interactions,
@@ -1142,7 +1158,349 @@ def parse_barnaba_output(
                         f"Unknown barnaba stacking topology: {interaction_str}"
                     )
-    return BaseInteractions(base_pairs, stackings, [], [], other_interactions)
+    return BaseInteractions.from_structure3d(
+        structure3d, base_pairs, stackings, [], [], other_interactions
+    )
+class MCAnnotateAdapter:
+    # Represents state of parsing MC-Annotate result
+    # Luckily every important part of file
+    # begins with a unique sentence
+    class ParseState(str, Enum):
+        RESIDUES_INFORMATION = "Residue conformations"
+        ADJACENT_STACKINGS = "Adjacent stackings"
+        NON_ADJACENT_STACKINGS = "Non-Adjacent stackings"
+        BASE_PAIRS_SECTION = "Base-pairs"
+        SUMMARY_SECTION = "Number of"
+    # This dictionary maps our model edges
+    # to edge representation used by MC-Annotate
+    EDGES: Dict[str, Tuple[str, ...]] = {
+        "H": ("Hh", "Hw", "Bh", "C8"),
+        "W": ("Wh", "Ww", "Ws"),
+        "S": ("Ss", "Sw", "Bs"),
+    }
+    # Contains flatten EDGES values (in one touple)
+    ALL_EDGES = sum(EDGES.values(), ())
+    # Based on these tokens
+    # BaseRiboseInteractions and BasePhosphateInteractions are created
+    RIBOSE_ATOM = "O2'"
+    PHOSPHATE_ATOM = "O2P"
+    # Single hydrogen bond - for us it's OtherInteraction
+    ONE_HBOND = "one_hbond"
+    # Cis/trans tokens used by MC-Annotate
+    CIS = "cis"
+    TRANS = "trans"
+    # Tokens used in PDB files
+    ATOM = "ATOM"
+    HETATM = "HETATM"
+    # This regex is used to capture 6 groups of residues information:
+    # (1) (2) (3) (4) (5) (6)
+    # 1, 4 - chain IDs
+    # 2, 5 - numbers
+    # 3, 6 - icodes (or empty string if no icode)
+    # Example - match and groups:
+    # A-100.X-B200
+    # ('A'), ('-100'), ('X'), ('B'), ('200'), ('')
+    RESIDUE_REGEX = re.compile(
+        r"'?(.)'?(-?[0-9]+)\.?([a-zA-Z]?)-'?(.)'?(-?[0-9]+)\.?([a-zA-Z]?)"
+    )
+    # Roman numerals used by Saenger
+    # both in our model and MC-Annotate
+    ROMAN_NUMERALS = ("I", "V", "X")
+    # Positions of residues info in PDB files
+    CHAIN_INDEX = 21
+    NUMBER_INDEX = slice(22, 26)
+    ICODE_INDEX = 26
+    NAME_INDEX = slice(17, 20)
+    def __init__(self) -> None:
+        # Since names are not present in adjacent and non-adjacent stackings
+        # we need save these values eariler
+        self.names: Dict[str, str] = {}
+        self.base_pairs: List[BasePair] = []
+        self.stackings: List[Stacking] = []
+        self.base_ribose_interactions: List[BaseRibose] = []
+        self.base_phosphate_interactions: List[BasePhosphate] = []
+        self.other_interactions: List[OtherInteraction] = []
+    def classify_edge(self, edge_type: str) -> Optional[str]:
+        for edge, edges in self.EDGES.items():
+            if edge_type in edges:
+                return edge
+        logging.warning('Edge type "{type}" unknown')
+        return None
+    def get_residue(self, residue_info_list: Tuple[Union[str, Any], ...]) -> Residue:
+        chain = residue_info_list[0]
+        number = int(residue_info_list[1])
+        if residue_info_list[2] == "":
+            icode = None
+            residue_info = f"{chain}{number}"
+        else:
+            icode = residue_info_list[2]
+            residue_info = f"{chain}{number}.{icode}"
+        return Residue(
+            None, ResidueAuth(chain, number, icode, self.names[residue_info])
+        )
+    def get_residues(
+        self, residues_info: str
+    ) -> Tuple[Optional[Residue], Optional[Residue]]:
+        regex_result = re.search(self.RESIDUE_REGEX, residues_info)
+        if regex_result is None:
+            logging.error("MC-Annotate regex failed: {residues_info}")
+            return None, None
+        residues_info_list = regex_result.groups()
+        # Expects (chain1, number1, icode1, chain2, number2, icode2)
+        if len(residues_info_list) != 6:
+            logging.error(f"MC-Annotate regex failed for {residues_info}")
+            return None, None
+        residue_left = self.get_residue(residues_info_list[:3])
+        residue_right = self.get_residue(residues_info_list[3:])
+        return residue_left, residue_right
+    def append_stacking(self, line: str, topology_position: int) -> None:
+        splitted_line = line.split()
+        topology_info = splitted_line[topology_position]
+        residue_left, residue_right = self.get_residues(splitted_line[0])
+        if residue_left is None or residue_right is None:
+            logging.warning(f"Could not parse residues in line: {line}")
+            return
+        stacking = Stacking(
+            residue_left, residue_right, StackingTopology[topology_info]
+        )
+        self.stackings.append(stacking)
+    def get_ribose_interaction(
+        self, residues: Tuple[Residue, Residue], token: str
+    ) -> BaseRibose:
+        # BasePair is preffered first so swap if necessary
+        if token.split("/", 1)[0] == self.RIBOSE_ATOM:
+            residue_left, residue_right = residues[1], residues[0]
+        else:
+            residue_left, residue_right = residues[0], residues[1]
+        return BaseRibose(residue_left, residue_right, None)
+    def get_phosphate_interaction(
+        self, residues: Tuple[Residue, Residue], token: str
+    ) -> BasePhosphate:
+        # BasePair is preffered first so swap if necessary
+        if token.split("/", 1)[0] == self.PHOSPHATE_ATOM:
+            residue_left, residue_right = residues[1], residues[0]
+        else:
+            residue_left, residue_right = residues[0], residues[1]
+        return BasePhosphate(residue_left, residue_right, None)
+    def get_base_interaction(
+        self,
+        residues: Tuple[Residue, Residue],
+        token: str,
+        tokens: List[str],
+    ) -> Optional[BasePair]:
+        if self.CIS in tokens:
+            cis_trans = "c"
+        elif self.TRANS in tokens:
+            cis_trans = "t"
+        else:
+            logging.warning(f"Cis/trans expected, but not present in {tokens}")
+            return None
+        # example saenger: XIX or XII,XIII (?)
+        for potential_saenger_token in tokens:
+            potential_saenger_without_comma = potential_saenger_token.split(",")[0]
+            if all(
+                char in self.ROMAN_NUMERALS for char in potential_saenger_without_comma
+            ):
+                saenger = Saenger[potential_saenger_without_comma]
+                break
+        else:
+            saenger = None
+        left_edge, right_edge = token.split("/", 1)
+        leontis_westhof_left = self.classify_edge(left_edge)
+        leontis_westohf_right = self.classify_edge(right_edge)
+        if leontis_westhof_left is None or leontis_westohf_right is None:
+            return None
+        leontis_westhof = LeontisWesthof[
+            f"{cis_trans}{leontis_westhof_left}{leontis_westohf_right}"
+        ]
+        residue_left, residue_right = residues
+        return BasePair(residue_left, residue_right, leontis_westhof, saenger)
+    def get_other_interaction(
+        self, residues: Tuple[Residue, Residue]
+    ) -> OtherInteraction:
+        return OtherInteraction(residues[0], residues[1])
+    def append_interactions(self, line: str) -> None:
+        splitted_line = line.split()
+        residues = self.get_residues(splitted_line[0])
+        if residues[0] is None or residues[1] is None:
+            logging.warning(f"Could not parse residues in line: {line}")
+            return
+        # Assumes that one pair can belong to every interaction type
+        # no more than once!
+        base_added, ribose_added, phosphate_added = False, False, False
+        # example tokens: Ww/Ww pairing antiparallel cis XX
+        tokens: List[str] = splitted_line[3:]
+        # Special case
+        # IF single hydrogen bond and base pairs only THEN
+        # append to OtherIneraction list
+        if self.ONE_HBOND in tokens:
+            for token in tokens:
+                if self.RIBOSE_ATOM in token or self.PHOSPHATE_ATOM in token:
+                    break
+            else:
+                other_interaction = self.get_other_interaction(residues)
+                self.other_interactions.append(other_interaction)
+                return
+        for token in tokens:
+            if self.RIBOSE_ATOM in token and not ribose_added:
+                # example token: Ss/O2'
+                ribose_interaction = self.get_ribose_interaction(residues, token)
+                self.base_ribose_interactions.append(ribose_interaction)
+                ribose_added = True
+            elif self.PHOSPHATE_ATOM in token and not phosphate_added:
+                # example token: O2P/Bh
+                phosphate_interaction = self.get_phosphate_interaction(residues, token)
+                self.base_phosphate_interactions.append(phosphate_interaction)
+                phosphate_added = True
+            elif len(token.split("/", 1)) > 1:
+                token_left, token_right = token.split("/", 1)
+                tokens_in_edges = (
+                    token_left in self.ALL_EDGES and token_right in self.ALL_EDGES
+                )
+                if tokens_in_edges and not base_added:
+                    # example token_left: Ww | example token_right: Ws
+                    base_pair_interaction = self.get_base_interaction(
+                        residues, token, tokens
+                    )
+                    if base_pair_interaction is not None:
+                        self.base_pairs.append(base_pair_interaction)
+                    base_added = True
+    def append_names(self, file_content: str) -> None:
+        for line in file_content.splitlines():
+            if line.startswith(self.ATOM) or line.startswith(self.HETATM):
+                chain = line[self.CHAIN_INDEX].strip()
+                number = line[self.NUMBER_INDEX].strip()
+                icode = line[self.ICODE_INDEX].strip()
+                name = line[self.NAME_INDEX].strip()
+                residue_info = (
+                    f"{chain}{number}" if icode == "" else f"{chain}{number}.{icode}"
+                )
+                self.names[residue_info] = name
+    def analyze_by_mc_annotate(
+        self, pdb_content: str, mc_result: str, **_: Dict[str, Any]
+    ) -> BaseInteractions:
+        self.append_names(pdb_content)
+        current_state = None
+        for line in mc_result.splitlines():
+            for state in self.ParseState:
+                if line.startswith(state.value):
+                    current_state = state
+                    break
+            # Loop ended without break - parse file
+            else:
+                if current_state == self.ParseState.RESIDUES_INFORMATION:
+                    # example line: X7.H : G C3p_endo anti
+                    # Skip residues information - meaningless information
+                    pass
+                elif current_state == self.ParseState.ADJACENT_STACKINGS:
+                    # example line: X4.E-X5.F : adjacent_5p upward
+                    self.append_stacking(line, 3)
+                elif current_state == self.ParseState.NON_ADJACENT_STACKINGS:
+                    # example line: Y40.M-Y67.N : inward pairing
+                    self.append_stacking(line, 2)
+                elif current_state == self.ParseState.BASE_PAIRS_SECTION:
+                    # example line: Y38.K-Y51.X : A-U Ww/Ww pairing antiparallel cis XX
+                    self.append_interactions(line)
+                elif current_state == self.ParseState.SUMMARY_SECTION:
+                    # example line: Number of non adjacent stackings = 26
+                    # Skip summary section - meaningless information
+                    pass
+        return (
+            self.base_pairs,
+            self.stackings,
+            self.base_ribose_interactions,
+            self.base_phosphate_interactions,
+            self.other_interactions,
+        )
+def parse_mcannotate_output(
+    file_paths: List[str], structure3d: Structure3D
+) -> BaseInteractions:
+    """
+    Parse mc-annotate output and convert to BaseInteractions.
+    This function expects a file with mc-annotate stdout and a PDB file.
+    """
+    stdout_file = None
+    structure_file = None
+    for file_path in file_paths:
+        if os.path.basename(file_path).endswith("stdout.txt"):
+            stdout_file = file_path
+        elif file_path.endswith(".pdb"):
+            structure_file = file_path
+    if not stdout_file:
+        logging.warning("No stdout.txt file found for mc-annotate.")
+        return BaseInteractions([], [], [], [], [])
+    if not structure_file:
+        logging.warning("No PDB file found for mc-annotate.")
+        return BaseInteractions([], [], [], [], [])
+    logging.info(f"Processing mc-annotate stdout file: {stdout_file}")
+    logging.info(f"Using structure file for residue names: {structure_file}")
+    try:
+        with open(stdout_file, "r") as f:
+            mc_result = f.read()
+        with open(structure_file, "r") as f:
+            pdb_content = f.read()
+    except Exception as e:
+        logging.warning(f"Could not read input files for mc-annotate: {e}")
+        return BaseInteractions([], [], [], [], [])
+    adapter = MCAnnotateAdapter()
+    (
+        base_pairs,
+        stackings,
+        base_ribose_interactions,
+        base_phosphate_interactions,
+        other_interactions,
+    ) = adapter.analyze_by_mc_annotate(pdb_content, mc_result)
+    return BaseInteractions.from_structure3d(
+        structure3d,
+        base_pairs,
+        stackings,
+        base_ribose_interactions,
+        base_phosphate_interactions,
+        other_interactions,
+    )
 def parse_external_output(
@@ -1160,22 +1518,26 @@ def parse_external_output(
         BaseInteractions object containing the interactions found by the external tool
     """
     if tool == ExternalTool.FR3D:
-        return parse_fr3d_output(file_paths)
+        return parse_fr3d_output(file_paths, structure3d)
     elif tool == ExternalTool.DSSR:
         return parse_dssr_output(file_paths, structure3d)
     elif tool == ExternalTool.MAXIT:
-        return parse_maxit_output(file_paths)
+        return parse_maxit_output(file_paths, structure3d)
     elif tool == ExternalTool.BPNET:
-        return parse_bpnet_output(file_paths)
+        return parse_bpnet_output(file_paths, structure3d)
     elif tool == ExternalTool.RNAVIEW:
         return parse_rnaview_output(file_paths, structure3d)
     elif tool == ExternalTool.BARNABA:
         return parse_barnaba_output(file_paths, structure3d)
+    elif tool == ExternalTool.MCANNOTATE:
+        return parse_mcannotate_output(file_paths, structure3d)
     else:
         raise ValueError(f"Unsupported external tool: {tool}")
-def parse_fr3d_output(file_paths: List[str]) -> BaseInteractions:
+def parse_fr3d_output(
+    file_paths: List[str], structure3d: Structure3D
+) -> BaseInteractions:
     """
     Parse FR3D output files and convert to BaseInteractions.
@@ -1208,7 +1570,8 @@ def parse_fr3d_output(file_paths: List[str]) -> BaseInteractions:
                 _process_interaction_line(line, interactions_data)
     # Return a BaseInteractions object with all the processed interactions
-    return BaseInteractions(
+    return BaseInteractions.from_structure3d(
+        structure3d,
         interactions_data["base_pairs"],
         interactions_data["stackings"],
         interactions_data["base_ribose_interactions"],
@@ -1244,6 +1607,9 @@ def process_external_tool_output(
     if not external_file_paths:
         # For MAXIT or when no external files are provided, use the input file
         file_paths_to_process = [input_file_path]
+    elif tool == ExternalTool.MCANNOTATE:
+        # MC-Annotate requires both the stdout and the PDB file
+        file_paths_to_process = external_file_paths + [input_file_path]
     else:
         # Process all external files
         file_paths_to_process = external_file_paths

rnapolis/annotator.py CHANGED Viewed

@@ -85,15 +85,6 @@ def detect_cis_trans(residue_i: Residue3D, residue_j: Residue3D) -> Optional[str
     return "c" if -90.0 < torsion < 90.0 else "t"
-def detect_saenger(
-    residue_i: Residue3D, residue_j: Residue3D, lw: LeontisWesthof
-) -> Optional[Saenger]:
-    key = (f"{residue_i.one_letter_name}{residue_j.one_letter_name}", lw.value)
-    if key in Saenger.table():
-        return Saenger[Saenger.table()[key]]
-    return None
 def detect_bph_br_classification(
     donor_residue: Residue3D, donor: Atom, acceptor: Atom
 ) -> Optional[int]:
@@ -367,7 +358,9 @@ def find_pairs(
                 Residue(residue_i.label, residue_i.auth),
                 Residue(residue_j.label, residue_j.auth),
                 lw,
-                detect_saenger(residue_i, residue_j, lw),
+                Saenger.from_leontis_westhof(
+                    residue_i.one_letter_name, residue_j.one_letter_name, lw
+                ),
             )
         )
@@ -483,7 +476,9 @@ def extract_base_interactions(
 ) -> BaseInteractions:
     base_pairs, base_phosphate, base_ribose = find_pairs(tertiary_structure, model)
     stackings = find_stackings(tertiary_structure, model)
-    return BaseInteractions(base_pairs, stackings, base_ribose, base_phosphate, [])
+    return BaseInteractions.from_structure3d(
+        tertiary_structure, base_pairs, stackings, base_ribose, base_phosphate, []
+    )
 def generate_pymol_script(mapping: Mapping2D3D, stems: List[Stem]) -> str:
@@ -688,91 +683,6 @@ def add_common_output_arguments(parser: argparse.ArgumentParser):
     )
-def unify_structure_data(structure2d: Structure2D, mapping: Mapping2D3D) -> Structure2D:
-    """
-    Unify structure data by:
-    1. Adding missing Saenger classifications to base pairs
-    2. Filling in empty residue labels from Structure3D
-    """
-    # Create a mapping from residue to residue3d for label filling
-    residue_to_residue3d = {}
-    for residue3d in mapping.structure3d.residues:
-        residue_key = Residue(residue3d.label, residue3d.auth)
-        residue_to_residue3d[residue_key] = residue3d
-    def fill_residue_label(residue: Residue) -> Residue:
-        """Fill empty label from Structure3D if available."""
-        if residue.label is not None:
-            return residue
-        # Try to find matching residue3d by auth
-        for residue3d in mapping.structure3d.residues:
-            if residue.auth == residue3d.auth:
-                return Residue(residue3d.label, residue.auth)
-        return residue
-    # Process base pairs
-    unified_base_pairs = []
-    for base_pair in structure2d.base_pairs:
-        # Fill in missing labels
-        nt1 = fill_residue_label(base_pair.nt1)
-        nt2 = fill_residue_label(base_pair.nt2)
-        # Detect missing Saenger classification
-        saenger = base_pair.saenger
-        if saenger is None:
-            # Find corresponding 3D residues for Saenger detection
-            residue3d_1 = residue_to_residue3d.get(Residue(nt1.label, nt1.auth))
-            residue3d_2 = residue_to_residue3d.get(Residue(nt2.label, nt2.auth))
-            if residue3d_1 is not None and residue3d_2 is not None:
-                saenger = detect_saenger(residue3d_1, residue3d_2, base_pair.lw)
-        unified_base_pairs.append(BasePair(nt1, nt2, base_pair.lw, saenger))
-    # Process other interaction types (fill labels only)
-    unified_stackings = []
-    for stacking in structure2d.stackings:
-        nt1 = fill_residue_label(stacking.nt1)
-        nt2 = fill_residue_label(stacking.nt2)
-        unified_stackings.append(Stacking(nt1, nt2, stacking.topology))
-    unified_base_ribose = []
-    for base_ribose in structure2d.base_ribose_interactions:
-        nt1 = fill_residue_label(base_ribose.nt1)
-        nt2 = fill_residue_label(base_ribose.nt2)
-        unified_base_ribose.append(BaseRibose(nt1, nt2, base_ribose.br))
-    unified_base_phosphate = []
-    for base_phosphate in structure2d.base_phosphate_interactions:
-        nt1 = fill_residue_label(base_phosphate.nt1)
-        nt2 = fill_residue_label(base_phosphate.nt2)
-        unified_base_phosphate.append(BasePhosphate(nt1, nt2, base_phosphate.bph))
-    unified_other = []
-    for other in structure2d.other_interactions:
-        nt1 = fill_residue_label(other.nt1)
-        nt2 = fill_residue_label(other.nt2)
-        unified_other.append(OtherInteraction(nt1, nt2))
-    # Create new Structure2D with unified data
-    unified_base_interactions = BaseInteractions(
-        unified_base_pairs,
-        unified_stackings,
-        unified_base_ribose,
-        unified_base_phosphate,
-        unified_other,
-    )
-    # Recreate Structure2D with unified interactions
-    unified_structure2d, _ = mapping.structure3d.extract_secondary_structure(
-        unified_base_interactions, False
-    )
-    return unified_structure2d
 def handle_output_arguments(
     args: argparse.Namespace,
     structure2d: Structure2D,
@@ -780,34 +690,31 @@ def handle_output_arguments(
     input_filename: str,
 ):
     """Handles writing output based on provided arguments."""
-    # Unify the structure data before processing outputs
-    unified_structure2d = unify_structure_data(structure2d, mapping)
     input_basename = os.path.basename(input_filename)
     if args.csv:
-        write_csv(args.csv, unified_structure2d)
+        write_csv(args.csv, structure2d)
     if args.json:
-        write_json(args.json, unified_structure2d)
+        write_json(args.json, structure2d)
     if args.bpseq:
-        write_bpseq(args.bpseq, unified_structure2d.bpseq)
+        write_bpseq(args.bpseq, structure2d.bpseq)
     if args.extended:
-        print(unified_structure2d.extended_dot_bracket)
+        print(structure2d.extended_dot_bracket)
     else:
-        print(unified_structure2d.dot_bracket)
+        print(structure2d.dot_bracket)
     if args.dot:
-        print(BpSeq.from_string(unified_structure2d.bpseq).graphviz)
+        print(BpSeq.from_string(structure2d.bpseq).graphviz)
     if args.pml:
-        pml_script = generate_pymol_script(mapping, unified_structure2d.stems)
+        pml_script = generate_pymol_script(mapping, structure2d.stems)
         with open(args.pml, "w") as f:
             f.write(pml_script)
     if args.inter_stem_csv:
-        if unified_structure2d.inter_stem_parameters:
+        if structure2d.inter_stem_parameters:
             # Convert list of dataclasses to list of dicts
             params_list = [
                 {
@@ -820,7 +727,7 @@ def handle_output_arguments(
                     "min_endpoint_distance_pdf": p.min_endpoint_distance_pdf,
                     "coaxial_probability": p.coaxial_probability,
                 }
-                for p in unified_structure2d.interStemParameters
+                for p in structure2d.interStemParameters
             ]
             df = pd.DataFrame(params_list)
             df["input_basename"] = input_basename
@@ -838,9 +745,9 @@ def handle_output_arguments(
             # pd.DataFrame(columns=['input_basename', 'stem1_idx', ...]).to_csv(args.inter_stem_csv, index=False)
     if args.stems_csv:
-        if unified_structure2d.stems:
+        if structure2d.stems:
             stems_data = []
-            for i, stem in enumerate(unified_structure2d.stems):
+            for i, stem in enumerate(structure2d.stems):
                 try:
                     res5p_first = mapping.bpseq_index_to_residue_map.get(
                         stem.strand5p.first

rnapolis/common.py CHANGED Viewed

@@ -5,7 +5,7 @@ import re
 import string
 from collections import defaultdict
 from collections.abc import Sequence
-from dataclasses import dataclass
+from dataclasses import InitVar, dataclass
 from enum import Enum
 from functools import cache, cached_property, total_ordering
 from typing import Dict, List, Optional, Tuple
@@ -152,6 +152,18 @@ class Saenger(Enum):
             ("TG", "cWW"): "XXVIII",
         }
+    @classmethod
+    def from_leontis_westhof(
+        cls,
+        residue_i_one_letter_name: str,
+        residue_j_one_letter_name: str,
+        lw: LeontisWesthof,
+    ) -> Optional["Saenger"]:
+        key = (f"{residue_i_one_letter_name}{residue_j_one_letter_name}", lw.value)
+        if key in Saenger.table():
+            return Saenger[Saenger.table()[key]]
+        return None
     @property
     def is_canonical(self) -> bool:
         return self == Saenger.XIX or self == Saenger.XX or self == Saenger.XXVIII
@@ -1062,6 +1074,91 @@ class BaseInteractions:
     base_phosphate_interactions: List[BasePhosphate]
     other_interactions: List[OtherInteraction]
+    @classmethod
+    def from_structure3d(
+        cls,
+        structure3d: "Structure3D",
+        base_pairs: List[BasePair],
+        stackings: List[Stacking],
+        base_ribose_interactions: List[BaseRibose],
+        base_phosphate_interactions: List[BasePhosphate],
+        other_interactions: List[OtherInteraction],
+    ) -> "BaseInteractions":
+        auth2residue3d = {}
+        auth2label = {}
+        label2auth = {}
+        for residue3d in structure3d.residues:
+            auth2residue3d[residue3d.auth] = residue3d
+            auth2label[residue3d.auth] = residue3d.label
+            label2auth[residue3d.label] = residue3d.auth
+        def unify_nt(nt: Residue) -> Residue:
+            if nt.auth is not None and nt.label is not None:
+                return nt
+            if nt.auth is not None:
+                return Residue(label=auth2label.get(nt.auth, None), auth=nt.auth)
+            if nt.label is not None:
+                return Residue(label=nt.label, auth=label2auth.get(nt.label, None))
+            return nt
+        base_pairs_new = []
+        for base_pair in base_pairs:
+            nt1 = unify_nt(base_pair.nt1)
+            nt2 = unify_nt(base_pair.nt2)
+            saenger = base_pair.saenger or Saenger.from_leontis_westhof(
+                auth2residue3d[nt1.auth].one_letter_name,
+                auth2residue3d[nt2.auth].one_letter_name,
+                base_pair.lw,
+            )
+            if (
+                nt1 != base_pair.nt1
+                or nt2 != base_pair.nt2
+                or saenger != base_pair.saenger
+            ):
+                base_pair = BasePair(nt1=nt1, nt2=nt2, lw=base_pair.lw, saenger=saenger)
+            base_pairs_new.append(base_pair)
+        stackings_new = []
+        for stacking in stackings:
+            nt1 = unify_nt(stacking.nt1)
+            nt2 = unify_nt(stacking.nt2)
+            if nt1 != stacking.nt1 or nt2 != stacking.nt2:
+                stacking = Stacking(nt1=nt1, nt2=nt2, topology=stacking.topology)
+            stackings_new.append(stacking)
+        base_ribose_interactions_new = []
+        for base_ribose in base_ribose_interactions:
+            nt1 = unify_nt(base_ribose.nt1)
+            nt2 = unify_nt(base_ribose.nt2)
+            if nt1 != base_ribose.nt1 or nt2 != base_ribose.nt2:
+                base_ribose = BaseRibose(nt1=nt1, nt2=nt2, br=base_ribose.br)
+            base_ribose_interactions_new.append(base_ribose)
+        base_phosphate_interactions_new = []
+        for base_phosphate in base_phosphate_interactions:
+            nt1 = unify_nt(base_phosphate.nt1)
+            nt2 = unify_nt(base_phosphate.nt2)
+            if nt1 != base_phosphate.nt1 or nt2 != base_phosphate.nt2:
+                base_phosphate = BasePhosphate(nt1=nt1, nt2=nt2, bph=base_phosphate.bph)
+            base_phosphate_interactions_new.append(base_phosphate)
+        other_interactions_new = []
+        for other_interaction in other_interactions:
+            nt1 = unify_nt(other_interaction.nt1)
+            nt2 = unify_nt(other_interaction.nt2)
+            if nt1 != other_interaction.nt1 or nt2 != other_interaction.nt2:
+                other_interaction = OtherInteraction(nt1=nt1, nt2=nt2)
+            other_interactions_new.append(other_interaction)
+        return cls(
+            base_pairs=base_pairs_new,
+            stackings=stackings_new,
+            base_ribose_interactions=base_ribose_interactions_new,
+            base_phosphate_interactions=base_phosphate_interactions_new,
+            other_interactions=other_interactions_new,
+        )
 @dataclass(frozen=True, order=True)
 class InterStemParameters:

{rnapolis-0.10.4.dist-info → rnapolis-0.10.6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: RNApolis
-Version: 0.10.4
+Version: 0.10.6
 Summary: A Python library containing RNA-related bioinformatics functions and classes
 Home-page: https://github.com/tzok/rnapolis-py
 Author: Tomasz Zok

{rnapolis-0.10.4.dist-info → rnapolis-0.10.6.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
-rnapolis/adapter.py,sha256=6b4m1EzD1BA3D7GgXaV4iYKOnmzgz0AKwXHc2svQj3w,48132
+rnapolis/adapter.py,sha256=6hJTweIqUXH8CEGvi8oupFzk5etkIt8Q2bqRvgsqako,62169
 rnapolis/aligner.py,sha256=o7rQyjAZ3n4VXcnSPY3HVB8nLNRkVbl552O3NVh0mfg,3429
-rnapolis/annotator.py,sha256=OkqFVuxOtb-mySmw3bc5NF9ETu4BWq4ImtBecWJikrY,33899
+rnapolis/annotator.py,sha256=HA2hfEUXdmBElObqRlASAB1FgkysjiHgwMTjEhsDiDE,30277
 rnapolis/clashfinder.py,sha256=AC9_tIx7QIk57sELq_aKfU1u3UMOXbgcccQeGHhMR6c,8517
-rnapolis/common.py,sha256=HTe-RSZa_9hEIi-j4-1afxdqt7zAD-BpZ7JxRZGX170,32390
+rnapolis/common.py,sha256=hamlW892ZF5A0dSWsl7cOCZqOpbVQMgXjVPYDFzk3pE,36347
 rnapolis/component_A.csv,sha256=koirS-AwUZwoYGItT8yn3wS6Idvmh2FANfTQcOS_xh8,2897
 rnapolis/component_C.csv,sha256=NtvsAu_YrUgTjzZm3j4poW4IZ99x3dPARB09XVIiMCc,2803
 rnapolis/component_G.csv,sha256=Z5wl8OnHRyx4XhTyBiWgRZiEvmZXhoxtVRH8bn6Vxf0,2898
@@ -22,9 +22,9 @@ rnapolis/tertiary_v2.py,sha256=SgijTv0bPqMJwsMqyQk0O8QAnS2Ozk45vk8igxt9hRs,38001
 rnapolis/transformer.py,sha256=aC0nBmHHJf5TyLvBIV57Jj3tlwpvHbPo347opfAOlQA,3844
 rnapolis/unifier.py,sha256=2ge7IB9FdRgzSAiVD39U_ciwtdDJ2fGzf8mUIudbrqY,5820
 rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
-rnapolis-0.10.4.dist-info/licenses/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
-rnapolis-0.10.4.dist-info/METADATA,sha256=VKy39unD-Kyqzg7J7ADgFlseV3FftWCyBtjn-vnYbEU,54611
-rnapolis-0.10.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-rnapolis-0.10.4.dist-info/entry_points.txt,sha256=MZMWnYBUYnis-zWDmFfuA5yXtU3W5YdQrm5HA5LrkeM,474
-rnapolis-0.10.4.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
-rnapolis-0.10.4.dist-info/RECORD,,
+rnapolis-0.10.6.dist-info/licenses/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
+rnapolis-0.10.6.dist-info/METADATA,sha256=Q2OY_Y3PZgVNaob7Xk8vruYNZ13HyFfdiRD7giJqJ_I,54611
+rnapolis-0.10.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+rnapolis-0.10.6.dist-info/entry_points.txt,sha256=MZMWnYBUYnis-zWDmFfuA5yXtU3W5YdQrm5HA5LrkeM,474
+rnapolis-0.10.6.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
+rnapolis-0.10.6.dist-info/RECORD,,

{rnapolis-0.10.4.dist-info → rnapolis-0.10.6.dist-info}/WHEEL RENAMED Viewed

File without changes

{rnapolis-0.10.4.dist-info → rnapolis-0.10.6.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{rnapolis-0.10.4.dist-info → rnapolis-0.10.6.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{rnapolis-0.10.4.dist-info → rnapolis-0.10.6.dist-info}/top_level.txt RENAMED Viewed

File without changes

RNApolis 0.10.4__py3-none-any.whl → 0.10.6__py3-none-any.whl

RNApolis 0.10.4py3-none-any.whl → 0.10.6py3-none-any.whl