PyPI - RNApolis - Versions diffs - 0.9.0__py3-none-any.whl → 0.9.2__py3-none-any.whl - Mend

RNApolis 0.9.0py3-none-any.whl → 0.9.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

rnapolis/adapter.py CHANGED Viewed

@@ -1,8 +1,12 @@
 #! /usr/bin/env python
 import argparse
 import logging
+import math
 import os
+import re
+from dataclasses import dataclass
 from enum import Enum
+from tempfile import NamedTemporaryFile
 from typing import Dict, List, Optional, Tuple
 import orjson
@@ -22,10 +26,13 @@ from rnapolis.common import (
     OtherInteraction,
     Residue,
     ResidueAuth,
+    ResidueLabel,
+    Saenger,
     Stacking,
     StackingTopology,
     Structure2D,
 )
+from rnapolis.metareader import read_metadata
 from rnapolis.parser import read_3d_structure
 from rnapolis.tertiary import (
     Mapping2D3D,
@@ -37,11 +44,48 @@ from rnapolis.util import handle_input_file
 class ExternalTool(Enum):
     FR3D = "fr3d"
     DSSR = "dssr"
+    RNAVIEW = "rnaview"
+    BPNET = "bpnet"
+    MAXIT = "maxit"
 logging.basicConfig(level=os.getenv("LOGLEVEL", "INFO").upper())
+def auto_detect_tool(external_files: List[str]) -> ExternalTool:
+    """
+    Auto-detect the external tool based on file patterns.
+    Args:
+        external_files: List of external tool output file paths
+    Returns:
+        ExternalTool enum value based on detected patterns
+    """
+    if not external_files:
+        return ExternalTool.MAXIT
+    for file_path in external_files:
+        # Check for FR3D pattern
+        if file_path.endswith("basepair_detail.txt"):
+            return ExternalTool.FR3D
+        # Check for RNAView pattern
+        if file_path.endswith(".out"):
+            return ExternalTool.RNAVIEW
+        # Check for BPNet pattern
+        if file_path.endswith("basepair.json"):
+            return ExternalTool.BPNET
+        # Check for JSON files (DSSR)
+        if file_path.endswith(".json"):
+            return ExternalTool.DSSR
+    # Default to MAXIT if no patterns match
+    return ExternalTool.MAXIT
 def parse_unit_id(nt: str) -> Residue:
     """Parse FR3D unit ID format into a Residue object."""
     fields = nt.split("|")
@@ -200,13 +244,13 @@ def match_dssr_lw(lw: Optional[str]) -> Optional[LeontisWesthof]:
 def parse_dssr_output(
-    file_path: str, structure3d: Structure3D, model: Optional[int] = None
+    file_paths: List[str], structure3d: Structure3D, model: Optional[int] = None
 ) -> BaseInteractions:
     """
     Parse DSSR JSON output and convert to BaseInteractions.
     Args:
-        file_path: Path to DSSR JSON output file
+        file_paths: List of paths to DSSR output files
         structure3d: The 3D structure parsed from PDB/mmCIF
         model: Model number to use (if None, use first model)
@@ -216,7 +260,23 @@ def parse_dssr_output(
     base_pairs: List[BasePair] = []
     stackings: List[Stacking] = []
-    with open(file_path) as f:
+    # Find the first .json file in the list
+    json_file = None
+    for file_path in file_paths:
+        if file_path.endswith(".json"):
+            json_file = file_path
+            break
+    if json_file is None:
+        logging.warning("No .json file found in DSSR file list")
+        return BaseInteractions([], [], [], [], [])
+    # Log unused files
+    unused_files = [f for f in file_paths if f != json_file]
+    if unused_files:
+        logging.info(f"DSSR: Using {json_file}, ignoring unused files: {unused_files}")
+    with open(json_file) as f:
         dssr = orjson.loads(f.read())
     # Handle multi-model files
@@ -253,14 +313,689 @@ def parse_dssr_output(
     return BaseInteractions(base_pairs, stackings, [], [], [])
+def parse_maxit_output(file_paths: List[str]) -> BaseInteractions:
+    """
+    Parse MAXIT output files and convert to BaseInteractions.
+    MAXIT analysis is embedded in mmCIF files as ndb_struct_na_base_pair category.
+    Args:
+        file_paths: List of paths to mmCIF files containing MAXIT analysis
+    Returns:
+        BaseInteractions object containing the interactions found by MAXIT
+    """
+    def convert_saenger(hbond_type_28: str) -> Optional[Saenger]:
+        if hbond_type_28 == "?":
+            return None
+        try:
+            index = int(hbond_type_28)
+            if 1 <= index <= 28:
+                return list(Saenger)[index - 1]
+        except ValueError:
+            pass
+        return None
+    def convert_lw(hbond_type_12) -> Optional[LeontisWesthof]:
+        if hbond_type_12 == "?":
+            return None
+        try:
+            index = int(hbond_type_12)
+            if index == 1:
+                return LeontisWesthof.cWW
+            if index == 2:
+                return LeontisWesthof.tWW
+            if index == 3:
+                return LeontisWesthof.cWH
+            if index == 4:
+                return LeontisWesthof.tWH
+            if index == 5:
+                return LeontisWesthof.cWS
+            if index == 6:
+                return LeontisWesthof.tWS
+            if index == 7:
+                return LeontisWesthof.cHH
+            if index == 8:
+                return LeontisWesthof.tHH
+            if index == 9:
+                return LeontisWesthof.cHS
+            if index == 10:
+                return LeontisWesthof.tHS
+            if index == 11:
+                return LeontisWesthof.cSS
+            if index == 12:
+                return LeontisWesthof.tSS
+        except ValueError:
+            pass
+        return None
+    all_base_pairs = []
+    all_other_interactions = []
+    # Find the first .cif file in the list
+    cif_file = None
+    for file_path in file_paths:
+        if file_path.endswith(".cif"):
+            cif_file = file_path
+            break
+    if cif_file is None:
+        logging.warning("No .cif file found in MAXIT file list")
+        return BaseInteractions([], [], [], [], [])
+    # Log unused files
+    unused_files = [f for f in file_paths if f != cif_file]
+    if unused_files:
+        logging.info(f"MAXIT: Using {cif_file}, ignoring unused files: {unused_files}")
+    # Process only the first .cif file
+    logging.info(f"Processing MAXIT file: {cif_file}")
+    try:
+        with open(cif_file, "r") as f:
+            file_content = f.read()
+        with NamedTemporaryFile("w+", suffix=".cif") as mmcif:
+            mmcif.write(file_content)
+            mmcif.seek(0)
+            metadata = read_metadata(mmcif, ["ndb_struct_na_base_pair"])
+        # Parse base pairs from this file
+        for entry in metadata.get("ndb_struct_na_base_pair", []):
+            auth_chain_i = entry["i_auth_asym_id"]
+            auth_number_i = int(entry["i_auth_seq_id"])
+            auth_icode_i = (
+                entry["i_PDB_ins_code"] if entry["i_PDB_ins_code"] != "?" else None
+            )
+            name_i = entry["i_label_comp_id"]
+            auth_i = ResidueAuth(auth_chain_i, auth_number_i, auth_icode_i, name_i)
+            auth_chain_j = entry["j_auth_asym_id"]
+            auth_number_j = int(entry["j_auth_seq_id"])
+            auth_icode_j = (
+                entry["j_PDB_ins_code"] if entry["j_PDB_ins_code"] != "?" else None
+            )
+            name_j = entry["j_label_comp_id"]
+            auth_j = ResidueAuth(auth_chain_j, auth_number_j, auth_icode_j, name_j)
+            label_chain_i = entry["i_label_asym_id"]
+            label_number_i = int(entry["i_label_seq_id"])
+            label_i = ResidueLabel(label_chain_i, label_number_i, name_i)
+            label_chain_j = entry["j_label_asym_id"]
+            label_number_j = int(entry["j_label_seq_id"])
+            label_j = ResidueLabel(label_chain_j, label_number_j, name_j)
+            residue_i = Residue(label_i, auth_i)
+            residue_j = Residue(label_j, auth_j)
+            saenger = convert_saenger(entry["hbond_type_28"])
+            lw = convert_lw(entry["hbond_type_12"])
+            if lw is not None:
+                all_base_pairs.append(BasePair(residue_i, residue_j, lw, saenger))
+            else:
+                all_other_interactions.append(OtherInteraction(residue_i, residue_j))
+    except Exception as e:
+        logging.warning(f"Error processing MAXIT file {cif_file}: {e}", exc_info=True)
+    return BaseInteractions(all_base_pairs, [], [], [], all_other_interactions)
+def parse_bpnet_output(file_paths: List[str]) -> BaseInteractions:
+    """
+    Parse BPNet output files and convert to BaseInteractions.
+    Args:
+        file_paths: List of paths to BPNet output files (basepair.json and .rob files)
+    Returns:
+        BaseInteractions object containing the interactions found by BPNet
+    """
+    def convert_lw(bpnet_lw) -> LeontisWesthof:
+        """Convert BPNet LW notation to LeontisWesthof enum."""
+        if len(bpnet_lw) != 4:
+            raise ValueError(f"bpnet lw invalid length: {bpnet_lw}")
+        bpnet_lw = bpnet_lw.replace("+", "W").replace("z", "S").replace("g", "H")
+        edge5 = bpnet_lw[0].upper()
+        edge3 = bpnet_lw[2].upper()
+        stericity = bpnet_lw[3].lower()
+        return LeontisWesthof[f"{stericity}{edge5}{edge3}"]
+    def residues_from_overlap_info(fields):
+        """Parse residue information from overlap line fields."""
+        chains = fields[6].split("^")
+        numbers = list(map(int, fields[3].split(":")))
+        icode1, icode2 = fields[2], fields[4]
+        names = fields[5].split(":")
+        if icode1 in " ?":
+            icode1 = None
+        if icode2 in " ?":
+            icode2 = None
+        nt1 = Residue(None, ResidueAuth(chains[0], numbers[0], icode1, names[0]))
+        nt2 = Residue(None, ResidueAuth(chains[1], numbers[1], icode2, names[1]))
+        return nt1, nt2
+    # Find required files
+    basepair_json = None
+    rob_file = None
+    for file_path in file_paths:
+        if file_path.endswith("basepair.json"):
+            basepair_json = file_path
+        elif file_path.endswith(".rob"):
+            rob_file = file_path
+    # Log unused files
+    used_files = [f for f in [basepair_json, rob_file] if f is not None]
+    unused_files = [f for f in file_paths if f not in used_files]
+    if unused_files:
+        logging.info(
+            f"BPNet: Using {used_files}, ignoring unused files: {unused_files}"
+        )
+    base_pairs = []
+    stackings = []
+    base_ribose_interactions = []
+    base_phosphate_interactions = []
+    other_interactions = []
+    # Parse base pairs from JSON file
+    if basepair_json:
+        logging.info(f"Processing BPNet basepair file: {basepair_json}")
+        try:
+            with open(basepair_json, encoding="utf-8") as f:
+                data = orjson.loads(f.read())
+            for entry in data["basepairs"]:
+                nt1 = Residue(
+                    None,
+                    ResidueAuth(
+                        entry["chain1"],
+                        entry["resnum1"],
+                        entry["ins1"],
+                        entry["resname1"],
+                    ),
+                )
+                nt2 = Residue(
+                    None,
+                    ResidueAuth(
+                        entry["chain2"],
+                        entry["resnum2"],
+                        entry["ins2"],
+                        entry["resname2"],
+                    ),
+                )
+                lw = convert_lw(entry["basepair"])
+                base_pairs.append(BasePair(nt1, nt2, lw, None))
+        except Exception as e:
+            logging.warning(
+                f"Error processing BPNet basepair file {basepair_json}: {e}",
+                exc_info=True,
+            )
+    # Parse overlaps from ROB file
+    if rob_file:
+        logging.info(f"Processing BPNet rob file: {rob_file}")
+        try:
+            with open(rob_file, encoding="utf-8") as f:
+                rob_content = f.read()
+            for line in rob_content.splitlines():
+                if line.startswith("OVLP"):
+                    fields = line.strip().split()
+                    if len(fields) == 13:
+                        # ASTK means Adjacent Stacking, OSTK means Non-Adjacent Stacking
+                        # ADJA means Adjacent contact but not proper stacking
+                        if fields[7] in ["ASTK", "OSTK", "ADJA"]:
+                            nt1, nt2 = residues_from_overlap_info(fields)
+                            stackings.append(Stacking(nt1, nt2, None))
+                    else:
+                        logging.warning(f"Failed to parse OVLP line: {line}")
+                elif line.startswith("PROX"):
+                    fields = line.strip().split()
+                    if len(fields) == 11:
+                        nt1, nt2 = residues_from_overlap_info(fields)
+                        atom1, atom2 = fields[7].split(":")
+                        # Determine element types based on atom names
+                        phosphate_atoms = frozenset(
+                            (
+                                "P",
+                                "OP1",
+                                "OP2",
+                                "O5'",
+                                "C5'",
+                                "C4'",
+                                "C3'",
+                                "O3'",
+                                "O5*",
+                                "C5*",
+                                "C4*",
+                                "C3*",
+                                "O3*",
+                            )
+                        )
+                        ribose_atoms = frozenset(
+                            ("C1'", "C2'", "O2'", "O4'", "C1*", "C2*", "O2*", "O4*")
+                        )
+                        base_atoms = frozenset(
+                            (
+                                "C2",
+                                "C4",
+                                "C5",
+                                "C6",
+                                "C8",
+                                "N1",
+                                "N2",
+                                "N3",
+                                "N4",
+                                "N6",
+                                "N7",
+                                "N9",
+                                "O2",
+                                "O4",
+                                "O6",
+                            )
+                        )
+                        def assign_element(atom_name):
+                            if atom_name in phosphate_atoms:
+                                return "PHOSPHATE"
+                            elif atom_name in ribose_atoms:
+                                return "RIBOSE"
+                            elif atom_name in base_atoms:
+                                return "BASE"
+                            else:
+                                return "UNKNOWN"
+                        element1 = assign_element(atom1)
+                        element2 = assign_element(atom2)
+                        # Base-ribose interactions
+                        if element1 == "BASE" and element2 == "RIBOSE":
+                            base_ribose_interactions.append(BaseRibose(nt1, nt2, None))
+                        elif element1 == "RIBOSE" and element2 == "BASE":
+                            base_ribose_interactions.append(BaseRibose(nt2, nt1, None))
+                        # Base-phosphate interactions
+                        elif element1 == "BASE" and element2 == "PHOSPHATE":
+                            base_phosphate_interactions.append(
+                                BasePhosphate(nt1, nt2, None)
+                            )
+                        elif element1 == "PHOSPHATE" and element2 == "BASE":
+                            base_phosphate_interactions.append(
+                                BasePhosphate(nt2, nt1, None)
+                            )
+                        # Other interactions
+                        other_interactions.append(OtherInteraction(nt1, nt2))
+                    else:
+                        logging.warning(f"Failed to parse PROX line: {line}")
+        except Exception as e:
+            logging.warning(
+                f"Error processing BPNet rob file {rob_file}: {e}", exc_info=True
+            )
+    return BaseInteractions(
+        base_pairs,
+        stackings,
+        base_ribose_interactions,
+        base_phosphate_interactions,
+        other_interactions,
+    )
+def parse_rnaview_output(
+    file_paths: List[str], structure3d: Structure3D
+) -> BaseInteractions:
+    """
+    Parse RNAView output files and convert to BaseInteractions.
+    Args:
+        file_paths: List of paths to RNAView output files (.out files)
+        structure3d: The 3D structure parsed from PDB/mmCIF
+    Returns:
+        BaseInteractions object containing the interactions found by RNAView
+    """
+    @dataclass
+    class PotentialResidue:
+        residue: Residue
+        position_c2: Optional[Tuple[float, float, float]]
+        position_c6: Optional[Tuple[float, float, float]]
+        position_n1: Optional[Tuple[float, float, float]]
+        def is_correct_according_to_rnaview(self) -> bool:
+            """
+            This is a reimplementation of residue_ident() function from fpair_sub.c from RNAView source code.
+            """
+            if any(
+                (
+                    self.position_c2 is None,
+                    self.position_c6 is None,
+                    self.position_n1 is None,
+                )
+            ):
+                return False
+            distance_n1_c2 = math.dist(self.position_n1, self.position_c2)  # type: ignore
+            distance_n1_c6 = math.dist(self.position_n1, self.position_c6)  # type: ignore
+            distance_c2_c6 = math.dist(self.position_c2, self.position_c6)  # type: ignore
+            return all(
+                (distance_n1_c2 <= 2.0, distance_n1_c6 <= 2.0, distance_c2_c6 <= 3.0)
+            )
+    # RNAView regex pattern from the reference implementation
+    RNAVIEW_REGEX = re.compile(
+        r"\s*(\d+)_(\d+),\s+(\w):\s+(-?\d+)\s+(\w+)-(\w+)\s+(-?\d+)\s+(\w):\s+(syn|\s+)*((./.)\s+(cis|tran)(syn|\s+)*([IVX,]+|n/a|![^.]+)|stacked)\.?"
+    )
+    # Positions of residues info in PDB files
+    ATOM_NAME_INDEX = slice(12, 16)
+    CHAIN_INDEX = 21
+    NUMBER_INDEX = slice(22, 26)
+    ICODE_INDEX = 26
+    NAME_INDEX = slice(17, 20)
+    X_INDEX, Y_INDEX, Z_INDEX = slice(30, 38), slice(38, 46), slice(46, 54)
+    # Tokens used in PDB files
+    ATOM = "ATOM"
+    HETATM = "HETATM"
+    ATOM_C6 = "C6"
+    ATOM_C2 = "C2"
+    ATOM_N1 = "N1"
+    # RNAView tokens
+    BEGIN_BASE_PAIR = "BEGIN_base-pair"
+    END_BASE_PAIR = "END_base-pair"
+    STACKING = "stacked"
+    BASE_RIBOSE = "!(b_s)"
+    BASE_PHOSPHATE = "!b_(O1P,O2P)"
+    OTHER_INTERACTION = "!(s_s)"
+    SAENGER_UNKNOWN = "n/a"
+    PLUS_INTERACTION = "+/+"  # For us - cWW
+    MINUS_INTERACTION = "-/-"  # For us - cWW
+    X_INTERACTION = "X/X"  # For us - cWW
+    ONE_HBOND = "!1H(b_b)"  # For us - OtherInteraction
+    DOUBLE_SAENGER = ("XIV,XV", "XII,XIII")
+    UNKNOWN_LW_CHARS = (".", "?")
+    ROMAN_NUMERALS = ("I", "V", "X")
+    def get_leontis_westhof(
+        lw_info: str, trans_cis_info: str
+    ) -> Optional[LeontisWesthof]:
+        """Convert RNAView LW notation to LeontisWesthof enum."""
+        trans_cis = trans_cis_info[0]
+        if any(char in lw_info for char in UNKNOWN_LW_CHARS):
+            return None
+        if lw_info in (PLUS_INTERACTION, MINUS_INTERACTION, X_INTERACTION):
+            return LeontisWesthof[f"{trans_cis}WW"]
+        return LeontisWesthof[f"{trans_cis}{lw_info[0].upper()}{lw_info[2].upper()}"]
+    def append_residues_from_pdb_using_rnaview_indexing(
+        pdb_content: str,
+    ) -> Dict[int, Residue]:
+        """Parse PDB content and create RNAView-style residue mapping."""
+        potential_residues: Dict[str, PotentialResidue] = {}
+        for line in pdb_content.splitlines():
+            if line.startswith(ATOM) or line.startswith(HETATM):
+                atom_name = line[ATOM_NAME_INDEX].strip()
+                number = int(line[NUMBER_INDEX].strip())
+                icode = None if line[ICODE_INDEX].strip() == "" else line[ICODE_INDEX]
+                chain = line[CHAIN_INDEX].strip()
+                name = line[NAME_INDEX].strip()
+                residue = Residue(None, ResidueAuth(chain, number, icode, name))
+                if str(residue) not in potential_residues:
+                    potential_residues[str(residue)] = PotentialResidue(
+                        residue, None, None, None
+                    )
+                potential_residue = potential_residues[str(residue)]
+                atom_position = (
+                    float(line[X_INDEX].strip()),
+                    float(line[Y_INDEX].strip()),
+                    float(line[Z_INDEX].strip()),
+                )
+                if atom_name == ATOM_C6:
+                    potential_residue.position_c6 = atom_position
+                elif atom_name == ATOM_C2:
+                    potential_residue.position_c2 = atom_position
+                elif atom_name == ATOM_N1:
+                    potential_residue.position_n1 = atom_position
+        residues_from_pdb: Dict[int, Residue] = {}
+        counter = 1
+        for potential_residue in potential_residues.values():
+            if potential_residue.is_correct_according_to_rnaview():
+                residues_from_pdb[counter] = potential_residue.residue
+                counter += 1
+        logging.debug("RNAView residues mapping:")
+        for idx, residue in sorted(residues_from_pdb.items()):
+            logging.debug(f"  {idx}: {residue}")
+        return residues_from_pdb
+    def check_indexing_correctness(
+        regex_result: Tuple[str, ...], line: str, residues_from_pdb: Dict[int, Residue]
+    ) -> None:
+        """Check if RNAView internal indexing matches PDB residue information."""
+        residue_left = residues_from_pdb[int(regex_result[0])]
+        if residue_left.auth.chain.lower() != regex_result[
+            2
+        ].lower() or residue_left.auth.number != int(regex_result[3]):
+            raise ValueError(
+                f"Wrong internal index for {residue_left}. Fix RNAView internal index mapping. Line: {line}"
+            )
+        residue_right = residues_from_pdb[int(regex_result[1])]
+        if residue_right.auth.chain.lower() != regex_result[
+            7
+        ].lower() or residue_right.auth.number != int(regex_result[6]):
+            raise ValueError(
+                f"Wrong internal index for {residue_right}. Fix RNAView internal index mapping. Line: {line}"
+            )
+    # Find the first .out file in the list
+    out_file = None
+    pdb_file = None
+    for file_path in file_paths:
+        if file_path.endswith(".out"):
+            out_file = file_path
+        elif file_path.endswith(".pdb"):
+            pdb_file = file_path
+    if out_file is None:
+        logging.warning("No .out file found in RNAView file list")
+        return BaseInteractions([], [], [], [], [])
+    # Log unused files
+    used_files = [f for f in [out_file, pdb_file] if f is not None]
+    unused_files = [f for f in file_paths if f not in used_files]
+    if unused_files:
+        logging.info(
+            f"RNAView: Using {used_files}, ignoring unused files: {unused_files}"
+        )
+    base_pairs = []
+    stackings = []
+    base_ribose_interactions = []
+    base_phosphate_interactions = []
+    other_interactions = []
+    # Parse PDB content to build residue mapping if PDB file is available
+    residues_from_pdb: Dict[int, Residue] = {}
+    if pdb_file:
+        logging.info(f"Processing RNAView PDB file: {pdb_file}")
+        try:
+            with open(pdb_file, "r", encoding="utf-8") as f:
+                pdb_content = f.read()
+            residues_from_pdb = append_residues_from_pdb_using_rnaview_indexing(
+                pdb_content
+            )
+        except Exception as e:
+            logging.warning(
+                f"Error processing RNAView PDB file {pdb_file}: {e}", exc_info=True
+            )
+    # Process the RNAView output file
+    logging.info(f"Processing RNAView file: {out_file}")
+    try:
+        with open(out_file, "r", encoding="utf-8") as f:
+            rnaview_result = f.read()
+        base_pair_section = False
+        for line in rnaview_result.splitlines():
+            if line.startswith(BEGIN_BASE_PAIR):
+                base_pair_section = True
+            elif line.startswith(END_BASE_PAIR):
+                base_pair_section = False
+            elif base_pair_section:
+                rnaview_regex_result = re.search(RNAVIEW_REGEX, line)
+                if rnaview_regex_result is None:
+                    logging.warning(f"RNAView regex failed for line: {line}")
+                    continue
+                rnaview_regex_groups = rnaview_regex_result.groups()
+                # Log parsed groups with their meanings
+                logging.debug("RNAView regex parsed:")
+                logging.debug(
+                    f"  First residue:  idx={rnaview_regex_groups[0]}, chain={rnaview_regex_groups[2]}, num={rnaview_regex_groups[3]}, name={rnaview_regex_groups[4]}"
+                )
+                logging.debug(
+                    f"  Second residue: idx={rnaview_regex_groups[1]}, chain={rnaview_regex_groups[7]}, num={rnaview_regex_groups[6]}, name={rnaview_regex_groups[5]}"
+                )
+                if rnaview_regex_groups[9] == "stacked":
+                    logging.debug("  Interaction: stacking")
+                else:
+                    logging.debug(f"  LW edges: {rnaview_regex_groups[10]}")
+                    logging.debug(f"  LW orientation: {rnaview_regex_groups[11]}")
+                    logging.debug(f"  Classification: {rnaview_regex_groups[13]}")
+                # Use residue mapping if available, otherwise create residues from regex
+                if residues_from_pdb:
+                    try:
+                        check_indexing_correctness(
+                            rnaview_regex_groups, line, residues_from_pdb
+                        )
+                        residue_left = residues_from_pdb[int(rnaview_regex_groups[0])]
+                        residue_right = residues_from_pdb[int(rnaview_regex_groups[1])]
+                    except (KeyError, ValueError) as e:
+                        logging.warning(f"RNAView indexing error: {e}")
+                        continue
+                else:
+                    # Fallback: create residues from regex groups
+                    chain_left = rnaview_regex_groups[2]
+                    number_left = int(rnaview_regex_groups[3])
+                    name_left = rnaview_regex_groups[4]
+                    chain_right = rnaview_regex_groups[7]
+                    number_right = int(rnaview_regex_groups[6])
+                    name_right = rnaview_regex_groups[5]
+                    residue_left = Residue(
+                        None, ResidueAuth(chain_left, number_left, None, name_left)
+                    )
+                    residue_right = Residue(
+                        None, ResidueAuth(chain_right, number_right, None, name_right)
+                    )
+                # Interaction OR Saenger OR n/a OR empty string
+                token = rnaview_regex_groups[13]
+                if rnaview_regex_groups[9] == STACKING:
+                    stackings.append(Stacking(residue_left, residue_right, None))
+                elif token == BASE_RIBOSE:
+                    base_ribose_interactions.append(
+                        BaseRibose(residue_left, residue_right, None)
+                    )
+                elif token == BASE_PHOSPHATE:
+                    base_phosphate_interactions.append(
+                        BasePhosphate(residue_left, residue_right, None)
+                    )
+                elif token in (OTHER_INTERACTION, ONE_HBOND):
+                    other_interactions.append(
+                        OtherInteraction(residue_left, residue_right)
+                    )
+                elif token == SAENGER_UNKNOWN:
+                    leontis_westhof = get_leontis_westhof(
+                        rnaview_regex_groups[10], rnaview_regex_groups[11]
+                    )
+                    if leontis_westhof is None:
+                        other_interactions.append(
+                            OtherInteraction(residue_left, residue_right)
+                        )
+                    else:
+                        base_pairs.append(
+                            BasePair(residue_left, residue_right, leontis_westhof, None)
+                        )
+                elif (
+                    all(char in ROMAN_NUMERALS for char in token)
+                    or token in DOUBLE_SAENGER
+                ):
+                    leontis_westhof = get_leontis_westhof(
+                        rnaview_regex_groups[10], rnaview_regex_groups[11]
+                    )
+                    if leontis_westhof is None:
+                        other_interactions.append(
+                            OtherInteraction(residue_left, residue_right)
+                        )
+                    else:
+                        saenger = (
+                            Saenger[token.split(",", 1)[0]]
+                            if token in DOUBLE_SAENGER
+                            else Saenger[token]
+                        )
+                        base_pairs.append(
+                            BasePair(
+                                residue_left, residue_right, leontis_westhof, saenger
+                            )
+                        )
+                else:
+                    logging.warning(f"Unknown RNAView interaction: {token}")
+    except Exception as e:
+        logging.warning(f"Error processing RNAView file {out_file}: {e}", exc_info=True)
+    return BaseInteractions(
+        base_pairs,
+        stackings,
+        base_ribose_interactions,
+        base_phosphate_interactions,
+        other_interactions,
+    )
 def parse_external_output(
-    file_path: str, tool: ExternalTool, structure3d: Structure3D
+    file_paths: List[str], tool: ExternalTool, structure3d: Structure3D
 ) -> BaseInteractions:
     """
     Parse the output from an external tool (FR3D, DSSR, etc.) and convert it to BaseInteractions.
     Args:
-        file_path: Path to the external tool output file
+        file_paths: List of paths to external tool output files
         tool: The external tool that generated the output
         structure3d: The 3D structure parsed from PDB/mmCIF
@@ -268,20 +1003,26 @@ def parse_external_output(
         BaseInteractions object containing the interactions found by the external tool
     """
     if tool == ExternalTool.FR3D:
-        return parse_fr3d_output(file_path)
+        return parse_fr3d_output(file_paths)
     elif tool == ExternalTool.DSSR:
-        return parse_dssr_output(file_path, structure3d)
+        return parse_dssr_output(file_paths, structure3d)
+    elif tool == ExternalTool.MAXIT:
+        return parse_maxit_output(file_paths)
+    elif tool == ExternalTool.BPNET:
+        return parse_bpnet_output(file_paths)
+    elif tool == ExternalTool.RNAVIEW:
+        return parse_rnaview_output(file_paths, structure3d)
     else:
         raise ValueError(f"Unsupported external tool: {tool}")
-def parse_fr3d_output(file_path: str) -> BaseInteractions:
+def parse_fr3d_output(file_paths: List[str]) -> BaseInteractions:
     """
-    Parse FR3D output file and convert to BaseInteractions.
+    Parse FR3D output files and convert to BaseInteractions.
     Args:
-        file_path: Path to a concatenated FR3D output file containing basepair, stacking,
-                  and backbone interactions
+        file_paths: List of paths to FR3D output files containing basepair, stacking,
+                   and backbone interactions
     Returns:
         BaseInteractions object containing the interactions found by FR3D
@@ -295,15 +1036,17 @@ def parse_fr3d_output(file_path: str) -> BaseInteractions:
         "other_interactions": [],
     }
-    # Process the concatenated file
-    with open(file_path, "r") as f:
-        for line in f:
-            line = line.strip()
-            if not line or line.startswith("#"):
-                continue
+    # Process each input file
+    for file_path in file_paths:
+        logging.info(f"Processing FR3D file: {file_path}")
+        with open(file_path, "r") as f:
+            for line in f:
+                line = line.strip()
+                if not line or line.startswith("#"):
+                    continue
-            # Process every non-empty, non-comment line
-            _process_interaction_line(line, interactions_data)
+                # Process every non-empty, non-comment line
+                _process_interaction_line(line, interactions_data)
     # Return a BaseInteractions object with all the processed interactions
     return BaseInteractions(
@@ -317,8 +1060,9 @@ def parse_fr3d_output(file_path: str) -> BaseInteractions:
 def process_external_tool_output(
     structure3d: Structure3D,
-    external_file_path: str,
+    external_file_paths: List[str],
     tool: ExternalTool,
+    input_file_path: str,
     find_gaps: bool = False,
 ) -> Tuple[Structure2D, Mapping2D3D]:  # Added Mapping2D3D to return tuple
     """
@@ -329,16 +1073,23 @@ def process_external_tool_output(
     Args:
         structure3d: The 3D structure parsed from PDB/mmCIF
-        external_file_path: Path to the external tool output file
+        external_file_paths: List of paths to external tool output files (empty for MAXIT)
         tool: The external tool that generated the output (FR3D, DSSR, etc.)
-        model: Model number to use (if None, use first model)
+        input_file_path: Path to the input file (used when external_file_paths is empty)
         find_gaps: Whether to detect gaps in the structure
     Returns:
         A tuple containing the Structure2D object and the Mapping2D3D object.
     """
     # Parse external tool output
-    base_interactions = parse_external_output(external_file_path, tool, structure3d)
+    if not external_file_paths:
+        # For MAXIT or when no external files are provided, use the input file
+        file_paths_to_process = [input_file_path]
+    else:
+        # Process all external files
+        file_paths_to_process = external_file_paths
+    base_interactions = parse_external_output(file_paths_to_process, tool, structure3d)
     # Extract secondary structure using the external tool's interactions
     return structure3d.extract_secondary_structure(base_interactions, find_gaps)
@@ -348,15 +1099,14 @@ def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("input", help="Path to PDB or mmCIF file")
     parser.add_argument(
-        "--external",
-        required=True,
-        help="Path to external tool output file (FR3D, DSSR, etc.)",
+        "external_files",
+        nargs="*",
+        help="Path(s) to external tool output file(s) (FR3D, DSSR, etc.)",
     )
     parser.add_argument(
         "--tool",
         choices=[t.value for t in ExternalTool],
-        required=True,
-        help="External tool that generated the output file",
+        help="External tool that generated the output file (auto-detected if not specified)",
     )
     parser.add_argument(
         "-f",
@@ -371,20 +1121,24 @@ def main():
     file = handle_input_file(args.input)
     structure3d = read_3d_structure(file, None)
-    # Process external tool output and get secondary structure
+    # Auto-detect tool if not specified
+    if args.tool is not None:
+        tool = ExternalTool(args.tool)
+    else:
+        tool = auto_detect_tool(args.external_files)
+        logging.info(f"Auto-detected tool: {tool.value}")
+    # Process external tool output files and get secondary structure
+    # Always call process_external_tool_output, even for MAXIT (empty external files)
     structure2d, mapping = process_external_tool_output(
         structure3d,
-        args.external,
-        ExternalTool(args.tool),
+        args.external_files,
+        tool,
+        args.input,
         args.find_gaps,
     )
-    if args.all_dot_brackets:
-        dot_brackets = mapping.all_dot_brackets
-    else:
-        dot_brackets = [mapping.dot_bracket]
-    handle_output_arguments(args, structure2d, dot_brackets, mapping, args.input)
+    handle_output_arguments(args, structure2d, mapping, args.input)
 if __name__ == "__main__":

rnapolis/annotator.py CHANGED Viewed

@@ -24,6 +24,7 @@ from rnapolis.common import (
     BPh,
     BpSeq,
     LeontisWesthof,
+    OtherInteraction,
     Residue,
     Saenger,
     Stacking,
@@ -660,12 +661,6 @@ def write_bpseq(path: str, bpseq: BpSeq):
 def add_common_output_arguments(parser: argparse.ArgumentParser):
     """Adds common output and processing arguments to the parser."""
-    parser.add_argument(
-        "-a",
-        "--all-dot-brackets",
-        action="store_true",
-        help="(optional) print all dot-brackets, not only optimal one (exclusive with -e/--extended)",
-    )
     parser.add_argument("-b", "--bpseq", help="(optional) path to output BPSEQ file")
     parser.add_argument("-c", "--csv", help="(optional) path to output CSV file")
     parser.add_argument(
@@ -693,42 +688,126 @@ def add_common_output_arguments(parser: argparse.ArgumentParser):
     )
+def unify_structure_data(structure2d: Structure2D, mapping: Mapping2D3D) -> Structure2D:
+    """
+    Unify structure data by:
+    1. Adding missing Saenger classifications to base pairs
+    2. Filling in empty residue labels from Structure3D
+    """
+    # Create a mapping from residue to residue3d for label filling
+    residue_to_residue3d = {}
+    for residue3d in mapping.structure3d.residues:
+        residue_key = Residue(residue3d.label, residue3d.auth)
+        residue_to_residue3d[residue_key] = residue3d
+    def fill_residue_label(residue: Residue) -> Residue:
+        """Fill empty label from Structure3D if available."""
+        if residue.label is not None:
+            return residue
+        # Try to find matching residue3d by auth
+        for residue3d in mapping.structure3d.residues:
+            if residue.auth == residue3d.auth:
+                return Residue(residue3d.label, residue.auth)
+        return residue
+    # Process base pairs
+    unified_base_pairs = []
+    for base_pair in structure2d.base_pairs:
+        # Fill in missing labels
+        nt1 = fill_residue_label(base_pair.nt1)
+        nt2 = fill_residue_label(base_pair.nt2)
+        # Detect missing Saenger classification
+        saenger = base_pair.saenger
+        if saenger is None:
+            # Find corresponding 3D residues for Saenger detection
+            residue3d_1 = residue_to_residue3d.get(Residue(nt1.label, nt1.auth))
+            residue3d_2 = residue_to_residue3d.get(Residue(nt2.label, nt2.auth))
+            if residue3d_1 is not None and residue3d_2 is not None:
+                saenger = detect_saenger(residue3d_1, residue3d_2, base_pair.lw)
+        unified_base_pairs.append(BasePair(nt1, nt2, base_pair.lw, saenger))
+    # Process other interaction types (fill labels only)
+    unified_stackings = []
+    for stacking in structure2d.stackings:
+        nt1 = fill_residue_label(stacking.nt1)
+        nt2 = fill_residue_label(stacking.nt2)
+        unified_stackings.append(Stacking(nt1, nt2, stacking.topology))
+    unified_base_ribose = []
+    for base_ribose in structure2d.base_ribose_interactions:
+        nt1 = fill_residue_label(base_ribose.nt1)
+        nt2 = fill_residue_label(base_ribose.nt2)
+        unified_base_ribose.append(BaseRibose(nt1, nt2, base_ribose.br))
+    unified_base_phosphate = []
+    for base_phosphate in structure2d.base_phosphate_interactions:
+        nt1 = fill_residue_label(base_phosphate.nt1)
+        nt2 = fill_residue_label(base_phosphate.nt2)
+        unified_base_phosphate.append(BasePhosphate(nt1, nt2, base_phosphate.bph))
+    unified_other = []
+    for other in structure2d.other_interactions:
+        nt1 = fill_residue_label(other.nt1)
+        nt2 = fill_residue_label(other.nt2)
+        unified_other.append(OtherInteraction(nt1, nt2))
+    # Create new Structure2D with unified data
+    unified_base_interactions = BaseInteractions(
+        unified_base_pairs,
+        unified_stackings,
+        unified_base_ribose,
+        unified_base_phosphate,
+        unified_other,
+    )
+    # Recreate Structure2D with unified interactions
+    unified_structure2d, _ = mapping.structure3d.extract_secondary_structure(
+        unified_base_interactions, False
+    )
+    return unified_structure2d
 def handle_output_arguments(
     args: argparse.Namespace,
     structure2d: Structure2D,
-    dot_brackets: List[str],
     mapping: Mapping2D3D,
     input_filename: str,
 ):
     """Handles writing output based on provided arguments."""
+    # Unify the structure data before processing outputs
+    unified_structure2d = unify_structure_data(structure2d, mapping)
     input_basename = os.path.basename(input_filename)
     if args.csv:
-        write_csv(args.csv, structure2d)
+        write_csv(args.csv, unified_structure2d)
     if args.json:
-        write_json(args.json, structure2d)
+        write_json(args.json, unified_structure2d)
     if args.bpseq:
-        write_bpseq(args.bpseq, structure2d.bpseq)
+        write_bpseq(args.bpseq, unified_structure2d.bpseq)
     if args.extended:
-        print(structure2d.extended_dot_bracket)
-    elif args.all_dot_brackets:
-        for dot_bracket in dot_brackets:
-            print(dot_bracket)
+        print(unified_structure2d.extended_dot_bracket)
     else:
-        print(structure2d.dot_bracket)
+        print(unified_structure2d.dot_bracket)
     if args.dot:
-        print(BpSeq.from_string(structure2d.bpseq).graphviz)
+        print(BpSeq.from_string(unified_structure2d.bpseq).graphviz)
     if args.pml:
-        pml_script = generate_pymol_script(mapping, structure2d.stems)
+        pml_script = generate_pymol_script(mapping, unified_structure2d.stems)
         with open(args.pml, "w") as f:
             f.write(pml_script)
     if args.inter_stem_csv:
-        if structure2d.inter_stem_parameters:
+        if unified_structure2d.inter_stem_parameters:
             # Convert list of dataclasses to list of dicts
             params_list = [
                 {
@@ -741,7 +820,7 @@ def handle_output_arguments(
                     "min_endpoint_distance_pdf": p.min_endpoint_distance_pdf,
                     "coaxial_probability": p.coaxial_probability,
                 }
-                for p in structure2d.interStemParameters
+                for p in unified_structure2d.interStemParameters
             ]
             df = pd.DataFrame(params_list)
             df["input_basename"] = input_basename
@@ -759,9 +838,9 @@ def handle_output_arguments(
             # pd.DataFrame(columns=['input_basename', 'stem1_idx', ...]).to_csv(args.inter_stem_csv, index=False)
     if args.stems_csv:
-        if structure2d.stems:
+        if unified_structure2d.stems:
             stems_data = []
-            for i, stem in enumerate(structure2d.stems):
+            for i, stem in enumerate(unified_structure2d.stems):
                 try:
                     res5p_first = mapping.bpseq_index_to_residue_map.get(
                         stem.strand5p.first
@@ -838,11 +917,11 @@ def main():
     file = handle_input_file(args.input)
     structure3d = read_3d_structure(file, None)
     base_interactions = extract_base_interactions(structure3d)
-    structure2d, dot_brackets, mapping = structure3d.extract_secondary_structure(
-        base_interactions, args.find_gaps, args.all_dot_brackets
+    structure2d, mapping = structure3d.extract_secondary_structure(
+        base_interactions, args.find_gaps
     )
-    handle_output_arguments(args, structure2d, dot_brackets, mapping, args.input)
+    handle_output_arguments(args, structure2d, mapping, args.input)
 if __name__ == "__main__":

{rnapolis-0.9.0.dist-info → rnapolis-0.9.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: RNApolis
-Version: 0.9.0
+Version: 0.9.2
 Summary: A Python library containing RNA-related bioinformatics functions and classes
 Home-page: https://github.com/tzok/rnapolis-py
 Author: Tomasz Zok

{rnapolis-0.9.0.dist-info → rnapolis-0.9.2.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
-rnapolis/adapter.py,sha256=b4fyHq0Dt9VLsxungiI-a4o1t-JqSw3Sd_loFYXGaUk,13371
+rnapolis/adapter.py,sha256=apDxyftg9NnlsN9ieVk07dFzsxmJTTilJ2gyHV0_HX8,42239
 rnapolis/aligner.py,sha256=o7rQyjAZ3n4VXcnSPY3HVB8nLNRkVbl552O3NVh0mfg,3429
-rnapolis/annotator.py,sha256=4sDNQOvipxa-Axlu7MTvYUFinWh26CW2-wcHrP1Gcuo,30796
+rnapolis/annotator.py,sha256=OkqFVuxOtb-mySmw3bc5NF9ETu4BWq4ImtBecWJikrY,33899
 rnapolis/clashfinder.py,sha256=AC9_tIx7QIk57sELq_aKfU1u3UMOXbgcccQeGHhMR6c,8517
 rnapolis/common.py,sha256=HTe-RSZa_9hEIi-j4-1afxdqt7zAD-BpZ7JxRZGX170,32390
 rnapolis/component_A.csv,sha256=koirS-AwUZwoYGItT8yn3wS6Idvmh2FANfTQcOS_xh8,2897
@@ -21,9 +21,9 @@ rnapolis/tertiary_v2.py,sha256=y7Rh43Jzt9QU6wCa1wAHIcO3BcNQY83PbbWNTmqI8zM,23424
 rnapolis/transformer.py,sha256=aC0nBmHHJf5TyLvBIV57Jj3tlwpvHbPo347opfAOlQA,3844
 rnapolis/unifier.py,sha256=2ge7IB9FdRgzSAiVD39U_ciwtdDJ2fGzf8mUIudbrqY,5820
 rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
-rnapolis-0.9.0.dist-info/licenses/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
-rnapolis-0.9.0.dist-info/METADATA,sha256=SmNBsfi66LNJ4MGjgR8ovdhocCRlNmYV2eR74vsLLL4,54537
-rnapolis-0.9.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-rnapolis-0.9.0.dist-info/entry_points.txt,sha256=H00KoN54wU3dFOofAu3H_3PADmZOBTB1hXf5TUU2uzo,438
-rnapolis-0.9.0.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
-rnapolis-0.9.0.dist-info/RECORD,,
+rnapolis-0.9.2.dist-info/licenses/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
+rnapolis-0.9.2.dist-info/METADATA,sha256=im-tdbK04EmFGO4O7ZGUCMWp5rimzW6_NZ5YQlrKJ0U,54537
+rnapolis-0.9.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+rnapolis-0.9.2.dist-info/entry_points.txt,sha256=H00KoN54wU3dFOofAu3H_3PADmZOBTB1hXf5TUU2uzo,438
+rnapolis-0.9.2.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
+rnapolis-0.9.2.dist-info/RECORD,,

{rnapolis-0.9.0.dist-info → rnapolis-0.9.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{rnapolis-0.9.0.dist-info → rnapolis-0.9.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{rnapolis-0.9.0.dist-info → rnapolis-0.9.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{rnapolis-0.9.0.dist-info → rnapolis-0.9.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

RNApolis 0.9.0__py3-none-any.whl → 0.9.2__py3-none-any.whl

RNApolis 0.9.0py3-none-any.whl → 0.9.2py3-none-any.whl