PyPI - RNApolis - Versions diffs - 0.6.2__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

RNApolis 0.6.2py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

rnapolis/adapter.py ADDED Viewed

@@ -0,0 +1,537 @@
+#! /usr/bin/env python
+import argparse
+import csv
+import logging
+import os
+from enum import Enum
+from typing import Dict, List, Optional, Tuple
+import orjson
+from rnapolis.common import (
+    BR,
+    BaseInteractions,
+    BasePair,
+    BasePhosphate,
+    BaseRibose,
+    BPh,
+    BpSeq,
+    LeontisWesthof,
+    OtherInteraction,
+    Residue,
+    ResidueAuth,
+    Stacking,
+    StackingTopology,
+    Structure2D,
+)
+from rnapolis.parser import read_3d_structure
+from rnapolis.tertiary import Mapping2D3D, Structure3D
+from rnapolis.util import handle_input_file
+class ExternalTool(Enum):
+    FR3D = "fr3d"
+    DSSR = "dssr"
+logging.basicConfig(level=os.getenv("LOGLEVEL", "INFO").upper())
+def parse_unit_id(nt: str) -> Residue:
+    """Parse FR3D unit ID format into a Residue object."""
+    fields = nt.split("|")
+    icode = fields[7] if len(fields) >= 8 and fields[7] != "" else None
+    auth = ResidueAuth(fields[2], int(fields[4]), icode, fields[3])
+    return Residue(None, auth)
+def unify_classification(fr3d_name: str) -> tuple:
+    """Convert FR3D classification to internal format."""
+    original_name = fr3d_name  # Keep for logging
+    # Handle 'n' prefix (e.g., ncWW -> cWW, ns55 -> s55)
+    if fr3d_name.startswith("n"):
+        fr3d_name = fr3d_name[1:]
+        logging.debug(
+            f"Detected 'n' prefix: removed from {original_name} -> {fr3d_name}"
+        )
+    # Handle alternative base pairs with 'a' suffix (e.g., cWWa -> cWW)
+    if len(fr3d_name) >= 3 and fr3d_name.endswith("a"):
+        fr3d_name = fr3d_name[:-1]  # Remove the 'a' suffix
+        logging.debug(
+            f"Detected alternative base pair: removed 'a' suffix from {original_name} -> {fr3d_name}"
+        )
+    # Handle backbone interactions: 0BR, 1BR, ... 9BR for base-ribose
+    if len(fr3d_name) == 3 and fr3d_name[1:] == "BR" and fr3d_name[0].isdigit():
+        try:
+            br_type = f"_{fr3d_name[0]}"
+            return ("base-ribose", BR[br_type])
+        except (ValueError, KeyError):
+            logging.debug(f"Unknown base-ribose interaction: {original_name}")
+            return ("other", None)
+    # Handle backbone interactions: 0BPh, 1BPh, ... 9BPh for base-phosphate
+    if len(fr3d_name) == 4 and fr3d_name[1:] == "BPh" and fr3d_name[0].isdigit():
+        try:
+            bph_type = f"_{fr3d_name[0]}"
+            return ("base-phosphate", BPh[bph_type])
+        except (ValueError, KeyError):
+            logging.debug(f"Unknown base-phosphate interaction: {original_name}")
+            return ("other", None)
+    # Handle the stacking notation from direct FR3D service (s33, s35, s53, s55)
+    if (
+        len(fr3d_name) == 3
+        and fr3d_name.startswith("s")
+        and fr3d_name[1] in ("3", "5")
+        and fr3d_name[2] in ("3", "5")
+    ):
+        if fr3d_name == "s33":
+            return ("stacking", StackingTopology.downward)
+        if fr3d_name == "s55":
+            return ("stacking", StackingTopology.upward)
+        if fr3d_name == "s35":
+            return ("stacking", StackingTopology.outward)
+        if fr3d_name == "s53":
+            return ("stacking", StackingTopology.inward)
+    # Handle the cWW style notation from direct FR3D service output
+    # Support both uppercase and lowercase edge names (e.g., cWW, cww, tHS, ths, tSs, etc.)
+    if len(fr3d_name) == 3 and fr3d_name[0].lower() in ("c", "t"):
+        try:
+            # Convert to the format expected by LeontisWesthof
+            edge_type = fr3d_name[0].lower()  # c or t
+            edge1 = fr3d_name[1].upper()  # W, H, S (convert to uppercase)
+            edge2 = fr3d_name[2].upper()  # W, H, S (convert to uppercase)
+            lw_format = f"{edge_type}{edge1}{edge2}"
+            return ("base-pair", LeontisWesthof[lw_format])
+        except KeyError:
+            logging.debug(
+                f"Fr3d unknown interaction from service: {original_name} -> {fr3d_name}"
+            )
+            return ("other", None)
+    # Handle other classifications with different formatting
+    logging.debug(f"Fr3d unknown interaction: {fr3d_name}")
+    return ("other", None)
+def _process_interaction_line(
+    line: str,
+    interactions_data: Dict[str, list],
+):
+    """
+    Process a single interaction line and add it to the appropriate list.
+    Args:
+        line: The tab-separated interaction line
+        interactions_data: Dictionary containing all interaction lists
+    Returns:
+        True if successfully processed, False otherwise
+    """
+    try:
+        # Split by tabs and get the first three fields
+        parts = line.split("\t")
+        if len(parts) < 3:
+            logging.warning(f"Invalid interaction line format: {line}")
+            return False
+        nt1 = parts[0]
+        interaction_type = parts[1]
+        nt2 = parts[2]
+        nt1_residue = parse_unit_id(nt1)
+        nt2_residue = parse_unit_id(nt2)
+        # Convert the interaction type to our internal format
+        interaction_category, classification = unify_classification(interaction_type)
+        # Add to the appropriate list based on the interaction category
+        if interaction_category == "base-pair":
+            interactions_data["base_pairs"].append(
+                BasePair(nt1_residue, nt2_residue, classification, None)
+            )
+        elif interaction_category == "stacking":
+            interactions_data["stackings"].append(
+                Stacking(nt1_residue, nt2_residue, classification)
+            )
+        elif interaction_category == "base-ribose":
+            interactions_data["base_ribose_interactions"].append(
+                BaseRibose(nt1_residue, nt2_residue, classification)
+            )
+        elif interaction_category == "base-phosphate":
+            interactions_data["base_phosphate_interactions"].append(
+                BasePhosphate(nt1_residue, nt2_residue, classification)
+            )
+        elif interaction_category == "other":
+            interactions_data["other_interactions"].append(
+                OtherInteraction(nt1_residue, nt2_residue)
+            )
+        return True
+    except (ValueError, IndexError) as e:
+        logging.warning(f"Error parsing interaction: {e}")
+        return False
+def match_dssr_name_to_residue(
+    structure3d: Structure3D, nt_id: Optional[str]
+) -> Optional[Residue]:
+    if nt_id is not None:
+        nt_id = nt_id.split(":")[-1]
+        for residue in structure3d.residues:
+            if residue.full_name == nt_id:
+                return residue
+        logging.warning(f"Failed to find residue {nt_id}")
+    return None
+def match_dssr_lw(lw: Optional[str]) -> Optional[LeontisWesthof]:
+    return LeontisWesthof[lw] if lw in dir(LeontisWesthof) else None
+def parse_dssr_output(
+    file_path: str, structure3d: Structure3D, model: Optional[int] = None
+) -> BaseInteractions:
+    """
+    Parse DSSR JSON output and convert to BaseInteractions.
+    Args:
+        file_path: Path to DSSR JSON output file
+        structure3d: The 3D structure parsed from PDB/mmCIF
+        model: Model number to use (if None, use first model)
+    Returns:
+        BaseInteractions object containing the interactions found by DSSR
+    """
+    base_pairs: List[BasePair] = []
+    stackings: List[Stacking] = []
+    with open(file_path) as f:
+        dssr = orjson.loads(f.read())
+    # Handle multi-model files
+    if "models" in dssr:
+        if model is None and dssr.get("models"):
+            # If model is None, use the first model
+            dssr = dssr.get("models")[0].get("parameters", {})
+        else:
+            # Otherwise find the specified model
+            for result in dssr.get("models", []):
+                if result.get("model", None) == model:
+                    dssr = result.get("parameters", {})
+                    break
+    for pair in dssr.get("pairs", []):
+        nt1 = match_dssr_name_to_residue(structure3d, pair.get("nt1", None))
+        nt2 = match_dssr_name_to_residue(structure3d, pair.get("nt2", None))
+        lw = match_dssr_lw(pair.get("LW", None))
+        if nt1 is not None and nt2 is not None and lw is not None:
+            base_pairs.append(BasePair(nt1, nt2, lw, None))
+    for stack in dssr.get("stacks", []):
+        nts = [
+            match_dssr_name_to_residue(structure3d, nt)
+            for nt in stack.get("nts_long", "").split(",")
+        ]
+        for i in range(1, len(nts)):
+            nt1 = nts[i - 1]
+            nt2 = nts[i]
+            if nt1 is not None and nt2 is not None:
+                stackings.append(Stacking(nt1, nt2, None))
+    return BaseInteractions(base_pairs, stackings, [], [], [])
+def parse_external_output(
+    file_path: str, tool: ExternalTool, structure3d: Structure3D
+) -> BaseInteractions:
+    """
+    Parse the output from an external tool (FR3D, DSSR, etc.) and convert it to BaseInteractions.
+    Args:
+        file_path: Path to the external tool output file
+        tool: The external tool that generated the output
+        structure3d: The 3D structure parsed from PDB/mmCIF
+    Returns:
+        BaseInteractions object containing the interactions found by the external tool
+    """
+    if tool == ExternalTool.FR3D:
+        return parse_fr3d_output(file_path)
+    elif tool == ExternalTool.DSSR:
+        return parse_dssr_output(file_path, structure3d)
+    else:
+        raise ValueError(f"Unsupported external tool: {tool}")
+def parse_fr3d_output(file_path: str) -> BaseInteractions:
+    """
+    Parse FR3D output file and convert to BaseInteractions.
+    Args:
+        file_path: Path to a concatenated FR3D output file containing basepair, stacking,
+                  and backbone interactions
+    Returns:
+        BaseInteractions object containing the interactions found by FR3D
+    """
+    # Initialize the interaction data dictionary
+    interactions_data = {
+        "base_pairs": [],
+        "stackings": [],
+        "base_ribose_interactions": [],
+        "base_phosphate_interactions": [],
+        "other_interactions": [],
+    }
+    # Process the concatenated file
+    with open(file_path, "r") as f:
+        for line in f:
+            line = line.strip()
+            if not line or line.startswith("#"):
+                continue
+            # Process every non-empty, non-comment line
+            _process_interaction_line(line, interactions_data)
+    # Return a BaseInteractions object with all the processed interactions
+    return BaseInteractions(
+        interactions_data["base_pairs"],
+        interactions_data["stackings"],
+        interactions_data["base_ribose_interactions"],
+        interactions_data["base_phosphate_interactions"],
+        interactions_data["other_interactions"],
+    )
+def process_external_tool_output(
+    structure3d: Structure3D,
+    external_file_path: str,
+    tool: ExternalTool,
+    model: Optional[int] = None,
+    find_gaps: bool = False,
+    all_dot_brackets: bool = False,
+) -> Tuple[Structure2D, List[str]]:
+    """
+    Process external tool output and create a secondary structure representation.
+    This function can be used from other code to process external tool outputs
+    and get a Structure2D object with the secondary structure information.
+    Args:
+        structure3d: The 3D structure parsed from PDB/mmCIF
+        external_file_path: Path to the external tool output file
+        tool: The external tool that generated the output (FR3D, DSSR, etc.)
+        model: Model number to use (if None, use first model)
+        find_gaps: Whether to detect gaps in the structure
+        all_dot_brackets: Whether to return all possible dot-bracket notations
+    Returns:
+        A tuple containing the Structure2D object and a list of dot-bracket notations
+    """
+    # Parse external tool output
+    base_interactions = parse_external_output(external_file_path, tool, structure3d)
+    # Extract secondary structure using the external tool's interactions
+    return extract_secondary_structure_from_external(
+        structure3d, base_interactions, model, find_gaps, all_dot_brackets
+    )
+def extract_secondary_structure_from_external(
+    tertiary_structure: Structure3D,
+    base_interactions: BaseInteractions,
+    model: Optional[int] = None,
+    find_gaps: bool = False,
+    all_dot_brackets: bool = False,
+) -> Tuple[Structure2D, List[str]]:
+    """
+    Create a secondary structure representation using interactions from an external tool.
+    Args:
+        tertiary_structure: The 3D structure parsed from PDB/mmCIF
+        base_interactions: Interactions parsed from external tool output
+        model: Model number to use (if None, use all models)
+        find_gaps: Whether to detect gaps in the structure
+        all_dot_brackets: Whether to return all possible dot-bracket notations
+    Returns:
+        A tuple containing the Structure2D object and a list of dot-bracket notations
+    """
+    mapping = Mapping2D3D(
+        tertiary_structure,
+        base_interactions.basePairs,
+        base_interactions.stackings,
+        find_gaps,
+    )
+    stems, single_strands, hairpins, loops = mapping.bpseq.elements
+    structure2d = Structure2D(
+        base_interactions,
+        str(mapping.bpseq),
+        mapping.dot_bracket,
+        mapping.extended_dot_bracket,
+        stems,
+        single_strands,
+        hairpins,
+        loops,
+    )
+    if all_dot_brackets:
+        return structure2d, mapping.all_dot_brackets
+    else:
+        return structure2d, [structure2d.dotBracket]
+def write_json(path: str, structure2d: BaseInteractions):
+    with open(path, "wb") as f:
+        f.write(orjson.dumps(structure2d))
+def write_csv(path: str, structure2d: Structure2D):
+    with open(path, "w") as f:
+        writer = csv.writer(f)
+        writer.writerow(["nt1", "nt2", "type", "classification-1", "classification-2"])
+        for base_pair in structure2d.baseInteractions.basePairs:
+            writer.writerow(
+                [
+                    base_pair.nt1.full_name,
+                    base_pair.nt2.full_name,
+                    "base pair",
+                    base_pair.lw.value,
+                    (
+                        base_pair.saenger.value or ""
+                        if base_pair.saenger is not None
+                        else ""
+                    ),
+                ]
+            )
+        for stacking in structure2d.baseInteractions.stackings:
+            writer.writerow(
+                [
+                    stacking.nt1.full_name,
+                    stacking.nt2.full_name,
+                    "stacking",
+                    stacking.topology.value if stacking.topology is not None else "",
+                    "",
+                ]
+            )
+        for base_phosphate in structure2d.baseInteractions.basePhosphateInteractions:
+            writer.writerow(
+                [
+                    base_phosphate.nt1.full_name,
+                    base_phosphate.nt2.full_name,
+                    "base-phosphate interaction",
+                    base_phosphate.bph.value if base_phosphate.bph is not None else "",
+                    "",
+                ]
+            )
+        for base_ribose in structure2d.baseInteractions.baseRiboseInteractions:
+            writer.writerow(
+                [
+                    base_ribose.nt1.full_name,
+                    base_ribose.nt2.full_name,
+                    "base-ribose interaction",
+                    base_ribose.br.value if base_ribose.br is not None else "",
+                    "",
+                ]
+            )
+        for other in structure2d.baseInteractions.otherInteractions:
+            writer.writerow(
+                [
+                    other.nt1.full_name,
+                    other.nt2.full_name,
+                    "other interaction",
+                    "",
+                    "",
+                ]
+            )
+def write_bpseq(path: str, bpseq: BpSeq):
+    with open(path, "w") as f:
+        f.write(str(bpseq))
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("input", help="Path to PDB or mmCIF file")
+    parser.add_argument(
+        "--external",
+        required=True,
+        help="Path to external tool output file (FR3D, DSSR, etc.)",
+    )
+    parser.add_argument(
+        "--tool",
+        choices=[t.value for t in ExternalTool],
+        required=True,
+        help="External tool that generated the output file",
+    )
+    parser.add_argument(
+        "-a",
+        "--all-dot-brackets",
+        action="store_true",
+        help="(optional) print all dot-brackets, not only optimal one (exclusive with -e/--extended)",
+    )
+    parser.add_argument("-b", "--bpseq", help="(optional) path to output BPSEQ file")
+    parser.add_argument("-c", "--csv", help="(optional) path to output CSV file")
+    parser.add_argument(
+        "-j",
+        "--json",
+        help="(optional) path to output JSON file",
+    )
+    parser.add_argument(
+        "-e",
+        "--extended",
+        action="store_true",
+        help="(optional) if set, the program will print extended secondary structure to the standard output",
+    )
+    parser.add_argument(
+        "-f",
+        "--find-gaps",
+        action="store_true",
+        help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands",
+    )
+    parser.add_argument("-d", "--dot", help="(optional) path to output DOT file")
+    args = parser.parse_args()
+    file = handle_input_file(args.input)
+    structure3d = read_3d_structure(file, None)
+    # Process external tool output and get secondary structure
+    structure2d, dot_brackets = process_external_tool_output(
+        structure3d,
+        args.external,
+        ExternalTool(args.tool),
+        None,
+        args.find_gaps,
+        args.all_dot_brackets,
+    )
+    if args.csv:
+        write_csv(args.csv, structure2d)
+    if args.json:
+        write_json(args.json, structure2d)
+    if args.bpseq:
+        write_bpseq(args.bpseq, structure2d.bpseq)
+    if args.extended:
+        print(structure2d.extendedDotBracket)
+    elif args.all_dot_brackets:
+        for dot_bracket in dot_brackets:
+            print(dot_bracket)
+    else:
+        print(structure2d.dotBracket)
+    if args.dot:
+        print(BpSeq.from_string(structure2d.bpseq).graphviz)
+if __name__ == "__main__":
+    main()

rnapolis/parser_v2.py CHANGED Viewed

@@ -34,9 +34,19 @@ def parse_pdb_atoms(content: Union[str, IO[str]]) -> pd.DataFrame:
         if isinstance(lines[0], bytes):
             lines = [line.decode("utf-8") for line in lines]
+    current_model = 1
     for line in lines:
         record_type = line[:6].strip()
+        # Check for MODEL record
+        if record_type == "MODEL":
+            try:
+                current_model = int(line[10:14].strip())
+            except ValueError:
+                # Handle cases where MODEL record might be malformed
+                pass  # Keep the previous model number
+            continue
         # Only process ATOM and HETATM records
         if record_type not in ["ATOM", "HETATM"]:
             continue
@@ -59,6 +69,7 @@ def parse_pdb_atoms(content: Union[str, IO[str]]) -> pd.DataFrame:
             "tempFactor": line[60:66].strip(),
             "element": line[76:78].strip(),
             "charge": line[78:80].strip(),
+            "model": current_model,  # Add the current model number
         }
         records.append(record)
@@ -83,13 +94,23 @@ def parse_pdb_atoms(content: Union[str, IO[str]]) -> pd.DataFrame:
                 "tempFactor",
                 "element",
                 "charge",
+                "model",
             ]
         )
     df = pd.DataFrame(records)
     # Convert numeric columns to appropriate types
-    numeric_columns = ["serial", "resSeq", "x", "y", "z", "occupancy", "tempFactor"]
+    numeric_columns = [
+        "serial",
+        "resSeq",
+        "x",
+        "y",
+        "z",
+        "occupancy",
+        "tempFactor",
+        "model",
+    ]
     for col in numeric_columns:
         df[col] = pd.to_numeric(df[col], errors="coerce")
@@ -229,8 +250,43 @@ def write_pdb(
     # Get the format of the DataFrame
     format_type = df.attrs.get("format", "PDB")
+    # Variables to track chain changes for TER records
+    last_chain_id = None
+    last_res_seq = None
+    last_res_name = None
+    last_serial = None
+    last_icode = None
     # Process each row in the DataFrame
-    for _, row in df.iterrows():
+    for index, row in df.iterrows():
+        # Get current chain ID
+        if format_type == "PDB":
+            current_chain_id = row["chainID"]
+        else:  # mmCIF
+            current_chain_id = row.get("auth_asym_id", row.get("label_asym_id", ""))
+        # Write TER record if chain changes
+        if last_chain_id is not None and current_chain_id != last_chain_id:
+            # Format TER record according to PDB specification
+            # Columns:
+            # 1-6: "TER   "
+            # 7-11: Serial number (right-justified)
+            # 18-20: Residue name (right-justified)
+            # 22: Chain ID
+            # 23-26: Residue sequence number (right-justified)
+            # 27: Insertion code
+            ter_serial = str(last_serial + 1).rjust(5)
+            ter_res_name = last_res_name.strip().ljust(3)  # Strip and left-justify
+            ter_chain_id = last_chain_id
+            ter_res_seq = last_res_seq.rjust(4)
+            ter_icode = last_icode if last_icode else ""  # Use last recorded iCode
+            # Construct the TER line ensuring correct spacing for all fields
+            # TER (1-6), serial (7-11), space (12-17), resName (18-20), space (21),
+            # chainID (22), resSeq (23-26), iCode (27)
+            ter_line = f"TER   {ter_serial}      {ter_res_name} {ter_chain_id}{ter_res_seq}{ter_icode}"
+            buffer.write(ter_line.ljust(80) + "\n")
         # Initialize the line with spaces
         line = " " * 80
@@ -361,6 +417,37 @@ def write_pdb(
         # Write the line to the buffer
         buffer.write(line.rstrip() + "\n")
+        # Update last atom info for potential TER record
+        if format_type == "PDB":
+            last_serial = int(row["serial"])
+            last_res_name = row["resName"]
+            last_chain_id = row["chainID"]
+            last_res_seq = str(int(row["resSeq"]))
+            last_icode = row["iCode"] if pd.notna(row["iCode"]) else ""
+        else:  # mmCIF
+            last_serial = int(row["id"])
+            last_res_name = row.get("auth_comp_id", row.get("label_comp_id", ""))
+            last_chain_id = row.get("auth_asym_id", row.get("label_asym_id", ""))
+            last_res_seq = str(int(row.get("auth_seq_id", row.get("label_seq_id", 0))))
+            last_icode = (
+                row.get("pdbx_PDB_ins_code", "")
+                if pd.notna(row.get("pdbx_PDB_ins_code", ""))
+                else ""
+            )
+    # Add TER record for the last chain
+    if last_chain_id is not None:
+        # Format TER record according to PDB specification
+        ter_serial = str(last_serial + 1).rjust(5)
+        ter_res_name = last_res_name.strip().ljust(3)  # Strip and left-justify
+        ter_chain_id = last_chain_id
+        ter_res_seq = last_res_seq.rjust(4)
+        ter_icode = last_icode if last_icode else ""  # Use last recorded iCode
+        # Construct the TER line ensuring correct spacing for all fields
+        ter_line = f"TER   {ter_serial}      {ter_res_name} {ter_chain_id}{ter_res_seq}{ter_icode}"
+        buffer.write(ter_line.ljust(80) + "\n")
     # Add END record
     buffer.write("END\n")

rnapolis/splitter.py ADDED Viewed

@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+import argparse
+import os
+import sys
+import pandas as pd
+from rnapolis.parser import is_cif
+from rnapolis.parser_v2 import parse_cif_atoms, parse_pdb_atoms, write_cif, write_pdb
+def main():
+    """Main function to run the splitter tool."""
+    parser = argparse.ArgumentParser(
+        description="Split a multi-model PDB or mmCIF file into separate files per model."
+    )
+    parser.add_argument("--output", "-o", help="Output directory", required=True)
+    parser.add_argument(
+        "--format",
+        "-f",
+        help="Output format (possible values: PDB, mmCIF, keep. Default: keep)",
+        default="keep",
+    )
+    parser.add_argument("file", help="Input PDB or mmCIF file to split")
+    args = parser.parse_args()
+    # Check if input file exists
+    if not os.path.exists(args.file):
+        print(f"Error: Input file not found: {args.file}", file=sys.stderr)
+        sys.exit(1)
+    # Read and parse the input file
+    input_format = "mmCIF"
+    try:
+        with open(args.file) as f:
+            if is_cif(f):
+                atoms_df = parse_cif_atoms(f)
+                model_column = "pdbx_PDB_model_num"
+            else:
+                atoms_df = parse_pdb_atoms(f)
+                input_format = "PDB"
+                model_column = "model"
+    except Exception as e:
+        print(f"Error parsing file {args.file}: {e}", file=sys.stderr)
+        sys.exit(1)
+    if atoms_df.empty:
+        print(f"Warning: No atoms found in {args.file}", file=sys.stderr)
+        sys.exit(0)
+    # Check if model column exists
+    if model_column not in atoms_df.columns:
+        print(
+            f"Error: Model column '{model_column}' not found in the parsed data from {args.file}.",
+            file=sys.stderr,
+        )
+        print(
+            "This might indicate an issue with the input file or the parser.",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+    # Determine output format
+    output_format = args.format.upper()
+    if output_format == "KEEP":
+        output_format = input_format
+    elif output_format not in ["PDB", "MMCIF"]:
+        print(
+            f"Error: Invalid output format '{args.format}'. Choose PDB, mmCIF, or keep.",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+    # Ensure output directory exists
+    os.makedirs(args.output, exist_ok=True)
+    # Group by model number
+    grouped_by_model = atoms_df.groupby(model_column)
+    # Get base name for output files
+    base_name = os.path.splitext(os.path.basename(args.file))[0]
+    # Write each model to a separate file
+    for model_num, model_df in grouped_by_model:
+        # Ensure model_df is a DataFrame copy to avoid SettingWithCopyWarning
+        model_df = model_df.copy()
+        # Set the correct format attribute for the writer function
+        model_df.attrs["format"] = input_format
+        # Construct output filename
+        ext = ".pdb" if output_format == "PDB" else ".cif"
+        output_filename = f"{base_name}_model_{model_num}{ext}"
+        output_path = os.path.join(args.output, output_filename)
+        print(f"Writing model {model_num} to {output_path}...")
+        try:
+            if output_format == "PDB":
+                write_pdb(model_df, output_path)
+            else:  # mmCIF
+                write_cif(model_df, output_path)
+        except Exception as e:
+            print(
+                f"Error writing file {output_path}: {e}",
+                file=sys.stderr,
+            )
+            # Optionally continue to next model or exit
+            # sys.exit(1)
+    print("Splitting complete.")
+if __name__ == "__main__":
+    main()

{rnapolis-0.6.2.dist-info → rnapolis-0.8.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: RNApolis
-Version: 0.6.2
+Version: 0.8.0
 Summary: A Python library containing RNA-related bioinformatics functions and classes
 Home-page: https://github.com/tzok/rnapolis-py
 Author: Tomasz Zok

{rnapolis-0.6.2.dist-info → rnapolis-0.8.0.dist-info}/RECORD RENAMED Viewed

@@ -1,3 +1,4 @@
+rnapolis/adapter.py,sha256=n7f5e8dbP-grJI7L9GycYAbMjpMvTuUM5aXiiCqG91k,18239
 rnapolis/aligner.py,sha256=o7rQyjAZ3n4VXcnSPY3HVB8nLNRkVbl552O3NVh0mfg,3429
 rnapolis/annotator.py,sha256=hRRzRmneYxbg2tvwVHMWLfzmJb4szV0JL_6EOC09Gwg,22101
 rnapolis/clashfinder.py,sha256=AC9_tIx7QIk57sELq_aKfU1u3UMOXbgcccQeGHhMR6c,8517
@@ -11,16 +12,17 @@ rnapolis/mmcif_pdbx_v50.dic,sha256=5QFx1ssDaehR4_DQ-tS9VQux262SiLXaqcwmwwejF5c,5
 rnapolis/molecule_filter.py,sha256=jgcpJxx_oXEBX0d30v4k_FdwRouRUPUsEtCYWgLGpD4,7310
 rnapolis/motif_extractor.py,sha256=Lfn1iEkhkP9eZD3GPEWNAfy00QO7QPCc8wM_XS1ory8,1147
 rnapolis/parser.py,sha256=3g4mtFvpiEENFcSBBtx_E_x1vJPF9BujWnts0kb9XjE,16340
-rnapolis/parser_v2.py,sha256=ltesVKBiIKk9JlM02ttTJzLm1g5MHdPzDgQTcl40GP8,16257
+rnapolis/parser_v2.py,sha256=eUccbTXCD5I7q0GVbaGWmjj0CT5d2VK8x9tr0gtrRuA,19801
 rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
+rnapolis/splitter.py,sha256=8mMZ2ZmhqptPUjmkDOFbLvC-dvWpuvJ0beSoeaD5pzk,3642
 rnapolis/tertiary.py,sha256=6t9ZB4w33-5n_M3sns1RoFXCOTgVAgGH4WDNG5OG9Kg,23426
 rnapolis/tertiary_v2.py,sha256=I1uyHWIUePNGO5m-suoL4ibtz02qAJUMvYm0BUKUygY,22480
 rnapolis/transformer.py,sha256=aC0nBmHHJf5TyLvBIV57Jj3tlwpvHbPo347opfAOlQA,3844
 rnapolis/unifier.py,sha256=DR1_IllgaAYT9_FUE6XC9B-2wgqbBHs2D1MjyZT2j2g,5438
 rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
-rnapolis-0.6.2.dist-info/licenses/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
-rnapolis-0.6.2.dist-info/METADATA,sha256=2epFKLVBOoNmJHGZSSSF4bNEdOq2eB_KpWKmannB7rY,54537
-rnapolis-0.6.2.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-rnapolis-0.6.2.dist-info/entry_points.txt,sha256=kS_Ji3_6UaomxkOaYpGHh4aZKaIh9CAfzoexbaS3y50,372
-rnapolis-0.6.2.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
-rnapolis-0.6.2.dist-info/RECORD,,
+rnapolis-0.8.0.dist-info/licenses/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
+rnapolis-0.8.0.dist-info/METADATA,sha256=zD_byFTP6xNdYCQdu5bslqSE_noBjSagzhn2EOSlcYE,54537
+rnapolis-0.8.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+rnapolis-0.8.0.dist-info/entry_points.txt,sha256=H00KoN54wU3dFOofAu3H_3PADmZOBTB1hXf5TUU2uzo,438
+rnapolis-0.8.0.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
+rnapolis-0.8.0.dist-info/RECORD,,

{rnapolis-0.6.2.dist-info → rnapolis-0.8.0.dist-info}/entry_points.txt RENAMED Viewed

@@ -1,4 +1,5 @@
 [console_scripts]
+adapter = rnapolis.adapter:main
 aligner = rnapolis.aligner:main
 annotator = rnapolis.annotator:main
 clashfinder = rnapolis.clashfinder:main
@@ -6,5 +7,6 @@ metareader = rnapolis.metareader:main
 molecule-filter = rnapolis.molecule_filter:main
 motif-extractor = rnapolis.motif_extractor:main
 rfam-folder = rnapolis.rfam_folder:main
+splitter = rnapolis.splitter:main
 transformer = rnapolis.transformer:main
 unifier = rnapolis.unifier:main

{rnapolis-0.6.2.dist-info → rnapolis-0.8.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{rnapolis-0.6.2.dist-info → rnapolis-0.8.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{rnapolis-0.6.2.dist-info → rnapolis-0.8.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

RNApolis 0.6.2__py3-none-any.whl → 0.8.0__py3-none-any.whl

RNApolis 0.6.2py3-none-any.whl → 0.8.0py3-none-any.whl