PyPI - RNApolis - Versions diffs - 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

RNApolis 0.7.0py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

rnapolis/parser_v2.py CHANGED Viewed

@@ -34,9 +34,19 @@ def parse_pdb_atoms(content: Union[str, IO[str]]) -> pd.DataFrame:
         if isinstance(lines[0], bytes):
             lines = [line.decode("utf-8") for line in lines]
+    current_model = 1
     for line in lines:
         record_type = line[:6].strip()
+        # Check for MODEL record
+        if record_type == "MODEL":
+            try:
+                current_model = int(line[10:14].strip())
+            except ValueError:
+                # Handle cases where MODEL record might be malformed
+                pass  # Keep the previous model number
+            continue
         # Only process ATOM and HETATM records
         if record_type not in ["ATOM", "HETATM"]:
             continue
@@ -59,6 +69,7 @@ def parse_pdb_atoms(content: Union[str, IO[str]]) -> pd.DataFrame:
             "tempFactor": line[60:66].strip(),
             "element": line[76:78].strip(),
             "charge": line[78:80].strip(),
+            "model": current_model,  # Add the current model number
         }
         records.append(record)
@@ -83,13 +94,23 @@ def parse_pdb_atoms(content: Union[str, IO[str]]) -> pd.DataFrame:
                 "tempFactor",
                 "element",
                 "charge",
+                "model",
             ]
         )
     df = pd.DataFrame(records)
     # Convert numeric columns to appropriate types
-    numeric_columns = ["serial", "resSeq", "x", "y", "z", "occupancy", "tempFactor"]
+    numeric_columns = [
+        "serial",
+        "resSeq",
+        "x",
+        "y",
+        "z",
+        "occupancy",
+        "tempFactor",
+        "model",
+    ]
     for col in numeric_columns:
         df[col] = pd.to_numeric(df[col], errors="coerce")
@@ -229,8 +250,43 @@ def write_pdb(
     # Get the format of the DataFrame
     format_type = df.attrs.get("format", "PDB")
+    # Variables to track chain changes for TER records
+    last_chain_id = None
+    last_res_seq = None
+    last_res_name = None
+    last_serial = None
+    last_icode = None
     # Process each row in the DataFrame
-    for _, row in df.iterrows():
+    for index, row in df.iterrows():
+        # Get current chain ID
+        if format_type == "PDB":
+            current_chain_id = row["chainID"]
+        else:  # mmCIF
+            current_chain_id = row.get("auth_asym_id", row.get("label_asym_id", ""))
+        # Write TER record if chain changes
+        if last_chain_id is not None and current_chain_id != last_chain_id:
+            # Format TER record according to PDB specification
+            # Columns:
+            # 1-6: "TER   "
+            # 7-11: Serial number (right-justified)
+            # 18-20: Residue name (right-justified)
+            # 22: Chain ID
+            # 23-26: Residue sequence number (right-justified)
+            # 27: Insertion code
+            ter_serial = str(last_serial + 1).rjust(5)
+            ter_res_name = last_res_name.strip().ljust(3)  # Strip and left-justify
+            ter_chain_id = last_chain_id
+            ter_res_seq = last_res_seq.rjust(4)
+            ter_icode = last_icode if last_icode else ""  # Use last recorded iCode
+            # Construct the TER line ensuring correct spacing for all fields
+            # TER (1-6), serial (7-11), space (12-17), resName (18-20), space (21),
+            # chainID (22), resSeq (23-26), iCode (27)
+            ter_line = f"TER   {ter_serial}      {ter_res_name} {ter_chain_id}{ter_res_seq}{ter_icode}"
+            buffer.write(ter_line.ljust(80) + "\n")
         # Initialize the line with spaces
         line = " " * 80
@@ -361,6 +417,37 @@ def write_pdb(
         # Write the line to the buffer
         buffer.write(line.rstrip() + "\n")
+        # Update last atom info for potential TER record
+        if format_type == "PDB":
+            last_serial = int(row["serial"])
+            last_res_name = row["resName"]
+            last_chain_id = row["chainID"]
+            last_res_seq = str(int(row["resSeq"]))
+            last_icode = row["iCode"] if pd.notna(row["iCode"]) else ""
+        else:  # mmCIF
+            last_serial = int(row["id"])
+            last_res_name = row.get("auth_comp_id", row.get("label_comp_id", ""))
+            last_chain_id = row.get("auth_asym_id", row.get("label_asym_id", ""))
+            last_res_seq = str(int(row.get("auth_seq_id", row.get("label_seq_id", 0))))
+            last_icode = (
+                row.get("pdbx_PDB_ins_code", "")
+                if pd.notna(row.get("pdbx_PDB_ins_code", ""))
+                else ""
+            )
+    # Add TER record for the last chain
+    if last_chain_id is not None:
+        # Format TER record according to PDB specification
+        ter_serial = str(last_serial + 1).rjust(5)
+        ter_res_name = last_res_name.strip().ljust(3)  # Strip and left-justify
+        ter_chain_id = last_chain_id
+        ter_res_seq = last_res_seq.rjust(4)
+        ter_icode = last_icode if last_icode else ""  # Use last recorded iCode
+        # Construct the TER line ensuring correct spacing for all fields
+        ter_line = f"TER   {ter_serial}      {ter_res_name} {ter_chain_id}{ter_res_seq}{ter_icode}"
+        buffer.write(ter_line.ljust(80) + "\n")
     # Add END record
     buffer.write("END\n")

rnapolis/splitter.py ADDED Viewed

@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+import argparse
+import os
+import sys
+import pandas as pd
+from rnapolis.parser import is_cif
+from rnapolis.parser_v2 import parse_cif_atoms, parse_pdb_atoms, write_cif, write_pdb
+def main():
+    """Main function to run the splitter tool."""
+    parser = argparse.ArgumentParser(
+        description="Split a multi-model PDB or mmCIF file into separate files per model."
+    )
+    parser.add_argument("--output", "-o", help="Output directory", required=True)
+    parser.add_argument(
+        "--format",
+        "-f",
+        help="Output format (possible values: PDB, mmCIF, keep. Default: keep)",
+        default="keep",
+    )
+    parser.add_argument("file", help="Input PDB or mmCIF file to split")
+    args = parser.parse_args()
+    # Check if input file exists
+    if not os.path.exists(args.file):
+        print(f"Error: Input file not found: {args.file}", file=sys.stderr)
+        sys.exit(1)
+    # Read and parse the input file
+    input_format = "mmCIF"
+    try:
+        with open(args.file) as f:
+            if is_cif(f):
+                atoms_df = parse_cif_atoms(f)
+                model_column = "pdbx_PDB_model_num"
+            else:
+                atoms_df = parse_pdb_atoms(f)
+                input_format = "PDB"
+                model_column = "model"
+    except Exception as e:
+        print(f"Error parsing file {args.file}: {e}", file=sys.stderr)
+        sys.exit(1)
+    if atoms_df.empty:
+        print(f"Warning: No atoms found in {args.file}", file=sys.stderr)
+        sys.exit(0)
+    # Check if model column exists
+    if model_column not in atoms_df.columns:
+        print(
+            f"Error: Model column '{model_column}' not found in the parsed data from {args.file}.",
+            file=sys.stderr,
+        )
+        print(
+            "This might indicate an issue with the input file or the parser.",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+    # Determine output format
+    output_format = args.format.upper()
+    if output_format == "KEEP":
+        output_format = input_format
+    elif output_format not in ["PDB", "MMCIF"]:
+        print(
+            f"Error: Invalid output format '{args.format}'. Choose PDB, mmCIF, or keep.",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+    # Ensure output directory exists
+    os.makedirs(args.output, exist_ok=True)
+    # Group by model number
+    grouped_by_model = atoms_df.groupby(model_column)
+    # Get base name for output files
+    base_name = os.path.splitext(os.path.basename(args.file))[0]
+    # Write each model to a separate file
+    for model_num, model_df in grouped_by_model:
+        # Ensure model_df is a DataFrame copy to avoid SettingWithCopyWarning
+        model_df = model_df.copy()
+        # Set the correct format attribute for the writer function
+        model_df.attrs["format"] = input_format
+        # Construct output filename
+        ext = ".pdb" if output_format == "PDB" else ".cif"
+        output_filename = f"{base_name}_model_{model_num}{ext}"
+        output_path = os.path.join(args.output, output_filename)
+        print(f"Writing model {model_num} to {output_path}...")
+        try:
+            if output_format == "PDB":
+                write_pdb(model_df, output_path)
+            else:  # mmCIF
+                write_cif(model_df, output_path)
+        except Exception as e:
+            print(
+                f"Error writing file {output_path}: {e}",
+                file=sys.stderr,
+            )
+            # Optionally continue to next model or exit
+            # sys.exit(1)
+    print("Splitting complete.")
+if __name__ == "__main__":
+    main()

{rnapolis-0.7.0.dist-info → rnapolis-0.8.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: RNApolis
-Version: 0.7.0
+Version: 0.8.0
 Summary: A Python library containing RNA-related bioinformatics functions and classes
 Home-page: https://github.com/tzok/rnapolis-py
 Author: Tomasz Zok

{rnapolis-0.7.0.dist-info → rnapolis-0.8.0.dist-info}/RECORD RENAMED Viewed

@@ -12,16 +12,17 @@ rnapolis/mmcif_pdbx_v50.dic,sha256=5QFx1ssDaehR4_DQ-tS9VQux262SiLXaqcwmwwejF5c,5
 rnapolis/molecule_filter.py,sha256=jgcpJxx_oXEBX0d30v4k_FdwRouRUPUsEtCYWgLGpD4,7310
 rnapolis/motif_extractor.py,sha256=Lfn1iEkhkP9eZD3GPEWNAfy00QO7QPCc8wM_XS1ory8,1147
 rnapolis/parser.py,sha256=3g4mtFvpiEENFcSBBtx_E_x1vJPF9BujWnts0kb9XjE,16340
-rnapolis/parser_v2.py,sha256=ltesVKBiIKk9JlM02ttTJzLm1g5MHdPzDgQTcl40GP8,16257
+rnapolis/parser_v2.py,sha256=eUccbTXCD5I7q0GVbaGWmjj0CT5d2VK8x9tr0gtrRuA,19801
 rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
+rnapolis/splitter.py,sha256=8mMZ2ZmhqptPUjmkDOFbLvC-dvWpuvJ0beSoeaD5pzk,3642
 rnapolis/tertiary.py,sha256=6t9ZB4w33-5n_M3sns1RoFXCOTgVAgGH4WDNG5OG9Kg,23426
 rnapolis/tertiary_v2.py,sha256=I1uyHWIUePNGO5m-suoL4ibtz02qAJUMvYm0BUKUygY,22480
 rnapolis/transformer.py,sha256=aC0nBmHHJf5TyLvBIV57Jj3tlwpvHbPo347opfAOlQA,3844
 rnapolis/unifier.py,sha256=DR1_IllgaAYT9_FUE6XC9B-2wgqbBHs2D1MjyZT2j2g,5438
 rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
-rnapolis-0.7.0.dist-info/licenses/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
-rnapolis-0.7.0.dist-info/METADATA,sha256=Rrnbq7pKHvcPKcHPp9nWjAZZ6x8PflF2WaJCTxaRgbo,54537
-rnapolis-0.7.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-rnapolis-0.7.0.dist-info/entry_points.txt,sha256=D630mec6slaw_QMmzDNeBIy7p0pTtuOGnu8xTjmx8VA,404
-rnapolis-0.7.0.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
-rnapolis-0.7.0.dist-info/RECORD,,
+rnapolis-0.8.0.dist-info/licenses/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
+rnapolis-0.8.0.dist-info/METADATA,sha256=zD_byFTP6xNdYCQdu5bslqSE_noBjSagzhn2EOSlcYE,54537
+rnapolis-0.8.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+rnapolis-0.8.0.dist-info/entry_points.txt,sha256=H00KoN54wU3dFOofAu3H_3PADmZOBTB1hXf5TUU2uzo,438
+rnapolis-0.8.0.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
+rnapolis-0.8.0.dist-info/RECORD,,

{rnapolis-0.7.0.dist-info → rnapolis-0.8.0.dist-info}/entry_points.txt RENAMED Viewed

@@ -7,5 +7,6 @@ metareader = rnapolis.metareader:main
 molecule-filter = rnapolis.molecule_filter:main
 motif-extractor = rnapolis.motif_extractor:main
 rfam-folder = rnapolis.rfam_folder:main
+splitter = rnapolis.splitter:main
 transformer = rnapolis.transformer:main
 unifier = rnapolis.unifier:main

{rnapolis-0.7.0.dist-info → rnapolis-0.8.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{rnapolis-0.7.0.dist-info → rnapolis-0.8.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{rnapolis-0.7.0.dist-info → rnapolis-0.8.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

RNApolis 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

RNApolis 0.7.0py3-none-any.whl → 0.8.0py3-none-any.whl