PyPI - RNApolis - Versions diffs - 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl - Mend

RNApolis 0.8.0py3-none-any.whl → 0.8.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

rnapolis/parser_v2.py CHANGED Viewed

@@ -1,4 +1,6 @@
 import io
+import os
+import string
 import tempfile
 from typing import IO, TextIO, Union
@@ -52,23 +54,27 @@ def parse_pdb_atoms(content: Union[str, IO[str]]) -> pd.DataFrame:
             continue
         # Parse fields according to PDB format specification
+        alt_loc = line[16:17].strip()
         icode = line[26:27].strip()
+        element = line[76:78].strip()
+        charge = line[78:80].strip()
         record = {
             "record_type": record_type,
             "serial": line[6:11].strip(),
             "name": line[12:16].strip(),
-            "altLoc": line[16:17].strip(),
+            "altLoc": None if not alt_loc else alt_loc,  # Store None if empty
             "resName": line[17:20].strip(),
             "chainID": line[21:22].strip(),
             "resSeq": line[22:26].strip(),
-            "iCode": None if not icode else icode,  # Convert empty string to None
+            "iCode": None if not icode else icode,  # Store None if empty
             "x": line[30:38].strip(),
             "y": line[38:46].strip(),
             "z": line[46:54].strip(),
             "occupancy": line[54:60].strip(),
             "tempFactor": line[60:66].strip(),
-            "element": line[76:78].strip(),
-            "charge": line[78:80].strip(),
+            "element": None if not element else element,  # Store None if empty
+            "charge": None if not charge else charge,  # Store None if empty
             "model": current_model,  # Add the current model number
         }
@@ -149,18 +155,37 @@ def parse_cif_atoms(content: Union[str, IO[str]]) -> pd.DataFrame:
     """
     adapter = IoAdapterPy()
-    # Handle both string content and file-like objects
+    # Handle string, StringIO, and file-like objects
     if isinstance(content, str):
-        # Create a temporary file to use with the adapter
-        import tempfile
-        with tempfile.NamedTemporaryFile(mode="w+", suffix=".cif") as temp_file:
+        # Create a temporary file for string input
+        with tempfile.NamedTemporaryFile(
+            mode="w+", suffix=".cif", delete=False
+        ) as temp_file:
             temp_file.write(content)
-            temp_file.flush()
-            data = adapter.readFile(temp_file.name)
-    else:
-        # Assume it's a file-like object with a name attribute
+            temp_file_path = temp_file.name
+        try:
+            data = adapter.readFile(temp_file_path)
+        finally:
+            os.remove(temp_file_path)  # Clean up the temporary file
+    elif isinstance(content, io.StringIO):
+        # Create a temporary file for StringIO input
+        with tempfile.NamedTemporaryFile(
+            mode="w+", suffix=".cif", delete=False
+        ) as temp_file:
+            content.seek(0)  # Ensure reading from the start
+            temp_file.write(content.read())
+            temp_file_path = temp_file.name
+        try:
+            data = adapter.readFile(temp_file_path)
+        finally:
+            os.remove(temp_file_path)  # Clean up the temporary file
+    elif hasattr(content, "name"):
+        # Assume it's a file-like object with a name attribute (like an open file)
         data = adapter.readFile(content.name)
+    else:
+        raise TypeError(
+            "Unsupported input type for parse_cif_atoms. Expected str, file-like object with name, or StringIO."
+        )
     # Get the atom_site category
     category = data[0].getObj("atom_site")
@@ -176,47 +201,133 @@ def parse_cif_atoms(content: Union[str, IO[str]]) -> pd.DataFrame:
     # Create a list of dictionaries for each atom
     records = []
     for row in rows:
-        record = dict(zip(attributes, row))
-        # Convert "?" or "." in insertion code to None
-        if "pdbx_PDB_ins_code" in record:
-            if record["pdbx_PDB_ins_code"] in ["?", ".", ""]:
-                record["pdbx_PDB_ins_code"] = None
+        record = {}
+        for attr, value in zip(attributes, row):
+            # Store None if value indicates missing data ('?' or '.')
+            if value in ["?", "."]:
+                record[attr] = None
+            else:
+                record[attr] = value
         records.append(record)
     # Create DataFrame from records
     df = pd.DataFrame(records)
-    # Convert numeric columns to appropriate types
-    numeric_columns = [
-        "id",
-        "auth_seq_id",
+    # Define columns based on mmCIF specification for atom_site
+    float_cols = [
+        "aniso_B[1][1]",
+        "aniso_B[1][1]_esd",
+        "aniso_B[1][2]",
+        "aniso_B[1][2]_esd",
+        "aniso_B[1][3]",
+        "aniso_B[1][3]_esd",
+        "aniso_B[2][2]",
+        "aniso_B[2][2]_esd",
+        "aniso_B[2][3]",
+        "aniso_B[2][3]_esd",
+        "aniso_B[3][3]",
+        "aniso_B[3][3]_esd",
+        "aniso_ratio",
+        "aniso_U[1][1]",
+        "aniso_U[1][1]_esd",
+        "aniso_U[1][2]",
+        "aniso_U[1][2]_esd",
+        "aniso_U[1][3]",
+        "aniso_U[1][3]_esd",
+        "aniso_U[2][2]",
+        "aniso_U[2][2]_esd",
+        "aniso_U[2][3]",
+        "aniso_U[2][3]_esd",
+        "aniso_U[3][3]",
+        "aniso_U[3][3]_esd",
+        "B_equiv_geom_mean",
+        "B_equiv_geom_mean_esd",
+        "B_iso_or_equiv",
+        "B_iso_or_equiv_esd",
         "Cartn_x",
+        "Cartn_x_esd",
         "Cartn_y",
+        "Cartn_y_esd",
         "Cartn_z",
+        "Cartn_z_esd",
+        "fract_x",
+        "fract_x_esd",
+        "fract_y",
+        "fract_y_esd",
+        "fract_z",
+        "fract_z_esd",
         "occupancy",
-        "B_iso_or_equiv",
+        "occupancy_esd",
+        "U_equiv_geom_mean",
+        "U_equiv_geom_mean_esd",
+        "U_iso_or_equiv",
+        "U_iso_or_equiv_esd",
+    ]
+    int_cols = [
+        "attached_hydrogens",
+        "label_seq_id",
+        "symmetry_multiplicity",
+        "pdbx_PDB_model_num",
         "pdbx_formal_charge",
+        "pdbx_label_index",
     ]
-    for col in numeric_columns:
-        if col in df.columns:
-            df[col] = pd.to_numeric(df[col], errors="coerce")
-    # Convert categorical columns
-    categorical_columns = [
+    category_cols = [
+        "auth_asym_id",
+        "auth_atom_id",
+        "auth_comp_id",
+        "auth_seq_id",
+        "calc_attached_atom",
+        "calc_flag",
+        "disorder_assembly",
+        "disorder_group",
         "group_PDB",
-        "type_symbol",
+        "id",
+        "label_alt_id",
+        "label_asym_id",
         "label_atom_id",
         "label_comp_id",
-        "label_asym_id",
-        "auth_atom_id",
-        "auth_comp_id",
-        "auth_asym_id",
+        "label_entity_id",
+        "thermal_displace_type",
+        "type_symbol",
+        "pdbx_atom_ambiguity",
+        "adp_type",
+        "refinement_flags",
+        "refinement_flags_adp",
+        "refinement_flags_occupancy",
+        "refinement_flags_posn",
+        "pdbx_auth_alt_id",
+        "pdbx_PDB_ins_code",
+        "pdbx_PDB_residue_no",
+        "pdbx_PDB_residue_name",
+        "pdbx_PDB_strand_id",
+        "pdbx_PDB_atom_name",
+        "pdbx_auth_atom_name",
+        "pdbx_auth_comp_id",
+        "pdbx_auth_asym_id",
+        "pdbx_auth_seq_id",
+        "pdbx_tls_group_id",
+        "pdbx_ncs_dom_id",
+        "pdbx_group_NDB",
+        "pdbx_atom_group",
+        "pdbx_label_seq_num",
+        "pdbx_not_in_asym",
+        "pdbx_sifts_xref_db_name",
+        "pdbx_sifts_xref_db_acc",
+        "pdbx_sifts_xref_db_num",
+        "pdbx_sifts_xref_db_res",
     ]
-    for col in categorical_columns:
+    # Convert columns to appropriate types
+    for col in float_cols:
+        if col in df.columns:
+            df[col] = pd.to_numeric(df[col], errors="coerce")
+    for col in int_cols:
+        if col in df.columns:
+            # Use Int64 (nullable integer) to handle potential NaNs from coercion
+            df[col] = pd.to_numeric(df[col], errors="coerce").astype("Int64")
+    for col in category_cols:
         if col in df.columns:
             df[col] = df[col].astype("category")
@@ -226,6 +337,473 @@ def parse_cif_atoms(content: Union[str, IO[str]]) -> pd.DataFrame:
     return df
+def can_write_pdb(df: pd.DataFrame) -> bool:
+    """
+    Check if the DataFrame can be losslessly represented in PDB format.
+    PDB format has limitations on field widths:
+    - Atom serial number (id): max 99999
+    - Chain identifier (auth_asym_id): max 1 character
+    - Residue sequence number (auth_seq_id): max 9999
+    Parameters:
+    -----------
+    df : pd.DataFrame
+        DataFrame containing atom records, as created by parse_pdb_atoms or parse_cif_atoms.
+    Returns:
+    --------
+    bool
+        True if the DataFrame can be written to PDB format without data loss/truncation, False otherwise.
+    """
+    format_type = df.attrs.get("format")
+    if format_type == "PDB":
+        # Assume data originally from PDB already fits PDB constraints
+        return True
+    if df.empty:
+        # An empty DataFrame can be represented as an empty PDB file
+        return True
+    if format_type == "mmCIF":
+        # Check serial number (id)
+        # Convert to numeric first to handle potential categorical type and NaNs
+        if "id" not in df.columns or (
+            pd.to_numeric(df["id"], errors="coerce").max() > 99999
+        ):
+            return False
+        # Check chain ID (auth_asym_id) length
+        if "auth_asym_id" not in df.columns or (
+            df["auth_asym_id"].dropna().astype(str).str.len().max() > 1
+        ):
+            return False
+        # Check residue sequence number (auth_seq_id)
+        if "auth_seq_id" not in df.columns or (
+            pd.to_numeric(df["auth_seq_id"], errors="coerce").max() > 9999
+        ):
+            return False
+        # All checks passed for mmCIF
+        return True
+    # If format is unknown or not PDB/mmCIF, assume it cannot be safely written
+    return False
+def fit_to_pdb(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Attempts to fit the atom data in a DataFrame to comply with PDB format limitations.
+    If the data already fits (checked by can_write_pdb), returns the original DataFrame.
+    Otherwise, checks if fitting is possible based on total atoms, unique chains,
+    and residues per chain. If fitting is possible, it renumbers atoms, renames chains,
+    and renumbers residues within each chain sequentially starting from 1.
+    Parameters:
+    -----------
+    df : pd.DataFrame
+        DataFrame containing atom records, as created by parse_pdb_atoms or parse_cif_atoms.
+    Returns:
+    --------
+    pd.DataFrame
+        A new DataFrame with data potentially modified to fit PDB constraints.
+        The 'format' attribute of the returned DataFrame will be set to 'PDB'.
+    Raises:
+    -------
+    ValueError
+        If the data cannot be fitted into PDB format constraints (too many atoms,
+        chains, or residues per chain).
+    """
+    format_type = df.attrs.get("format")
+    if not format_type:
+        raise ValueError("DataFrame format attribute is not set.")
+    if can_write_pdb(df):
+        return df
+    # Determine column names based on format
+    if format_type == "PDB":
+        serial_col = "serial"
+        chain_col = "chainID"
+        resseq_col = "resSeq"
+        icode_col = "iCode"
+    elif format_type == "mmCIF":
+        serial_col = "id"
+        chain_col = "auth_asym_id"
+        resseq_col = "auth_seq_id"
+        icode_col = "pdbx_PDB_ins_code"
+    else:
+        raise ValueError(f"Unsupported DataFrame format: {format_type}")
+    # --- Feasibility Checks ---
+    if chain_col not in df.columns:
+        raise ValueError(f"Missing required chain column: {chain_col}")
+    if resseq_col not in df.columns:
+        raise ValueError(f"Missing required residue sequence column: {resseq_col}")
+    unique_chains = df[chain_col].unique()
+    num_chains = len(unique_chains)
+    total_atoms = len(df)
+    max_pdb_serial = 99999
+    max_pdb_residue = 9999
+    available_chain_ids = list(
+        string.ascii_uppercase + string.ascii_lowercase + string.digits
+    )
+    max_pdb_chains = len(available_chain_ids)
+    # Check 1: Total atoms + TER lines <= 99999
+    if total_atoms + num_chains > max_pdb_serial:
+        raise ValueError(
+            f"Cannot fit to PDB: Total atoms ({total_atoms}) + TER lines ({num_chains}) exceeds PDB limit ({max_pdb_serial})."
+        )
+    # Check 2: Number of chains <= 62
+    if num_chains > max_pdb_chains:
+        raise ValueError(
+            f"Cannot fit to PDB: Number of unique chains ({num_chains}) exceeds PDB limit ({max_pdb_chains})."
+        )
+    # Check 3: Max residues per chain <= 9999
+    # More accurate check: group by chain, then count unique (resSeq, iCode) tuples
+    # Use a temporary structure to avoid modifying the original df
+    check_df = pd.DataFrame(
+        {
+            "chain": df[chain_col],
+            "resSeq": df[resseq_col],
+            "iCode": df[icode_col].fillna("") if icode_col in df.columns else "",
+        }
+    )
+    residue_counts = check_df.groupby("chain").apply(
+        lambda x: x[["resSeq", "iCode"]].drop_duplicates().shape[0]
+    )
+    max_residues_per_chain = residue_counts.max() if not residue_counts.empty else 0
+    if max_residues_per_chain > max_pdb_residue:
+        raise ValueError(
+            f"Cannot fit to PDB: Maximum residues in a single chain ({max_residues_per_chain}) exceeds PDB limit ({max_pdb_residue})."
+        )
+    # --- Perform Fitting ---
+    df_fitted = df.copy()
+    # 1. Rename Chains
+    chain_mapping = {
+        orig_chain: available_chain_ids[i] for i, orig_chain in enumerate(unique_chains)
+    }
+    df_fitted[chain_col] = df_fitted[chain_col].map(chain_mapping)
+    # Ensure the chain column is treated as string/object after mapping
+    df_fitted[chain_col] = df_fitted[chain_col].astype(object)
+    # 2. Renumber Residues within each new chain
+    new_resseq_col = "new_resSeq"  # Temporary column for new numbering
+    df_fitted[new_resseq_col] = -1  # Initialize
+    all_new_res_maps = {}
+    for new_chain_id, group in df_fitted.groupby(chain_col):
+        # Identify unique original residues (seq + icode) in order of appearance
+        original_residues = group[[resseq_col, icode_col]].drop_duplicates()
+        # Create mapping: (orig_resSeq, orig_iCode) -> new_resSeq (1-based)
+        residue_mapping = {
+            tuple(res): i + 1
+            for i, res in enumerate(original_residues.itertuples(index=False))
+        }
+        all_new_res_maps[new_chain_id] = residue_mapping
+        # Apply mapping to the group
+        res_indices = group.set_index([resseq_col, icode_col]).index
+        df_fitted.loc[group.index, new_resseq_col] = res_indices.map(residue_mapping)
+    # Replace original residue number and clear insertion code
+    df_fitted[resseq_col] = df_fitted[new_resseq_col]
+    df_fitted[icode_col] = None  # Insertion codes are now redundant
+    df_fitted.drop(columns=[new_resseq_col], inplace=True)
+    # Convert resseq_col back to Int64 if it was before, handling potential NaNs if any step failed
+    df_fitted[resseq_col] = df_fitted[resseq_col].astype("Int64")
+    # 3. Renumber Atom Serials
+    new_serial_col = "new_serial"
+    df_fitted[new_serial_col] = -1  # Initialize
+    current_serial = 0
+    last_chain_id_for_serial = None
+    # Iterate in the potentially re-sorted order after grouping/mapping
+    # Ensure stable sort order for consistent serial numbering
+    df_fitted.sort_index(
+        inplace=True
+    )  # Sort by original index to maintain original atom order as much as possible
+    for index, row in df_fitted.iterrows():
+        current_chain_id = row[chain_col]
+        if (
+            last_chain_id_for_serial is not None
+            and current_chain_id != last_chain_id_for_serial
+        ):
+            current_serial += 1  # Increment for TER line
+        current_serial += 1
+        if current_serial > max_pdb_serial:
+            # This should have been caught by the initial check, but is a safeguard
+            raise ValueError("Serial number exceeded PDB limit during renumbering.")
+        df_fitted.loc[index, new_serial_col] = current_serial
+        last_chain_id_for_serial = current_chain_id
+    # Replace original serial number
+    df_fitted[serial_col] = df_fitted[new_serial_col]
+    df_fitted.drop(columns=[new_serial_col], inplace=True)
+    # Convert serial_col back to Int64
+    df_fitted[serial_col] = df_fitted[serial_col].astype("Int64")
+    # Update attributes and column types for PDB compatibility
+    df_fitted.attrs["format"] = "PDB"
+    # Ensure final column types match expected PDB output (especially categories)
+    # Reapply categorical conversion as some operations might change dtypes
+    pdb_categorical_cols = [
+        "record_type",
+        "name",
+        "altLoc",
+        "resName",
+        chain_col,
+        "element",
+        "charge",
+        icode_col,
+    ]
+    if "record_type" not in df_fitted.columns and "group_PDB" in df_fitted.columns:
+        df_fitted.rename(
+            columns={"group_PDB": "record_type"}, inplace=True
+        )  # Ensure correct name
+    for col in pdb_categorical_cols:
+        if col in df_fitted.columns:
+            # Handle None explicitly before converting to category if needed
+            if df_fitted[col].isnull().any():
+                df_fitted[col] = (
+                    df_fitted[col].astype(object).fillna("")
+                )  # Fill None with empty string for category
+            df_fitted[col] = df_fitted[col].astype("category")
+    # Rename columns if necessary from mmCIF to PDB standard names
+    rename_map = {
+        "id": "serial",
+        "auth_asym_id": "chainID",
+        "auth_seq_id": "resSeq",
+        "pdbx_PDB_ins_code": "iCode",
+        "label_atom_id": "name",  # Prefer label_atom_id if auth_atom_id not present? PDB uses 'name'
+        "label_comp_id": "resName",  # Prefer label_comp_id if auth_comp_id not present? PDB uses 'resName'
+        "type_symbol": "element",
+        "pdbx_formal_charge": "charge",
+        "Cartn_x": "x",
+        "Cartn_y": "y",
+        "Cartn_z": "z",
+        "B_iso_or_equiv": "tempFactor",
+        "group_PDB": "record_type",
+        "pdbx_PDB_model_num": "model",
+        # Add mappings for auth_atom_id -> name, auth_comp_id -> resName if needed,
+        # deciding on precedence if both label_* and auth_* exist.
+        # Current write_pdb prioritizes auth_* when reading mmCIF, so map those.
+        "auth_atom_id": "name",
+        "auth_comp_id": "resName",
+    }
+    # Only rename columns that actually exist in the DataFrame
+    actual_rename_map = {k: v for k, v in rename_map.items() if k in df_fitted.columns}
+    df_fitted.rename(columns=actual_rename_map, inplace=True)
+    # Ensure essential PDB columns exist, even if empty, if they were created during fitting
+    pdb_essential_cols = [
+        "record_type",
+        "serial",
+        "name",
+        "altLoc",
+        "resName",
+        "chainID",
+        "resSeq",
+        "iCode",
+        "x",
+        "y",
+        "z",
+        "occupancy",
+        "tempFactor",
+        "element",
+        "charge",
+        "model",
+    ]
+    for col in pdb_essential_cols:
+        if col not in df_fitted.columns:
+            # This case might occur if input mmCIF was missing fundamental columns mapped to PDB essentials
+            # Decide on default value or raise error. Adding empty series for now.
+            df_fitted[col] = pd.Series(
+                dtype="object"
+            )  # Add as object to handle potential None/mixed types initially
+    # Re-order columns to standard PDB order for clarity
+    final_pdb_order = [col for col in pdb_essential_cols if col in df_fitted.columns]
+    other_cols = [col for col in df_fitted.columns if col not in final_pdb_order]
+    df_fitted = df_fitted[final_pdb_order + other_cols]
+    # --- Final Type Conversions for PDB format ---
+    # Convert numeric columns (similar to parse_pdb_atoms)
+    pdb_numeric_columns = [
+        "serial",
+        "resSeq",
+        "x",
+        "y",
+        "z",
+        "occupancy",
+        "tempFactor",
+        "model",
+    ]
+    for col in pdb_numeric_columns:
+        if col in df_fitted.columns:
+            # Use Int64 for integer-like columns that might have been NaN during processing
+            if col in ["serial", "resSeq", "model"]:
+                df_fitted[col] = pd.to_numeric(df_fitted[col], errors="coerce").astype(
+                    "Int64"
+                )
+            else:  # Floats
+                df_fitted[col] = pd.to_numeric(df_fitted[col], errors="coerce")
+    # Convert categorical columns (similar to parse_pdb_atoms)
+    # Note: chainID and iCode were already handled during fitting/renaming
+    pdb_categorical_columns_final = [
+        "record_type",
+        "name",
+        "altLoc",
+        "resName",
+        "chainID",  # Already category, but ensure consistency
+        "iCode",  # Already category, but ensure consistency
+        "element",
+        "charge",
+    ]
+    for col in pdb_categorical_columns_final:
+        if col in df_fitted.columns:
+            # Ensure the column is categorical first
+            if not pd.api.types.is_categorical_dtype(df_fitted[col]):
+                # Convert non-categorical columns, handling potential NaNs
+                if df_fitted[col].isnull().any():
+                    df_fitted[col] = (
+                        df_fitted[col].astype(object).fillna("").astype("category")
+                    )
+                else:
+                    df_fitted[col] = df_fitted[col].astype("category")
+            else:
+                # If already categorical, check if '' needs to be added before fillna
+                has_nans = df_fitted[col].isnull().any()
+                if has_nans and "" not in df_fitted[col].cat.categories:
+                    # Add '' category explicitly
+                    df_fitted[col] = df_fitted[col].cat.add_categories([""])
+                # Fill None/NaN with empty string (now safe)
+                if has_nans:
+                    df_fitted[col].fillna("", inplace=True)
+    return df_fitted
+def _format_pdb_atom_line(atom_data: dict) -> str:
+    """Formats a dictionary of atom data into a PDB ATOM/HETATM line."""
+    # PDB format specification:
+    # COLUMNS        DATA TYPE     FIELD         DEFINITION
+    # -----------------------------------------------------------------------
+    #  1 -  6        Record name   "ATOM  " or "HETATM"
+    #  7 - 11        Integer       serial        Atom serial number.
+    # 13 - 16        Atom          name          Atom name.
+    # 17             Character     altLoc        Alternate location indicator.
+    # 18 - 20        Residue name  resName       Residue name.
+    # 22             Character     chainID       Chain identifier.
+    # 23 - 26        Integer       resSeq        Residue sequence number.
+    # 27             AChar         iCode         Code for insertion of residues.
+    # 31 - 38        Real(8.3)     x             Orthogonal coordinates for X.
+    # 39 - 46        Real(8.3)     y             Orthogonal coordinates for Y.
+    # 47 - 54        Real(8.3)     z             Orthogonal coordinates for Z.
+    # 55 - 60        Real(6.2)     occupancy     Occupancy.
+    # 61 - 66        Real(6.2)     tempFactor    Temperature factor.
+    # 77 - 78        LString(2)    element       Element symbol, right-justified.
+    # 79 - 80        LString(2)    charge        Charge on the atom.
+    # Record name (ATOM/HETATM)
+    record_name = atom_data.get("record_name", "ATOM").ljust(6)
+    # Serial number
+    serial = str(atom_data.get("serial", 0)).rjust(5)
+    # Atom name - special alignment rules
+    atom_name = atom_data.get("name", "")
+    if len(atom_name) < 4 and atom_name[:1].isalpha():
+        # Pad with space on left for 1-3 char names starting with a letter
+        atom_name_fmt = (" " + atom_name).ljust(4)
+    else:
+        # Use as is, left-justified, for 4-char names or those starting with a digit
+        atom_name_fmt = atom_name.ljust(4)
+    # Alternate location indicator
+    alt_loc = atom_data.get("altLoc", "")[:1].ljust(1)  # Max 1 char
+    # Residue name
+    res_name = atom_data.get("resName", "").rjust(
+        3
+    )  # Spec says "Residue name", examples often right-justified
+    # Chain identifier
+    chain_id = atom_data.get("chainID", "")[:1].ljust(1)  # Max 1 char
+    # Residue sequence number
+    res_seq = str(atom_data.get("resSeq", 0)).rjust(4)
+    # Insertion code
+    icode = atom_data.get("iCode", "")[:1].ljust(1)  # Max 1 char
+    # Coordinates
+    x = f"{atom_data.get('x', 0.0):8.3f}"
+    y = f"{atom_data.get('y', 0.0):8.3f}"
+    z = f"{atom_data.get('z', 0.0):8.3f}"
+    # Occupancy
+    occupancy = f"{atom_data.get('occupancy', 1.0):6.2f}"
+    # Temperature factor
+    temp_factor = f"{atom_data.get('tempFactor', 0.0):6.2f}"
+    # Element symbol
+    element = atom_data.get("element", "").rjust(2)
+    # Charge
+    charge_val = atom_data.get("charge", "")
+    charge_fmt = ""
+    if charge_val:
+        try:
+            # Try converting numeric charge (e.g., +1, -2) to PDB format (1+, 2-)
+            charge_int = int(float(charge_val))  # Use float first for cases like "1.0"
+            if charge_int != 0:
+                charge_fmt = f"{abs(charge_int)}{'+' if charge_int > 0 else '-'}"
+        except ValueError:
+            # If already formatted (e.g., "1+", "FE2+"), use its string representation
+            charge_fmt = str(charge_val)
+        # Ensure it fits and is right-justified
+        charge_fmt = charge_fmt.strip()[:2].rjust(2)
+    else:
+        charge_fmt = "  "  # Blank if no charge
+    # Construct the full line
+    # Ensure spacing is correct according to the spec
+    # 1-6 Record name | 7-11 Serial | 12 Space | 13-16 Name | 17 AltLoc | 18-20 ResName | 21 Space | 22 ChainID | 23-26 ResSeq | 27 iCode | 28-30 Spaces | 31-38 X | 39-46 Y | 47-54 Z | 55-60 Occupancy | 61-66 TempFactor | 67-76 Spaces | 77-78 Element | 79-80 Charge
+    line = (
+        f"{record_name}{serial} {atom_name_fmt}{alt_loc}{res_name} {chain_id}{res_seq}{icode}   "
+        f"{x}{y}{z}{occupancy}{temp_factor}          "  # 10 spaces
+        f"{element}{charge_fmt}"
+    )
+    # Ensure the line is exactly 80 characters long
+    return line.ljust(80)
 def write_pdb(
     df: pd.DataFrame, output: Union[str, TextIO, None] = None
 ) -> Union[str, None]:
@@ -235,7 +813,8 @@ def write_pdb(
     Parameters:
     -----------
     df : pd.DataFrame
-        DataFrame containing atom records, as created by parse_pdb_atoms or parse_cif_atoms
+        DataFrame containing atom records, as created by parse_pdb_atoms or parse_cif_atoms.
+        Must contain columns mappable to PDB format fields.
     output : Union[str, TextIO, None], optional
         Output file path or file-like object. If None, returns the PDB content as a string.
@@ -244,218 +823,162 @@ def write_pdb(
     Union[str, None]
         If output is None, returns the PDB content as a string. Otherwise, returns None.
     """
-    # Create a buffer to store the PDB content
     buffer = io.StringIO()
+    format_type = df.attrs.get("format", "PDB")  # Assume PDB if not specified
-    # Get the format of the DataFrame
-    format_type = df.attrs.get("format", "PDB")
-    # Variables to track chain changes for TER records
+    last_model_num = None
     last_chain_id = None
-    last_res_seq = None
-    last_res_name = None
-    last_serial = None
-    last_icode = None
-    # Process each row in the DataFrame
-    for index, row in df.iterrows():
-        # Get current chain ID
+    last_res_info = None  # Tuple (resSeq, iCode, resName) for TER record
+    last_serial = 0
+    # Check if DataFrame is empty
+    if df.empty:
+        buffer.write("END\n")
+        content = buffer.getvalue()
+        buffer.close()
+        if output is not None:
+            if isinstance(output, str):
+                with open(output, "w") as f:
+                    f.write(content)
+            else:
+                output.write(content)
+            return None
+        return content
+    for _, row in df.iterrows():
+        atom_data = {}
+        # --- Data Extraction ---
         if format_type == "PDB":
-            current_chain_id = row["chainID"]
-        else:  # mmCIF
-            current_chain_id = row.get("auth_asym_id", row.get("label_asym_id", ""))
+            # Pre-process PDB values, converting None to empty strings for optional fields
+            raw_alt_loc = row.get("altLoc")
+            pdb_alt_loc = "" if pd.isna(raw_alt_loc) else str(raw_alt_loc)
+            raw_icode = row.get("iCode")
+            pdb_icode = "" if pd.isna(raw_icode) else str(raw_icode)
+            raw_element = row.get("element")
+            pdb_element = "" if pd.isna(raw_element) else str(raw_element)
+            raw_charge = row.get("charge")
+            pdb_charge = "" if pd.isna(raw_charge) else str(raw_charge)
+            atom_data = {
+                "record_name": row.get("record_type", "ATOM"),
+                "serial": int(row.get("serial", 0)),
+                "name": str(row.get("name", "")),
+                "altLoc": pdb_alt_loc,
+                "resName": str(row.get("resName", "")),
+                "chainID": str(row.get("chainID", "")),
+                "resSeq": int(row.get("resSeq", 0)),
+                "iCode": pdb_icode,
+                "x": float(row.get("x", 0.0)),
+                "y": float(row.get("y", 0.0)),
+                "z": float(row.get("z", 0.0)),
+                "occupancy": float(row.get("occupancy", 1.0)),
+                "tempFactor": float(row.get("tempFactor", 0.0)),
+                "element": pdb_element,
+                "charge": pdb_charge,
+                "model": int(row.get("model", 1)),
+            }
+        elif format_type == "mmCIF":
+            # Pre-process mmCIF values to PDB compatible format, converting None to empty strings
+            raw_alt_loc = row.get("label_alt_id")
+            pdb_alt_loc = "" if pd.isna(raw_alt_loc) else str(raw_alt_loc)
+            raw_icode = row.get("pdbx_PDB_ins_code")
+            pdb_icode = "" if pd.isna(raw_icode) else str(raw_icode)
+            raw_element = row.get("type_symbol")
+            pdb_element = "" if pd.isna(raw_element) else str(raw_element)
+            raw_charge = row.get("pdbx_formal_charge")
+            pdb_charge = "" if pd.isna(raw_charge) else str(raw_charge)
+            atom_data = {
+                "record_name": row.get("group_PDB", "ATOM"),
+                "serial": int(row.get("id", 0)),
+                "name": str(row.get("auth_atom_id", row.get("label_atom_id", ""))),
+                "altLoc": pdb_alt_loc,
+                "resName": str(row.get("auth_comp_id", row.get("label_comp_id", ""))),
+                "chainID": str(row.get("auth_asym_id", row.get("label_asym_id"))),
+                "resSeq": int(row.get("auth_seq_id", row.get("label_seq_id", 0))),
+                "iCode": pdb_icode,
+                "x": float(row.get("Cartn_x", 0.0)),
+                "y": float(row.get("Cartn_y", 0.0)),
+                "z": float(row.get("Cartn_z", 0.0)),
+                "occupancy": float(row.get("occupancy", 1.0)),
+                "tempFactor": float(row.get("B_iso_or_equiv", 0.0)),
+                "element": pdb_element,
+                "charge": pdb_charge,
+                "model": int(row.get("pdbx_PDB_model_num", 1)),
+            }
+        else:
+            raise ValueError(f"Unsupported DataFrame format: {format_type}")
+        # --- MODEL/ENDMDL Records ---
+        current_model_num = atom_data["model"]
+        if current_model_num != last_model_num:
+            if last_model_num is not None:
+                buffer.write("ENDMDL\n")
+            buffer.write(f"MODEL     {current_model_num:>4}\n")
+            last_model_num = current_model_num
+            # Reset chain/residue tracking for the new model
+            last_chain_id = None
+            last_res_info = None
+        # --- TER Records ---
+        current_chain_id = atom_data["chainID"]
+        current_res_info = (
+            atom_data["resSeq"],
+            atom_data["iCode"],
+            atom_data["resName"],
+        )
-        # Write TER record if chain changes
+        # Write TER if chain ID changes within the same model
         if last_chain_id is not None and current_chain_id != last_chain_id:
-            # Format TER record according to PDB specification
-            # Columns:
-            # 1-6: "TER   "
-            # 7-11: Serial number (right-justified)
-            # 18-20: Residue name (right-justified)
-            # 22: Chain ID
-            # 23-26: Residue sequence number (right-justified)
-            # 27: Insertion code
             ter_serial = str(last_serial + 1).rjust(5)
-            ter_res_name = last_res_name.strip().ljust(3)  # Strip and left-justify
+            ter_res_name = last_res_info[2].strip().rjust(3)  # Use last residue's name
             ter_chain_id = last_chain_id
-            ter_res_seq = last_res_seq.rjust(4)
-            ter_icode = last_icode if last_icode else ""  # Use last recorded iCode
+            ter_res_seq = str(last_res_info[0]).rjust(4)  # Use last residue's seq num
+            ter_icode = (
+                last_res_info[1] if last_res_info[1] else ""
+            )  # Use last residue's icode
-            # Construct the TER line ensuring correct spacing for all fields
-            # TER (1-6), serial (7-11), space (12-17), resName (18-20), space (21),
-            # chainID (22), resSeq (23-26), iCode (27)
             ter_line = f"TER   {ter_serial}      {ter_res_name} {ter_chain_id}{ter_res_seq}{ter_icode}"
             buffer.write(ter_line.ljust(80) + "\n")
-        # Initialize the line with spaces
-        line = " " * 80
-        # Set record type (ATOM or HETATM)
-        if format_type == "PDB":
-            record_type = row["record_type"]
-        else:  # mmCIF
-            record_type = row.get("group_PDB", "ATOM")
-        line = record_type.ljust(6) + line[6:]
-        # Set atom serial number
-        if format_type == "PDB":
-            serial = str(int(row["serial"]))
-        else:  # mmCIF
-            serial = str(int(row["id"]))
-        line = line[:6] + serial.rjust(5) + line[11:]
-        # Set atom name
-        if format_type == "PDB":
-            atom_name = row["name"]
-        else:  # mmCIF
-            atom_name = row.get("auth_atom_id", row.get("label_atom_id", ""))
-        # Right-justify atom name if it starts with a number
-        if atom_name and atom_name[0].isdigit():
-            line = line[:12] + atom_name.ljust(4) + line[16:]
-        else:
-            line = line[:12] + " " + atom_name.ljust(3) + line[16:]
-        # Set alternate location indicator
-        if format_type == "PDB":
-            alt_loc = row.get("altLoc", "")
-        else:  # mmCIF
-            alt_loc = row.get("label_alt_id", "")
-        line = line[:16] + alt_loc + line[17:]
-        # Set residue name
-        if format_type == "PDB":
-            res_name = row["resName"]
-        else:  # mmCIF
-            res_name = row.get("auth_comp_id", row.get("label_comp_id", ""))
-        line = line[:17] + res_name.ljust(3) + line[20:]
-        # Set chain identifier
-        if format_type == "PDB":
-            chain_id = row["chainID"]
-        else:  # mmCIF
-            chain_id = row.get("auth_asym_id", row.get("label_asym_id", ""))
-        line = line[:21] + chain_id + line[22:]
-        # Set residue sequence number
-        if format_type == "PDB":
-            res_seq = str(int(row["resSeq"]))
-        else:  # mmCIF
-            res_seq = str(int(row.get("auth_seq_id", row.get("label_seq_id", 0))))
-        line = line[:22] + res_seq.rjust(4) + line[26:]
-        # Set insertion code
-        if format_type == "PDB":
-            icode = row["iCode"] if pd.notna(row["iCode"]) else ""
-        else:  # mmCIF
-            icode = (
-                row.get("pdbx_PDB_ins_code", "")
-                if pd.notna(row.get("pdbx_PDB_ins_code", ""))
-                else ""
-            )
-        line = line[:26] + icode + line[27:]
-        # Set X coordinate
-        if format_type == "PDB":
-            x = float(row["x"])
-        else:  # mmCIF
-            x = float(row["Cartn_x"])
-        line = line[:30] + f"{x:8.3f}" + line[38:]
+        # --- Format and Write ATOM/HETATM Line ---
+        pdb_line = _format_pdb_atom_line(atom_data)
+        buffer.write(pdb_line + "\n")
-        # Set Y coordinate
-        if format_type == "PDB":
-            y = float(row["y"])
-        else:  # mmCIF
-            y = float(row["Cartn_y"])
-        line = line[:38] + f"{y:8.3f}" + line[46:]
-        # Set Z coordinate
-        if format_type == "PDB":
-            z = float(row["z"])
-        else:  # mmCIF
-            z = float(row["Cartn_z"])
-        line = line[:46] + f"{z:8.3f}" + line[54:]
-        # Set occupancy
-        if format_type == "PDB":
-            occupancy = float(row["occupancy"])
-        else:  # mmCIF
-            occupancy = float(row.get("occupancy", 1.0))
-        line = line[:54] + f"{occupancy:6.2f}" + line[60:]
+        # --- Update Tracking Variables ---
+        last_serial = atom_data["serial"]
+        last_chain_id = current_chain_id
+        last_res_info = current_res_info
-        # Set temperature factor
-        if format_type == "PDB":
-            temp_factor = float(row["tempFactor"])
-        else:  # mmCIF
-            temp_factor = float(row.get("B_iso_or_equiv", 0.0))
-        line = line[:60] + f"{temp_factor:6.2f}" + line[66:]
-        # Set element symbol
-        if format_type == "PDB":
-            element = row["element"]
-        else:  # mmCIF
-            element = row.get("type_symbol", "")
-        line = line[:76] + element.rjust(2) + line[78:]
-        # Set charge
-        if format_type == "PDB":
-            charge = row["charge"]
-        else:  # mmCIF
-            charge = row.get("pdbx_formal_charge", "")
-            if charge and charge not in ["?", "."]:
-                # Convert numeric charge to PDB format (e.g., "1+" or "2-")
-                try:
-                    charge_val = int(charge)
-                    if charge_val != 0:
-                        charge = f"{abs(charge_val)}{'+' if charge_val > 0 else '-'}"
-                    else:
-                        charge = ""
-                except ValueError:
-                    pass
-        line = line[:78] + charge + line[80:]
-        # Write the line to the buffer
-        buffer.write(line.rstrip() + "\n")
-        # Update last atom info for potential TER record
-        if format_type == "PDB":
-            last_serial = int(row["serial"])
-            last_res_name = row["resName"]
-            last_chain_id = row["chainID"]
-            last_res_seq = str(int(row["resSeq"]))
-            last_icode = row["iCode"] if pd.notna(row["iCode"]) else ""
-        else:  # mmCIF
-            last_serial = int(row["id"])
-            last_res_name = row.get("auth_comp_id", row.get("label_comp_id", ""))
-            last_chain_id = row.get("auth_asym_id", row.get("label_asym_id", ""))
-            last_res_seq = str(int(row.get("auth_seq_id", row.get("label_seq_id", 0))))
-            last_icode = (
-                row.get("pdbx_PDB_ins_code", "")
-                if pd.notna(row.get("pdbx_PDB_ins_code", ""))
-                else ""
-            )
-    # Add TER record for the last chain
+    # --- Final Records ---
+    # Add TER record for the very last chain in the last model
     if last_chain_id is not None:
-        # Format TER record according to PDB specification
         ter_serial = str(last_serial + 1).rjust(5)
-        ter_res_name = last_res_name.strip().ljust(3)  # Strip and left-justify
+        ter_res_name = last_res_info[2].strip().rjust(3)
         ter_chain_id = last_chain_id
-        ter_res_seq = last_res_seq.rjust(4)
-        ter_icode = last_icode if last_icode else ""  # Use last recorded iCode
+        ter_res_seq = str(last_res_info[0]).rjust(4)
+        ter_icode = last_res_info[1] if last_res_info[1] else ""
-        # Construct the TER line ensuring correct spacing for all fields
         ter_line = f"TER   {ter_serial}      {ter_res_name} {ter_chain_id}{ter_res_seq}{ter_icode}"
         buffer.write(ter_line.ljust(80) + "\n")
-    # Add END record
+    # Add ENDMDL if models were used
+    if last_model_num is not None:
+        buffer.write("ENDMDL\n")
     buffer.write("END\n")
-    # Get the content as a string
+    # --- Output Handling ---
     content = buffer.getvalue()
     buffer.close()
-    # Write to output if provided
     if output is not None:
         if isinstance(output, str):
             with open(output, "w") as f:
@@ -463,9 +986,8 @@ def write_pdb(
         else:
             output.write(content)
         return None
-    # Return the content as a string
-    return content
+    else:
+        return content
 def write_cif(
@@ -490,7 +1012,7 @@ def write_cif(
     format_type = df.attrs.get("format", "PDB")
     # Create a new DataContainer
-    data_container = DataContainer("data_structure")
+    data_container = DataContainer("rnapolis")
     # Define the attributes for atom_site category
     if format_type == "mmCIF":
@@ -519,7 +1041,7 @@ def write_cif(
             "auth_comp_id",  # resName
             "auth_asym_id",  # chainID
             "auth_atom_id",  # name
-            "pdbx_PDB_model_num",  # (generated)
+            "pdbx_PDB_model_num",  # model
         ]
     # Prepare rows for the atom_site category
@@ -527,32 +1049,44 @@ def write_cif(
     for _, row in df.iterrows():
         if format_type == "mmCIF":
-            # Use existing mmCIF data
-            row_data = [str(row.get(attr, "?")) for attr in attributes]
+            # Use existing mmCIF data, converting None to '?' universally
+            row_data = []
+            for attr in attributes:
+                value = row.get(attr)
+                if pd.isna(value):
+                    # Use '?' as the standard placeholder for missing values
+                    row_data.append("?")
+                else:
+                    # Ensure all non-missing values are converted to string
+                    row_data.append(str(value))
         else:  # PDB format
-            # Map PDB data to mmCIF format
+            # Map PDB data to mmCIF format, converting None to '.' or '?'
             entity_id = "1"  # Default entity ID
-            model_num = "1"  # Default model number
+            model_num = str(int(row["model"]))
+            # Pre-process optional fields for mmCIF placeholders
+            element_val = "?" if pd.isna(row.get("element")) else str(row["element"])
+            altloc_val = "." if pd.isna(row.get("altLoc")) else str(row["altLoc"])
+            icode_val = "." if pd.isna(row.get("iCode")) else str(row["iCode"])
+            charge_val = "." if pd.isna(row.get("charge")) else str(row["charge"])
             row_data = [
                 str(row["record_type"]),  # group_PDB
                 str(int(row["serial"])),  # id
-                str(row["element"]),  # type_symbol
+                element_val,  # type_symbol
                 str(row["name"]),  # label_atom_id
-                str(row.get("altLoc", "")),  # label_alt_id
+                altloc_val,  # label_alt_id
                 str(row["resName"]),  # label_comp_id
                 str(row["chainID"]),  # label_asym_id
                 entity_id,  # label_entity_id
                 str(int(row["resSeq"])),  # label_seq_id
-                str(row["iCode"])
-                if pd.notna(row["iCode"])
-                else "?",  # pdbx_PDB_ins_code
+                icode_val,  # pdbx_PDB_ins_code
                 f"{float(row['x']):.3f}",  # Cartn_x
                 f"{float(row['y']):.3f}",  # Cartn_y
                 f"{float(row['z']):.3f}",  # Cartn_z
                 f"{float(row['occupancy']):.2f}",  # occupancy
                 f"{float(row['tempFactor']):.2f}",  # B_iso_or_equiv
-                str(row.get("charge", "")) or "?",  # pdbx_formal_charge
+                charge_val,  # pdbx_formal_charge
                 str(int(row["resSeq"])),  # auth_seq_id
                 str(row["resName"]),  # auth_comp_id
                 str(row["chainID"]),  # auth_asym_id

rnapolis/splitter.py CHANGED Viewed

@@ -3,10 +3,14 @@ import argparse
 import os
 import sys
-import pandas as pd
 from rnapolis.parser import is_cif
-from rnapolis.parser_v2 import parse_cif_atoms, parse_pdb_atoms, write_cif, write_pdb
+from rnapolis.parser_v2 import (
+    fit_to_pdb,
+    parse_cif_atoms,
+    parse_pdb_atoms,
+    write_cif,
+    write_pdb,
+)
 def main():
@@ -97,12 +101,21 @@ def main():
         try:
             if output_format == "PDB":
-                write_pdb(model_df, output_path)
+                df_to_write = fit_to_pdb(model_df)
+                write_pdb(df_to_write, output_path)
             else:  # mmCIF
                 write_cif(model_df, output_path)
+        except ValueError as e:
+            # Handle errors specifically from fit_to_pdb
+            print(
+                f"Error fitting model {model_num} from {args.file} to PDB: {e}. Skipping model.",
+                file=sys.stderr,
+            )
+            continue
         except Exception as e:
+            # Handle general writing errors
             print(
-                f"Error writing file {output_path}: {e}",
+                f"Error writing file {output_path} for model {model_num}: {e}",
                 file=sys.stderr,
             )
             # Optionally continue to next model or exit

rnapolis/unifier.py CHANGED Viewed

@@ -7,7 +7,13 @@ from collections import Counter
 import pandas as pd
 from rnapolis.parser import is_cif
-from rnapolis.parser_v2 import parse_cif_atoms, parse_pdb_atoms, write_cif, write_pdb
+from rnapolis.parser_v2 import (
+    fit_to_pdb,
+    parse_cif_atoms,
+    parse_pdb_atoms,
+    write_cif,
+    write_pdb,
+)
 from rnapolis.tertiary_v2 import Structure
@@ -140,13 +146,22 @@ def main():
         ext = ".pdb" if format == "PDB" else ".cif"
-        with open(f"{args.output}/{base}{ext}", "w") as f:
-            df = pd.concat([residue.atoms for residue in residues])
+        df = pd.concat([residue.atoms for residue in residues])
+        try:
             if format == "PDB":
-                write_pdb(df, f)
+                df_to_write = fit_to_pdb(df)
+                with open(f"{args.output}/{base}{ext}", "w") as f:
+                    write_pdb(df_to_write, f)
             else:
-                write_cif(df, f)
+                with open(f"{args.output}/{base}{ext}", "w") as f:
+                    write_cif(df, f)
+        except ValueError as e:
+            print(
+                f"Error processing {path} for PDB output: {e}. Skipping file.",
+                file=sys.stderr,
+            )
+            continue
 if __name__ == "__main__":

{rnapolis-0.8.0.dist-info → rnapolis-0.8.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: RNApolis
-Version: 0.8.0
+Version: 0.8.1
 Summary: A Python library containing RNA-related bioinformatics functions and classes
 Home-page: https://github.com/tzok/rnapolis-py
 Author: Tomasz Zok

{rnapolis-0.8.0.dist-info → rnapolis-0.8.1.dist-info}/RECORD RENAMED Viewed

@@ -12,17 +12,17 @@ rnapolis/mmcif_pdbx_v50.dic,sha256=5QFx1ssDaehR4_DQ-tS9VQux262SiLXaqcwmwwejF5c,5
 rnapolis/molecule_filter.py,sha256=jgcpJxx_oXEBX0d30v4k_FdwRouRUPUsEtCYWgLGpD4,7310
 rnapolis/motif_extractor.py,sha256=Lfn1iEkhkP9eZD3GPEWNAfy00QO7QPCc8wM_XS1ory8,1147
 rnapolis/parser.py,sha256=3g4mtFvpiEENFcSBBtx_E_x1vJPF9BujWnts0kb9XjE,16340
-rnapolis/parser_v2.py,sha256=eUccbTXCD5I7q0GVbaGWmjj0CT5d2VK8x9tr0gtrRuA,19801
+rnapolis/parser_v2.py,sha256=qG6CO3or7zmuJu368g9Nzokiqdeip4yjD14F163uH6w,40618
 rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
-rnapolis/splitter.py,sha256=8mMZ2ZmhqptPUjmkDOFbLvC-dvWpuvJ0beSoeaD5pzk,3642
+rnapolis/splitter.py,sha256=x-Zn21mkiMgvYPptUFD9BbdNIvoaM6b8GzGf6uYXEwE,4052
 rnapolis/tertiary.py,sha256=6t9ZB4w33-5n_M3sns1RoFXCOTgVAgGH4WDNG5OG9Kg,23426
 rnapolis/tertiary_v2.py,sha256=I1uyHWIUePNGO5m-suoL4ibtz02qAJUMvYm0BUKUygY,22480
 rnapolis/transformer.py,sha256=aC0nBmHHJf5TyLvBIV57Jj3tlwpvHbPo347opfAOlQA,3844
-rnapolis/unifier.py,sha256=DR1_IllgaAYT9_FUE6XC9B-2wgqbBHs2D1MjyZT2j2g,5438
+rnapolis/unifier.py,sha256=2ge7IB9FdRgzSAiVD39U_ciwtdDJ2fGzf8mUIudbrqY,5820
 rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
-rnapolis-0.8.0.dist-info/licenses/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
-rnapolis-0.8.0.dist-info/METADATA,sha256=zD_byFTP6xNdYCQdu5bslqSE_noBjSagzhn2EOSlcYE,54537
-rnapolis-0.8.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-rnapolis-0.8.0.dist-info/entry_points.txt,sha256=H00KoN54wU3dFOofAu3H_3PADmZOBTB1hXf5TUU2uzo,438
-rnapolis-0.8.0.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
-rnapolis-0.8.0.dist-info/RECORD,,
+rnapolis-0.8.1.dist-info/licenses/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
+rnapolis-0.8.1.dist-info/METADATA,sha256=NOg9-s2n313HElku8z06JiBvEhPf6oV9RR7ur20hwys,54537
+rnapolis-0.8.1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+rnapolis-0.8.1.dist-info/entry_points.txt,sha256=H00KoN54wU3dFOofAu3H_3PADmZOBTB1hXf5TUU2uzo,438
+rnapolis-0.8.1.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
+rnapolis-0.8.1.dist-info/RECORD,,

{rnapolis-0.8.0.dist-info → rnapolis-0.8.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{rnapolis-0.8.0.dist-info → rnapolis-0.8.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{rnapolis-0.8.0.dist-info → rnapolis-0.8.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{rnapolis-0.8.0.dist-info → rnapolis-0.8.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

RNApolis 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

RNApolis 0.8.0py3-none-any.whl → 0.8.1py3-none-any.whl