PyPI - rc-foundry - Versions diffs - 0.1.6__py3-none-any.whl → 0.1.9__py3-none-any.whl - Mend

rc-foundry 0.1.6py3-none-any.whl → 0.1.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

foundry/inference_engines/checkpoint_registry.py +58 -11
foundry/utils/alignment.py +10 -2
foundry/utils/ddp.py +1 -1
foundry/utils/logging.py +1 -1
foundry/version.py +2 -2
foundry_cli/download_checkpoints.py +66 -66
{rc_foundry-0.1.6.dist-info → rc_foundry-0.1.9.dist-info}/METADATA +30 -21
{rc_foundry-0.1.6.dist-info → rc_foundry-0.1.9.dist-info}/RECORD +31 -31
rf3/cli.py +13 -4
rf3/inference.py +3 -1
rfd3/configs/datasets/train/pdb/af3_train_interface.yaml +1 -1
rfd3/configs/inference_engine/rfdiffusion3.yaml +2 -2
rfd3/configs/model/samplers/symmetry.yaml +1 -1
rfd3/engine.py +28 -12
rfd3/inference/datasets.py +1 -1
rfd3/inference/input_parsing.py +32 -1
rfd3/inference/legacy_input_parsing.py +17 -1
rfd3/inference/parsing.py +1 -0
rfd3/inference/symmetry/atom_array.py +78 -13
rfd3/inference/symmetry/checks.py +62 -29
rfd3/inference/symmetry/frames.py +256 -5
rfd3/inference/symmetry/symmetry_utils.py +39 -61
rfd3/model/inference_sampler.py +11 -1
rfd3/model/layers/block_utils.py +33 -33
rfd3/model/layers/chunked_pairwise.py +84 -82
rfd3/run_inference.py +3 -1
rfd3/transforms/symmetry.py +16 -7
rfd3/utils/inference.py +21 -22
{rc_foundry-0.1.6.dist-info → rc_foundry-0.1.9.dist-info}/WHEEL +0 -0
{rc_foundry-0.1.6.dist-info → rc_foundry-0.1.9.dist-info}/entry_points.txt +0 -0
{rc_foundry-0.1.6.dist-info → rc_foundry-0.1.9.dist-info}/licenses/LICENSE.md +0 -0

rf3/cli.py CHANGED Viewed

@@ -23,10 +23,19 @@ def fold(
         configure_minimal_inference_logging()
     # Find the RF3 configs directory relative to this file
-    # This file is at: models/rf3/src/rf3/cli.py
-    # Configs are at: models/rf3/configs/
-    rf3_package_dir = Path(__file__).parent.parent.parent  # Go up to models/rf3/
-    config_path = str(rf3_package_dir / "configs")
+    # In development: models/rf3/src/rf3/cli.py -> models/rf3/configs/
+    # When installed: site-packages/rf3/cli.py -> site-packages/rf3/configs/
+    rf3_file_dir = Path(__file__).parent
+    # Check if we're in installed mode (configs are sibling to this file)
+    # or development mode (configs are ../../../configs)
+    if (rf3_file_dir / "configs").exists():
+        # Installed mode
+        config_path = str(rf3_file_dir / "configs")
+    else:
+        # Development mode
+        rf3_package_dir = rf3_file_dir.parent.parent  # Go up to models/rf3/
+        config_path = str(rf3_package_dir / "configs")
     # Get all arguments
     args = ctx.params.get("args", []) + ctx.args

rf3/inference.py CHANGED Viewed

@@ -16,7 +16,9 @@ rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 load_dotenv(override=True)
-_config_path = os.path.join(os.environ["PROJECT_ROOT"], "models/rf3/configs")
+_config_path = os.path.join(
+    os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "configs"
+)
 @hydra.main(

rfd3/configs/datasets/train/pdb/af3_train_interface.yaml CHANGED Viewed

@@ -7,7 +7,7 @@ dataset:
     base_dir: ${paths.data.pdb_data_dir}
   dataset:
     name: interface
-    data: ${paths.data.pdb_parquet_dir}/interfaces_df_train.parquet
+    data: ${paths.data.pdb_parquet_dir}/interfaces_df.parquet
     filters:
       # filters common across all PDB datasets
       - "deposition_date < '2021-09-30'"

rfd3/configs/inference_engine/rfdiffusion3.yaml CHANGED Viewed

@@ -7,7 +7,7 @@ _target_: rfd3.engine.RFD3InferenceEngine
 out_dir: ???
 inputs: ???  # null, json, pdb or
-ckpt_path: /projects/ml/aa_design/models/rfd3_latest_cleaned.ckpt
+ckpt_path: rfd3
 json_keys_subset: null
 skip_existing: True
@@ -61,5 +61,5 @@ global_prefix: null
 dump_prediction_metadata_json: True
 dump_trajectories: False
 align_trajectory_structures: False
-prevalidate_inputs: True
+prevalidate_inputs: False
 low_memory_mode: False # False for standard mode, True for memory efficient tokenization mode

rfd3/configs/model/samplers/symmetry.yaml CHANGED Viewed

@@ -4,7 +4,7 @@ defaults:
 kind: symmetry
 num_timesteps: 200
-gamma_0: 1.0  # 1.0 for SDE sampling
+gamma_0: 0.6  # 1.0 for SDE sampling
 gamma_min: 1.0
 gamma_min2: 0.0
 sym_step_frac: 0.9 # when 0.9, 90% of the trajectory from the start is symmetrized

rfd3/engine.py CHANGED Viewed

@@ -21,9 +21,14 @@ from rfd3.constants import SAVED_CONDITIONING_ANNOTATIONS
 from rfd3.inference.datasets import (
     assemble_distributed_inference_loader_from_json,
 )
-from rfd3.inference.input_parsing import DesignInputSpecification
+from rfd3.inference.input_parsing import (
+    DesignInputSpecification,
+    ensure_input_is_abspath,
+)
 from rfd3.model.inference_sampler import SampleDiffusionConfig
-from rfd3.utils.inference import ensure_input_is_abspath
+from rfd3.utils.inference import (
+    ensure_inference_sampler_matches_design_spec,
+)
 from rfd3.utils.io import (
     CIF_LIKE_EXTENSIONS,
     build_stack_from_atom_array_and_batched_coords,
@@ -171,6 +176,7 @@ class RFD3InferenceEngine(BaseInferenceEngine):
         )
         # save
         self.specification_overrides = dict(specification or {})
+        self.inference_sampler_overrides = dict(inference_sampler or {})
         # Setup output directories and args
         self.global_prefix = global_prefix
@@ -210,6 +216,9 @@ class RFD3InferenceEngine(BaseInferenceEngine):
             inputs=inputs,
             n_batches=n_batches,
         )
+        ensure_inference_sampler_matches_design_spec(
+            design_specifications, self.inference_sampler_overrides
+        )
         # init before
         self.initialize()
         outputs = self._run_multi(design_specifications)
@@ -383,6 +392,15 @@ class RFD3InferenceEngine(BaseInferenceEngine):
         # Based on inputs, construct the specifications to loop through
         design_specifications = {}
         for prefix, example_spec in inputs.items():
+            # Record task name in the specification
+            if isinstance(example_spec, DesignInputSpecification):
+                example_spec.extra = example_spec.extra or {}
+                example_spec.extra["task_name"] = prefix
+            else:
+                if "extra" not in example_spec:
+                    example_spec["extra"] = {}
+                example_spec["extra"]["task_name"] = prefix
             # ... Create n_batches for example
             for batch_id in range((n_batches) if exists(n_batches) else 1):
                 # ... Example ID
@@ -524,21 +542,19 @@ def process_input(
 def _reshape_trajectory(traj, align_structures: bool):
-    traj = [traj[i] for i in range(len(traj))]
-    n_steps = len(traj)
+    traj = [traj[i] for i in range(len(traj))]  # make list of arrays
     max_frames = 100
+    if len(traj) > max_frames:
+        selected_indices = torch.linspace(0, len(traj) - 1, max_frames).long().tolist()
+        traj = [traj[i] for i in selected_indices]
     if align_structures:
         # ... align the trajectories on the last prediction
-        for step in range(n_steps - 1):
+        for step in range(len(traj) - 1):
             traj[step] = weighted_rigid_align(
-                X_L=traj[-1],
-                X_gt_L=traj[step],
-            )
+                X_L=traj[-1][None],
+                X_gt_L=traj[step][None],
+            ).squeeze(0)
     traj = traj[::-1]  # reverse to go from noised -> denoised
-    if n_steps > max_frames:
-        selected_indices = torch.linspace(0, n_steps - 1, max_frames).long().tolist()
-        traj = [traj[i] for i in selected_indices]
     traj = torch.stack(traj).cpu().numpy()
     return traj

rfd3/inference/datasets.py CHANGED Viewed

@@ -14,8 +14,8 @@ from atomworks.ml.transforms.base import Compose, Transform
 from omegaconf import DictConfig, OmegaConf
 from rfd3.inference.input_parsing import (
     DesignInputSpecification,
+    ensure_input_is_abspath,
 )
-from rfd3.utils.inference import ensure_input_is_abspath
 from torch.utils.data import (
     DataLoader,
     SequentialSampler,

rfd3/inference/input_parsing.py CHANGED Viewed

@@ -5,6 +5,7 @@ import os
 import time
 import warnings
 from contextlib import contextmanager
+from os import PathLike
 from typing import Any, Dict, List, Optional, Union
 import numpy as np
@@ -696,7 +697,7 @@ class DesignInputSpecification(BaseModel):
             # Partial diffusion: use COM, keep all coordinates
             if exists(self.symmetry) and self.symmetry.id:
                 # For symmetric structures, avoid COM centering that would collapse chains
-                ranked_logger.info(
+                logger.info(
                     "Partial diffusion with symmetry: skipping COM centering to preserve chain spacing"
                 )
             else:
@@ -1121,3 +1122,33 @@ def accumulate_components(
     if atom_array_accum.bonds is None:
         atom_array_accum.bonds = BondList(atom_array_accum.array_length())
     return atom_array_accum
+def ensure_input_is_abspath(args: Dict[str, Any], path: PathLike | None):
+    """
+    Ensures the input source is an absolute path if exists, if not it will convert
+    args:
+        args: Inference specification for atom array
+        path: None or file to which the input is relative to.
+    """
+    if isinstance(args, str):
+        raise ValueError(
+            "Expected args to be a dictionary, got a string: {}. If you are using an input JSON ensure it contains dictionaries of arguments".format(
+                args
+            )
+        )
+    if "input" not in args or not exists(args["input"]):
+        return args
+    input = str(args["input"])
+    if not os.path.isabs(input):
+        if path is None:
+            raise ValueError(
+                "Input path is relative, but no base path was provided to resolve it against."
+            )
+        input = os.path.abspath(os.path.join(os.path.dirname(str(path)), input))
+        logger.info(
+            f"Input source path is relative, converted to absolute path: {input}"
+        )
+        args["input"] = input
+    return args

rfd3/inference/legacy_input_parsing.py CHANGED Viewed

@@ -139,13 +139,18 @@ def fetch_motif_residue_(
         subarray, motif=True, unindexed=False, dtype=int
     )  # all values init to True (fix all)
+    to_unindex = f"{src_chain}{src_resid}" in unindexed_components
+    to_index = f"{src_chain}{src_resid}" in components
     # Assign is motif atom and sequence
     if exists(atoms := fixed_atoms.get(f"{src_chain}{src_resid}")):
+        # If specified, we set fixed atoms in the residue to be motif atoms
         atom_mask = get_name_mask(subarray.atom_name, atoms, res_name)
         subarray.set_annotation("is_motif_atom", atom_mask)
         # subarray.set_annotation("is_motif_atom_with_fixed_coord", atom_mask)  # BUGFIX: uncomment
     elif redesign_motif_sidechains and res_name in STANDARD_AA:
+        # If redesign_motif_sidechains is True, we only make the backbone atoms to be motif atoms
         n_atoms = subarray.shape[0]
         diffuse_oxygen = False
         if n_atoms < 3:
@@ -178,6 +183,18 @@ def fetch_motif_residue_(
         subarray.set_annotation(
             "is_motif_atom_with_fixed_seq", np.zeros(subarray.shape[0], dtype=int)
         )
+    elif to_index or to_unindex:
+        # If the residue is in the contig or unindexed components,
+        # we set all atoms in the residue to be motif atoms
+        subarray.set_annotation("is_motif_atom", np.ones(subarray.shape[0], dtype=int))
+    else:
+        if to_unindex and not (
+            unfix_all or f"{src_chain}{src_resid}" in unfix_residues
+        ):
+            raise ValueError(
+                f"{src_chain}{src_resid} is not found in fixed_atoms, contig or unindex contig."
+                "Please check your input and contig specification."
+            )
     if unfix_all or f"{src_chain}{src_resid}" in unfix_residues:
         subarray.set_annotation(
             "is_motif_atom_with_fixed_coord", np.zeros(subarray.shape[0], dtype=int)
@@ -197,7 +214,6 @@ def fetch_motif_residue_(
         subarray.set_annotation(
             "is_flexible_motif_atom", np.zeros(subarray.shape[0], dtype=bool)
         )
-    to_unindex = f"{src_chain}{src_resid}" in unindexed_components
     if to_unindex:
         subarray.set_annotation(
             "is_motif_atom_unindexed", subarray.is_motif_atom.copy()

rfd3/inference/parsing.py CHANGED Viewed

@@ -117,6 +117,7 @@ def from_any_(v: Any, atom_array: AtomArray):
         # Split to atom names
         data_split[idx] = token.atom_name[comp_mask_subset].tolist()
+        # TODO: there is a bug where when you select specifc atoms within a ligand, output ligand is fragmented
         # Update mask & token dictionary
         mask[comp_mask] = comp_mask_subset

rfd3/inference/symmetry/atom_array.py CHANGED Viewed

@@ -1,14 +1,74 @@
+import string
 import numpy as np
 from rfd3.inference.symmetry.frames import (
     decompose_symmetry_frame,
     get_symmetry_frames_from_symmetry_id,
 )
-from foundry.utils.ddp import RankedLogger
 FIXED_TRANSFORM_ID = -1
 FIXED_ENTITY_ID = -1
-ranked_logger = RankedLogger(__name__, rank_zero_only=True)
+# Alphabet for chain ID generation (uppercase letters only, per wwPDB convention)
+_CHAIN_ALPHABET = string.ascii_uppercase
+def index_to_chain_id(index: int) -> str:
+    """
+    Convert a zero-based index to a chain ID following wwPDB convention.
+    The naming follows the wwPDB-assigned chain ID system:
+    - 0-25: A-Z (single letter)
+    - 26-701: AA-ZZ (double letter)
+    - 702-18277: AAA-ZZZ (triple letter)
+    - And so on...
+    This is similar to Excel column naming (A, B, ..., Z, AA, AB, ...).
+    Arguments:
+        index: zero-based index (0 -> 'A', 25 -> 'Z', 26 -> 'AA', etc.)
+    Returns:
+        chain_id: string chain identifier
+    """
+    if index < 0:
+        raise ValueError(f"Chain index must be non-negative, got {index}")
+    result = ""
+    remaining = index
+    # Convert to bijective base-26 (like Excel columns)
+    while True:
+        result = _CHAIN_ALPHABET[remaining % 26] + result
+        remaining = remaining // 26 - 1
+        if remaining < 0:
+            break
+    return result
+def chain_id_to_index(chain_id: str) -> int:
+    """
+    Convert a chain ID back to a zero-based index.
+    Inverse of index_to_chain_id.
+    Arguments:
+        chain_id: string chain identifier (e.g., 'A', 'Z', 'AA', 'AB')
+    Returns:
+        index: zero-based index
+    """
+    if not chain_id or not all(c in _CHAIN_ALPHABET for c in chain_id):
+        raise ValueError(f"Invalid chain ID: {chain_id}")
+    # Offset for all shorter chain IDs (26 + 26^2 + ... + 26^(len-1))
+    offset = sum(26**k for k in range(1, len(chain_id)))
+    # Value within the current length group (standard base-26)
+    value = 0
+    for char in chain_id:
+        value = value * 26 + _CHAIN_ALPHABET.index(char)
+    return offset + value
 ########################################################
@@ -28,7 +88,7 @@ def add_sym_annotations(atom_array, sym_conf):
     is_asu = np.full(n, True, dtype=np.bool_)
     atom_array.set_annotation("is_sym_asu", is_asu)
     # symmetry_id
-    symmetry_ids = np.full(n, sym_conf.get("id"), dtype="U6")
+    symmetry_ids = np.full(n, sym_conf.id, dtype="U6")
     atom_array.set_annotation("symmetry_id", symmetry_ids)
     return atom_array
@@ -251,11 +311,13 @@ def reset_chain_ids(atom_array, start_id):
     Reset the chain ids and pn_unit_iids of an atom array to start from the given id.
     Arguments:
         atom_array: atom array with chain_ids and pn_unit_iids annotated
+        start_id: starting chain ID (e.g., 'A')
     """
     chain_ids = np.unique(atom_array.chain_id)
-    new_chain_range = range(ord(start_id), ord(start_id) + len(chain_ids))
-    for new_id, old_id in zip(new_chain_range, chain_ids):
-        atom_array.chain_id[atom_array.chain_id == old_id] = chr(new_id)
+    start_index = chain_id_to_index(start_id)
+    for i, old_id in enumerate(chain_ids):
+        new_id = index_to_chain_id(start_index + i)
+        atom_array.chain_id[atom_array.chain_id == old_id] = new_id
     atom_array.pn_unit_iid = atom_array.chain_id
     return atom_array
@@ -263,15 +325,18 @@ def reset_chain_ids(atom_array, start_id):
 def reannotate_chain_ids(atom_array, offset, multiplier=0):
     """
     Reannotate the chain ids and pn_unit_iids of an atom array.
+    Uses wwPDB-style chain IDs (A-Z, AA-ZZ, AAA-ZZZ, ...) to support
+    any number of chains.
     Arguments:
         atom_array: protein atom array with chain_ids and pn_unit_iids annotated
-        offset: offset to add to the chain ids
-        multiplier: multiplier to add to the chain ids
+        offset: offset to add to the chain ids (typically num_chains in ASU)
+        multiplier: multiplier for the offset (typically transform index)
     """
-    chain_ids_int = (
-        np.array([ord(c) for c in atom_array.chain_id]) + offset * multiplier
-    )
-    chain_ids = np.array([chr(id) for id in chain_ids_int], dtype=str)
+    chain_ids_indices = np.array([chain_id_to_index(c) for c in atom_array.chain_id])
+    new_indices = chain_ids_indices + offset * multiplier
+    chain_ids = np.array([index_to_chain_id(idx) for idx in new_indices], dtype="U4")
     atom_array.chain_id = chain_ids
     atom_array.pn_unit_iid = chain_ids
     return atom_array

rfd3/inference/symmetry/checks.py CHANGED Viewed

@@ -1,10 +1,13 @@
 import numpy as np
-from rfd3.inference.symmetry.contigs import expand_contig_unsym_motif
+from rfd3.inference.symmetry.contigs import (
+    expand_contig_unsym_motif,
+    get_unsym_motif_mask,
+)
 from rfd3.transforms.conditioning_base import get_motif_features
 from foundry.utils.ddp import RankedLogger
-MIN_ATOMS_ALIGN = 100
+MIN_ATOMS_ALIGN = 30
 MAX_TRANSFORMS = 10
 RMSD_CUT = 1.0  # Angstroms
@@ -18,32 +21,44 @@ def check_symmetry_config(
     Check if the symmetry configuration is valid. Add all basic checks here.
     """
-    assert sym_conf.get("id"), "symmetry_id is required. e.g. {'id': 'C2'}"
+    assert sym_conf.id, "symmetry_id is required. e.g. {'id': 'C2'}"
     # if unsym motif is provided, check that each motif name is in the atom array
-    if sym_conf.get("is_unsym_motif"):
+    is_motif_atom = get_motif_features(atom_array)["is_motif_atom"]
+    is_unsym_motif = np.zeros(atom_array.shape[0], dtype=bool)
+    if not is_motif_atom.any():
+        sym_conf.is_symmetric_motif = None
+        ranked_logger.warning(
+            "No motifs found in atom array. Setting is_symmetric_motif to None."
+        )
+        return sym_conf
+    if sym_conf.is_unsym_motif:
         assert (
             src_atom_array is not None
         ), "Source atom array must be provided for symmetric motifs"
-        unsym_motif_names = sym_conf["is_unsym_motif"].split(",")
+        unsym_motif_names = sym_conf.is_unsym_motif.split(",")
         unsym_motif_names = expand_contig_unsym_motif(unsym_motif_names)
+        is_unsym_motif = get_unsym_motif_mask(atom_array, unsym_motif_names)
         for n in unsym_motif_names:
             if (sm and n not in sm.split(",")) and (n not in atom_array.src_component):
                 raise ValueError(f"Unsym motif {n} not found in atom_array")
     if (
-        get_motif_features(atom_array)["is_motif_token"].any()
-        and not sym_conf.get("is_symmetric_motif")
+        is_motif_atom[~is_unsym_motif].any()
+        and not sym_conf.is_symmetric_motif
         and not has_dist_cond
     ):
         raise ValueError(
-            "Asymmetric motif inputs should be distance constrained. "
-            "Use atomwise_fixed_dist to constrain the distance between the motif atoms."
+            "Asymmetric motif inputs are not supported yet. Please provide a symmetric motif."
         )
-    # else: if unconditional symmetry, no need to have symmetric input motif
-    if partial and not sym_conf.get("is_symmetric_motif"):
+    if partial and not sym_conf.is_symmetric_motif:
         raise ValueError(
             "Partial diffusion with symmetry is only supported for symmetric inputs."
         )
+    return sym_conf
 def check_atom_array_is_symmetric(atom_array):
@@ -54,9 +69,6 @@ def check_atom_array_is_symmetric(atom_array):
     Returns:
         bool: True if the atom array is symmetric, False otherwise
     """
-    # TODO: Implement something like this https://github.com/baker-laboratory/ipd/blob/main/ipd/sym/sym_detect.py#L303
-    #       and maybe this https://github.com/baker-laboratory/ipd/blob/main/ipd/sym/sym_detect.py#L231
     import biotite.structure as struc
     from rfd3.inference.symmetry.atom_array import (
         apply_symmetry_to_atomarray_coord,
@@ -68,8 +80,10 @@ def check_atom_array_is_symmetric(atom_array):
     # remove hetero atoms
     atom_array = atom_array[~atom_array.hetero]
     if len(atom_array) == 0:
-        ranked_logger.info("Atom array has no protein chains. Please check your input.")
-        return False
+        ranked_logger.warning(
+            "Atom array has no protein chains. Please check your input."
+        )
+        return True
     chains = np.unique(atom_array.chain_id)
     asu_mask = atom_array.chain_id == chains[0]
@@ -162,16 +176,22 @@ def find_optimal_rotation(coords1, coords2, max_points=1000):
         return None
-def check_input_frames_match_symmetry_frames(computed_frames, original_frames) -> None:
+def check_input_frames_match_symmetry_frames(
+    computed_frames, original_frames, nids_by_entity
+) -> None:
     """
     Check if the atom array matches the symmetry_id.
     Arguments:
         computed_frames: list of computed frames
         original_frames: list of original frames
     """
-    assert len(computed_frames) == len(
-        original_frames
-    ), "Number of computed frames does not match number of original frames"
+    assert len(computed_frames) == len(original_frames), (
+        "Number of computed frames does not match number of original frames.\n"
+        f"Computed Frames: {len(computed_frames)}. Original Frames: {len(original_frames)}.\n"
+        "If the computed frames are not as expected, please check if you have one-to-one mapping "
+        "(size, sequence, folding) of an entity across all chains.\n"
+        f"Computed Entity Mapping: {nids_by_entity}."
+    )
 def check_valid_multiplicity(nids_by_entity) -> None:
@@ -184,25 +204,35 @@ def check_valid_multiplicity(nids_by_entity) -> None:
     multiplicity = min([len(i) for i in nids_by_entity.values()])
     if multiplicity == 1:  # no possible symmetry
         raise ValueError(
-            "Input has no possible symmetry. If asymmetric motif, please use 2D conditioning inference instead."
+            "Input has no possible symmetry. If asymmetric motif, please use 2D conditioning inference instead.\n"
+            "Multiplicity: 1"
         )
     # Check that the input is not asymmetric
     multiplicity_good = [len(i) % multiplicity == 0 for i in nids_by_entity.values()]
     if not all(multiplicity_good):
-        raise ValueError("Invalid multiplicities of subunits. Please check your input.")
+        raise ValueError(
+            "Expected multiplicity does not match for some entities.\n"
+            "Please modify your input to have one-to-one mapping (size, sequence, folding) of an entity across all chains.\n"
+            f"Expected Multiplicity: {multiplicity}.\n"
+            f"Computed Entity Mapping: {nids_by_entity}."
+        )
 def check_valid_subunit_size(nids_by_entity, pn_unit_id) -> None:
     """
     Check that the subunits in the input are of the same size.
     Arguments:
-        nids_by_entity: dict mapping entity to ids
+        nids_by_entity: dict mapping entity to ids. e.g. {0: (['A_1', 'B_1', 'C_1']), 1: (['A_2', 'B_2', 'C_2'])}
+        pn_unit_id: array of ids. e.g. ['A_1', 'B_1', 'C_1', 'A_2', 'B_2', 'C_2']
     """
-    for i, js in nids_by_entity.items():
-        for j in js[1:]:
-            if (pn_unit_id == js[0]).sum() != (pn_unit_id == j).sum():
-                raise ValueError("Size mismatch in the input. Please check your file.")
+    for js in nids_by_entity.values():
+        for js_i in js[1:]:
+            if (pn_unit_id == js[0]).sum() != (pn_unit_id == js_i).sum():
+                raise ValueError(
+                    f"Size mismatch between chain {js[0]} ({(pn_unit_id == js[0]).sum()} atoms) "
+                    f"and chain {js_i} ({(pn_unit_id == js_i).sum()} atoms). Please check your input file."
+                )
 def check_min_atoms_to_align(natm_per_unique, reference_entity) -> None:
@@ -212,7 +242,10 @@ def check_min_atoms_to_align(natm_per_unique, reference_entity) -> None:
         nids_by_entity: dict mapping entity to ids
     """
     if natm_per_unique[reference_entity] < MIN_ATOMS_ALIGN:
-        raise ValueError("Not enough atoms to align. Please check your input.")
+        raise ValueError(
+            f"Not enough atoms to align < {MIN_ATOMS_ALIGN} atoms."
+            f"Please provide a input with at least {MIN_ATOMS_ALIGN} atoms."
+        )
 def check_max_transforms(chains_to_consider) -> None:
@@ -224,7 +257,7 @@ def check_max_transforms(chains_to_consider) -> None:
     """
     if len(chains_to_consider) > MAX_TRANSFORMS:
         raise ValueError(
-            "Number of transforms exceeds the max number of transforms (10)"
+            f"Number of transforms exceeds the max number of transforms ({MAX_TRANSFORMS})."
         )

rc-foundry 0.1.6__py3-none-any.whl → 0.1.9__py3-none-any.whl

rc-foundry 0.1.6py3-none-any.whl → 0.1.9py3-none-any.whl