PyPI - rc-foundry - Versions diffs - 0.1.1__py3-none-any.whl - Mend

rc-foundry 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (180) hide show

foundry/__init__.py +57 -0
foundry/callbacks/__init__.py +5 -0
foundry/callbacks/callback.py +116 -0
foundry/callbacks/health_logging.py +419 -0
foundry/callbacks/metrics_logging.py +211 -0
foundry/callbacks/timing_logging.py +67 -0
foundry/callbacks/train_logging.py +278 -0
foundry/common.py +108 -0
foundry/constants.py +28 -0
foundry/hydra/resolvers.py +77 -0
foundry/inference_engines/base.py +235 -0
foundry/inference_engines/checkpoint_registry.py +66 -0
foundry/metrics/__init__.py +12 -0
foundry/metrics/losses.py +30 -0
foundry/metrics/metric.py +319 -0
foundry/model/layers/blocks.py +47 -0
foundry/testing/__init__.py +6 -0
foundry/testing/fixtures.py +19 -0
foundry/testing/pytest_hooks.py +15 -0
foundry/trainers/fabric.py +923 -0
foundry/training/EMA.py +67 -0
foundry/training/checkpoint.py +61 -0
foundry/training/schedulers.py +91 -0
foundry/utils/alignment.py +86 -0
foundry/utils/components.py +415 -0
foundry/utils/datasets.py +405 -0
foundry/utils/ddp.py +103 -0
foundry/utils/instantiators.py +72 -0
foundry/utils/logging.py +279 -0
foundry/utils/rigid.py +1460 -0
foundry/utils/rotation_augmentation.py +65 -0
foundry/utils/squashfs.py +172 -0
foundry/utils/torch.py +317 -0
foundry/utils/weights.py +271 -0
foundry/version.py +34 -0
foundry_cli/__init__.py +3 -0
foundry_cli/download_checkpoints.py +281 -0
mpnn/__init__.py +1 -0
mpnn/collate/feature_collator.py +265 -0
mpnn/inference.py +53 -0
mpnn/inference_engines/mpnn.py +549 -0
mpnn/loss/nll_loss.py +122 -0
mpnn/metrics/nll.py +369 -0
mpnn/metrics/sequence_recovery.py +440 -0
mpnn/model/layers/graph_embeddings.py +2372 -0
mpnn/model/layers/message_passing.py +332 -0
mpnn/model/layers/position_wise_feed_forward.py +44 -0
mpnn/model/layers/positional_encoding.py +98 -0
mpnn/model/mpnn.py +2632 -0
mpnn/pipelines/mpnn.py +162 -0
mpnn/samplers/samplers.py +167 -0
mpnn/train.py +341 -0
mpnn/trainers/mpnn.py +193 -0
mpnn/transforms/feature_aggregation/mpnn.py +184 -0
mpnn/transforms/feature_aggregation/polymer_ligand_interface.py +76 -0
mpnn/transforms/feature_aggregation/token_encodings.py +132 -0
mpnn/transforms/feature_aggregation/user_settings.py +347 -0
mpnn/transforms/polymer_ligand_interface.py +164 -0
mpnn/utils/inference.py +2397 -0
mpnn/utils/probability.py +37 -0
mpnn/utils/weights.py +309 -0
rc_foundry-0.1.1.dist-info/METADATA +239 -0
rc_foundry-0.1.1.dist-info/RECORD +180 -0
rc_foundry-0.1.1.dist-info/WHEEL +4 -0
rc_foundry-0.1.1.dist-info/entry_points.txt +5 -0
rc_foundry-0.1.1.dist-info/licenses/LICENSE.md +28 -0
rf3/__init__.py +3 -0
rf3/_version.py +33 -0
rf3/alignment.py +79 -0
rf3/callbacks/dump_validation_structures.py +101 -0
rf3/callbacks/metrics_logging.py +324 -0
rf3/chemical.py +1529 -0
rf3/cli.py +77 -0
rf3/data/cyclic_transform.py +78 -0
rf3/data/extra_xforms.py +36 -0
rf3/data/ground_truth_template.py +463 -0
rf3/data/paired_msa.py +206 -0
rf3/data/pipeline_utils.py +128 -0
rf3/data/pipelines.py +558 -0
rf3/diffusion_samplers/inference_sampler.py +222 -0
rf3/inference.py +65 -0
rf3/inference_engines/__init__.py +5 -0
rf3/inference_engines/rf3.py +735 -0
rf3/kinematics.py +354 -0
rf3/loss/af3_confidence_loss.py +515 -0
rf3/loss/af3_losses.py +655 -0
rf3/loss/loss.py +179 -0
rf3/metrics/chiral.py +179 -0
rf3/metrics/clashing_chains.py +68 -0
rf3/metrics/distogram.py +421 -0
rf3/metrics/lddt.py +523 -0
rf3/metrics/metadata.py +43 -0
rf3/metrics/metric_utils.py +192 -0
rf3/metrics/predicted_error.py +134 -0
rf3/metrics/rasa.py +108 -0
rf3/metrics/selected_distances.py +91 -0
rf3/model/RF3.py +527 -0
rf3/model/RF3_blocks.py +92 -0
rf3/model/RF3_structure.py +303 -0
rf3/model/layers/af3_auxiliary_heads.py +255 -0
rf3/model/layers/af3_diffusion_transformer.py +544 -0
rf3/model/layers/attention.py +313 -0
rf3/model/layers/layer_utils.py +127 -0
rf3/model/layers/mlff.py +118 -0
rf3/model/layers/outer_product.py +59 -0
rf3/model/layers/pairformer_layers.py +783 -0
rf3/model/layers/structure_bias.py +56 -0
rf3/scoring.py +1787 -0
rf3/symmetry/resolve.py +284 -0
rf3/train.py +194 -0
rf3/trainers/rf3.py +570 -0
rf3/util_module.py +47 -0
rf3/utils/frames.py +109 -0
rf3/utils/inference.py +665 -0
rf3/utils/io.py +198 -0
rf3/utils/loss.py +72 -0
rf3/utils/predict_and_score.py +165 -0
rf3/utils/predicted_error.py +673 -0
rf3/utils/recycling.py +42 -0
rf3/validate.py +140 -0
rfd3/.gitignore +7 -0
rfd3/Makefile +76 -0
rfd3/__init__.py +12 -0
rfd3/callbacks.py +66 -0
rfd3/cli.py +41 -0
rfd3/constants.py +212 -0
rfd3/engine.py +543 -0
rfd3/inference/datasets.py +193 -0
rfd3/inference/input_parsing.py +1123 -0
rfd3/inference/legacy_input_parsing.py +717 -0
rfd3/inference/parsing.py +165 -0
rfd3/inference/symmetry/atom_array.py +298 -0
rfd3/inference/symmetry/checks.py +241 -0
rfd3/inference/symmetry/contigs.py +63 -0
rfd3/inference/symmetry/frames.py +355 -0
rfd3/inference/symmetry/symmetry_utils.py +398 -0
rfd3/metrics/design_metrics.py +465 -0
rfd3/metrics/hbonds_hbplus_metrics.py +308 -0
rfd3/metrics/hbonds_metrics.py +389 -0
rfd3/metrics/losses.py +325 -0
rfd3/metrics/metrics_utils.py +118 -0
rfd3/metrics/sidechain_metrics.py +349 -0
rfd3/model/RFD3.py +105 -0
rfd3/model/RFD3_diffusion_module.py +387 -0
rfd3/model/cfg_utils.py +81 -0
rfd3/model/inference_sampler.py +635 -0
rfd3/model/layers/attention.py +577 -0
rfd3/model/layers/block_utils.py +580 -0
rfd3/model/layers/blocks.py +777 -0
rfd3/model/layers/chunked_pairwise.py +377 -0
rfd3/model/layers/encoders.py +417 -0
rfd3/model/layers/layer_utils.py +197 -0
rfd3/model/layers/pairformer_layers.py +128 -0
rfd3/run_inference.py +45 -0
rfd3/testing/debug.py +139 -0
rfd3/testing/debug_utils.py +73 -0
rfd3/testing/testing_utils.py +356 -0
rfd3/train.py +194 -0
rfd3/trainer/dump_validation_structures.py +154 -0
rfd3/trainer/fabric_trainer.py +923 -0
rfd3/trainer/recycling.py +42 -0
rfd3/trainer/rfd3.py +485 -0
rfd3/trainer/trainer_utils.py +502 -0
rfd3/transforms/conditioning_base.py +508 -0
rfd3/transforms/conditioning_utils.py +200 -0
rfd3/transforms/design_transforms.py +807 -0
rfd3/transforms/dna_crop.py +523 -0
rfd3/transforms/hbonds.py +407 -0
rfd3/transforms/hbonds_hbplus.py +246 -0
rfd3/transforms/ncaa_transforms.py +153 -0
rfd3/transforms/pipelines.py +632 -0
rfd3/transforms/ppi_transforms.py +541 -0
rfd3/transforms/rasa.py +116 -0
rfd3/transforms/symmetry.py +76 -0
rfd3/transforms/training_conditions.py +552 -0
rfd3/transforms/util_transforms.py +498 -0
rfd3/transforms/virtual_atoms.py +305 -0
rfd3/utils/inference.py +648 -0
rfd3/utils/io.py +245 -0
rfd3/utils/vizualize.py +276 -0

foundry/utils/components.py ADDED Viewed

@@ -0,0 +1,415 @@
+import random
+import re
+from typing import List
+import numpy as np
+from atomworks.ml.encoding_definitions import AF3SequenceEncoding
+from biotite.structure import AtomArray
+from rfd3.constants import (
+    TIP_BY_RESTYPE,
+)
+from foundry.common import exists
+from foundry.utils.ddp import RankedLogger
+global_logger = RankedLogger(__name__, rank_zero_only=False)
+sequence_encoding = AF3SequenceEncoding()
+_aa_like_res_names = sequence_encoding.all_res_names[sequence_encoding.is_aa_like]
+#################################################################################
+# Component / contig parsing
+#################################################################################
+class ComponentValidationError(ValueError):
+    """Raised when contig/component inputs cannot be parsed or validated."""
+    def __init__(
+        self,
+        message: str,
+        *,
+        component: str | None = None,
+        details: dict | None = None,
+    ):
+        self.component = component
+        self.details = details or {}
+        prefix = f"[component={component}] " if component else ""
+        suffix = f" Details: {self.details}" if self.details else ""
+        super().__init__(f"{prefix}{message}{suffix}")
+class ComponentStr(str):
+    """Component identifier, e.g. "A1" for residues, "B12", etc. Previously named `contig_string`"""
+    def split_component(v):
+        return split_contig(v)
+def split_contig(x):
+    try:
+        chain = str(x[0])
+        idx = x[1:]
+        idx = int(idx)
+        if idx < 0:
+            raise ComponentValidationError(
+                "Residue index must be a non-negative integer.", component=str(x)
+            )
+    except Exception as e:
+        raise ComponentValidationError(
+            f"Invalid contig format: '{x}'. Expected format is 'ChainIDResID' (e.g. 'A20').",
+            component=str(x),
+        ) from e
+    return [chain, idx]
+def extract_pn_unit_info(contig):
+    """
+    Convert substring like A20-21 or A20 to separate terms: A, 20, 21.
+    """
+    pattern = r"([A-Za-z])(\d+)(?:-(\d+))?"
+    match = re.match(pattern, contig)
+    if match:
+        pn_unit_id = match.group(1)
+        start = int(match.group(2))
+        end = int(match.group(3)) if match.group(3) else start
+        return pn_unit_id, start, end
+    raise ComponentValidationError(
+        "Invalid contig format. Expected 'ChainIDStart-Stop' or 'ChainIDIdx'.",
+        component=contig,
+    )
+def get_design_pattern_with_constraints(contig, length=None):
+    """
+    Convert the contig string to separate modules.
+    e.g. '1-5,A20-21,1-5,A25-25,1-5,A30-30,/0,1-5' with length = 10-10 may be converted to [2, A20, A21, 2, A25, 3, A30, /0, 3]
+    Integers represent number of free residues to put there.
+    """
+    contig_parts = contig.split(",")
+    # Separate fixed segments (e.g., "A1051-1051") and variable ranges (e.g., "0-40")
+    variable_ranges = []
+    fixed_parts = []
+    pos_to_put_motif = []
+    for part in contig_parts:
+        if any(c.isalpha() for c in part):  # Detect parts containing letters as fixed
+            pn_unit_id, pn_unit_start, pn_unit_end = extract_pn_unit_info(part)
+            fixed_parts.append([pn_unit_id, pn_unit_start, pn_unit_end])
+            pos_to_put_motif.append(1)
+        elif part == "/0":
+            pos_to_put_motif.append(2)
+        else:
+            if "-" in part:
+                start, end = map(int, part.split("-"))
+            else:
+                start = end = int(part)
+            variable_ranges.append([start, end])
+            pos_to_put_motif.append(0)
+    # adjust the total length to solely for free residues
+    num_motif_residues = sum([i[2] - i[1] + 1 for i in fixed_parts])
+    if length is None:
+        length_min, length_max = 0, 9999
+    else:
+        if "-" in length:
+            length_min, length_max = map(int, length.split("-"))
+        else:
+            length_min = length_max = int(length)
+    length_min -= num_motif_residues
+    length_max -= num_motif_residues
+    remaining_length_min = length_min
+    remaining_length_max = length_max
+    num_free_atoms = []
+    for range_limits in variable_ranges:
+        min_value = range_limits[0]
+        max_value = range_limits[1]
+        # Calculate the valid range for the current segment
+        valid_min = max(
+            min_value,
+            remaining_length_min
+            - sum(r[1] for r in variable_ranges[len(num_free_atoms) + 1 :]),
+        )
+        valid_max = min(
+            max_value,
+            remaining_length_max
+            - sum(r[0] for r in variable_ranges[len(num_free_atoms) + 1 :]),
+        )
+        if valid_min > valid_max and length is not None:
+            raise ComponentValidationError(
+                "No valid selections possible with the given constraints."
+            )
+        # Randomly select a value for the current segment
+        selected_value = random.randint(valid_min, valid_max)
+        num_free_atoms.append(selected_value)
+        # Update remaining lengths
+        remaining_length_min -= selected_value
+        remaining_length_max -= selected_value
+    atoms_with_motif = []
+    for idx in range(len(pos_to_put_motif)):
+        if pos_to_put_motif[idx] == 1:
+            motif = fixed_parts.pop(0)
+            pn_unit_id, pn_unit_start, pn_unit_end = motif[0], motif[1], motif[2]
+            for index in range(pn_unit_start, pn_unit_end + 1):
+                atoms_with_motif.append(f"{pn_unit_id}{index}")
+        elif pos_to_put_motif[idx] == 0:
+            free_atom = num_free_atoms.pop(0)
+            atoms_with_motif.append(free_atom)
+        elif pos_to_put_motif[idx] == 2:
+            atoms_with_motif.append("/0")
+    return atoms_with_motif
+def get_motif_components_and_breaks(unindexed_contig, index_all=False):
+    """
+    Convert a contig string into its components and breaks in motif
+    This way you can specify in your contigs where the breaks in the motif should be, so that,
+        say, residues aren't glued together by the model. Used for parsing unindexed inputs.
+    e.g.:
+        contig="A14,A15,A16" -> components=[A14, A15, A16] breaks=[True, True,  True]
+        contig="A14-15,A16"  -> components=[A14, A15, A16] breaks=[True, False, True]
+    args:
+        unindexed_contig: Contig string for unindexed tokens, see above for example on how positional
+            encodings between contigs can be selectively leaked
+        index_all: No breaks are used, allows for full indexing of concatenated tokens
+            Can use cleanup if this is the desired way to provide motif tokens.
+    """
+    components = []
+    breaks = []
+    contig_parts = unindexed_contig.split(",")
+    for part in contig_parts:
+        if any(c.isalpha() for c in part):
+            # ... Parse possibilities: A11 | A11-12 | A11-11
+            pn_unit_id, pn_unit_start, pn_unit_end = extract_pn_unit_info(part)
+            if pn_unit_start == pn_unit_end:
+                # ... For single residues, append and break
+                components.append(f"{pn_unit_id}{pn_unit_start}")
+                breaks.append(True)
+            else:
+                # ... For multiple residues, break and then append without breaks
+                for index in range(pn_unit_start, pn_unit_end + 1):
+                    components.append(f"{pn_unit_id}{index}")
+                    if index == pn_unit_start:
+                        breaks.append(True)
+                    else:
+                        breaks.append(False)
+        elif part == "/0":
+            components.append(part)
+            breaks.append(None)
+        else:
+            if "-" in part:
+                raise ComponentValidationError(
+                    "Partial unindexing without fixed length is not supported.",
+                    component=part,
+                )
+            components.append(part)
+            breaks.append(None)
+    breaks[0] = True  # Decouple unindexed region from global index
+    if index_all:
+        global_logger.info("Unindexing all residues")
+        breaks = [(False if b is not None else None) for b in breaks]
+    return components, breaks
+#################################################################################
+# Mask getters
+#################################################################################
+def get_name_mask(
+    source_names: np.ndarray, query_names: str, source_resname: str | None = None
+):
+    """
+    Args:
+        source_names: list of all names to match in current token
+        query_string: specifier of names to get:
+            "ALL" - All atom names in token are matched
+            "BKBN - Only backbone atoms (not CB)
+            "TIP" - 2 farthest atoms from the backbone are fixed with any
+                additional atoms that automatically constrain geometries
+                (e.g. 4 atoms for carboxylates/amides). See `constants.py`.
+            Comma-separated string - e.g. "N,CA,C,O,CB" for exact queries
+            List of names - e.g. ["N", "CA", "C", "O"] for exact queries
+        source_resname: residue name is required when specifying just to grab the names for a "TIP"
+    Raises error if not all exact atom names are found and unique
+    Returns:
+        mask of atoms corresponding to token
+    """
+    if isinstance(query_names, list):
+        names = query_names
+    elif isinstance(query_names, str):
+        if query_names.upper() == "ALL":
+            return np.ones(source_names.shape[0], dtype=bool)
+        elif query_names.upper() == "BKBN":
+            names = ["N", "CA", "C", "O"]
+        elif query_names.upper() == "TIP":
+            if not exists(source_resname):
+                raise ComponentValidationError(
+                    "TIP selection requires a residue name.",
+                    component=str(source_resname),
+                )
+            names = TIP_BY_RESTYPE[source_resname]
+            if not exists(names):
+                raise ComponentValidationError(
+                    "Residue does not define TIP atoms; use ALL, BKBN, or explicit names.",
+                    component=str(source_resname),
+                )
+        elif query_names == "":
+            names = []
+        else:
+            names = query_names.split(",")
+    else:
+        raise ComponentValidationError(
+            "query_names must be a string or list of strings.",
+            details={"got_type": str(type(query_names))},
+        )
+    if any(n == "" for n in names):
+        raise ComponentValidationError(
+            f"Empty atom name found in selection '{query_names}'.",
+            component=str(source_resname),
+        )
+    mask = np.isin(source_names, names)
+    if len(names) == 0:
+        return mask
+    if not len(set(names)) == len(names):
+        raise ComponentValidationError(
+            f"Atom names in '{query_names}' must be unique.",
+            details={"duplicates": names},
+        )
+    if not mask.any():
+        raise ComponentValidationError(
+            f"Could not find requested atoms '{query_names}' in atom array.",
+            details={"source_names": np.asarray(source_names).tolist()},
+        )
+    if mask.sum() != len(names):
+        global_logger.warning(
+            "Not all atoms found in atom array. Are you expecting multiple residues/ligands with the same names? "
+            + "If not, check your input pdb file. "
+            + "Atom array requested to contain names {}. Got: {}. Requested {}".format(
+                query_names,
+                np.asarray(source_names).tolist(),
+                np.asarray(names).tolist(),
+            )
+        )
+    if mask.sum() % len(names) != 0:
+        # for the case where source_names are originated from multiple residues with the same names
+        # (e.g. two ORO ligands in the input pdb: {ligand: "ORO", fixed_atoms: {ORO:"N3,C2,C4,N1"}})
+        raise ComponentValidationError(
+            "Number of atoms must be a multiple of the requested names.",
+            details={
+                "query": query_names,
+                "source_names": np.asarray(source_names).tolist(),
+                "requested": np.asarray(names).tolist(),
+            },
+        )
+    return mask
+def fetch_mask_from_idx(contig_str, *, atom_array):
+    """
+    contig_str: A11
+    returns:
+        mask of atoms within contig (e.g. residue 11 in chain A)
+    """
+    chain, res_id = split_contig(contig_str)
+    mask = (atom_array.chain_id == chain) & (atom_array.res_id == res_id)
+    if not np.any(mask):
+        raise ComponentValidationError(
+            f"Residue {chain}{res_id} not found in atom array.",
+            component=f"{chain}{res_id}",
+        )
+    return mask
+def fetch_mask_from_name(name, *, atom_array):
+    """
+    name: LIG_NAME
+    returns:
+        mask of atoms corresponding to non-protein
+    """
+    mask = atom_array.res_name == name
+    if not np.any(mask):
+        non_protein_res_names = np.unique(
+            atom_array.res_name[~np.isin(atom_array.res_name, _aa_like_res_names)]
+        )
+        raise ComponentValidationError(
+            "Component not found in input atom array.",
+            component=name,
+            details={"available_non_protein": non_protein_res_names.tolist()},
+        )
+    return mask
+def fetch_mask_from_component(component, *, atom_array):
+    """
+    Catch-all function for fetching a component by non-protein name or contig
+    component: A11 or LIG_NAME
+    returns:
+        mask of atoms corresponding to component
+    """
+    try:
+        mask = fetch_mask_from_name(component, atom_array=atom_array)
+    except ComponentValidationError:
+        mask = fetch_mask_from_idx(component, atom_array=atom_array)
+    return mask
+def unravel_components(
+    v: str, *, atom_array: AtomArray = None, allow_multiple_matches: bool = False
+) -> List[str]:
+    """Safely unravel components from a string input."""
+    components = []
+    if "," in v or "-" in v:
+        components.extend(get_design_pattern_with_constraints(v))
+    else:
+        # Safely canonicalize to single component
+        mask = fetch_mask_from_component(v, atom_array=atom_array)
+        if mask.sum() > 0:
+            res_ids, chain_ids = atom_array.res_id[mask], atom_array.chain_id[mask]
+            # assert unique resids for component
+            if len(set(zip(chain_ids, res_ids))) != 1:
+                if not allow_multiple_matches:
+                    raise ComponentValidationError(
+                        f"Component '{v}' maps to multiple residues.",
+                        component=v,
+                    )
+                else:
+                    global_logger.warning(
+                        f"Component '{v}' maps to multiple residues. If you are using Symmetry this is OK."
+                    )
+                    components.extend([f"{c}{r}" for c, r in zip(chain_ids, res_ids)])
+                    components = list(set(components))  # unique components
+                    return components
+            res_id, chain_id = res_ids[0], chain_ids[0]
+            component = f"{chain_id}{res_id}"
+            global_logger.debug(
+                "Canonicalized component string: %s -> %s", v, component
+            )
+            components.append(component)
+    return components