PyPI - rc-foundry - Versions diffs - 0.1.1__py3-none-any.whl - Mend

rc-foundry 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (180) hide show

foundry/__init__.py +57 -0
foundry/callbacks/__init__.py +5 -0
foundry/callbacks/callback.py +116 -0
foundry/callbacks/health_logging.py +419 -0
foundry/callbacks/metrics_logging.py +211 -0
foundry/callbacks/timing_logging.py +67 -0
foundry/callbacks/train_logging.py +278 -0
foundry/common.py +108 -0
foundry/constants.py +28 -0
foundry/hydra/resolvers.py +77 -0
foundry/inference_engines/base.py +235 -0
foundry/inference_engines/checkpoint_registry.py +66 -0
foundry/metrics/__init__.py +12 -0
foundry/metrics/losses.py +30 -0
foundry/metrics/metric.py +319 -0
foundry/model/layers/blocks.py +47 -0
foundry/testing/__init__.py +6 -0
foundry/testing/fixtures.py +19 -0
foundry/testing/pytest_hooks.py +15 -0
foundry/trainers/fabric.py +923 -0
foundry/training/EMA.py +67 -0
foundry/training/checkpoint.py +61 -0
foundry/training/schedulers.py +91 -0
foundry/utils/alignment.py +86 -0
foundry/utils/components.py +415 -0
foundry/utils/datasets.py +405 -0
foundry/utils/ddp.py +103 -0
foundry/utils/instantiators.py +72 -0
foundry/utils/logging.py +279 -0
foundry/utils/rigid.py +1460 -0
foundry/utils/rotation_augmentation.py +65 -0
foundry/utils/squashfs.py +172 -0
foundry/utils/torch.py +317 -0
foundry/utils/weights.py +271 -0
foundry/version.py +34 -0
foundry_cli/__init__.py +3 -0
foundry_cli/download_checkpoints.py +281 -0
mpnn/__init__.py +1 -0
mpnn/collate/feature_collator.py +265 -0
mpnn/inference.py +53 -0
mpnn/inference_engines/mpnn.py +549 -0
mpnn/loss/nll_loss.py +122 -0
mpnn/metrics/nll.py +369 -0
mpnn/metrics/sequence_recovery.py +440 -0
mpnn/model/layers/graph_embeddings.py +2372 -0
mpnn/model/layers/message_passing.py +332 -0
mpnn/model/layers/position_wise_feed_forward.py +44 -0
mpnn/model/layers/positional_encoding.py +98 -0
mpnn/model/mpnn.py +2632 -0
mpnn/pipelines/mpnn.py +162 -0
mpnn/samplers/samplers.py +167 -0
mpnn/train.py +341 -0
mpnn/trainers/mpnn.py +193 -0
mpnn/transforms/feature_aggregation/mpnn.py +184 -0
mpnn/transforms/feature_aggregation/polymer_ligand_interface.py +76 -0
mpnn/transforms/feature_aggregation/token_encodings.py +132 -0
mpnn/transforms/feature_aggregation/user_settings.py +347 -0
mpnn/transforms/polymer_ligand_interface.py +164 -0
mpnn/utils/inference.py +2397 -0
mpnn/utils/probability.py +37 -0
mpnn/utils/weights.py +309 -0
rc_foundry-0.1.1.dist-info/METADATA +239 -0
rc_foundry-0.1.1.dist-info/RECORD +180 -0
rc_foundry-0.1.1.dist-info/WHEEL +4 -0
rc_foundry-0.1.1.dist-info/entry_points.txt +5 -0
rc_foundry-0.1.1.dist-info/licenses/LICENSE.md +28 -0
rf3/__init__.py +3 -0
rf3/_version.py +33 -0
rf3/alignment.py +79 -0
rf3/callbacks/dump_validation_structures.py +101 -0
rf3/callbacks/metrics_logging.py +324 -0
rf3/chemical.py +1529 -0
rf3/cli.py +77 -0
rf3/data/cyclic_transform.py +78 -0
rf3/data/extra_xforms.py +36 -0
rf3/data/ground_truth_template.py +463 -0
rf3/data/paired_msa.py +206 -0
rf3/data/pipeline_utils.py +128 -0
rf3/data/pipelines.py +558 -0
rf3/diffusion_samplers/inference_sampler.py +222 -0
rf3/inference.py +65 -0
rf3/inference_engines/__init__.py +5 -0
rf3/inference_engines/rf3.py +735 -0
rf3/kinematics.py +354 -0
rf3/loss/af3_confidence_loss.py +515 -0
rf3/loss/af3_losses.py +655 -0
rf3/loss/loss.py +179 -0
rf3/metrics/chiral.py +179 -0
rf3/metrics/clashing_chains.py +68 -0
rf3/metrics/distogram.py +421 -0
rf3/metrics/lddt.py +523 -0
rf3/metrics/metadata.py +43 -0
rf3/metrics/metric_utils.py +192 -0
rf3/metrics/predicted_error.py +134 -0
rf3/metrics/rasa.py +108 -0
rf3/metrics/selected_distances.py +91 -0
rf3/model/RF3.py +527 -0
rf3/model/RF3_blocks.py +92 -0
rf3/model/RF3_structure.py +303 -0
rf3/model/layers/af3_auxiliary_heads.py +255 -0
rf3/model/layers/af3_diffusion_transformer.py +544 -0
rf3/model/layers/attention.py +313 -0
rf3/model/layers/layer_utils.py +127 -0
rf3/model/layers/mlff.py +118 -0
rf3/model/layers/outer_product.py +59 -0
rf3/model/layers/pairformer_layers.py +783 -0
rf3/model/layers/structure_bias.py +56 -0
rf3/scoring.py +1787 -0
rf3/symmetry/resolve.py +284 -0
rf3/train.py +194 -0
rf3/trainers/rf3.py +570 -0
rf3/util_module.py +47 -0
rf3/utils/frames.py +109 -0
rf3/utils/inference.py +665 -0
rf3/utils/io.py +198 -0
rf3/utils/loss.py +72 -0
rf3/utils/predict_and_score.py +165 -0
rf3/utils/predicted_error.py +673 -0
rf3/utils/recycling.py +42 -0
rf3/validate.py +140 -0
rfd3/.gitignore +7 -0
rfd3/Makefile +76 -0
rfd3/__init__.py +12 -0
rfd3/callbacks.py +66 -0
rfd3/cli.py +41 -0
rfd3/constants.py +212 -0
rfd3/engine.py +543 -0
rfd3/inference/datasets.py +193 -0
rfd3/inference/input_parsing.py +1123 -0
rfd3/inference/legacy_input_parsing.py +717 -0
rfd3/inference/parsing.py +165 -0
rfd3/inference/symmetry/atom_array.py +298 -0
rfd3/inference/symmetry/checks.py +241 -0
rfd3/inference/symmetry/contigs.py +63 -0
rfd3/inference/symmetry/frames.py +355 -0
rfd3/inference/symmetry/symmetry_utils.py +398 -0
rfd3/metrics/design_metrics.py +465 -0
rfd3/metrics/hbonds_hbplus_metrics.py +308 -0
rfd3/metrics/hbonds_metrics.py +389 -0
rfd3/metrics/losses.py +325 -0
rfd3/metrics/metrics_utils.py +118 -0
rfd3/metrics/sidechain_metrics.py +349 -0
rfd3/model/RFD3.py +105 -0
rfd3/model/RFD3_diffusion_module.py +387 -0
rfd3/model/cfg_utils.py +81 -0
rfd3/model/inference_sampler.py +635 -0
rfd3/model/layers/attention.py +577 -0
rfd3/model/layers/block_utils.py +580 -0
rfd3/model/layers/blocks.py +777 -0
rfd3/model/layers/chunked_pairwise.py +377 -0
rfd3/model/layers/encoders.py +417 -0
rfd3/model/layers/layer_utils.py +197 -0
rfd3/model/layers/pairformer_layers.py +128 -0
rfd3/run_inference.py +45 -0
rfd3/testing/debug.py +139 -0
rfd3/testing/debug_utils.py +73 -0
rfd3/testing/testing_utils.py +356 -0
rfd3/train.py +194 -0
rfd3/trainer/dump_validation_structures.py +154 -0
rfd3/trainer/fabric_trainer.py +923 -0
rfd3/trainer/recycling.py +42 -0
rfd3/trainer/rfd3.py +485 -0
rfd3/trainer/trainer_utils.py +502 -0
rfd3/transforms/conditioning_base.py +508 -0
rfd3/transforms/conditioning_utils.py +200 -0
rfd3/transforms/design_transforms.py +807 -0
rfd3/transforms/dna_crop.py +523 -0
rfd3/transforms/hbonds.py +407 -0
rfd3/transforms/hbonds_hbplus.py +246 -0
rfd3/transforms/ncaa_transforms.py +153 -0
rfd3/transforms/pipelines.py +632 -0
rfd3/transforms/ppi_transforms.py +541 -0
rfd3/transforms/rasa.py +116 -0
rfd3/transforms/symmetry.py +76 -0
rfd3/transforms/training_conditions.py +552 -0
rfd3/transforms/util_transforms.py +498 -0
rfd3/transforms/virtual_atoms.py +305 -0
rfd3/utils/inference.py +648 -0
rfd3/utils/io.py +245 -0
rfd3/utils/vizualize.py +276 -0

mpnn/collate/feature_collator.py ADDED Viewed

@@ -0,0 +1,265 @@
+"""
+Collation utilities for PyTorch data loading.
+This module provides collation functions for batching examples with
+variable-length features, including padding and scalar feature
+consistency checks.
+"""
+import copy
+from typing import Any, Dict, List
+import torch
+from atomworks.constants import UNKNOWN_AA
+from mpnn.transforms.feature_aggregation.token_encodings import MPNN_TOKEN_ENCODING
+MPNN_DEFAULT_PADDING = {
+    # Tensor features that require padding
+    "X": 0.0,
+    "X_m": False,
+    "S": MPNN_TOKEN_ENCODING.token_to_idx[UNKNOWN_AA],
+    "R_idx": -100,
+    "chain_labels": -1,
+    "residue_mask": False,
+    "Y": 0.0,
+    "Y_m": 0,
+    "Y_t": 0,
+    "designed_residue_mask": False,
+    "symmetry_equivalence_group": -1,
+    "symmetry_weight": 0,
+    "bias": 0,
+    "pair_bias": 0,
+    "temperature": 1,
+}
+class FeatureCollator:
+    """
+    Generic PyTorch collation class for handling variable-length features with
+    padding.
+    This collator processes batches of examples where each example
+    contains features that may have different shapes (requiring padding) or
+    scalar values (requiring consistency checks across the batch).
+    """
+    def __init__(self, default_padding: Dict[str, Any] = None):
+        """
+        Initialize the FeatureCollator.
+        Args:
+            default_padding (Dict[str, Any], optional): Maps each feature key
+                to a scalar or tensor to use for padding/missing values. Each
+                value should be appropriate for the expected feature type
+                (e.g., 0 for missing integers, 0.0 for missing floats, or a
+                tensor for missing tensor features). If None, uses
+                MPNN-specific defaults.
+        """
+        if default_padding is None:
+            default_padding = MPNN_DEFAULT_PADDING
+        self.default_padding = default_padding
+    def __call__(self, pipeline_outputs: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """
+        Collate a batch of pipeline outputs into network inputs.
+        Args:
+            pipeline_outputs (List[Dict[str, Any]]): List of examples, each
+                example is a dict with keys "input_features" (feature dict)
+                and "atom_array" (raw atom array).
+        Returns:
+            Dict[str, Any]: dict with keys 'input_features' (collated features)
+                and 'atom_array' (list of atom_arrays from each example).
+        Raises:
+            ValueError: If pipeline_outputs is empty, if feature values differ
+                across examples, or if feature types are unsupported or
+                mismatched.
+        """
+        # Raise error if batch is empty.
+        if not pipeline_outputs:
+            raise ValueError("Cannot collate empty batch")
+        # Only collate keys present in the first example's input_features.
+        keys_to_collate = pipeline_outputs[0]["input_features"].keys()
+        input_features = {}
+        for key in keys_to_collate:
+            # Assert that each key exists in every example's input_features.
+            for i, example in enumerate(pipeline_outputs):
+                if key not in example["input_features"]:
+                    raise ValueError(
+                        f"Feature '{key}' not found in " + f"example {i} input_features"
+                    )
+            # Extract values for this feature from all examples.
+            values = [example["input_features"][key] for example in pipeline_outputs]
+            tensor_mask = [isinstance(v, torch.Tensor) for v in values]
+            if all(tensor_mask):
+                # Validate that all tensors have the same number of dimensions.
+                ndims = [v.ndim for v in values]
+                if len(set(ndims)) != 1:
+                    raise ValueError(
+                        f"Tensors for feature '{key}' have "
+                        + f"mismatched dimensions: {ndims}"
+                    )
+                ndim = ndims[0]
+                # Handle tensor collation and padding.
+                if ndim >= 1:
+                    # Check if lengths along dimension 0 vary.
+                    shapes_dim0 = [v.shape[0] for v in values]
+                    if len(set(shapes_dim0)) > 1:
+                        # Padding required - lengths vary along dim 0.
+                        if key not in self.default_padding:
+                            raise ValueError(
+                                "No default padding value for feature "
+                                + f"'{key}' required for tensor padding."
+                            )
+                        pad_val = self.default_padding[key]
+                        max_L = max(shapes_dim0)
+                        B = len(values)
+                        # Verify that all other dimensions are identical.
+                        other_shapes = [v.shape[1:] for v in values]
+                        if len(set(other_shapes)) != 1:
+                            raise ValueError(
+                                f"Tensors for feature '{key}' "
+                                + "have mismatched shapes beyond dimension 0:"
+                                + f" {[v.shape for v in values]}"
+                            )
+                        # Create padded tensor.
+                        shape = (B, max_L, *values[0].shape[1:])
+                        padded = torch.full(
+                            shape,
+                            pad_val,
+                            dtype=values[0].dtype,
+                            device=values[0].device,
+                        )
+                        # Copy actual values into padded tensor.
+                        for i, v in enumerate(values):
+                            padded[i, : v.shape[0]] = v
+                        input_features[key] = padded
+                    else:
+                        input_features[key] = torch.stack(values, dim=0)
+                else:
+                    input_features[key] = torch.stack(values, dim=0)
+            else:
+                # Check that all values are the same type.
+                first_type = type(values[0])
+                if not all(isinstance(v, first_type) for v in values):
+                    raise ValueError(
+                        f"Feature '{key}' has mismatched "
+                        + f"types: {[type(v).__name__ for v in values]}"
+                    )
+                # Deep comparison for all values.
+                first_val = values[0]
+                if all(self._deep_equal(first_val, v) for v in values):
+                    input_features[key] = copy.deepcopy(first_val)
+                else:
+                    raise ValueError(
+                        f"Feature '{key}' differs across examples: {values}"
+                    )
+        atom_arrays = [example["atom_array"] for example in pipeline_outputs]
+        network_inputs = {"input_features": input_features, "atom_array": atom_arrays}
+        return network_inputs
+    def _deep_equal(self, a, b):
+        if isinstance(a, torch.Tensor) and isinstance(b, torch.Tensor):
+            return torch.equal(a, b)
+        if isinstance(a, dict) and isinstance(b, dict):
+            if a.keys() != b.keys():
+                return False
+            return all(self._deep_equal(a[k], b[k]) for k in a)
+        if isinstance(a, (list, tuple)) and isinstance(b, (list, tuple)):
+            return len(a) == len(b) and all(
+                self._deep_equal(x, y) for x, y in zip(a, b)
+            )
+        return a == b
+class TokenBudgetAwareFeatureCollator(FeatureCollator):
+    """
+    Feature collator that enforces token budget constraints before collation.
+    This collator sorts pipeline outputs by sequence length and removes the
+    largest examples first if the batch would exceed the token budget when
+    padded to the maximum length.
+    Args:
+        max_tokens_with_padding: Maximum number of tokens allowed per batch,
+            including padding. The constraint is
+            max(batch_lengths) * len(batch) <= max_tokens.
+        default_padding: Default padding values for features.
+    """
+    def __init__(
+        self, max_tokens_with_padding: int, default_padding: Dict[str, Any] = None
+    ):
+        super().__init__(default_padding)
+        self.max_tokens_with_padding = max_tokens_with_padding
+        if max_tokens_with_padding <= 0:
+            raise ValueError("max_tokens_with_padding must be greater than 0")
+    def __call__(self, pipeline_outputs: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """
+        Collate pipeline outputs while respecting token budget constraints.
+        Sorts examples by length and removes largest examples first if needed
+        to stay within the token budget.
+        Args:
+            pipeline_outputs: List of pipeline output dictionaries.
+        Returns:
+            Dict containing collated features and atom arrays.
+        Raises:
+            ValueError: If pipeline_outputs is empty.
+        """
+        if not pipeline_outputs:
+            raise ValueError("Cannot collate empty batch")
+        # Extract lengths.
+        examples_with_L = []
+        for example in pipeline_outputs:
+            L = example["input_features"]["S"].shape[0]
+            examples_with_L.append((L, example))
+        # Sort by length (ascending).
+        examples_with_L.sort(key=lambda x: x[0])
+        # Apply token budget constraint by removing largest examples first.
+        filtered_examples = []
+        max_length = 0
+        for L, example in examples_with_L:
+            new_batch_size = len(filtered_examples) + 1
+            potential_max_length = max(L, max_length)
+            if potential_max_length * new_batch_size > self.max_tokens_with_padding:
+                # Adding this example would violate constraint, stop here.
+                break
+            filtered_examples.append((L, example))
+            max_length = potential_max_length
+        # Extract just the examples (without lengths) for collation.
+        filtered_pipeline_outputs = [example for _, example in filtered_examples]
+        # If no examples remain after filtering, raise an error
+        if not filtered_pipeline_outputs:
+            raise ValueError(
+                "No examples remain after applying token budget constraint. "
+                "All examples exceed max_tokens_with_padding="
+                f"{self.max_tokens_with_padding}"
+            )
+        # Call parent collation on filtered examples
+        return super().__call__(filtered_pipeline_outputs)

mpnn/inference.py ADDED Viewed

@@ -0,0 +1,53 @@
+#!/usr/bin/env -S /bin/sh -c '"$(dirname "$0")/../../../../.ipd/shebang/mpnn_exec.sh" "$0" "$@"'
+from mpnn.inference_engines.mpnn import MPNNInferenceEngine
+from mpnn.utils.inference import (
+    MPNN_GLOBAL_INFERENCE_DEFAULTS,
+    build_arg_parser,
+    cli_to_json,
+)
+def main() -> None:
+    """Top-level CLI entry point for MPNN inference.
+    This script wires together:
+      - CLI / arg parsing
+      - JSON config building (or loading an existing JSON)
+      - Execution of the MPNNInferenceEngine
+    """
+    # CLI
+    parser = build_arg_parser()
+    args = parser.parse_args()
+    # JSON config building/loading
+    config = cli_to_json(args)
+    # Split global vs per-input config
+    engine = MPNNInferenceEngine(
+        model_type=config.get(
+            "model_type", MPNN_GLOBAL_INFERENCE_DEFAULTS["model_type"]
+        ),
+        checkpoint_path=config.get(
+            "checkpoint_path", MPNN_GLOBAL_INFERENCE_DEFAULTS["checkpoint_path"]
+        ),
+        is_legacy_weights=config.get(
+            "is_legacy_weights", MPNN_GLOBAL_INFERENCE_DEFAULTS["is_legacy_weights"]
+        ),
+        out_directory=config.get(
+            "out_directory", MPNN_GLOBAL_INFERENCE_DEFAULTS["out_directory"]
+        ),
+        write_fasta=config.get(
+            "write_fasta", MPNN_GLOBAL_INFERENCE_DEFAULTS["write_fasta"]
+        ),
+        write_structures=config.get(
+            "write_structures", MPNN_GLOBAL_INFERENCE_DEFAULTS["write_structures"]
+        ),
+    )
+    # In this case, structures are resolved from inputs; no explicit atom arrays
+    _ = engine.run(input_dicts=config["inputs"], atom_arrays=None)
+if __name__ == "__main__":
+    main()