PyPI - rc-foundry - Versions diffs - 0.1.1__py3-none-any.whl - Mend

rc-foundry 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (180) hide show

foundry/__init__.py +57 -0
foundry/callbacks/__init__.py +5 -0
foundry/callbacks/callback.py +116 -0
foundry/callbacks/health_logging.py +419 -0
foundry/callbacks/metrics_logging.py +211 -0
foundry/callbacks/timing_logging.py +67 -0
foundry/callbacks/train_logging.py +278 -0
foundry/common.py +108 -0
foundry/constants.py +28 -0
foundry/hydra/resolvers.py +77 -0
foundry/inference_engines/base.py +235 -0
foundry/inference_engines/checkpoint_registry.py +66 -0
foundry/metrics/__init__.py +12 -0
foundry/metrics/losses.py +30 -0
foundry/metrics/metric.py +319 -0
foundry/model/layers/blocks.py +47 -0
foundry/testing/__init__.py +6 -0
foundry/testing/fixtures.py +19 -0
foundry/testing/pytest_hooks.py +15 -0
foundry/trainers/fabric.py +923 -0
foundry/training/EMA.py +67 -0
foundry/training/checkpoint.py +61 -0
foundry/training/schedulers.py +91 -0
foundry/utils/alignment.py +86 -0
foundry/utils/components.py +415 -0
foundry/utils/datasets.py +405 -0
foundry/utils/ddp.py +103 -0
foundry/utils/instantiators.py +72 -0
foundry/utils/logging.py +279 -0
foundry/utils/rigid.py +1460 -0
foundry/utils/rotation_augmentation.py +65 -0
foundry/utils/squashfs.py +172 -0
foundry/utils/torch.py +317 -0
foundry/utils/weights.py +271 -0
foundry/version.py +34 -0
foundry_cli/__init__.py +3 -0
foundry_cli/download_checkpoints.py +281 -0
mpnn/__init__.py +1 -0
mpnn/collate/feature_collator.py +265 -0
mpnn/inference.py +53 -0
mpnn/inference_engines/mpnn.py +549 -0
mpnn/loss/nll_loss.py +122 -0
mpnn/metrics/nll.py +369 -0
mpnn/metrics/sequence_recovery.py +440 -0
mpnn/model/layers/graph_embeddings.py +2372 -0
mpnn/model/layers/message_passing.py +332 -0
mpnn/model/layers/position_wise_feed_forward.py +44 -0
mpnn/model/layers/positional_encoding.py +98 -0
mpnn/model/mpnn.py +2632 -0
mpnn/pipelines/mpnn.py +162 -0
mpnn/samplers/samplers.py +167 -0
mpnn/train.py +341 -0
mpnn/trainers/mpnn.py +193 -0
mpnn/transforms/feature_aggregation/mpnn.py +184 -0
mpnn/transforms/feature_aggregation/polymer_ligand_interface.py +76 -0
mpnn/transforms/feature_aggregation/token_encodings.py +132 -0
mpnn/transforms/feature_aggregation/user_settings.py +347 -0
mpnn/transforms/polymer_ligand_interface.py +164 -0
mpnn/utils/inference.py +2397 -0
mpnn/utils/probability.py +37 -0
mpnn/utils/weights.py +309 -0
rc_foundry-0.1.1.dist-info/METADATA +239 -0
rc_foundry-0.1.1.dist-info/RECORD +180 -0
rc_foundry-0.1.1.dist-info/WHEEL +4 -0
rc_foundry-0.1.1.dist-info/entry_points.txt +5 -0
rc_foundry-0.1.1.dist-info/licenses/LICENSE.md +28 -0
rf3/__init__.py +3 -0
rf3/_version.py +33 -0
rf3/alignment.py +79 -0
rf3/callbacks/dump_validation_structures.py +101 -0
rf3/callbacks/metrics_logging.py +324 -0
rf3/chemical.py +1529 -0
rf3/cli.py +77 -0
rf3/data/cyclic_transform.py +78 -0
rf3/data/extra_xforms.py +36 -0
rf3/data/ground_truth_template.py +463 -0
rf3/data/paired_msa.py +206 -0
rf3/data/pipeline_utils.py +128 -0
rf3/data/pipelines.py +558 -0
rf3/diffusion_samplers/inference_sampler.py +222 -0
rf3/inference.py +65 -0
rf3/inference_engines/__init__.py +5 -0
rf3/inference_engines/rf3.py +735 -0
rf3/kinematics.py +354 -0
rf3/loss/af3_confidence_loss.py +515 -0
rf3/loss/af3_losses.py +655 -0
rf3/loss/loss.py +179 -0
rf3/metrics/chiral.py +179 -0
rf3/metrics/clashing_chains.py +68 -0
rf3/metrics/distogram.py +421 -0
rf3/metrics/lddt.py +523 -0
rf3/metrics/metadata.py +43 -0
rf3/metrics/metric_utils.py +192 -0
rf3/metrics/predicted_error.py +134 -0
rf3/metrics/rasa.py +108 -0
rf3/metrics/selected_distances.py +91 -0
rf3/model/RF3.py +527 -0
rf3/model/RF3_blocks.py +92 -0
rf3/model/RF3_structure.py +303 -0
rf3/model/layers/af3_auxiliary_heads.py +255 -0
rf3/model/layers/af3_diffusion_transformer.py +544 -0
rf3/model/layers/attention.py +313 -0
rf3/model/layers/layer_utils.py +127 -0
rf3/model/layers/mlff.py +118 -0
rf3/model/layers/outer_product.py +59 -0
rf3/model/layers/pairformer_layers.py +783 -0
rf3/model/layers/structure_bias.py +56 -0
rf3/scoring.py +1787 -0
rf3/symmetry/resolve.py +284 -0
rf3/train.py +194 -0
rf3/trainers/rf3.py +570 -0
rf3/util_module.py +47 -0
rf3/utils/frames.py +109 -0
rf3/utils/inference.py +665 -0
rf3/utils/io.py +198 -0
rf3/utils/loss.py +72 -0
rf3/utils/predict_and_score.py +165 -0
rf3/utils/predicted_error.py +673 -0
rf3/utils/recycling.py +42 -0
rf3/validate.py +140 -0
rfd3/.gitignore +7 -0
rfd3/Makefile +76 -0
rfd3/__init__.py +12 -0
rfd3/callbacks.py +66 -0
rfd3/cli.py +41 -0
rfd3/constants.py +212 -0
rfd3/engine.py +543 -0
rfd3/inference/datasets.py +193 -0
rfd3/inference/input_parsing.py +1123 -0
rfd3/inference/legacy_input_parsing.py +717 -0
rfd3/inference/parsing.py +165 -0
rfd3/inference/symmetry/atom_array.py +298 -0
rfd3/inference/symmetry/checks.py +241 -0
rfd3/inference/symmetry/contigs.py +63 -0
rfd3/inference/symmetry/frames.py +355 -0
rfd3/inference/symmetry/symmetry_utils.py +398 -0
rfd3/metrics/design_metrics.py +465 -0
rfd3/metrics/hbonds_hbplus_metrics.py +308 -0
rfd3/metrics/hbonds_metrics.py +389 -0
rfd3/metrics/losses.py +325 -0
rfd3/metrics/metrics_utils.py +118 -0
rfd3/metrics/sidechain_metrics.py +349 -0
rfd3/model/RFD3.py +105 -0
rfd3/model/RFD3_diffusion_module.py +387 -0
rfd3/model/cfg_utils.py +81 -0
rfd3/model/inference_sampler.py +635 -0
rfd3/model/layers/attention.py +577 -0
rfd3/model/layers/block_utils.py +580 -0
rfd3/model/layers/blocks.py +777 -0
rfd3/model/layers/chunked_pairwise.py +377 -0
rfd3/model/layers/encoders.py +417 -0
rfd3/model/layers/layer_utils.py +197 -0
rfd3/model/layers/pairformer_layers.py +128 -0
rfd3/run_inference.py +45 -0
rfd3/testing/debug.py +139 -0
rfd3/testing/debug_utils.py +73 -0
rfd3/testing/testing_utils.py +356 -0
rfd3/train.py +194 -0
rfd3/trainer/dump_validation_structures.py +154 -0
rfd3/trainer/fabric_trainer.py +923 -0
rfd3/trainer/recycling.py +42 -0
rfd3/trainer/rfd3.py +485 -0
rfd3/trainer/trainer_utils.py +502 -0
rfd3/transforms/conditioning_base.py +508 -0
rfd3/transforms/conditioning_utils.py +200 -0
rfd3/transforms/design_transforms.py +807 -0
rfd3/transforms/dna_crop.py +523 -0
rfd3/transforms/hbonds.py +407 -0
rfd3/transforms/hbonds_hbplus.py +246 -0
rfd3/transforms/ncaa_transforms.py +153 -0
rfd3/transforms/pipelines.py +632 -0
rfd3/transforms/ppi_transforms.py +541 -0
rfd3/transforms/rasa.py +116 -0
rfd3/transforms/symmetry.py +76 -0
rfd3/transforms/training_conditions.py +552 -0
rfd3/transforms/util_transforms.py +498 -0
rfd3/transforms/virtual_atoms.py +305 -0
rfd3/utils/inference.py +648 -0
rfd3/utils/io.py +245 -0
rfd3/utils/vizualize.py +276 -0

rfd3/model/layers/pairformer_layers.py ADDED Viewed

@@ -0,0 +1,128 @@
+import torch
+from rfd3.model.layers.layer_utils import (
+    MultiDimLinear,
+    RMSNorm,
+    Transition,
+    linearNoBias,
+)
+from torch import nn
+from foundry.training.checkpoint import activation_checkpointing
+from foundry.utils.torch import device_of
+class AttentionPairBiasPairformerDeepspeed(nn.Module):
+    def __init__(self, c_a, c_s, c_pair, n_head, kq_norm=False):
+        super().__init__()
+        self.n_head = n_head
+        self.c_a = c_a
+        self.c_pair = c_pair
+        self.c = c_a // n_head
+        self.to_q = MultiDimLinear(c_a, (n_head, self.c))
+        self.to_k = MultiDimLinear(c_a, (n_head, self.c), bias=False, norm=kq_norm)
+        self.to_v = MultiDimLinear(c_a, (n_head, self.c), bias=False, norm=kq_norm)
+        self.to_b = linearNoBias(c_pair, n_head)
+        self.to_g = nn.Sequential(
+            MultiDimLinear(c_a, (n_head, self.c), bias=False),
+            nn.Sigmoid(),
+        )
+        self.to_a = linearNoBias(c_a, c_a)
+        # self.linear_output_project = nn.Sequential(
+        # LinearBiasInit(c_s, c_a, biasinit=-2.),
+        # nn.Sigmoid(),
+        # )
+        self.ln_0 = RMSNorm((c_pair,))
+        # self.ada_ln_1 = AdaLN(c_a=c_a, c_s=c_s)
+        self.ln_1 = RMSNorm((c_a,))
+        self.use_deepspeed_evo = False
+        self.force_bfloat16 = True
+    def forward(
+        self,
+        A_I,  # [I, C_a]
+        S_I,  # [I, C_a] | None
+        Z_II,  # [I, I, C_z]
+        Beta_II=None,  # [I, I]
+    ):
+        # Input projections
+        assert S_I is None
+        A_I = self.ln_1(A_I)
+        if self.use_deepspeed_evo or self.force_bfloat16:
+            A_I = A_I.to(torch.bfloat16)
+        Q_IH = self.to_q(A_I)  # / np.sqrt(self.c)
+        K_IH = self.to_k(A_I)
+        V_IH = self.to_v(A_I)
+        B_IIH = self.to_b(self.ln_0(Z_II)) + Beta_II[..., None]
+        G_IH = self.to_g(A_I)
+        B, L = B_IIH.shape[:2]
+        if not self.use_deepspeed_evo or L <= 24:
+            Q_IH = Q_IH / torch.sqrt(
+                torch.tensor(self.c).to(Q_IH.device, torch.bfloat16)
+            )
+            # Attention
+            A_IIH = torch.softmax(
+                torch.einsum("...ihd,...jhd->...ijh", Q_IH, K_IH) + B_IIH, dim=-2
+            )  # softmax over j
+            ## G_IH: [I, H, C]
+            ## A_IIH: [I, I, H]
+            ## V_IH: [I, H, C]
+            A_I = torch.einsum("...ijh,...jhc->...ihc", A_IIH, V_IH)
+            A_I = G_IH * A_I  # [B, I, H, C]
+            A_I = A_I.flatten(start_dim=-2)  # [B, I, Ca]
+        else:
+            raise NotImplementedError
+        A_I = self.to_a(A_I)
+        return A_I
+class PairformerBlock(nn.Module):
+    """
+    Attempt to replicate AF3 architecture from scratch.
+    """
+    def __init__(
+        self,
+        c_s,
+        c_z,
+        attention_pair_bias,
+        p_drop=0.1,
+        triangle_multiplication=None,
+        triangle_attention=None,
+        n_transition=4,
+        use_deepspeed_evo=True,
+        use_triangle_mult=False,
+        use_triangle_attn=False,
+    ):
+        super().__init__()
+        # self.drop_row = Dropout(broadcast_dim=-2, p_drop=p_drop)
+        # self.drop_col = Dropout(broadcast_dim=-3, p_drop=p_drop)
+        self.z_transition = Transition(c=c_z, n=n_transition)
+        if c_s > 0:
+            self.s_transition = Transition(c=c_s, n=n_transition)
+            self.attention_pair_bias = AttentionPairBiasPairformerDeepspeed(
+                c_a=c_s, c_s=0, c_pair=c_z, **attention_pair_bias
+            )
+    @activation_checkpointing
+    def forward(self, S_I, Z_II):
+        with torch.amp.autocast(
+            device_type=device_of(self).type, enabled=True, dtype=torch.bfloat16
+        ):
+            Z_II = Z_II + self.z_transition(Z_II)
+            if S_I is not None:
+                S_I = S_I + self.attention_pair_bias(
+                    S_I, None, Z_II, Beta_II=torch.tensor([0.0], device=Z_II.device)
+                )
+                S_I = S_I + self.s_transition(S_I)
+        return S_I, Z_II

rfd3/run_inference.py ADDED Viewed

@@ -0,0 +1,45 @@
+#!/usr/bin/env -S /bin/sh -c '"$(dirname "$0")/../../../../.ipd/shebang/rf3_exec.sh" "$0" "$@"'
+import os
+import hydra
+import rootutils
+from dotenv import load_dotenv
+from omegaconf import DictConfig, OmegaConf
+from rfd3.engine import RFD3InferenceConfig, RFD3InferenceEngine
+# Setup root dir and environment variables (more info: https://github.com/ashleve/rootutils)
+# NOTE: Sets the `PROJECT_ROOT` environment variable to the root directory of the project (where `.project-root` is located)
+rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
+load_dotenv(override=True)
+# If the user has set `PROJECT_PATH`, use it to build the config path; otherwise, fall back to `PROJECT_ROOT`
+_config_path = os.path.join(os.environ["PROJECT_ROOT"], "models/rfd3/configs")
+@hydra.main(
+    config_path=_config_path,
+    config_name="inference",
+    version_base="1.3",
+)
+def run_inference(cfg: DictConfig) -> None:
+    """Execute the specified inference pipeline"""
+    run_params_set = {"inputs", "n_batches", "out_dir"}
+    run_params = {k: v for k, v in cfg.items() if k in run_params_set}
+    # Create __init__ args by filtering for all configs not in run_params
+    cfg_dict = OmegaConf.to_container(cfg, resolve=True)
+    init_cfg_dict = {
+        k: v for k, v in cfg_dict.items() if k not in run_params_set | {"_target_"}
+    }
+    # Run
+    engine = RFD3InferenceEngine(**RFD3InferenceConfig(**init_cfg_dict))
+    engine.run(**run_params)
+if __name__ == "__main__":
+    run_inference()

rfd3/testing/debug.py ADDED Viewed

@@ -0,0 +1,139 @@
+#!/usr/bin/env -S /bin/sh -c '"$(dirname "$0")/../../../scripts/shebang/modelhub_exec.sh" "$0" "$@"'
+# JBs debugging file, please create your own and go crazy!
+import logging
+import os
+import sys
+import time
+import hydra
+import ipdb  # noqa: F401
+import numpy as np
+import rootutils
+import torch
+import tree
+from atomworks.ml.utils.token import (
+    get_token_starts,
+)
+from rfd3.testing.testing_utils import (
+    TEST_CFG_TRAIN,
+    TEST_JSON_DATA,
+    build_pipelines,
+    instantiate_example,
+    load_train_or_val_cfg,
+)
+from foundry.utils.ddp import set_accelerator_based_on_availability
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Same as train.py
+rootutils.setup_root(__file__ + "/../..", indicator=".project-root", pythonpath=True)
+_config_path = os.path.join(
+    os.environ.get("PROJECT_PATH", os.environ.get("PROJECT_ROOT", "../..")), "configs"
+)
+print(f"Config path: {_config_path}")
+print(f"Project root: {os.environ.get('PROJECT_ROOT', '../..')}")
+is_inference = True
+args = TEST_JSON_DATA["1qys-1-refactored"]
+input = instantiate_example(args, is_inference=is_inference)
+TEST_CFG_TRAIN = (
+    load_train_or_val_cfg(name=sys.argv[1].split("=")[-1])
+    if len(sys.argv) > 1
+    else TEST_CFG_TRAIN
+)
+def forward(example, trainer, model, is_inference=is_inference):
+    network_input = trainer._assemble_network_inputs(example)
+    # Forward pass
+    device = "cuda:0"
+    def _inmap(path, x):
+        if hasattr(x, "cpu") and path != ("f", "msa_stack"):
+            return x.to(device)
+        else:
+            return x
+    network_input = tree.map_structure_with_path(_inmap, network_input)
+    model.eval() if is_inference else model.train()
+    if not is_inference:
+        network_output = model.forward(
+            input=network_input,
+            n_cycle=1,
+            coord_atom_lvl_to_be_noised=example["coord_atom_lvl_to_be_noised"].to(
+                device
+            ),
+        )
+    else:
+        with torch.no_grad():
+            network_output = model.forward(
+                input=network_input,
+                n_cycle=1,
+                coord_atom_lvl_to_be_noised=example["coord_atom_lvl_to_be_noised"].to(
+                    device
+                ),
+            )
+    return network_output
+def prep_forward(cfg):
+    trainer = hydra.utils.instantiate(
+        cfg.trainer,
+        loggers=None,
+        callbacks=None,
+        _convert_="partial",
+        _recursive_=False,
+    )
+    set_accelerator_based_on_availability(cfg)
+    trainer.initialize_or_update_trainer_state({"train_cfg": cfg})
+    cfg.trainer.devices_per_node = 1
+    cfg.trainer.num_nodes = 1
+    try:
+        trainer.fabric.launch()
+    except Exception as e:
+        print(f"Error: {e}")
+        print("Switching port")
+        os.environ["MASTER_PORT"] = str(1024 + np.random.randint(64512))
+        trainer.fabric.launch()
+    trainer.construct_model()
+    model = trainer.state["model"]
+    return model, trainer
+def test_conditional_forward():
+    unindexed_cfg = load_train_or_val_cfg("test-unindexed")
+    unindexed_cfg.datasets.global_transform_args.train_conditions.island.frequency = (
+        1e10
+    )
+    unindexed_cfg.datasets.global_transform_args.train_conditions.island.p_unindex_motif_tokens = 1.0
+    unindexed_pipes = build_pipelines(composed_config=unindexed_cfg)
+    t0 = time.time()
+    example = unindexed_pipes[is_inference](input)
+    example["example_id"] = "debug_example"
+    print(f"Time taken to process example: {time.time() - t0}")
+    aa = example["atom_array"]
+    t_aa = aa[get_token_starts(aa)]  # noqa: F841
+    from rfd3.testing.debug_utils import pipe_out_to_file
+    pipe_out_to_file(example, save=True)
+    print("Preparing model")
+    model, trainer = prep_forward(TEST_CFG_TRAIN)
+    if is_inference:
+        model.eval()
+        trainer.state["model"].eval()
+    network_output = forward(example, trainer, model, is_inference=is_inference)  # noqa: F841
+if __name__ == "__main__":
+    test_conditional_forward()
+    print("Finished main")

rfd3/testing/debug_utils.py ADDED Viewed

@@ -0,0 +1,73 @@
+import numpy as np
+from atomworks.common import sum_string_arrays
+from atomworks.io.utils.io_utils import to_cif_file
+from atomworks.ml.transforms.center_random_augmentation import CenterRandomAugmentation
+from biotite.structure import AtomArrayStack
+from rfd3.trainer.rfd3 import _reassign_unindexed_token_chains
+from rfd3.transforms.design_transforms import (
+    MotifCenterRandomAugmentation,
+)
+def pipe_out_to_file(output, save=True):
+    atom_array = output["atom_array"]
+    xyz = output["coord_atom_lvl_to_be_noised"]
+    idxs = np.argsort(output["t"].numpy())
+    eps = output["noise"].numpy()[idxs]
+    eps[0] = eps[0] * 0
+    x = AtomArrayStack(xyz.shape[0], xyz.shape[1])
+    x.coord = xyz[idxs].numpy() + eps
+    x.set_annotation("chain_id", ["A"] * xyz.shape[1])
+    x.set_annotation("atom_name", [f"C{i}" for i in range(x.shape[-1])])
+    x.set_annotation("res_id", output["feats"]["atom_to_token_map"])
+    x.set_annotation("element", ["C"] * x.shape[-1])
+    x.set_annotation("res_name", [atom_array.res_name[i] for i in range(x.shape[-1])])
+    if save:
+        f = f"{output.get('example_id', 'example')}_debug_out.cif"
+        to_cif_file(
+            x,
+            f,
+            id="x",
+        )
+        print("Saved cif file to:", f)
+    else:
+        return x
+def save_pipe_out(atom_array):
+    atom_array = _reassign_unindexed_token_chains(atom_array)
+    f = "debug_out.cif"
+    to_cif_file(
+        atom_array,
+        f,
+        id="x",
+    )
+    print("Saved cif file to:", f)
+def to_debug_pipe(pipe):
+    pipe.transforms = [
+        t
+        for t in pipe.transforms
+        if not isinstance(t, (CenterRandomAugmentation, MotifCenterRandomAugmentation))
+    ]
+    return pipe
+# Allows to use atom-array whenever debugging by removing friction in atoms having the same identifiers
+def save_debug_cif(atom_array, filepath, name="debug_out.cif"):
+    dummy_array = atom_array.copy()
+    dummy_array.chain_id = sum_string_arrays(
+        dummy_array.chain_id, "-", dummy_array.transformation_id.astype(str)
+    )
+    f = filepath + name
+    to_cif_file(
+        dummy_array,
+        f,
+    )
+    print("Saved cif file to:", f)