PyPI - rc-foundry - Versions diffs - 0.1.1__py3-none-any.whl - Mend

rc-foundry 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (180) hide show

foundry/__init__.py +57 -0
foundry/callbacks/__init__.py +5 -0
foundry/callbacks/callback.py +116 -0
foundry/callbacks/health_logging.py +419 -0
foundry/callbacks/metrics_logging.py +211 -0
foundry/callbacks/timing_logging.py +67 -0
foundry/callbacks/train_logging.py +278 -0
foundry/common.py +108 -0
foundry/constants.py +28 -0
foundry/hydra/resolvers.py +77 -0
foundry/inference_engines/base.py +235 -0
foundry/inference_engines/checkpoint_registry.py +66 -0
foundry/metrics/__init__.py +12 -0
foundry/metrics/losses.py +30 -0
foundry/metrics/metric.py +319 -0
foundry/model/layers/blocks.py +47 -0
foundry/testing/__init__.py +6 -0
foundry/testing/fixtures.py +19 -0
foundry/testing/pytest_hooks.py +15 -0
foundry/trainers/fabric.py +923 -0
foundry/training/EMA.py +67 -0
foundry/training/checkpoint.py +61 -0
foundry/training/schedulers.py +91 -0
foundry/utils/alignment.py +86 -0
foundry/utils/components.py +415 -0
foundry/utils/datasets.py +405 -0
foundry/utils/ddp.py +103 -0
foundry/utils/instantiators.py +72 -0
foundry/utils/logging.py +279 -0
foundry/utils/rigid.py +1460 -0
foundry/utils/rotation_augmentation.py +65 -0
foundry/utils/squashfs.py +172 -0
foundry/utils/torch.py +317 -0
foundry/utils/weights.py +271 -0
foundry/version.py +34 -0
foundry_cli/__init__.py +3 -0
foundry_cli/download_checkpoints.py +281 -0
mpnn/__init__.py +1 -0
mpnn/collate/feature_collator.py +265 -0
mpnn/inference.py +53 -0
mpnn/inference_engines/mpnn.py +549 -0
mpnn/loss/nll_loss.py +122 -0
mpnn/metrics/nll.py +369 -0
mpnn/metrics/sequence_recovery.py +440 -0
mpnn/model/layers/graph_embeddings.py +2372 -0
mpnn/model/layers/message_passing.py +332 -0
mpnn/model/layers/position_wise_feed_forward.py +44 -0
mpnn/model/layers/positional_encoding.py +98 -0
mpnn/model/mpnn.py +2632 -0
mpnn/pipelines/mpnn.py +162 -0
mpnn/samplers/samplers.py +167 -0
mpnn/train.py +341 -0
mpnn/trainers/mpnn.py +193 -0
mpnn/transforms/feature_aggregation/mpnn.py +184 -0
mpnn/transforms/feature_aggregation/polymer_ligand_interface.py +76 -0
mpnn/transforms/feature_aggregation/token_encodings.py +132 -0
mpnn/transforms/feature_aggregation/user_settings.py +347 -0
mpnn/transforms/polymer_ligand_interface.py +164 -0
mpnn/utils/inference.py +2397 -0
mpnn/utils/probability.py +37 -0
mpnn/utils/weights.py +309 -0
rc_foundry-0.1.1.dist-info/METADATA +239 -0
rc_foundry-0.1.1.dist-info/RECORD +180 -0
rc_foundry-0.1.1.dist-info/WHEEL +4 -0
rc_foundry-0.1.1.dist-info/entry_points.txt +5 -0
rc_foundry-0.1.1.dist-info/licenses/LICENSE.md +28 -0
rf3/__init__.py +3 -0
rf3/_version.py +33 -0
rf3/alignment.py +79 -0
rf3/callbacks/dump_validation_structures.py +101 -0
rf3/callbacks/metrics_logging.py +324 -0
rf3/chemical.py +1529 -0
rf3/cli.py +77 -0
rf3/data/cyclic_transform.py +78 -0
rf3/data/extra_xforms.py +36 -0
rf3/data/ground_truth_template.py +463 -0
rf3/data/paired_msa.py +206 -0
rf3/data/pipeline_utils.py +128 -0
rf3/data/pipelines.py +558 -0
rf3/diffusion_samplers/inference_sampler.py +222 -0
rf3/inference.py +65 -0
rf3/inference_engines/__init__.py +5 -0
rf3/inference_engines/rf3.py +735 -0
rf3/kinematics.py +354 -0
rf3/loss/af3_confidence_loss.py +515 -0
rf3/loss/af3_losses.py +655 -0
rf3/loss/loss.py +179 -0
rf3/metrics/chiral.py +179 -0
rf3/metrics/clashing_chains.py +68 -0
rf3/metrics/distogram.py +421 -0
rf3/metrics/lddt.py +523 -0
rf3/metrics/metadata.py +43 -0
rf3/metrics/metric_utils.py +192 -0
rf3/metrics/predicted_error.py +134 -0
rf3/metrics/rasa.py +108 -0
rf3/metrics/selected_distances.py +91 -0
rf3/model/RF3.py +527 -0
rf3/model/RF3_blocks.py +92 -0
rf3/model/RF3_structure.py +303 -0
rf3/model/layers/af3_auxiliary_heads.py +255 -0
rf3/model/layers/af3_diffusion_transformer.py +544 -0
rf3/model/layers/attention.py +313 -0
rf3/model/layers/layer_utils.py +127 -0
rf3/model/layers/mlff.py +118 -0
rf3/model/layers/outer_product.py +59 -0
rf3/model/layers/pairformer_layers.py +783 -0
rf3/model/layers/structure_bias.py +56 -0
rf3/scoring.py +1787 -0
rf3/symmetry/resolve.py +284 -0
rf3/train.py +194 -0
rf3/trainers/rf3.py +570 -0
rf3/util_module.py +47 -0
rf3/utils/frames.py +109 -0
rf3/utils/inference.py +665 -0
rf3/utils/io.py +198 -0
rf3/utils/loss.py +72 -0
rf3/utils/predict_and_score.py +165 -0
rf3/utils/predicted_error.py +673 -0
rf3/utils/recycling.py +42 -0
rf3/validate.py +140 -0
rfd3/.gitignore +7 -0
rfd3/Makefile +76 -0
rfd3/__init__.py +12 -0
rfd3/callbacks.py +66 -0
rfd3/cli.py +41 -0
rfd3/constants.py +212 -0
rfd3/engine.py +543 -0
rfd3/inference/datasets.py +193 -0
rfd3/inference/input_parsing.py +1123 -0
rfd3/inference/legacy_input_parsing.py +717 -0
rfd3/inference/parsing.py +165 -0
rfd3/inference/symmetry/atom_array.py +298 -0
rfd3/inference/symmetry/checks.py +241 -0
rfd3/inference/symmetry/contigs.py +63 -0
rfd3/inference/symmetry/frames.py +355 -0
rfd3/inference/symmetry/symmetry_utils.py +398 -0
rfd3/metrics/design_metrics.py +465 -0
rfd3/metrics/hbonds_hbplus_metrics.py +308 -0
rfd3/metrics/hbonds_metrics.py +389 -0
rfd3/metrics/losses.py +325 -0
rfd3/metrics/metrics_utils.py +118 -0
rfd3/metrics/sidechain_metrics.py +349 -0
rfd3/model/RFD3.py +105 -0
rfd3/model/RFD3_diffusion_module.py +387 -0
rfd3/model/cfg_utils.py +81 -0
rfd3/model/inference_sampler.py +635 -0
rfd3/model/layers/attention.py +577 -0
rfd3/model/layers/block_utils.py +580 -0
rfd3/model/layers/blocks.py +777 -0
rfd3/model/layers/chunked_pairwise.py +377 -0
rfd3/model/layers/encoders.py +417 -0
rfd3/model/layers/layer_utils.py +197 -0
rfd3/model/layers/pairformer_layers.py +128 -0
rfd3/run_inference.py +45 -0
rfd3/testing/debug.py +139 -0
rfd3/testing/debug_utils.py +73 -0
rfd3/testing/testing_utils.py +356 -0
rfd3/train.py +194 -0
rfd3/trainer/dump_validation_structures.py +154 -0
rfd3/trainer/fabric_trainer.py +923 -0
rfd3/trainer/recycling.py +42 -0
rfd3/trainer/rfd3.py +485 -0
rfd3/trainer/trainer_utils.py +502 -0
rfd3/transforms/conditioning_base.py +508 -0
rfd3/transforms/conditioning_utils.py +200 -0
rfd3/transforms/design_transforms.py +807 -0
rfd3/transforms/dna_crop.py +523 -0
rfd3/transforms/hbonds.py +407 -0
rfd3/transforms/hbonds_hbplus.py +246 -0
rfd3/transforms/ncaa_transforms.py +153 -0
rfd3/transforms/pipelines.py +632 -0
rfd3/transforms/ppi_transforms.py +541 -0
rfd3/transforms/rasa.py +116 -0
rfd3/transforms/symmetry.py +76 -0
rfd3/transforms/training_conditions.py +552 -0
rfd3/transforms/util_transforms.py +498 -0
rfd3/transforms/virtual_atoms.py +305 -0
rfd3/utils/inference.py +648 -0
rfd3/utils/io.py +245 -0
rfd3/utils/vizualize.py +276 -0

rfd3/transforms/virtual_atoms.py ADDED Viewed

@@ -0,0 +1,305 @@
+"""
+Virtual-atom transforms for Atom14
+"""
+import biotite.structure as struc
+import numpy as np
+from atomworks.io.utils.atom_array_plus import insert_atoms
+from atomworks.ml.transforms.base import (
+    Transform,
+)
+from atomworks.ml.utils.token import get_token_starts
+from rfd3.constants import (
+    ATOM14_ATOM_NAME_TO_ELEMENT,
+    ATOM14_ATOM_NAMES,
+    VIRTUAL_ATOM_ELEMENT_NAME,
+    association_schemes,
+    association_schemes_stripped,
+    ccd_ordering_atomchar,
+)
+from rfd3.transforms.conditioning_base import (
+    UnindexFlaggedTokens,
+)
+from rfd3.transforms.util_transforms import (
+    assert_single_representative,
+    get_af3_token_representative_masks,
+)
+from foundry.common import exists
+def map_to_association_scheme(atom_names: list | str, res_name: str, scheme="atom14"):
+    """
+    Maps a list of names to the atom14 naming scheme for that particular name (within a specific residue)
+    NB this function is a bit more general since it is used to handle tipatoms too.
+    """
+    if scheme not in association_schemes_stripped:
+        raise ValueError(
+            f"Scheme {scheme} not found in association_schemes_stripped. Available schemes: {list(association_schemes_stripped.keys())}"
+        )
+    atom_names = (
+        [str(atom_names)] if isinstance(atom_names, (str, np.str_)) else atom_names
+    )
+    idxs = np.array(
+        [
+            association_schemes_stripped[scheme][res_name].index(name)
+            for name in atom_names
+        ]
+    )
+    return ATOM14_ATOM_NAMES[idxs]
+def map_names_to_elements(
+    atom_names: list | str, default=VIRTUAL_ATOM_ELEMENT_NAME
+) -> np.ndarray:
+    """
+    Maps ATOM14 atom names to their corresponding elements.
+    If a name is not in ATOM14_ATOM_NAMES (e.g. if atom name is VX - virtual atom),
+    then it returns the default value
+    """
+    atom_names = [atom_names] if isinstance(atom_names, str) else atom_names
+    elements = [ATOM14_ATOM_NAME_TO_ELEMENT.get(name, default) for name in atom_names]
+    return np.array(elements)
+def generate_atom_mappings_(scheme="atom14"):
+    scheme = association_schemes[scheme]
+    atom_mapping = {}
+    symmetry_mapping = {}
+    for aaa, atom14_names in ccd_ordering_atomchar.items():
+        mapping = list(range(14))
+        scheme_names = scheme[aaa]
+        for ccd_index in range(len(atom14_names)):
+            atom14_name = atom14_names[ccd_index]
+            if atom14_name is not None:
+                assert (
+                    atom14_name in scheme_names
+                ), f"{atom14_name} not in CCD ordering for {aaa}"
+                scheme_index = scheme_names.index(atom14_name)
+                scheme_index_in_cur_mapping = mapping.index(scheme_index)
+                mapping[ccd_index], mapping[scheme_index_in_cur_mapping] = (
+                    mapping[scheme_index_in_cur_mapping],
+                    mapping[ccd_index],
+                )
+        assert set(mapping) == set(range(len(scheme_names)))
+        # atom_mapping[aaa] = mapping
+        atom_mapping[aaa] = mapping
+        ##################################################################
+        # Temporarily comment this out
+        # if aaa in symmetric_atomchar:
+        #     symmetry_mapping[aaa] = []
+        #     for group in symmetric_atomchar[aaa]:
+        #         indices = [atom14_names.index(name) for name in group]
+        #         symmetry_mapping[aaa].append(indices)
+        symmetry_mapping = {}
+        ##################################################################
+    # Test that the mapping is valid
+    for aaa in atom_mapping.keys():
+        idxs = atom_mapping[aaa]
+        assert len(idxs) == len(set(idxs)), f"Duplicate indices in mapping for {aaa}"
+        atom_mapping_expected = np.array(scheme[aaa])[idxs]
+        atom_mapping_actual = np.array(ccd_ordering_atomchar[aaa])
+        assert np.array_equal(
+            atom_mapping_expected, atom_mapping_actual
+        ), f"Mapping mismatch for {aaa}: {atom_mapping_expected} != {atom_mapping_actual}"
+    return atom_mapping, symmetry_mapping
+def permute_symmetric_atom_names_(
+    atom_names: list, res_name: str, association_map: dict, symmetry_map: dict
+) -> list:
+    # NB: Can leak GT sequence if the model receives the canconical ordering of atoms as input
+    # With the structure-local atom attention it will not unless N_keys(n_attn_seq_neighbours) > n_atom_attn_queries.
+    if res_name in association_map:
+        idx_to_swap = association_map[res_name]
+        atom_names = atom_names[idx_to_swap]
+        if res_name in symmetry_map:
+            for group in symmetry_map[res_name]:
+                if np.random.rand() < 0.5:  # random swap
+                    atom_names[group] = atom_names[group[::-1]]
+    return atom_names
+#####################################################################################################
+# Virtual atom transforms
+#####################################################################################################
+class PadTokensWithVirtualAtoms(Transform):
+    """
+    Pads tokens with virtual atoms to ensure all residue tokens have a fixed number of atoms
+    Applies padding only to the tokens who do not have sequence
+    Applies association schema during training and to tokens with sequence.
+    """
+    requires_previous_transforms = [UnindexFlaggedTokens]
+    def __init__(
+        self,
+        n_atoms_per_token,
+        atom_to_pad_from,
+        association_scheme,
+    ):
+        self.n_atoms_per_token = n_atoms_per_token
+        self.atom_to_pad_from = atom_to_pad_from
+        self.association_scheme = association_scheme
+        if exists(association_scheme):
+            self.association_map_, self.symmetry_map_ = generate_atom_mappings_(
+                association_scheme
+            )
+    def forward(self, data: dict) -> dict:
+        atom_array = data["atom_array"]
+        starts = get_token_starts(atom_array, add_exclusive_stop=True)
+        token_starts = starts[:-1]
+        token_level_array = atom_array[token_starts]
+        is_motif_atom_with_fixed_seq = token_level_array.is_motif_atom_with_fixed_seq
+        is_motif_token_unindexed = token_level_array.is_motif_atom_unindexed
+        token_ids = np.unique(atom_array.token_id)
+        assert len(token_ids) == len(
+            is_motif_atom_with_fixed_seq
+        ), "Token ids and token level array have different lengths!"
+        # Unindexed tokens are never fully atomized, but may be assigned as atomized to have repr atoms:
+        is_residue = (
+            token_level_array.is_protein & ~token_level_array.atomize
+        ) | is_motif_token_unindexed
+        # Unindexed tokens are never padded, and so are treated as residues with fixed sequence.
+        is_paddable = is_residue & ~(
+            is_motif_atom_with_fixed_seq | is_motif_token_unindexed
+        )
+        is_non_paddable_residue = is_residue & (
+            is_motif_atom_with_fixed_seq | is_motif_token_unindexed
+        )
+        # Collect virtual atoms to insert (we will insert them all at once)
+        virtual_atoms_to_insert = []
+        insert_positions = []
+        # First pass: collect virtual atoms for insertion
+        for token_id, (start, end) in enumerate(zip(starts[:-1], starts[1:])):
+            if is_paddable[token_id]:
+                token = atom_array[start:end]
+                # First, pad with virtual atoms if needed
+                n_pad = self.n_atoms_per_token - len(token)
+                if n_pad > 0:
+                    mask = get_af3_token_representative_masks(
+                        token, central_atom=self.atom_to_pad_from
+                    )
+                    assert_single_representative(token)
+                    # ... Create virtual atoms
+                    pad_atoms = token[mask].copy()
+                    pad_atoms = (
+                        pad_atoms[0]
+                        if isinstance(pad_atoms, struc.AtomArray)
+                        else pad_atoms
+                    )
+                    pad_atoms.element = VIRTUAL_ATOM_ELEMENT_NAME
+                    # ... Expand to desired number of atoms
+                    pad_array = struc.array([pad_atoms] * n_pad)
+                    # ... Change occupancy | if any atom in the token has occupancy, set to 1.0
+                    occ = 1.0 if pad_atoms.occupancy.sum() > 0.0 else 0.0
+                    pad_array.occupancy = np.full(n_pad, occ)
+                    # ... Even if the input pad_atoms are all motif, we don't ever want padded atoms to be motif
+                    pad_array.is_motif_atom = np.zeros(n_pad, dtype=bool)
+                    # Handle multidimensional annotations
+                    def _fix_multidimensional_annotations_in_pad_array(
+                        atomarray, padarray
+                    ):
+                        for annotation in atomarray.get_annotation_categories():
+                            if len(atomarray.get_annotation(annotation).shape) > 1:
+                                stacked = np.stack(
+                                    padarray.get_annotation(annotation)
+                                ).astype(float)
+                                padarray.del_annotation(annotation)
+                                padarray.set_annotation(annotation, stacked)
+                        return padarray
+                    pad_array = _fix_multidimensional_annotations_in_pad_array(
+                        token, pad_array
+                    )
+                    # Collect virtual atoms for later insertion
+                    virtual_atoms_to_insert.append(pad_array)
+                    insert_positions.append(end)
+        # Insert all virtual atoms at once using insert_atoms
+        if virtual_atoms_to_insert:
+            atom_array_padded = insert_atoms(
+                atom_array, virtual_atoms_to_insert, insert_positions
+            )
+        else:
+            atom_array_padded = atom_array
+        # Initialize gt_atom_name annotation if it doesn't exist
+        if "gt_atom_name" not in atom_array_padded.get_annotation_categories():
+            atom_array_padded.set_annotation(
+                "gt_atom_name", np.empty(len(atom_array_padded), dtype="U4")
+            )
+        # Second pass: process tokens with proper atom name assignment after padding
+        # Get updated token starts after padding
+        starts_padded = get_token_starts(atom_array_padded, add_exclusive_stop=True)
+        for token_id, (start, end) in enumerate(
+            zip(starts_padded[:-1], starts_padded[1:])
+        ):
+            if is_paddable[token_id]:
+                # ... Permutation of atom names during training
+                if not data["is_inference"] and exists(self.association_scheme):
+                    atom_names = permute_symmetric_atom_names_(
+                        ATOM14_ATOM_NAMES,
+                        atom_array_padded.res_name[start],
+                        association_map=self.association_map_,
+                        symmetry_map=self.symmetry_map_,
+                    )
+                else:
+                    atom_names = ATOM14_ATOM_NAMES
+                atom_array_padded.atom_name[start:end] = atom_names
+                atom_array_padded.get_annotation("gt_atom_name")[start:end] = atom_names
+            elif is_non_paddable_residue[token_id]:
+                # When sequence-constrained, we want to directly map the residue name based on the sequence
+                atom_names, res_name = (
+                    atom_array_padded.atom_name[start:end],
+                    atom_array_padded.res_name[start],
+                )
+                atom_array_padded.get_annotation("gt_atom_name")[start:end] = atom_names
+                atom_names = map_to_association_scheme(
+                    atom_names, res_name, scheme=self.association_scheme
+                )
+                atom_array_padded.atom_name[start:end] = atom_names
+            else:
+                # ... Add gt_atom_name annotation to other tokens
+                atom_names = atom_array_padded.atom_name[start:end]
+                atom_array_padded.get_annotation("gt_atom_name")[start:end] = atom_names
+            # ... Update atom array
+            assert {VIRTUAL_ATOM_ELEMENT_NAME} != set(
+                atom_array_padded.element[start:end].tolist()
+            ), (
+                "Padded atoms should be virtual atoms, but found: "
+                f"{set(atom_array_padded.element[start:end].tolist())}"
+            )
+        data["atom_array"] = atom_array_padded
+        return data