PyPI - rc-foundry - Versions diffs - 0.1.1__py3-none-any.whl - Mend

rc-foundry 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (180) hide show

foundry/__init__.py +57 -0
foundry/callbacks/__init__.py +5 -0
foundry/callbacks/callback.py +116 -0
foundry/callbacks/health_logging.py +419 -0
foundry/callbacks/metrics_logging.py +211 -0
foundry/callbacks/timing_logging.py +67 -0
foundry/callbacks/train_logging.py +278 -0
foundry/common.py +108 -0
foundry/constants.py +28 -0
foundry/hydra/resolvers.py +77 -0
foundry/inference_engines/base.py +235 -0
foundry/inference_engines/checkpoint_registry.py +66 -0
foundry/metrics/__init__.py +12 -0
foundry/metrics/losses.py +30 -0
foundry/metrics/metric.py +319 -0
foundry/model/layers/blocks.py +47 -0
foundry/testing/__init__.py +6 -0
foundry/testing/fixtures.py +19 -0
foundry/testing/pytest_hooks.py +15 -0
foundry/trainers/fabric.py +923 -0
foundry/training/EMA.py +67 -0
foundry/training/checkpoint.py +61 -0
foundry/training/schedulers.py +91 -0
foundry/utils/alignment.py +86 -0
foundry/utils/components.py +415 -0
foundry/utils/datasets.py +405 -0
foundry/utils/ddp.py +103 -0
foundry/utils/instantiators.py +72 -0
foundry/utils/logging.py +279 -0
foundry/utils/rigid.py +1460 -0
foundry/utils/rotation_augmentation.py +65 -0
foundry/utils/squashfs.py +172 -0
foundry/utils/torch.py +317 -0
foundry/utils/weights.py +271 -0
foundry/version.py +34 -0
foundry_cli/__init__.py +3 -0
foundry_cli/download_checkpoints.py +281 -0
mpnn/__init__.py +1 -0
mpnn/collate/feature_collator.py +265 -0
mpnn/inference.py +53 -0
mpnn/inference_engines/mpnn.py +549 -0
mpnn/loss/nll_loss.py +122 -0
mpnn/metrics/nll.py +369 -0
mpnn/metrics/sequence_recovery.py +440 -0
mpnn/model/layers/graph_embeddings.py +2372 -0
mpnn/model/layers/message_passing.py +332 -0
mpnn/model/layers/position_wise_feed_forward.py +44 -0
mpnn/model/layers/positional_encoding.py +98 -0
mpnn/model/mpnn.py +2632 -0
mpnn/pipelines/mpnn.py +162 -0
mpnn/samplers/samplers.py +167 -0
mpnn/train.py +341 -0
mpnn/trainers/mpnn.py +193 -0
mpnn/transforms/feature_aggregation/mpnn.py +184 -0
mpnn/transforms/feature_aggregation/polymer_ligand_interface.py +76 -0
mpnn/transforms/feature_aggregation/token_encodings.py +132 -0
mpnn/transforms/feature_aggregation/user_settings.py +347 -0
mpnn/transforms/polymer_ligand_interface.py +164 -0
mpnn/utils/inference.py +2397 -0
mpnn/utils/probability.py +37 -0
mpnn/utils/weights.py +309 -0
rc_foundry-0.1.1.dist-info/METADATA +239 -0
rc_foundry-0.1.1.dist-info/RECORD +180 -0
rc_foundry-0.1.1.dist-info/WHEEL +4 -0
rc_foundry-0.1.1.dist-info/entry_points.txt +5 -0
rc_foundry-0.1.1.dist-info/licenses/LICENSE.md +28 -0
rf3/__init__.py +3 -0
rf3/_version.py +33 -0
rf3/alignment.py +79 -0
rf3/callbacks/dump_validation_structures.py +101 -0
rf3/callbacks/metrics_logging.py +324 -0
rf3/chemical.py +1529 -0
rf3/cli.py +77 -0
rf3/data/cyclic_transform.py +78 -0
rf3/data/extra_xforms.py +36 -0
rf3/data/ground_truth_template.py +463 -0
rf3/data/paired_msa.py +206 -0
rf3/data/pipeline_utils.py +128 -0
rf3/data/pipelines.py +558 -0
rf3/diffusion_samplers/inference_sampler.py +222 -0
rf3/inference.py +65 -0
rf3/inference_engines/__init__.py +5 -0
rf3/inference_engines/rf3.py +735 -0
rf3/kinematics.py +354 -0
rf3/loss/af3_confidence_loss.py +515 -0
rf3/loss/af3_losses.py +655 -0
rf3/loss/loss.py +179 -0
rf3/metrics/chiral.py +179 -0
rf3/metrics/clashing_chains.py +68 -0
rf3/metrics/distogram.py +421 -0
rf3/metrics/lddt.py +523 -0
rf3/metrics/metadata.py +43 -0
rf3/metrics/metric_utils.py +192 -0
rf3/metrics/predicted_error.py +134 -0
rf3/metrics/rasa.py +108 -0
rf3/metrics/selected_distances.py +91 -0
rf3/model/RF3.py +527 -0
rf3/model/RF3_blocks.py +92 -0
rf3/model/RF3_structure.py +303 -0
rf3/model/layers/af3_auxiliary_heads.py +255 -0
rf3/model/layers/af3_diffusion_transformer.py +544 -0
rf3/model/layers/attention.py +313 -0
rf3/model/layers/layer_utils.py +127 -0
rf3/model/layers/mlff.py +118 -0
rf3/model/layers/outer_product.py +59 -0
rf3/model/layers/pairformer_layers.py +783 -0
rf3/model/layers/structure_bias.py +56 -0
rf3/scoring.py +1787 -0
rf3/symmetry/resolve.py +284 -0
rf3/train.py +194 -0
rf3/trainers/rf3.py +570 -0
rf3/util_module.py +47 -0
rf3/utils/frames.py +109 -0
rf3/utils/inference.py +665 -0
rf3/utils/io.py +198 -0
rf3/utils/loss.py +72 -0
rf3/utils/predict_and_score.py +165 -0
rf3/utils/predicted_error.py +673 -0
rf3/utils/recycling.py +42 -0
rf3/validate.py +140 -0
rfd3/.gitignore +7 -0
rfd3/Makefile +76 -0
rfd3/__init__.py +12 -0
rfd3/callbacks.py +66 -0
rfd3/cli.py +41 -0
rfd3/constants.py +212 -0
rfd3/engine.py +543 -0
rfd3/inference/datasets.py +193 -0
rfd3/inference/input_parsing.py +1123 -0
rfd3/inference/legacy_input_parsing.py +717 -0
rfd3/inference/parsing.py +165 -0
rfd3/inference/symmetry/atom_array.py +298 -0
rfd3/inference/symmetry/checks.py +241 -0
rfd3/inference/symmetry/contigs.py +63 -0
rfd3/inference/symmetry/frames.py +355 -0
rfd3/inference/symmetry/symmetry_utils.py +398 -0
rfd3/metrics/design_metrics.py +465 -0
rfd3/metrics/hbonds_hbplus_metrics.py +308 -0
rfd3/metrics/hbonds_metrics.py +389 -0
rfd3/metrics/losses.py +325 -0
rfd3/metrics/metrics_utils.py +118 -0
rfd3/metrics/sidechain_metrics.py +349 -0
rfd3/model/RFD3.py +105 -0
rfd3/model/RFD3_diffusion_module.py +387 -0
rfd3/model/cfg_utils.py +81 -0
rfd3/model/inference_sampler.py +635 -0
rfd3/model/layers/attention.py +577 -0
rfd3/model/layers/block_utils.py +580 -0
rfd3/model/layers/blocks.py +777 -0
rfd3/model/layers/chunked_pairwise.py +377 -0
rfd3/model/layers/encoders.py +417 -0
rfd3/model/layers/layer_utils.py +197 -0
rfd3/model/layers/pairformer_layers.py +128 -0
rfd3/run_inference.py +45 -0
rfd3/testing/debug.py +139 -0
rfd3/testing/debug_utils.py +73 -0
rfd3/testing/testing_utils.py +356 -0
rfd3/train.py +194 -0
rfd3/trainer/dump_validation_structures.py +154 -0
rfd3/trainer/fabric_trainer.py +923 -0
rfd3/trainer/recycling.py +42 -0
rfd3/trainer/rfd3.py +485 -0
rfd3/trainer/trainer_utils.py +502 -0
rfd3/transforms/conditioning_base.py +508 -0
rfd3/transforms/conditioning_utils.py +200 -0
rfd3/transforms/design_transforms.py +807 -0
rfd3/transforms/dna_crop.py +523 -0
rfd3/transforms/hbonds.py +407 -0
rfd3/transforms/hbonds_hbplus.py +246 -0
rfd3/transforms/ncaa_transforms.py +153 -0
rfd3/transforms/pipelines.py +632 -0
rfd3/transforms/ppi_transforms.py +541 -0
rfd3/transforms/rasa.py +116 -0
rfd3/transforms/symmetry.py +76 -0
rfd3/transforms/training_conditions.py +552 -0
rfd3/transforms/util_transforms.py +498 -0
rfd3/transforms/virtual_atoms.py +305 -0
rfd3/utils/inference.py +648 -0
rfd3/utils/io.py +245 -0
rfd3/utils/vizualize.py +276 -0

rf3/utils/recycling.py ADDED Viewed

@@ -0,0 +1,42 @@
+import math
+import torch
+from atomworks.ml.utils.rng import create_rng_state_from_seeds, rng_state
+def get_recycle_schedule(
+    max_cycle: int,
+    n_epochs: int,
+    n_train: int,
+    world_size: int,
+    seed: int = 42,
+) -> torch.Tensor:
+    """Generate a schedule for recycling iterations over multiple epochs.
+    Used to ensure that each GPU has the same number of recycles within a given batch.
+    Args:
+        max_cycle (int): Maximum number of recycling iterations (n_recycle).
+        n_epochs (int): Number of training epochs.
+        n_train (int): The total number of training examples per epoch (across all GPUs).
+        world_size (int): The number of distributed training processes.
+        seed (int, optional): The seed for random number generation. Defaults to 42.
+    Returns:
+        torch.Tensor: A tensor containing the recycling schedule for each epoch,
+            with dimensions `(n_epochs, n_train // world_size)`.
+    References:
+        AF-2 Supplement, Algorithm 31
+    """
+    # We use a context manager to avoid modifying the global RNG state
+    with rng_state(create_rng_state_from_seeds(torch_seed=seed)):
+        # ...generate a recycling schedule for each epoch
+        recycle_schedule = []
+        for i in range(n_epochs):
+            schedule = torch.randint(
+                1, max_cycle + 1, (math.ceil(n_train / world_size),)
+            )
+            recycle_schedule.append(schedule)
+    return torch.stack(recycle_schedule, dim=0)

rf3/validate.py ADDED Viewed

@@ -0,0 +1,140 @@
+#!/usr/bin/env -S /bin/sh -c '"$(dirname "$0")/../../../../.ipd/shebang/rf3_exec.sh" "$0" "$@"'
+import logging
+import os
+import hydra
+import rootutils
+from dotenv import load_dotenv
+from omegaconf import DictConfig
+from foundry.utils.logging import suppress_warnings
+load_dotenv(override=True)
+# Setup root dir and environment variables (more info: https://github.com/ashleve/rootutils)
+# NOTE: Sets the `PROJECT_ROOT` environment variable to the root directory of the project (where `.project-root` is located)
+rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
+_config_path = os.path.join(os.environ["PROJECT_ROOT"], "models/rf3/configs")
+_spawning_process_logger = logging.getLogger(__name__)
+@hydra.main(config_path=_config_path, config_name="validate", version_base="1.3")
+def validate(cfg: DictConfig) -> None:
+    # ==============================================================================
+    # Import dependencies and resolve Hydra configuration
+    # ==============================================================================
+    _spawning_process_logger.info("Importing dependencies...")
+    # Lazy imports to make config generation fast
+    import torch
+    from lightning.fabric import seed_everything
+    from lightning.fabric.loggers import Logger
+    # If training on DIGS L40, set precision of matrix multiplication to balance speed and accuracy
+    # Reference: https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
+    torch.set_float32_matmul_precision("medium")
+    from foundry.callbacks.callback import BaseCallback  # noqa
+    from foundry.utils.instantiators import instantiate_loggers, instantiate_callbacks  # noqa
+    from foundry.utils.logging import print_config_tree  # noqa
+    from foundry.utils.ddp import RankedLogger, set_accelerator_based_on_availability  # noqa
+    from foundry.utils.ddp import is_rank_zero  # noqa
+    from foundry.utils.datasets import assemble_val_loader_dict  # noqa
+    set_accelerator_based_on_availability(cfg)
+    ranked_logger = RankedLogger(__name__, rank_zero_only=True)
+    _spawning_process_logger.info("Completed dependency imports ...")
+    # ... print the configuration tree (NOTE: Only prints for rank 0)
+    print_config_tree(cfg, resolve=True)
+    # ==============================================================================
+    # Logging and Callback instantiation
+    # ==============================================================================
+    # Reduce the logging level for all dataset and sampler loggers (unless rank 0)
+    # We will still see messages from Rank 0; they are identical, since all ranks load and sample from the same datasets
+    if not is_rank_zero():
+        dataset_logger = logging.getLogger("datasets")
+        sampler_logger = logging.getLogger("atomworks.ml.samplers")
+        dataset_logger.setLevel(logging.WARNING)
+        sampler_logger.setLevel(logging.ERROR)
+    # ... seed everything (NOTE: By setting `workers=True`, we ensure that the dataloaders are seeded as well)
+    # (`PL_GLOBAL_SEED` environment varaible will be passed to the spawned subprocessed; e.g., through `ddp_spawn` backend)
+    if cfg.get("seed"):
+        ranked_logger.info(f"Seeding everything with seed={cfg.seed}...")
+        seed_everything(cfg.seed, workers=True, verbose=True)
+    else:
+        ranked_logger.warning("No seed provided - Not seeding anything!")
+    ranked_logger.info("Instantiating loggers...")
+    loggers: list[Logger] = instantiate_loggers(cfg.get("logger"))
+    ranked_logger.info("Instantiating callbacks...")
+    callbacks: list[BaseCallback] = instantiate_callbacks(cfg.get("callbacks"))
+    # ==============================================================================
+    # Trainer and model instantiation
+    # ==============================================================================
+    # ... instantiate the trainer
+    trainer = hydra.utils.instantiate(
+        cfg.trainer,
+        loggers=loggers or None,
+        callbacks=callbacks or None,
+        _convert_="partial",
+        _recursive_=False,
+    )
+    # (Store the Hydra configuration in the trainer state)
+    trainer.initialize_or_update_trainer_state({"train_cfg": cfg})
+    # ... spawn processes for distributed training
+    # (We spawn here, rather than within `fit`, so we can use Fabric's `init_module` to efficiently initialize the model on the appropriate device)
+    ranked_logger.info(
+        f"Spawning {trainer.fabric.world_size} processes from {trainer.fabric.global_rank}..."
+    )
+    trainer.fabric.launch()
+    # ... construct the model
+    trainer.construct_model()
+    # ==============================================================================
+    # Dataset instantiation
+    # ==============================================================================
+    # Compose the validation loader(s)
+    val_loaders = assemble_val_loader_dict(
+        cfg=cfg.datasets.val,
+        rank=trainer.fabric.global_rank,
+        world_size=trainer.fabric.world_size,
+        loader_cfg=cfg.dataloader["val"],
+    )
+    # ... load the checkpoint configuration, regardless of whether it's a path or a config
+    if "ckpt_path" in cfg and cfg.ckpt_path:
+        ckpt_path = cfg.ckpt_path
+    elif "ckpt_config" in cfg and cfg.ckpt_config:
+        assert (
+            "path" in cfg.ckpt_config
+        ), "No checkpoint path provided in `ckpt_config`!"
+        ckpt_path = cfg.ckpt_config.path
+    # ... validate the model
+    ranked_logger.info("Validating model...")
+    with suppress_warnings():
+        trainer.validate(
+            val_loaders=val_loaders,
+            ckpt_path=ckpt_path,
+        )
+    ranked_logger.info("Validation complete!")
+if __name__ == "__main__":
+    validate()

rfd3/.gitignore ADDED Viewed

@@ -0,0 +1,7 @@
+tests/outs
+tests/test_data/mcsa_41/
+configs/datasets/val/data
+configs/model/old
+transforms/old
+benchmarks
+old

rfd3/Makefile ADDED Viewed

@@ -0,0 +1,76 @@
+.PHONY: clean format
+#################################################################################
+# COMMANDS                                                                      #
+#################################################################################
+## Delete all compiled Python files
+clean:
+	find . -type f -name "*.py[co]" -delete
+	find . -type d -name "__pycache__" -delete
+## Format src directory using ruff
+format:
+	ruff format .
+	ruff check --fix .
+#################################################################################
+# Self Documenting Commands                                                     #
+#################################################################################
+.DEFAULT_GOAL := help
+# Inspired by <http://marmelab.com/blog/2016/02/29/auto-documented-makefile.html>
+# sed script explained:
+# /^##/:
+# 	* save line in hold space
+# 	* purge line
+# 	* Loop:
+# 		* append newline + line to hold space
+# 		* go to next line
+# 		* if line starts with doc comment, strip comment character off and loop
+# 	* remove target prerequisites
+# 	* append hold space (+ newline) to line
+# 	* replace newline plus comments by `---`
+# 	* print line
+# Separate expressions are necessary because labels cannot be delimited by
+# semicolon; see <http://stackoverflow.com/a/11799865/1968>
+.PHONY: help
+help:
+	@echo "$$(tput bold)Available rules:$$(tput sgr0)"
+	@echo
+	@sed -n -e "/^## / { \
+		h; \
+		s/.*//; \
+		:doc" \
+		-e "H; \
+		n; \
+		s/^## //; \
+		t doc" \
+		-e "s/:.*//; \
+		G; \
+		s/\\n## /---/; \
+		s/\\n/ /g; \
+		p; \
+	}" ${MAKEFILE_LIST} \
+	| LC_ALL='C' sort --ignore-case \
+	| awk -F '---' \
+		-v ncol=$$(tput cols) \
+		-v indent=19 \
+		-v col_on="$$(tput setaf 6)" \
+		-v col_off="$$(tput sgr0)" \
+	'{ \
+		printf "%s%*s%s ", col_on, -indent, $$1, col_off; \
+		n = split($$2, words, " "); \
+		line_length = ncol - indent; \
+		for (i = 1; i <= n; i++) { \
+			line_length -= length(words[i]) + 1; \
+			if (line_length <= 0) { \
+				line_length = ncol - indent - length(words[i]) - 1; \
+				printf "\n%*s ", -indent, " "; \
+			} \
+			printf "%s ", words[i]; \
+		} \
+		printf "\n"; \
+	}' \
+	| more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars')

rfd3/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+"""RFD3 - RosettaFold-diffusion model implementation."""
+import pydantic
+from packaging.version import Version
+if Version(pydantic.__version__) < Version("2.0"):
+    raise RuntimeError(
+        f"Pydantic >=2.0 is required; found {pydantic.__version__}. "
+        "Pin pydantic>=2,<3 and upgrade dependent packages."
+    )
+__version__ = "0.1.0"

rfd3/callbacks.py ADDED Viewed

@@ -0,0 +1,66 @@
+import pandas as pd
+from beartype.typing import Any
+from foundry.callbacks.callback import BaseCallback
+from foundry.utils.ddp import RankedLogger
+from foundry.utils.logging import print_df_as_table
+ranked_logger = RankedLogger(__name__, rank_zero_only=True)
+class LogDesignValidationMetricsCallback(BaseCallback):
+    def on_validation_epoch_end(self, trainer: Any):
+        # Only log metrics to disk if this is the global zero rank
+        if not trainer.fabric.is_global_zero:
+            return
+        assert hasattr(
+            trainer, "validation_results_path"
+        ), "Results path not found! Ensure that StoreValidationMetricsInDFCallback is called first."
+        df = pd.read_csv(trainer.validation_results_path)
+        # ... filter to most recent epoch, drop epoch column
+        df = df[df["epoch"] == df["epoch"].max()]
+        df.drop(columns=["epoch"], inplace=True)
+        for dataset in df["dataset"].unique():
+            dataset_df = df[df["dataset"] == dataset].copy()
+            dataset_df.drop(columns=["dataset"], inplace=True)
+            print(f"\n+{' ' + dataset + ' ':-^150}+\n")
+            remaining_cols = [
+                col for col in dataset_df.columns if col not in ["example_id"]
+            ]
+            remaining_df = dataset_df[remaining_cols].copy()
+            remaining_df = remaining_df.dropna(how="all")
+            numeric_cols = remaining_df.select_dtypes(include="number").columns
+            # Compute means and non-NaN counts for numeric columns
+            final_means = remaining_df[numeric_cols].mean()
+            non_nan_counts = remaining_df[numeric_cols].count()
+            # Convert the Series to a DataFrame and add the count as a new column
+            final_means_df = final_means.to_frame(name="mean")
+            final_means_df["Count"] = non_nan_counts
+            print_df_as_table(
+                final_means_df.reset_index(),
+                f"{dataset} — {trainer.state['current_epoch']} — Design Validation Metrics",
+            )
+            if trainer.fabric:
+                trainer.fabric.log_dict(
+                    {f"val/{dataset}/{col}": final_means[col] for col in numeric_cols},
+                    step=trainer.state["current_epoch"],
+                )
+                if len(dataset_df["example_id"].unique()) <= 25:
+                    for eid, df_ in dataset_df.groupby("example_id"):
+                        df_ = df_[numeric_cols].mean()
+                        trainer.fabric.log_dict(
+                            {
+                                f"val/{dataset}/{col}/{eid}": df_[col]
+                                for col in numeric_cols
+                            },
+                            step=trainer.state["current_epoch"],
+                        )

rfd3/cli.py ADDED Viewed

@@ -0,0 +1,41 @@
+from pathlib import Path
+import typer
+from hydra import compose, initialize_config_dir
+app = typer.Typer()
+@app.command(
+    context_settings={"allow_extra_args": True, "ignore_unknown_options": True}
+)
+def design(ctx: typer.Context):
+    """Run design using hydra config overrides and input files."""
+    # Find the RFD3 configs directory relative to this file
+    # This file is at: models/rfd3/src/rfd3/cli.py
+    # Configs are at: models/rfd3/configs/
+    rfd3_package_dir = Path(__file__).parent.parent.parent  # Go up to models/rfd3/
+    config_path = str(rfd3_package_dir / "configs")
+    # Get all arguments
+    args = ctx.params.get("args", []) + ctx.args
+    args = [a for a in args if a not in ["design", "fold"]]
+    # Ensure we have at least a default inference_engine if not specified
+    has_inference_engine = any(arg.startswith("inference_engine=") for arg in args)
+    if not has_inference_engine:
+        args.append("inference_engine=rfdiffusion3")
+    with initialize_config_dir(config_dir=config_path, version_base="1.3"):
+        cfg = compose(config_name="inference", overrides=args)
+        # Lazy import to avoid loading heavy dependencies at CLI startup
+        from foundry.utils.logging import suppress_warnings
+        from rfd3.run_inference import run_inference
+        with suppress_warnings(is_inference=True):
+            run_inference(cfg)
+if __name__ == "__main__":
+    app()

rfd3/constants.py ADDED Viewed

@@ -0,0 +1,212 @@
+import numpy as np
+from foundry.constants import TIP_BY_RESTYPE
+TIP_BY_RESTYPE
+# Annot: default (diffused default)
+REQUIRED_CONDITIONING_ANNOTATION_VALUES = {
+    "is_motif_atom_with_fixed_seq": True,
+    "is_motif_atom_with_fixed_coord": True,
+    "is_motif_atom_unindexed": False,
+    "is_motif_atom_unindexed_motif_breakpoint": False,
+}
+REQUIRED_CONDITIONING_ANNOTATIONS = list(REQUIRED_CONDITIONING_ANNOTATION_VALUES.keys())
+REQUIRED_INFERENCE_ANNOTATIONS = REQUIRED_CONDITIONING_ANNOTATIONS + ["src_component"]
+"""Annotations assigned to every valid atom array"""
+OPTIONAL_CONDITIONING_VALUES = {
+    "is_atom_level_hotspot": 0,
+    "is_helix_conditioning": 0,
+    "is_sheet_conditioning": 0,
+    "is_loop_conditioning": 0,
+    "active_donor": 0,
+    "active_acceptor": 0,
+    "rasa_bin": 3,
+    "ref_plddt": 0,
+    "is_non_loopy": 0,
+    "partial_t": np.nan,
+    # kept for legacy reasons
+    "is_motif_token": 1,
+    "is_motif_atom": 1,
+}
+"""Optional conditioning annotations and their default values if not provided."""
+CONDITIONING_VALUES = (
+    REQUIRED_CONDITIONING_ANNOTATION_VALUES | OPTIONAL_CONDITIONING_VALUES
+)
+"""Annotations that must be present in the AtomArray at inference time."""
+INFERENCE_ANNOTATIONS = REQUIRED_INFERENCE_ANNOTATIONS + list(
+    OPTIONAL_CONDITIONING_VALUES.keys()
+)
+"""All annotations that might be desired at inference time. Determines what AtomArray annotations will be preserved."""
+SAVED_CONDITIONING_ANNOTATIONS = [
+    # "is_motif_atom_with_fixed_coord",
+    "is_motif_atom_with_fixed_seq",
+]
+"""Annotations for conditioning to save in output files"""
+# fmt: off
+ccd_ordering_atomchar = {
+    'TRP': (" N  "," CA "," C  "," O  "," CB "," CG "," CD1"," CD2"," NE1"," CE2"," CE3"," CZ2"," CZ3"," CH2"),  # trp
+    'HIS': (" N  "," CA "," C  "," O  "," CB "," CG "," ND1"," CD2"," CE1"," NE2",  None,  None,  None,  None),  # his
+    'TYR': (" N  "," CA "," C  "," O  "," CB "," CG "," CD1"," CD2"," CE1"," CE2"," CZ "," OH ",  None,  None),  # tyr
+    'PHE': (" N  "," CA "," C  "," O  "," CB "," CG "," CD1"," CD2"," CE1"," CE2"," CZ ",  None,  None,  None),  # phe
+    'ASN': (" N  "," CA "," C  "," O  "," CB "," CG "," OD1"," ND2",  None,  None,  None,  None,  None,  None),  # asn
+    'ASP': (" N  "," CA "," C  "," O  "," CB "," CG "," OD1"," OD2",  None,  None,  None,  None,  None,  None),  # asp
+    'GLN': (" N  "," CA "," C  "," O  "," CB "," CG "," CD "," OE1"," NE2",  None,  None,  None,  None,  None),  # gln
+    'GLU': (" N  "," CA "," C  "," O  "," CB "," CG "," CD "," OE1"," OE2",  None,  None,  None,  None,  None),  # glu
+    'CYS': (" N  "," CA "," C  "," O  "," CB "," SG ",  None,  None,  None,  None,  None,  None,  None,  None),  # cys
+    'SER': (" N  "," CA "," C  "," O  "," CB "," OG ",  None,  None,  None,  None,  None,  None,  None,  None),  # ser
+    'THR': (" N  "," CA "," C  "," O  "," CB "," OG1"," CG2",  None,  None,  None,  None,  None,  None,  None),  # thr
+    'LEU': (" N  "," CA "," C  "," O  "," CB "," CG "," CD1"," CD2",  None,  None,  None,  None,  None,  None),  # leu
+    'VAL': (" N  "," CA "," C  "," O  "," CB "," CG1"," CG2",  None,  None,  None,  None,  None,  None,  None),  # val
+    'ILE': (" N  "," CA "," C  "," O  "," CB "," CG1"," CG2"," CD1",  None,  None,  None,  None,  None,  None),  # ile
+    'MET': (" N  "," CA "," C  "," O  "," CB "," CG "," SD "," CE ",  None,  None,  None,  None,  None,  None),  # met
+    'LYS': (" N  "," CA "," C  "," O  "," CB "," CG "," CD "," CE "," NZ ",  None,  None,  None,  None,  None),  # lys
+    'ARG': (" N  "," CA "," C  "," O  "," CB "," CG "," CD "," NE "," CZ "," NH1"," NH2",  None,  None,  None),  # arg
+    'PRO': (" N  "," CA "," C  "," O  "," CB "," CG "," CD ",  None,  None,  None,  None,  None,  None,  None),  # pro
+    'ALA': (" N  "," CA "," C  "," O  "," CB ",  None,  None,  None,  None,  None,  None,  None,  None,  None),  # ala
+    'GLY': (" N  "," CA "," C  "," O  ",  None,  None,  None,  None,  None,  None,  None,  None,  None,  None),  # gly
+    'UNK': (" N  "," CA "," C  "," O  "," CB ",  None,  None,  None,  None,  None,  None,  None,  None,  None),  # unk
+    'MSK': (" N  "," CA "," C  "," O  "," CB ",  None,  None,  None,  None,  None,  None,  None,  None,  None),  # mask
+}
+"""Canonical ordering of amino acid atom names in the CCD."""
+symmetric_atomchar = {
+    "TYR": [[" CE1", " CE2"], [" CD1", " CD2"]],
+    "PHE": [[" CE1", " CE2"], [" CD1", " CD2"]],
+    "ASP": [[" OD1", " OD2"]],
+    "GLU": [[" OE1", " OE2"]],
+    "LEU": [[" CD1", " CD2"]],
+    "VAL": [[" CG1", " CG2"]],
+}
+"""Maps residues to their pairs of aton names corresponding to symmetric atoms."""
+association_schemes = {
+    'atom14': {
+        #      |         Backbone atoms           |sp2-L1|sp2-R1|sp2-L2|sp2-R2|sp2-CZ|O-/S-|beta-OH|sp3-CG|sp2-CG|
+        #         0       1      2      3      4     V0     V1     V2     V3      V4    V5     V6     V7     V8
+        # Aromatics
+        'TRP': (" N  "," CA "," C  "," O  "," CB "," CD1"," CD2"," NE1"," CE2"," CE3"," CZ2"," CZ3"," CH2"," CG "), # trp
+        'HIS': (" N  "," CA "," C  "," O  "," CB "," ND1"," CD2"," CE1"," NE2",  None,  None,  None,  None," CG "), # his
+        'TYR': (" N  "," CA "," C  "," O  "," CB "," CD1"," CD2"," CE1"," CE2"," CZ "," OH ",  None,  None," CG "), # tyr*
+        'PHE': (" N  "," CA "," C  "," O  "," CB "," CD1"," CD2"," CE1"," CE2"," CZ ",  None,  None,  None," CG "), # phe*
+        # Carboxylates & amines
+        'ASN': (" N  "," CA "," C  "," O  "," CB ",  None,  None,  None,  None," ND2"," OD1",  None,  None," CG "), # asn
+        'ASP': (" N  "," CA "," C  "," O  "," CB ",  None,  None," OD1"," OD2",  None,  None,  None,  None," CG "), # asp*
+        'GLN': (" N  "," CA "," C  "," O  "," CB ",  None,  None,  None,  None," NE2"," OE1",  None," CD "," CG "), # gln
+        'GLU': (" N  "," CA "," C  "," O  "," CB ",  None,  None," OE2"," OE1",  None,  None,  None," CD "," CG "), # glu*
+        # CB-OH and CB-SG
+        'CYS': (" N  "," CA "," C  "," O  "," CB ",  None,  None,  None,  None,  None," SG ",  None,  None,  None), # cys
+        'SER': (" N  "," CA "," C  "," O  "," CB ",  None,  None,  None,  None,  None,  None," OG ",  None,  None), # ser
+        'THR': (" N  "," CA "," C  "," O  "," CB "," CG2",  None,  None,  None,  None,  None," OG1",  None,  None), # thr
+        # Ile/Leu/Val have a common C backbone but different placements of branching C
+        'LEU': (" N  "," CA "," C  "," O  "," CB "," CG "," CD1"," CD2",  None,  None,  None,  None,  None,  None), # leu*
+        'VAL': (" N  "," CA "," C  "," O  "," CB "," CG1",  None,  None," CG2",  None,  None,  None,  None,  None), # val*
+        'ILE': (" N  "," CA "," C  "," O  "," CB "," CG1"," CD1",  None," CG2",  None,  None,  None,  None,  None), # ile
+        # MET / LYS have a common C backbone but heteroatoms inbetween
+        'MET': (" N  "," CA "," C  "," O  "," CB "," CG ",  None," CE ",  None,  None," SD ",  None,  None,  None), # met
+        'LYS': (" N  "," CA "," C  "," O  "," CB "," CG "," CD "," CE ",  None," NZ ",  None,  None,  None,  None), # lys
+        # Weird ones
+        'ARG': (" N  "," CA "," C  "," O  "," CB "," CG "," CD "," NE "," NH1"," CZ "," NH2",  None,  None,  None), # arg*
+        'PRO': (" N  "," CA "," C  "," O  "," CB "," CG ",  None,  None,  None,  None,  None,  None," CD ",  None), # pro
+        # Other
+        'UNK': (" N  "," CA "," C  "," O  "," CB ",  None,  None,  None,  None,  None,  None,  None,  None,  None), # unk
+        'ALA': (" N  "," CA "," C  "," O  "," CB ",  None,  None,  None,  None,  None,  None,  None,  None,  None), # ala
+        'MSK': (" N  "," CA "," C  "," O  "," CB ",  None,  None,  None,  None,  None,  None,  None,  None,  None), # mask
+        'GLY': (" N  "," CA "," C  "," O  ",  None,  None,  None,  None,  None,  None,  None,  None,  None,  None), # gly
+    },
+    "permute_ambiguous_only": {
+        # "CYS": [6, 5,],  # SER  |  Permute *CB and SG (*CB and OG)   # CB = next virtual atom since otherwise things get messy
+        # "ASP": [8, 7],  #  [6, 5],  # ASN  |  Permute CG and OD2 (CG and OD1)
+        # "GLU": [9, 8],  # [7, 6],  # GLN  |  Permute CD and OE2 (CD and OE1)
+        # Ambiguous, modified
+        'CYS': (" N  "," CA "," C  "," O  "," CB ",  None, " SG ", None,  None,  None,  None,  None,  None,  None),  # cys
+        'ASP': (" N  "," CA "," C  "," O  "," CB "," CG "," OD1", None, " OD2",  None,  None,  None,  None,  None),  # asp
+        'GLU': (" N  "," CA "," C  "," O  "," CB "," CG "," CD "," OE1", None, " OE2",  None,  None,  None,  None),  # glu
+        # Ambiguous, unmodified
+        'SER': (" N  "," CA "," C  "," O  "," CB "," OG ",  None,  None,  None,  None,  None,  None,  None,  None),  # ser
+        'ASN': (" N  "," CA "," C  "," O  "," CB "," CG "," OD1"," ND2",  None,  None,  None,  None,  None,  None),  # asn
+        'GLN': (" N  "," CA "," C  "," O  "," CB "," CG "," CD "," OE1"," NE2",  None,  None,  None,  None,  None),  # gln
+        # Unambiguous
+        'TRP': (" N  "," CA "," C  "," O  "," CB "," CG "," CD1"," CD2"," NE1"," CE2"," CE3"," CZ2"," CZ3"," CH2"),  # trp
+        'HIS': (" N  "," CA "," C  "," O  "," CB "," CG "," ND1"," CD2"," CE1"," NE2",  None,  None,  None,  None),  # his
+        'TYR': (" N  "," CA "," C  "," O  "," CB "," CG "," CD1"," CD2"," CE1"," CE2"," CZ "," OH ",  None,  None),  # tyr
+        'PHE': (" N  "," CA "," C  "," O  "," CB "," CG "," CD1"," CD2"," CE1"," CE2"," CZ ",  None,  None,  None),  # phe
+        'THR': (" N  "," CA "," C  "," O  "," CB "," OG1"," CG2",  None,  None,  None,  None,  None,  None,  None),  # thr
+        'LEU': (" N  "," CA "," C  "," O  "," CB "," CG "," CD1"," CD2",  None,  None,  None,  None,  None,  None),  # leu
+        'VAL': (" N  "," CA "," C  "," O  "," CB "," CG1"," CG2",  None,  None,  None,  None,  None,  None,  None),  # val
+        'ILE': (" N  "," CA "," C  "," O  "," CB "," CG1"," CG2"," CD1",  None,  None,  None,  None,  None,  None),  # ile
+        'MET': (" N  "," CA "," C  "," O  "," CB "," CG "," SD "," CE ",  None,  None,  None,  None,  None,  None),  # met
+        'LYS': (" N  "," CA "," C  "," O  "," CB "," CG "," CD "," CE "," NZ ",  None,  None,  None,  None,  None),  # lys
+        'ARG': (" N  "," CA "," C  "," O  "," CB "," CG "," CD "," NE "," CZ "," NH1"," NH2",  None,  None,  None),  # arg
+        'PRO': (" N  "," CA "," C  "," O  "," CB "," CG "," CD ",  None,  None,  None,  None,  None,  None,  None),  # pro
+        'ALA': (" N  "," CA "," C  "," O  "," CB ",  None,  None,  None,  None,  None,  None,  None,  None,  None),  # ala
+        'GLY': (" N  "," CA "," C  "," O  ",  None,  None,  None,  None,  None,  None,  None,  None,  None,  None),  # gly
+        'UNK': (" N  "," CA "," C  "," O  "," CB ",  None,  None,  None,  None,  None,  None,  None,  None,  None),  # unk
+        'MSK': (" N  "," CA "," C  "," O  "," CB ",  None,  None,  None,  None,  None,  None,  None,  None,  None),  # mask
+    },
+    'ccd': ccd_ordering_atomchar,
+}
+association_schemes['atom14-new'] = association_schemes['atom14'].copy()
+association_schemes['atom14-new'] |= {
+        # Optional: Break TYR oxygen from GLN / ASN groups - not implemented for rfd3 since it might be useful for people to use
+        # 'TYR': (" N  "," CA "," C  "," O  "," CB "," CD1"," CD2"," CE1"," CE2"," CZ ",  None,  None," OH "," CG "), # tyr*
+        # Fixed carboxylate / amide groups:
+        'GLN': (" N  "," CA "," C  "," O  "," CB ",  None,  None,  None,  None," NE2"," OE1",  None," CG "," CD "), # gln
+        'GLU': (" N  "," CA "," C  "," O  "," CB ",  None,  None," OE2"," OE1",  None,  None,  None," CG "," CD "), # glu*
+        # Break connection with carboxylates
+        'HIS': (" N  "," CA "," C  "," O  "," CB "," ND1"," CD2"," CE1",  None,  None,  None," NE2",  None," CG "), # his
+}
+association_schemes['dense'] = association_schemes['permute_ambiguous_only'].copy()
+# fmt: on
+VIRTUAL_ATOM_ELEMENT_NAME = "VX"
+"""The element name annotation that will be assigned to virtual atoms"""
+ATOM14_ATOM_NAMES = np.array(
+    ["N", "CA", "C", "O", "CB"] + [f"V{i}" for i in range(14 - 5)]
+)
+"""Atom14 atom names (e.g. CA, V1)"""
+ATOM14_ATOM_ELEMENTS = np.array(
+    ["N", "C", "C", "O", "C"] + [VIRTUAL_ATOM_ELEMENT_NAME for i in range(14 - 5)]
+)
+"""Atom14 element names (e.g. C, VX)"""
+ATOM14_ATOM_NAME_TO_ELEMENT = {
+    name: elem for name, elem in zip(ATOM14_ATOM_NAMES, ATOM14_ATOM_ELEMENTS)
+}
+"""Mapping from atom14 atom names (e.g. CA, V1) to their corresponding element names (e.g. C, VX)"""
+strip_list = lambda x: [(x.strip() if x is not None else None) for x in x]  # noqa
+association_schemes_stripped = {
+    name: {k: strip_list(v) for k, v in scheme.items()}
+    for name, scheme in association_schemes.items()
+}
+SELECTION_PROTEIN = ["POLYPEPTIDE(D)", "POLYPEPTIDE(L)"]
+SELECTION_NONPROTEIN = [
+    "POLYDEOXYRIBONUCLEOTIDE",
+    "POLYRIBONUCLEOTIDE",
+    "PEPTIDE NUCLEIC ACID",
+    "OTHER",
+    "NON-POLYMER",
+    "CYCLIC-PSEUDO-PEPTIDE",
+    "MACROLIDE",
+    "POLYDEOXYRIBONUCLEOTIDE/POLYRIBONUCLEOTIDE HYBRID",
+]