PyPI - rc-foundry - Versions diffs - 0.1.1__py3-none-any.whl - Mend

rc-foundry 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (180) hide show

foundry/__init__.py +57 -0
foundry/callbacks/__init__.py +5 -0
foundry/callbacks/callback.py +116 -0
foundry/callbacks/health_logging.py +419 -0
foundry/callbacks/metrics_logging.py +211 -0
foundry/callbacks/timing_logging.py +67 -0
foundry/callbacks/train_logging.py +278 -0
foundry/common.py +108 -0
foundry/constants.py +28 -0
foundry/hydra/resolvers.py +77 -0
foundry/inference_engines/base.py +235 -0
foundry/inference_engines/checkpoint_registry.py +66 -0
foundry/metrics/__init__.py +12 -0
foundry/metrics/losses.py +30 -0
foundry/metrics/metric.py +319 -0
foundry/model/layers/blocks.py +47 -0
foundry/testing/__init__.py +6 -0
foundry/testing/fixtures.py +19 -0
foundry/testing/pytest_hooks.py +15 -0
foundry/trainers/fabric.py +923 -0
foundry/training/EMA.py +67 -0
foundry/training/checkpoint.py +61 -0
foundry/training/schedulers.py +91 -0
foundry/utils/alignment.py +86 -0
foundry/utils/components.py +415 -0
foundry/utils/datasets.py +405 -0
foundry/utils/ddp.py +103 -0
foundry/utils/instantiators.py +72 -0
foundry/utils/logging.py +279 -0
foundry/utils/rigid.py +1460 -0
foundry/utils/rotation_augmentation.py +65 -0
foundry/utils/squashfs.py +172 -0
foundry/utils/torch.py +317 -0
foundry/utils/weights.py +271 -0
foundry/version.py +34 -0
foundry_cli/__init__.py +3 -0
foundry_cli/download_checkpoints.py +281 -0
mpnn/__init__.py +1 -0
mpnn/collate/feature_collator.py +265 -0
mpnn/inference.py +53 -0
mpnn/inference_engines/mpnn.py +549 -0
mpnn/loss/nll_loss.py +122 -0
mpnn/metrics/nll.py +369 -0
mpnn/metrics/sequence_recovery.py +440 -0
mpnn/model/layers/graph_embeddings.py +2372 -0
mpnn/model/layers/message_passing.py +332 -0
mpnn/model/layers/position_wise_feed_forward.py +44 -0
mpnn/model/layers/positional_encoding.py +98 -0
mpnn/model/mpnn.py +2632 -0
mpnn/pipelines/mpnn.py +162 -0
mpnn/samplers/samplers.py +167 -0
mpnn/train.py +341 -0
mpnn/trainers/mpnn.py +193 -0
mpnn/transforms/feature_aggregation/mpnn.py +184 -0
mpnn/transforms/feature_aggregation/polymer_ligand_interface.py +76 -0
mpnn/transforms/feature_aggregation/token_encodings.py +132 -0
mpnn/transforms/feature_aggregation/user_settings.py +347 -0
mpnn/transforms/polymer_ligand_interface.py +164 -0
mpnn/utils/inference.py +2397 -0
mpnn/utils/probability.py +37 -0
mpnn/utils/weights.py +309 -0
rc_foundry-0.1.1.dist-info/METADATA +239 -0
rc_foundry-0.1.1.dist-info/RECORD +180 -0
rc_foundry-0.1.1.dist-info/WHEEL +4 -0
rc_foundry-0.1.1.dist-info/entry_points.txt +5 -0
rc_foundry-0.1.1.dist-info/licenses/LICENSE.md +28 -0
rf3/__init__.py +3 -0
rf3/_version.py +33 -0
rf3/alignment.py +79 -0
rf3/callbacks/dump_validation_structures.py +101 -0
rf3/callbacks/metrics_logging.py +324 -0
rf3/chemical.py +1529 -0
rf3/cli.py +77 -0
rf3/data/cyclic_transform.py +78 -0
rf3/data/extra_xforms.py +36 -0
rf3/data/ground_truth_template.py +463 -0
rf3/data/paired_msa.py +206 -0
rf3/data/pipeline_utils.py +128 -0
rf3/data/pipelines.py +558 -0
rf3/diffusion_samplers/inference_sampler.py +222 -0
rf3/inference.py +65 -0
rf3/inference_engines/__init__.py +5 -0
rf3/inference_engines/rf3.py +735 -0
rf3/kinematics.py +354 -0
rf3/loss/af3_confidence_loss.py +515 -0
rf3/loss/af3_losses.py +655 -0
rf3/loss/loss.py +179 -0
rf3/metrics/chiral.py +179 -0
rf3/metrics/clashing_chains.py +68 -0
rf3/metrics/distogram.py +421 -0
rf3/metrics/lddt.py +523 -0
rf3/metrics/metadata.py +43 -0
rf3/metrics/metric_utils.py +192 -0
rf3/metrics/predicted_error.py +134 -0
rf3/metrics/rasa.py +108 -0
rf3/metrics/selected_distances.py +91 -0
rf3/model/RF3.py +527 -0
rf3/model/RF3_blocks.py +92 -0
rf3/model/RF3_structure.py +303 -0
rf3/model/layers/af3_auxiliary_heads.py +255 -0
rf3/model/layers/af3_diffusion_transformer.py +544 -0
rf3/model/layers/attention.py +313 -0
rf3/model/layers/layer_utils.py +127 -0
rf3/model/layers/mlff.py +118 -0
rf3/model/layers/outer_product.py +59 -0
rf3/model/layers/pairformer_layers.py +783 -0
rf3/model/layers/structure_bias.py +56 -0
rf3/scoring.py +1787 -0
rf3/symmetry/resolve.py +284 -0
rf3/train.py +194 -0
rf3/trainers/rf3.py +570 -0
rf3/util_module.py +47 -0
rf3/utils/frames.py +109 -0
rf3/utils/inference.py +665 -0
rf3/utils/io.py +198 -0
rf3/utils/loss.py +72 -0
rf3/utils/predict_and_score.py +165 -0
rf3/utils/predicted_error.py +673 -0
rf3/utils/recycling.py +42 -0
rf3/validate.py +140 -0
rfd3/.gitignore +7 -0
rfd3/Makefile +76 -0
rfd3/__init__.py +12 -0
rfd3/callbacks.py +66 -0
rfd3/cli.py +41 -0
rfd3/constants.py +212 -0
rfd3/engine.py +543 -0
rfd3/inference/datasets.py +193 -0
rfd3/inference/input_parsing.py +1123 -0
rfd3/inference/legacy_input_parsing.py +717 -0
rfd3/inference/parsing.py +165 -0
rfd3/inference/symmetry/atom_array.py +298 -0
rfd3/inference/symmetry/checks.py +241 -0
rfd3/inference/symmetry/contigs.py +63 -0
rfd3/inference/symmetry/frames.py +355 -0
rfd3/inference/symmetry/symmetry_utils.py +398 -0
rfd3/metrics/design_metrics.py +465 -0
rfd3/metrics/hbonds_hbplus_metrics.py +308 -0
rfd3/metrics/hbonds_metrics.py +389 -0
rfd3/metrics/losses.py +325 -0
rfd3/metrics/metrics_utils.py +118 -0
rfd3/metrics/sidechain_metrics.py +349 -0
rfd3/model/RFD3.py +105 -0
rfd3/model/RFD3_diffusion_module.py +387 -0
rfd3/model/cfg_utils.py +81 -0
rfd3/model/inference_sampler.py +635 -0
rfd3/model/layers/attention.py +577 -0
rfd3/model/layers/block_utils.py +580 -0
rfd3/model/layers/blocks.py +777 -0
rfd3/model/layers/chunked_pairwise.py +377 -0
rfd3/model/layers/encoders.py +417 -0
rfd3/model/layers/layer_utils.py +197 -0
rfd3/model/layers/pairformer_layers.py +128 -0
rfd3/run_inference.py +45 -0
rfd3/testing/debug.py +139 -0
rfd3/testing/debug_utils.py +73 -0
rfd3/testing/testing_utils.py +356 -0
rfd3/train.py +194 -0
rfd3/trainer/dump_validation_structures.py +154 -0
rfd3/trainer/fabric_trainer.py +923 -0
rfd3/trainer/recycling.py +42 -0
rfd3/trainer/rfd3.py +485 -0
rfd3/trainer/trainer_utils.py +502 -0
rfd3/transforms/conditioning_base.py +508 -0
rfd3/transforms/conditioning_utils.py +200 -0
rfd3/transforms/design_transforms.py +807 -0
rfd3/transforms/dna_crop.py +523 -0
rfd3/transforms/hbonds.py +407 -0
rfd3/transforms/hbonds_hbplus.py +246 -0
rfd3/transforms/ncaa_transforms.py +153 -0
rfd3/transforms/pipelines.py +632 -0
rfd3/transforms/ppi_transforms.py +541 -0
rfd3/transforms/rasa.py +116 -0
rfd3/transforms/symmetry.py +76 -0
rfd3/transforms/training_conditions.py +552 -0
rfd3/transforms/util_transforms.py +498 -0
rfd3/transforms/virtual_atoms.py +305 -0
rfd3/utils/inference.py +648 -0
rfd3/utils/io.py +245 -0
rfd3/utils/vizualize.py +276 -0

rf3/model/RF3_structure.py ADDED Viewed

@@ -0,0 +1,303 @@
+import logging
+import torch
+import torch.nn as nn
+from rf3.model.layers.af3_diffusion_transformer import (
+    AtomAttentionEncoderDiffusion,
+    AtomTransformer,
+    DiffusionTransformer,
+)
+from rf3.model.layers.layer_utils import Transition, linearNoBias
+from rf3.model.layers.pairformer_layers import (
+    MSAModule,
+    PairformerBlock,
+    RelativePositionEncoding,
+    RF3TemplateEmbedder,
+)
+from foundry.model.layers.blocks import FourierEmbedding
+from foundry.training.checkpoint import activation_checkpointing
+logger = logging.getLogger(__name__)
+"""
+Glossary:
+    I: # tokens (coarse representation)
+    L: # atoms   (fine representation)
+    M: # msa
+    T: # templates
+    D: # diffusion structure batch dim
+"""
+class AtomAttentionDecoder(nn.Module):
+    def __init__(self, c_token, c_atom, c_atompair, atom_transformer):
+        super().__init__()
+        self.atom_transformer = AtomTransformer(
+            c_atom=c_atom, c_atompair=c_atompair, **atom_transformer
+        )
+        self.linear_1 = linearNoBias(c_token, c_atom)
+        self.to_r_update = nn.Sequential(
+            nn.LayerNorm((c_atom,)), linearNoBias(c_atom, 3)
+        )
+    def forward(
+        self,
+        f,
+        Ai,  # [L, C_token]
+        Ql_skip,  # [L, C_atom]
+        Cl_skip,  # [L, C_atom]
+        Plm_skip,  # [L, L, C_atompair]
+    ):
+        tok_idx = f["atom_to_token_map"]
+        @activation_checkpointing
+        def atom_decoder(Ai, Ql_skip, Cl_skip, Plm_skip, tok_idx):
+            # Broadcast per-token activiations to per-atom activations and add the skip connection
+            Ql = self.linear_1(Ai[..., tok_idx, :]) + Ql_skip
+            # Cross attention transformer.
+            Ql = self.atom_transformer(Ql, Cl_skip, Plm_skip)
+            # Map to positions update
+            Rl_update = self.to_r_update(Ql)
+            return Rl_update
+        return atom_decoder(Ai, Ql_skip, Cl_skip, Plm_skip, tok_idx)
+class DiffusionModule(nn.Module):
+    def __init__(
+        self,
+        sigma_data,
+        c_atom,
+        c_atompair,
+        c_token,
+        c_s,
+        c_z,
+        f_pred,
+        diffusion_conditioning,
+        atom_attention_encoder,
+        diffusion_transformer,
+        atom_attention_decoder,
+    ):
+        super().__init__()
+        self.sigma_data = sigma_data
+        self.c_atom = c_atom
+        self.c_atompair = c_atompair
+        self.c_token = c_token
+        self.c_s = c_s
+        self.f_pred = f_pred
+        self.diffusion_conditioning = DiffusionConditioning(
+            sigma_data=sigma_data, c_s=c_s, c_z=c_z, **diffusion_conditioning
+        )
+        self.atom_attention_encoder = AtomAttentionEncoderDiffusion(
+            c_token=c_token,
+            c_s=c_s,
+            c_atom=c_atom,
+            c_atompair=c_atompair,
+            **atom_attention_encoder,
+        )
+        self.process_s = nn.Sequential(
+            nn.LayerNorm((c_s,)),
+            linearNoBias(c_s, c_token),
+        )
+        self.diffusion_transformer = DiffusionTransformer(
+            c_token=c_token, c_s=c_s, c_tokenpair=c_z, **diffusion_transformer
+        )
+        self.layer_norm_1 = nn.LayerNorm(c_token)
+        self.atom_attention_decoder = AtomAttentionDecoder(
+            c_token=c_token,
+            c_atom=c_atom,
+            c_atompair=c_atompair,
+            **atom_attention_decoder,
+        )
+    def forward(
+        self,
+        X_noisy_L,  # [B, L, 3]
+        t,  # [B] (0 is ground truth)
+        f,  # Dict (Input feature dictionary)
+        S_inputs_I,  # [B, I, C_S_input]
+        S_trunk_I,  # [B, I, C_S_trunk]
+        Z_trunk_II,  # [B, I, I, C_Z]
+    ):
+        # Conditioning
+        S_I, Z_II = self.diffusion_conditioning(
+            t, f, S_inputs_I.float(), S_trunk_I.float(), Z_trunk_II.float()
+        )
+        # Scale positions to dimensionless vectors with approximately unit variance
+        if self.f_pred == "edm":
+            R_noisy_L = X_noisy_L / torch.sqrt(
+                t[..., None, None] ** 2 + self.sigma_data**2
+            )
+        elif self.f_pred == "unconditioned":
+            R_noisy_L = torch.zeros_like(X_noisy_L)
+        elif self.f_pred == "noise_pred":
+            R_noisy_L = X_noisy_L
+        else:
+            raise Exception(f"{self.f_pred=} unrecognized")
+        # Sequence-local Atom Attention and aggregation to coarse-grained tokens
+        A_I, Q_skip_L, C_skip_L, P_skip_LL = self.atom_attention_encoder(
+            f, R_noisy_L, S_trunk_I.float(), Z_II
+        )
+        # Full self-attention on token level
+        A_I = A_I + self.process_s(S_I)
+        A_I = self.diffusion_transformer(A_I, S_I, Z_II, Beta_II=None)
+        A_I = self.layer_norm_1(A_I)
+        # Broadcast token activations to atoms and run Sequence-local Atom Attention
+        R_update_L = self.atom_attention_decoder(
+            f, A_I.float(), Q_skip_L, C_skip_L, P_skip_LL
+        )
+        # Rescale updates to positions and combine with input positions
+        if self.f_pred == "edm":
+            X_out_L = (self.sigma_data**2 / (self.sigma_data**2 + t**2))[
+                ..., None, None
+            ] * X_noisy_L + (self.sigma_data * t / (self.sigma_data**2 + t**2) ** 0.5)[
+                ..., None, None
+            ] * R_update_L
+        elif self.f_pred == "unconditioned":
+            X_out_L = R_update_L
+        elif self.f_pred == "noise_pred":
+            X_out_L = X_noisy_L + R_update_L
+        else:
+            raise Exception(f"{self.f_pred=} unrecognized")
+        return X_out_L
+class DiffusionConditioning(nn.Module):
+    def __init__(
+        self, sigma_data, c_z, c_s, c_s_inputs, c_t_embed, relative_position_encoding
+    ):
+        super().__init__()
+        self.sigma_data = sigma_data
+        self.relative_position_encoding = RelativePositionEncoding(
+            c_z=c_z, **relative_position_encoding
+        )
+        self.to_zii = nn.Sequential(
+            nn.LayerNorm(
+                c_z * 2
+            ),  # Operates on concatenated (z_ij_trunk: [..., c_z]), RelativePositionalEncoding: [..., c_z])
+            linearNoBias(c_z * 2, c_z),
+        )
+        self.transition_1 = nn.ModuleList(
+            [
+                Transition(c=c_z, n=2),
+                Transition(c=c_z, n=2),
+            ]
+        )
+        self.to_si = nn.Sequential(
+            nn.LayerNorm(c_s + c_s_inputs), linearNoBias(c_s + c_s_inputs, c_s)
+        )
+        c_t_embed = 256
+        self.fourier_embedding = FourierEmbedding(c_t_embed)
+        self.process_n = nn.Sequential(
+            nn.LayerNorm(c_t_embed), linearNoBias(c_t_embed, c_s)
+        )
+        self.transition_2 = nn.ModuleList(
+            [
+                Transition(c=c_s, n=2),
+                Transition(c=c_s, n=2),
+            ]
+        )
+    def forward(self, t, f, S_inputs_I, S_trunk_I, Z_trunk_II):
+        # Pair conditioning
+        Z_II = torch.cat([Z_trunk_II, self.relative_position_encoding(f)], dim=-1)
+        @activation_checkpointing
+        def _run_conditioning(Z_II, S_trunk_I, S_inputs_I):
+            Z_II = self.to_zii(Z_II)
+            for b in range(2):
+                Z_II = Z_II + self.transition_1[b](Z_II)
+            # Single conditioning
+            S_I = torch.cat([S_trunk_I, S_inputs_I], dim=-1)
+            S_I = self.to_si(S_I)
+            N_D = self.fourier_embedding(1 / 4 * torch.log(t / self.sigma_data))
+            S_I = self.process_n(N_D).unsqueeze(-2) + S_I
+            for b in range(2):
+                S_I = S_I + self.transition_2[b](S_I)
+            return S_I, Z_II
+        return _run_conditioning(Z_II, S_trunk_I, S_inputs_I)
+class DistogramHead(nn.Module):
+    def __init__(
+        self,
+        c_z,
+        bins,
+    ):
+        super().__init__()
+        self.predictor = nn.Linear(c_z, bins)
+        self.reset_parameters()
+    def reset_parameters(self):
+        # initialize linear layer for final logit prediction
+        nn.init.zeros_(self.predictor.weight)
+        nn.init.zeros_(self.predictor.bias)
+    def forward(
+        self,
+        Z_II,
+    ):
+        return self.predictor(
+            Z_II + Z_II.transpose(-2, -3)  # symmetrize pair features
+        )
+class Recycler(nn.Module):
+    def __init__(
+        self,
+        c_s,
+        c_z,
+        template_embedder,
+        msa_module,
+        n_pairformer_blocks,
+        pairformer_block,
+    ):
+        super().__init__()
+        self.c_z = c_z
+        self.process_zh = nn.Sequential(
+            nn.LayerNorm(c_z),
+            linearNoBias(c_z, c_z),
+        )
+        self.template_embedder = RF3TemplateEmbedder(c_z=c_z, **template_embedder)
+        self.msa_module = MSAModule(**msa_module)
+        self.process_sh = nn.Sequential(
+            nn.LayerNorm(c_s),
+            linearNoBias(c_s, c_s),
+        )
+        self.pairformer_stack = nn.ModuleList(
+            [
+                PairformerBlock(c_s=c_s, c_z=c_z, **pairformer_block)
+                for _ in range(n_pairformer_blocks)
+            ]
+        )
+    def forward(
+        self,
+        f,
+        S_inputs_I,
+        S_init_I,
+        Z_init_II,
+        S_I,
+        Z_II,
+    ):
+        Z_II = Z_init_II + self.process_zh(Z_II)
+        Z_II = Z_II + self.template_embedder(f, Z_II)
+        # NOTE: Implementing bugfix from the Protenix Technical report, where residual-connecting the MSA module is redundant
+        # Reference: https://github.com/bytedance/Protenix/blob/main/Protenix_Technical_Report.pdf
+        Z_II = self.msa_module(f, Z_II, S_inputs_I)
+        S_I = S_init_I + self.process_sh(S_I)
+        for block in self.pairformer_stack:
+            S_I, Z_II = block(S_I, Z_II)
+        return S_I, Z_II

rf3/model/layers/af3_auxiliary_heads.py ADDED Viewed

@@ -0,0 +1,255 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from rf3.model.RF3_structure import PairformerBlock, linearNoBias
+# TODO: Get from RF2AA encoding instead
+CHEM_DATA_LEGACY = {"NHEAVY": 23, "aa2num": {"UNK": 20, "GLY": 7, "MAS": 21}}
+def discretize_distance_matrix(
+    distance_matrix, num_bins=38, min_distance=3.25, max_distance=50.75
+):
+    # Calculate the bin width
+    bin_width = (max_distance - min_distance) / num_bins
+    bins = (
+        torch.arange(num_bins, device=distance_matrix.device) * bin_width + min_distance
+    )
+    # Discretize distances into bins (bucketize automatically places out-of-range values in the last bin)
+    binned_distances = torch.bucketize(distance_matrix, bins)
+    return binned_distances
+class ConfidenceHead(nn.Module):
+    """Algorithm 31"""
+    def __init__(
+        self,
+        c_s,
+        c_z,
+        n_pairformer_layers,
+        pairformer,
+        n_bins_pae,
+        n_bins_pde,
+        n_bins_plddt,
+        n_bins_exp_resolved,
+        use_Cb_distances=False,
+        use_af3_style_binning_and_final_layer_norms=False,
+        symmetrize_Cb_logits=True,
+        layer_norm_along_feature_dimension=False,
+    ):
+        super(ConfidenceHead, self).__init__()
+        self.process_s_inputs_right = linearNoBias(449, c_z)
+        self.process_s_inputs_left = linearNoBias(449, c_z)
+        self.use_af3_style_binning_and_final_layer_norms = (
+            use_af3_style_binning_and_final_layer_norms
+        )
+        self.layer_norm_along_feature_dimension = layer_norm_along_feature_dimension
+        if self.use_af3_style_binning_and_final_layer_norms:
+            self.layernorm_pde = nn.LayerNorm(c_z)
+            self.layernorm_pae = nn.LayerNorm(c_z)
+            self.layernorm_plddt = nn.LayerNorm(c_s)
+            self.layernorm_exp_resolved = nn.LayerNorm(c_s)
+            self.process_pred_distances = linearNoBias(40, c_z)
+        else:
+            self.process_pred_distances = linearNoBias(11, c_z)
+        self.pairformer = nn.ModuleList(
+            [
+                PairformerBlock(c_s=c_s, c_z=c_z, **pairformer)
+                for _ in range(n_pairformer_layers)
+            ]
+        )
+        self.predict_pae = linearNoBias(c_z, n_bins_pae)
+        self.predict_pde = linearNoBias(c_z, n_bins_pde)
+        self.predict_plddt = linearNoBias(
+            c_s, CHEM_DATA_LEGACY["NHEAVY"] * n_bins_plddt
+        )
+        self.predict_exp_resolved = linearNoBias(
+            c_s, CHEM_DATA_LEGACY["NHEAVY"] * n_bins_exp_resolved
+        )
+        self.use_Cb_distances = use_Cb_distances
+        if self.use_Cb_distances:
+            self.process_Cb_distances = linearNoBias(25, c_z)
+        self.symmetrize_Cb_logits = symmetrize_Cb_logits
+    def reset_parameters(self):
+        for m in self.modules():
+            if isinstance(m, nn.Linear):
+                nn.init.xavier_uniform_(m.weight)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+    def forward(
+        self,
+        S_inputs_I,
+        S_trunk_I,
+        Z_trunk_II,
+        X_pred_L,
+        seq,
+        rep_atoms,
+        frame_atom_idxs=None,
+    ):
+        # stopgrad on S_trunk_I, Z_trunk_II, X_pred_L but not S_inputs_I (4.3.5)
+        S_trunk_I = S_trunk_I.detach().float()  # B, L, 384
+        Z_trunk_II = Z_trunk_II.detach().float()  # B, L, L, 128
+        if X_pred_L is not None:
+            X_pred_L = X_pred_L.detach().float()  # B, n_atoms, 3
+        S_inputs_I = S_inputs_I.detach().float()  # B, L, 384
+        seq = seq.detach()
+        if self.layer_norm_along_feature_dimension:
+            # do a layer norm on S_trunk_I
+            S_trunk_I = F.layer_norm(S_trunk_I, normalized_shape=(S_trunk_I.shape[-1]))
+            # do a layer norm on Z_trunk_II
+            Z_trunk_II = F.layer_norm(
+                Z_trunk_II, normalized_shape=(Z_trunk_II.shape[-1])
+            )
+            # do a layer norm on S_inputs_I
+            S_inputs_I = F.layer_norm(
+                S_inputs_I, normalized_shape=(S_inputs_I.shape[-1])
+            )
+        else:
+            S_trunk_I = F.layer_norm(S_trunk_I, normalized_shape=(S_trunk_I.shape))
+            Z_trunk_II = F.layer_norm(Z_trunk_II, normalized_shape=(Z_trunk_II.shape))
+            S_inputs_I = F.layer_norm(S_inputs_I, normalized_shape=(S_inputs_I.shape))
+        # embed S_inputs_I twice
+        S_inputs_I_right = self.process_s_inputs_right(S_inputs_I)
+        S_inputs_I_left = self.process_s_inputs_left(S_inputs_I)
+        # add outer product of two linear embeddings of S_inputs_I  to Z_II
+        # TODO: check the unsqueezed dimension is the correct one
+        Z_trunk_II = Z_trunk_II + (
+            S_inputs_I_right.unsqueeze(-2) + S_inputs_I_left.unsqueeze(-3)
+        )
+        # embed distances of representative atom from every token
+        #    in the pair representation
+        # if no coords are input, skip this connection
+        if X_pred_L is not None:
+            X_pred_rep_I = X_pred_L.index_select(1, rep_atoms)
+            dist = torch.cdist(X_pred_rep_I, X_pred_rep_I)
+            if not self.use_af3_style_binning_and_final_layer_norms:
+                # bins are 3.375 to 20.375 in 1.75 increments according to pseudocode
+                dist_one_hot = F.one_hot(
+                    discretize_distance_matrix(
+                        dist, min_distance=3.375, max_distance=20.875, num_bins=10
+                    ),
+                    num_classes=11,
+                )
+            else:
+                # published code is 3.25 to 50.75, with 39 bins
+                dist_one_hot = F.one_hot(
+                    discretize_distance_matrix(
+                        dist, min_distance=3.25, max_distance=50.75, num_bins=39
+                    ),
+                    num_classes=40,
+                )
+            Z_trunk_II = Z_trunk_II + self.process_pred_distances(dist_one_hot.float())
+            if self.use_Cb_distances:
+                # embed difference between observed cb and ideal cb positions
+                Cb_distances = calc_Cb_distances(
+                    X_pred_L, seq, rep_atoms, frame_atom_idxs
+                )
+                Cb_distances_one_hot = F.one_hot(
+                    discretize_distance_matrix(
+                        Cb_distances,
+                        min_distance=0.0001,
+                        max_distance=0.25,
+                        num_bins=24,
+                    ),
+                    num_classes=25,
+                )
+                Cb_logits = self.process_Cb_distances(Cb_distances_one_hot.float())
+                # symmetrize the logits
+                if self.symmetrize_Cb_logits:
+                    Cb_logits = Cb_logits[:, None, :, :] + Cb_logits[:, :, None, :]
+                else:
+                    Cb_logits = Cb_logits[:, None, :, :]
+                Z_trunk_II = Z_trunk_II + Cb_logits
+        if not self.use_af3_style_binning_and_final_layer_norms:
+            S_trunk_residual_I = S_trunk_I.clone()
+            Z_trunk_residual_II = Z_trunk_II.clone()
+        # process with pairformer stack
+        for n in range(len(self.pairformer)):
+            S_trunk_I, Z_trunk_II = self.pairformer[n](S_trunk_I, Z_trunk_II)
+        # despite doing so in their pseudocode, af3's published code does not add the residual back
+        if not self.use_af3_style_binning_and_final_layer_norms:
+            S_trunk_I = S_trunk_residual_I + S_trunk_I
+            Z_trunk_II = Z_trunk_residual_II + Z_trunk_II
+            # linearly project for each prediction task
+            pde_logits = self.predict_pde(
+                Z_trunk_II + Z_trunk_II.transpose(-2, -3)
+            )  # BUG: needs to be symmetrized correctly
+            pae_logits = self.predict_pae(Z_trunk_II)
+            plddt_logits = self.predict_plddt(S_trunk_I)
+            exp_resolved_logits = self.predict_exp_resolved(S_trunk_I)
+        # af3's published code does not add the residual back and has some additional layernorms before the linear projections
+        # they also do the pde slightly differently, adding the transpose after the linear projection
+        else:
+            left_distance_logits = self.predict_pde(self.layernorm_pde(Z_trunk_II))
+            right_distance_logits = left_distance_logits.transpose(-2, -3)
+            pde_logits = left_distance_logits + right_distance_logits
+            pae_logits = self.predict_pae(self.layernorm_pae(Z_trunk_II))
+            plddt_logits = self.predict_plddt(self.layernorm_plddt(S_trunk_I))
+            exp_resolved_logits = self.predict_exp_resolved(
+                self.layernorm_exp_resolved(S_trunk_I)
+            )
+        return dict(
+            pde_logits=pde_logits,
+            pae_logits=pae_logits,
+            plddt_logits=plddt_logits,
+            exp_resolved_logits=exp_resolved_logits,
+        )
+def calc_Cb_distances(X_pred_L, seq, rep_atoms, frame_atom_idxs):
+    frame_atom_idxs = frame_atom_idxs.unsqueeze(0).expand(X_pred_L.shape[0], -1, -1)
+    N = torch.gather(
+        X_pred_L, 1, frame_atom_idxs[..., 0].unsqueeze(-1).expand(-1, -1, 3)
+    )
+    Ca = torch.gather(
+        X_pred_L, 1, frame_atom_idxs[..., 1].unsqueeze(-1).expand(-1, -1, 3)
+    )
+    C = torch.gather(
+        X_pred_L, 1, frame_atom_idxs[..., 2].unsqueeze(-1).expand(-1, -1, 3)
+    )
+    Cb = X_pred_L.index_select(1, rep_atoms)
+    is_valid_Cb = (
+        (seq != CHEM_DATA_LEGACY.aa2num["UNK"])
+        & (seq != CHEM_DATA_LEGACY.aa2num["GLY"])
+        & (seq != CHEM_DATA_LEGACY.aa2num["MAS"])
+    )
+    def _legacy_is_protein(seq):
+        return (seq >= 0).all() & (seq < 20).all()
+    is_valid_Cb = is_valid_Cb & _legacy_is_protein(seq)
+    b = Ca - N
+    c = C - Ca
+    a = torch.cross(b, c, dim=-1)
+    ideal_Cb = -0.58273431 * a + 0.56802827 * b - 0.54067466 * c + Ca
+    Cb_distances = torch.norm(Cb - ideal_Cb, dim=-1)
+    Cb_distances[:, ~is_valid_Cb] = 0.0
+    return Cb_distances