PyPI - boltz-vsynthes - Versions diffs - 1.0.0__py3-none-any.whl - Mend

boltz-vsynthes 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (112) hide show

boltz/__init__.py +7 -0
boltz/data/__init__.py +0 -0
boltz/data/const.py +1184 -0
boltz/data/crop/__init__.py +0 -0
boltz/data/crop/affinity.py +164 -0
boltz/data/crop/boltz.py +296 -0
boltz/data/crop/cropper.py +45 -0
boltz/data/feature/__init__.py +0 -0
boltz/data/feature/featurizer.py +1230 -0
boltz/data/feature/featurizerv2.py +2208 -0
boltz/data/feature/symmetry.py +602 -0
boltz/data/filter/__init__.py +0 -0
boltz/data/filter/dynamic/__init__.py +0 -0
boltz/data/filter/dynamic/date.py +76 -0
boltz/data/filter/dynamic/filter.py +24 -0
boltz/data/filter/dynamic/max_residues.py +37 -0
boltz/data/filter/dynamic/resolution.py +34 -0
boltz/data/filter/dynamic/size.py +38 -0
boltz/data/filter/dynamic/subset.py +42 -0
boltz/data/filter/static/__init__.py +0 -0
boltz/data/filter/static/filter.py +26 -0
boltz/data/filter/static/ligand.py +37 -0
boltz/data/filter/static/polymer.py +299 -0
boltz/data/module/__init__.py +0 -0
boltz/data/module/inference.py +307 -0
boltz/data/module/inferencev2.py +429 -0
boltz/data/module/training.py +684 -0
boltz/data/module/trainingv2.py +660 -0
boltz/data/mol.py +900 -0
boltz/data/msa/__init__.py +0 -0
boltz/data/msa/mmseqs2.py +235 -0
boltz/data/pad.py +84 -0
boltz/data/parse/__init__.py +0 -0
boltz/data/parse/a3m.py +134 -0
boltz/data/parse/csv.py +100 -0
boltz/data/parse/fasta.py +138 -0
boltz/data/parse/mmcif.py +1239 -0
boltz/data/parse/mmcif_with_constraints.py +1607 -0
boltz/data/parse/schema.py +1851 -0
boltz/data/parse/yaml.py +68 -0
boltz/data/sample/__init__.py +0 -0
boltz/data/sample/cluster.py +283 -0
boltz/data/sample/distillation.py +57 -0
boltz/data/sample/random.py +39 -0
boltz/data/sample/sampler.py +49 -0
boltz/data/tokenize/__init__.py +0 -0
boltz/data/tokenize/boltz.py +195 -0
boltz/data/tokenize/boltz2.py +396 -0
boltz/data/tokenize/tokenizer.py +24 -0
boltz/data/types.py +777 -0
boltz/data/write/__init__.py +0 -0
boltz/data/write/mmcif.py +305 -0
boltz/data/write/pdb.py +171 -0
boltz/data/write/utils.py +23 -0
boltz/data/write/writer.py +330 -0
boltz/main.py +1292 -0
boltz/model/__init__.py +0 -0
boltz/model/layers/__init__.py +0 -0
boltz/model/layers/attention.py +132 -0
boltz/model/layers/attentionv2.py +111 -0
boltz/model/layers/confidence_utils.py +231 -0
boltz/model/layers/dropout.py +34 -0
boltz/model/layers/initialize.py +100 -0
boltz/model/layers/outer_product_mean.py +98 -0
boltz/model/layers/pair_averaging.py +135 -0
boltz/model/layers/pairformer.py +337 -0
boltz/model/layers/relative.py +58 -0
boltz/model/layers/transition.py +78 -0
boltz/model/layers/triangular_attention/__init__.py +0 -0
boltz/model/layers/triangular_attention/attention.py +189 -0
boltz/model/layers/triangular_attention/primitives.py +409 -0
boltz/model/layers/triangular_attention/utils.py +380 -0
boltz/model/layers/triangular_mult.py +212 -0
boltz/model/loss/__init__.py +0 -0
boltz/model/loss/bfactor.py +49 -0
boltz/model/loss/confidence.py +590 -0
boltz/model/loss/confidencev2.py +621 -0
boltz/model/loss/diffusion.py +171 -0
boltz/model/loss/diffusionv2.py +134 -0
boltz/model/loss/distogram.py +48 -0
boltz/model/loss/distogramv2.py +105 -0
boltz/model/loss/validation.py +1025 -0
boltz/model/models/__init__.py +0 -0
boltz/model/models/boltz1.py +1286 -0
boltz/model/models/boltz2.py +1249 -0
boltz/model/modules/__init__.py +0 -0
boltz/model/modules/affinity.py +223 -0
boltz/model/modules/confidence.py +481 -0
boltz/model/modules/confidence_utils.py +181 -0
boltz/model/modules/confidencev2.py +495 -0
boltz/model/modules/diffusion.py +844 -0
boltz/model/modules/diffusion_conditioning.py +116 -0
boltz/model/modules/diffusionv2.py +677 -0
boltz/model/modules/encoders.py +639 -0
boltz/model/modules/encodersv2.py +565 -0
boltz/model/modules/transformers.py +322 -0
boltz/model/modules/transformersv2.py +261 -0
boltz/model/modules/trunk.py +688 -0
boltz/model/modules/trunkv2.py +828 -0
boltz/model/modules/utils.py +303 -0
boltz/model/optim/__init__.py +0 -0
boltz/model/optim/ema.py +389 -0
boltz/model/optim/scheduler.py +99 -0
boltz/model/potentials/__init__.py +0 -0
boltz/model/potentials/potentials.py +497 -0
boltz/model/potentials/schedules.py +32 -0
boltz_vsynthes-1.0.0.dist-info/METADATA +151 -0
boltz_vsynthes-1.0.0.dist-info/RECORD +112 -0
boltz_vsynthes-1.0.0.dist-info/WHEEL +5 -0
boltz_vsynthes-1.0.0.dist-info/entry_points.txt +2 -0
boltz_vsynthes-1.0.0.dist-info/licenses/LICENSE +21 -0
boltz_vsynthes-1.0.0.dist-info/top_level.txt +1 -0

boltz/model/__init__.py ADDED Viewed

File without changes

boltz/model/layers/__init__.py ADDED Viewed

File without changes

boltz/model/layers/attention.py ADDED Viewed

@@ -0,0 +1,132 @@
+import torch
+from einops.layers.torch import Rearrange
+from torch import Tensor, nn
+import boltz.model.layers.initialize as init
+class AttentionPairBias(nn.Module):
+    """Attention pair bias layer."""
+    def __init__(
+        self,
+        c_s: int,
+        c_z: int,
+        num_heads: int,
+        inf: float = 1e6,
+        initial_norm: bool = True,
+    ) -> None:
+        """Initialize the attention pair bias layer.
+        Parameters
+        ----------
+        c_s : int
+            The input sequence dimension.
+        c_z : int
+            The input pairwise dimension.
+        num_heads : int
+            The number of heads.
+        inf : float, optional
+            The inf value, by default 1e6
+        initial_norm: bool, optional
+            Whether to apply layer norm to the input, by default True
+        """
+        super().__init__()
+        assert c_s % num_heads == 0
+        self.c_s = c_s
+        self.num_heads = num_heads
+        self.head_dim = c_s // num_heads
+        self.inf = inf
+        self.initial_norm = initial_norm
+        if self.initial_norm:
+            self.norm_s = nn.LayerNorm(c_s)
+        self.proj_q = nn.Linear(c_s, c_s)
+        self.proj_k = nn.Linear(c_s, c_s, bias=False)
+        self.proj_v = nn.Linear(c_s, c_s, bias=False)
+        self.proj_g = nn.Linear(c_s, c_s, bias=False)
+        self.proj_z = nn.Sequential(
+            nn.LayerNorm(c_z),
+            nn.Linear(c_z, num_heads, bias=False),
+            Rearrange("b ... h -> b h ..."),
+        )
+        self.proj_o = nn.Linear(c_s, c_s, bias=False)
+        init.final_init_(self.proj_o.weight)
+    def forward(
+        self,
+        s: Tensor,
+        z: Tensor,
+        mask: Tensor,
+        multiplicity: int = 1,
+        to_keys=None,
+        model_cache=None,
+    ) -> Tensor:
+        """Forward pass.
+        Parameters
+        ----------
+        s : torch.Tensor
+            The input sequence tensor (B, S, D)
+        z : torch.Tensor
+            The input pairwise tensor (B, N, N, D)
+        mask : torch.Tensor
+            The pairwise mask tensor (B, N)
+        multiplicity : int, optional
+            The diffusion batch size, by default 1
+        Returns
+        -------
+        torch.Tensor
+            The output sequence tensor.
+        """
+        B = s.shape[0]
+        # Layer norms
+        if self.initial_norm:
+            s = self.norm_s(s)
+        if to_keys is not None:
+            k_in = to_keys(s)
+            mask = to_keys(mask.unsqueeze(-1)).squeeze(-1)
+        else:
+            k_in = s
+        # Compute projections
+        q = self.proj_q(s).view(B, -1, self.num_heads, self.head_dim)
+        k = self.proj_k(k_in).view(B, -1, self.num_heads, self.head_dim)
+        v = self.proj_v(k_in).view(B, -1, self.num_heads, self.head_dim)
+        # Caching z projection during diffusion roll-out
+        if model_cache is None or "z" not in model_cache:
+            z = self.proj_z(z)
+            if model_cache is not None:
+                model_cache["z"] = z
+        else:
+            z = model_cache["z"]
+        z = z.repeat_interleave(multiplicity, 0)
+        g = self.proj_g(s).sigmoid()
+        with torch.autocast("cuda", enabled=False):
+            # Compute attention weights
+            attn = torch.einsum("bihd,bjhd->bhij", q.float(), k.float())
+            attn = attn / (self.head_dim**0.5) + z.float()
+            # The pairwise mask tensor (B, N) is broadcasted to (B, 1, 1, N) and (B, H, N, N)
+            attn = attn + (1 - mask[:, None, None].float()) * -self.inf
+            attn = attn.softmax(dim=-1)
+            # Compute output
+            o = torch.einsum("bhij,bjhd->bihd", attn, v.float()).to(v.dtype)
+        o = o.reshape(B, -1, self.c_s)
+        o = self.proj_o(g * o)
+        return o

boltz/model/layers/attentionv2.py ADDED Viewed

@@ -0,0 +1,111 @@
+from typing import Optional
+import torch
+from einops.layers.torch import Rearrange
+from torch import Tensor, nn
+import boltz.model.layers.initialize as init
+class AttentionPairBias(nn.Module):
+    """Attention pair bias layer."""
+    def __init__(
+        self,
+        c_s: int,
+        c_z: Optional[int] = None,
+        num_heads: Optional[int] = None,
+        inf: float = 1e6,
+        compute_pair_bias: bool = True,
+    ) -> None:
+        """Initialize the attention pair bias layer.
+        Parameters
+        ----------
+        c_s : int
+            The input sequence dimension.
+        c_z : int
+            The input pairwise dimension.
+        num_heads : int
+            The number of heads.
+        inf : float, optional
+            The inf value, by default 1e6
+        """
+        super().__init__()
+        assert c_s % num_heads == 0
+        self.c_s = c_s
+        self.num_heads = num_heads
+        self.head_dim = c_s // num_heads
+        self.inf = inf
+        self.proj_q = nn.Linear(c_s, c_s)
+        self.proj_k = nn.Linear(c_s, c_s, bias=False)
+        self.proj_v = nn.Linear(c_s, c_s, bias=False)
+        self.proj_g = nn.Linear(c_s, c_s, bias=False)
+        self.compute_pair_bias = compute_pair_bias
+        if compute_pair_bias:
+            self.proj_z = nn.Sequential(
+                nn.LayerNorm(c_z),
+                nn.Linear(c_z, num_heads, bias=False),
+                Rearrange("b ... h -> b h ..."),
+            )
+        else:
+            self.proj_z = Rearrange("b ... h -> b h ...")
+        self.proj_o = nn.Linear(c_s, c_s, bias=False)
+        init.final_init_(self.proj_o.weight)
+    def forward(
+        self,
+        s: Tensor,
+        z: Tensor,
+        mask: Tensor,
+        k_in: Tensor,
+        multiplicity: int = 1,
+    ) -> Tensor:
+        """Forward pass.
+        Parameters
+        ----------
+        s : torch.Tensor
+            The input sequence tensor (B, S, D)
+        z : torch.Tensor
+            The input pairwise tensor or bias (B, N, N, D)
+        mask : torch.Tensor
+            The pairwise mask tensor (B, N, N)
+        Returns
+        -------
+        torch.Tensor
+            The output sequence tensor.
+        """
+        B = s.shape[0]
+        # Compute projections
+        q = self.proj_q(s).view(B, -1, self.num_heads, self.head_dim)
+        k = self.proj_k(k_in).view(B, -1, self.num_heads, self.head_dim)
+        v = self.proj_v(k_in).view(B, -1, self.num_heads, self.head_dim)
+        bias = self.proj_z(z)
+        bias = bias.repeat_interleave(multiplicity, 0)
+        g = self.proj_g(s).sigmoid()
+        with torch.autocast("cuda", enabled=False):
+            # Compute attention weights
+            attn = torch.einsum("bihd,bjhd->bhij", q.float(), k.float())
+            attn = attn / (self.head_dim**0.5) + bias.float()
+            attn = attn + (1 - mask[:, None, None].float()) * -self.inf
+            attn = attn.softmax(dim=-1)
+            # Compute output
+            o = torch.einsum("bhij,bjhd->bihd", attn, v.float()).to(v.dtype)
+        o = o.reshape(B, -1, self.c_s)
+        o = self.proj_o(g * o)
+        return o

boltz/model/layers/confidence_utils.py ADDED Viewed

@@ -0,0 +1,231 @@
+import torch
+from torch import nn
+from boltz.data import const
+def compute_collinear_mask(v1, v2):
+    norm1 = torch.norm(v1, dim=1, keepdim=True)
+    norm2 = torch.norm(v2, dim=1, keepdim=True)
+    v1 = v1 / (norm1 + 1e-6)
+    v2 = v2 / (norm2 + 1e-6)
+    mask_angle = torch.abs(torch.sum(v1 * v2, dim=1)) < 0.9063
+    mask_overlap1 = norm1.reshape(-1) > 1e-2
+    mask_overlap2 = norm2.reshape(-1) > 1e-2
+    return mask_angle & mask_overlap1 & mask_overlap2
+def compute_frame_pred(
+    pred_atom_coords,
+    frames_idx_true,
+    feats,
+    multiplicity,
+    resolved_mask=None,
+    inference=False,
+):
+    with torch.amp.autocast("cuda", enabled=False):
+        asym_id_token = feats["asym_id"]
+        asym_id_atom = torch.bmm(
+            feats["atom_to_token"].float(), asym_id_token.unsqueeze(-1).float()
+        ).squeeze(-1)
+    B, N, _ = pred_atom_coords.shape
+    pred_atom_coords = pred_atom_coords.reshape(B // multiplicity, multiplicity, -1, 3)
+    frames_idx_pred = (
+        frames_idx_true.clone()
+        .repeat_interleave(multiplicity, 0)
+        .reshape(B // multiplicity, multiplicity, -1, 3)
+    )
+    # Iterate through the batch and modify the frames for nonpolymers
+    for i, pred_atom_coord in enumerate(pred_atom_coords):
+        token_idx = 0
+        atom_idx = 0
+        for id in torch.unique(asym_id_token[i]):
+            mask_chain_token = (asym_id_token[i] == id) * feats["token_pad_mask"][i]
+            mask_chain_atom = (asym_id_atom[i] == id) * feats["atom_pad_mask"][i]
+            num_tokens = int(mask_chain_token.sum().item())
+            num_atoms = int(mask_chain_atom.sum().item())
+            if (
+                feats["mol_type"][i, token_idx] != const.chain_type_ids["NONPOLYMER"]
+                or num_atoms < 3
+            ):
+                token_idx += num_tokens
+                atom_idx += num_atoms
+                continue
+            dist_mat = (
+                (
+                    pred_atom_coord[:, mask_chain_atom.bool()][:, None, :, :]
+                    - pred_atom_coord[:, mask_chain_atom.bool()][:, :, None, :]
+                )
+                ** 2
+            ).sum(-1) ** 0.5
+            if inference:
+                resolved_pair = 1 - (
+                    feats["atom_pad_mask"][i][mask_chain_atom.bool()][None, :]
+                    * feats["atom_pad_mask"][i][mask_chain_atom.bool()][:, None]
+                ).to(torch.float32)
+                resolved_pair[resolved_pair == 1] = torch.inf
+                indices = torch.sort(dist_mat + resolved_pair, axis=2).indices
+            else:
+                if resolved_mask is None:
+                    resolved_mask = feats["atom_resolved_mask"]
+                resolved_pair = 1 - (
+                    resolved_mask[i][mask_chain_atom.bool()][None, :]
+                    * resolved_mask[i][mask_chain_atom.bool()][:, None]
+                ).to(torch.float32)
+                resolved_pair[resolved_pair == 1] = torch.inf
+                indices = torch.sort(dist_mat + resolved_pair, axis=2).indices
+            frames = (
+                torch.cat(
+                    [
+                        indices[:, :, 1:2],
+                        indices[:, :, 0:1],
+                        indices[:, :, 2:3],
+                    ],
+                    dim=2,
+                )
+                + atom_idx
+            )
+            try:
+                frames_idx_pred[i, :, token_idx : token_idx + num_atoms, :] = frames
+            except Exception as e:
+                print(f"Failed to process {feats['pdb_id']} due to {e}")
+            token_idx += num_tokens
+            atom_idx += num_atoms
+    frames_expanded = pred_atom_coords[
+        torch.arange(0, B // multiplicity, 1)[:, None, None, None].to(
+            frames_idx_pred.device
+        ),
+        torch.arange(0, multiplicity, 1)[None, :, None, None].to(
+            frames_idx_pred.device
+        ),
+        frames_idx_pred,
+    ].reshape(-1, 3, 3)
+    # Compute masks for collinearity / overlap
+    mask_collinear_pred = compute_collinear_mask(
+        frames_expanded[:, 1] - frames_expanded[:, 0],
+        frames_expanded[:, 1] - frames_expanded[:, 2],
+    ).reshape(B // multiplicity, multiplicity, -1)
+    return frames_idx_pred, mask_collinear_pred * feats["token_pad_mask"][:, None, :]
+def compute_aggregated_metric(logits, end=1.0):
+    # Compute aggregated metric from logits
+    num_bins = logits.shape[-1]
+    bin_width = end / num_bins
+    bounds = torch.arange(
+        start=0.5 * bin_width, end=end, step=bin_width, device=logits.device
+    )
+    probs = nn.functional.softmax(logits, dim=-1)
+    plddt = torch.sum(
+        probs * bounds.view(*((1,) * len(probs.shape[:-1])), *bounds.shape),
+        dim=-1,
+    )
+    return plddt
+def tm_function(d, Nres):
+    d0 = 1.24 * (torch.clip(Nres, min=19) - 15) ** (1 / 3) - 1.8
+    return 1 / (1 + (d / d0) ** 2)
+def compute_ptms(logits, x_preds, feats, multiplicity):
+    # It needs to take as input the mask of the frames as they are not used to compute the PTM
+    _, mask_collinear_pred = compute_frame_pred(
+        x_preds, feats["frames_idx"], feats, multiplicity, inference=True
+    )
+    # mask overlapping, collinear tokens and ions (invalid frames)
+    mask_pad = feats["token_pad_mask"].repeat_interleave(multiplicity, 0)
+    maski = mask_collinear_pred.reshape(-1, mask_collinear_pred.shape[-1])
+    pair_mask_ptm = maski[:, :, None] * mask_pad[:, None, :] * mask_pad[:, :, None]
+    asym_id = feats["asym_id"].repeat_interleave(multiplicity, 0)
+    pair_mask_iptm = (
+        maski[:, :, None]
+        * (asym_id[:, None, :] != asym_id[:, :, None])
+        * mask_pad[:, None, :]
+        * mask_pad[:, :, None]
+    )
+    num_bins = logits.shape[-1]
+    bin_width = 32.0 / num_bins
+    end = 32.0
+    pae_value = torch.arange(
+        start=0.5 * bin_width, end=end, step=bin_width, device=logits.device
+    ).unsqueeze(0)
+    N_res = mask_pad.sum(dim=-1, keepdim=True)
+    tm_value = tm_function(pae_value, N_res).unsqueeze(1).unsqueeze(2)
+    probs = nn.functional.softmax(logits, dim=-1)
+    tm_expected_value = torch.sum(
+        probs * tm_value,
+        dim=-1,
+    )  # shape (B, N, N)
+    ptm = torch.max(
+        torch.sum(tm_expected_value * pair_mask_ptm, dim=-1)
+        / (torch.sum(pair_mask_ptm, dim=-1) + 1e-5),
+        dim=1,
+    ).values
+    iptm = torch.max(
+        torch.sum(tm_expected_value * pair_mask_iptm, dim=-1)
+        / (torch.sum(pair_mask_iptm, dim=-1) + 1e-5),
+        dim=1,
+    ).values
+    # compute ligand and protein iPTM
+    token_type = feats["mol_type"]
+    token_type = token_type.repeat_interleave(multiplicity, 0)
+    is_ligand_token = (token_type == const.chain_type_ids["NONPOLYMER"]).float()
+    is_protein_token = (token_type == const.chain_type_ids["PROTEIN"]).float()
+    ligand_iptm_mask = (
+        maski[:, :, None]
+        * (asym_id[:, None, :] != asym_id[:, :, None])
+        * mask_pad[:, None, :]
+        * mask_pad[:, :, None]
+        * (
+            (is_ligand_token[:, :, None] * is_protein_token[:, None, :])
+            + (is_protein_token[:, :, None] * is_ligand_token[:, None, :])
+        )
+    )
+    protein_ipmt_mask = (
+        maski[:, :, None]
+        * (asym_id[:, None, :] != asym_id[:, :, None])
+        * mask_pad[:, None, :]
+        * mask_pad[:, :, None]
+        * (is_protein_token[:, :, None] * is_protein_token[:, None, :])
+    )
+    ligand_iptm = torch.max(
+        torch.sum(tm_expected_value * ligand_iptm_mask, dim=-1)
+        / (torch.sum(ligand_iptm_mask, dim=-1) + 1e-5),
+        dim=1,
+    ).values
+    protein_iptm = torch.max(
+        torch.sum(tm_expected_value * protein_ipmt_mask, dim=-1)
+        / (torch.sum(protein_ipmt_mask, dim=-1) + 1e-5),
+        dim=1,
+    ).values
+    # Compute pair chain ipTM
+    chain_pair_iptm = {}
+    asym_ids_list = torch.unique(asym_id).tolist()
+    for idx1 in asym_ids_list:
+        chain_iptm = {}
+        for idx2 in asym_ids_list:
+            mask_pair_chain = (
+                maski[:, :, None]
+                * (asym_id[:, None, :] == idx1)
+                * (asym_id[:, :, None] == idx2)
+                * mask_pad[:, None, :]
+                * mask_pad[:, :, None]
+            )
+            chain_iptm[idx2] = torch.max(
+                torch.sum(tm_expected_value * mask_pair_chain, dim=-1)
+                / (torch.sum(mask_pair_chain, dim=-1) + 1e-5),
+                dim=1,
+            ).values
+        chain_pair_iptm[idx1] = chain_iptm
+    return ptm, iptm, ligand_iptm, protein_iptm, chain_pair_iptm

boltz/model/layers/dropout.py ADDED Viewed

@@ -0,0 +1,34 @@
+import torch
+from torch import Tensor
+def get_dropout_mask(
+    dropout: float,
+    z: Tensor,
+    training: bool,
+    columnwise: bool = False,
+) -> Tensor:
+    """Get the dropout mask.
+    Parameters
+    ----------
+    dropout : float
+        The dropout rate
+    z : torch.Tensor
+        The tensor to apply dropout to
+    training : bool
+        Whether the model is in training mode
+    columnwise : bool, optional
+        Whether to apply dropout columnwise
+    Returns
+    -------
+    torch.Tensor
+        The dropout mask
+    """
+    dropout = dropout * training
+    v = z[:, 0:1, :, 0:1] if columnwise else z[:, :, 0:1, 0:1]
+    d = torch.rand_like(v) > dropout
+    d = d * 1.0 / (1.0 - dropout)
+    return d

boltz/model/layers/initialize.py ADDED Viewed

@@ -0,0 +1,100 @@
+"""Utility functions for initializing weights and biases."""
+# Copyright 2021 AlQuraishi Laboratory
+# Copyright 2021 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+import numpy as np
+import torch
+from scipy.stats import truncnorm
+def _prod(nums):
+    out = 1
+    for n in nums:
+        out = out * n
+    return out
+def _calculate_fan(linear_weight_shape, fan="fan_in"):
+    fan_out, fan_in = linear_weight_shape
+    if fan == "fan_in":
+        f = fan_in
+    elif fan == "fan_out":
+        f = fan_out
+    elif fan == "fan_avg":
+        f = (fan_in + fan_out) / 2
+    else:
+        raise ValueError("Invalid fan option")
+    return f
+def trunc_normal_init_(weights, scale=1.0, fan="fan_in"):
+    shape = weights.shape
+    f = _calculate_fan(shape, fan)
+    scale = scale / max(1, f)
+    a = -2
+    b = 2
+    std = math.sqrt(scale) / truncnorm.std(a=a, b=b, loc=0, scale=1)
+    size = _prod(shape)
+    samples = truncnorm.rvs(a=a, b=b, loc=0, scale=std, size=size)
+    samples = np.reshape(samples, shape)
+    with torch.no_grad():
+        weights.copy_(torch.tensor(samples, device=weights.device))
+def lecun_normal_init_(weights):
+    trunc_normal_init_(weights, scale=1.0)
+def he_normal_init_(weights):
+    trunc_normal_init_(weights, scale=2.0)
+def glorot_uniform_init_(weights):
+    torch.nn.init.xavier_uniform_(weights, gain=1)
+def final_init_(weights):
+    with torch.no_grad():
+        weights.fill_(0.0)
+def gating_init_(weights):
+    with torch.no_grad():
+        weights.fill_(0.0)
+def bias_init_zero_(bias):
+    with torch.no_grad():
+        bias.fill_(0.0)
+def bias_init_one_(bias):
+    with torch.no_grad():
+        bias.fill_(1.0)
+def normal_init_(weights):
+    torch.nn.init.kaiming_normal_(weights, nonlinearity="linear")
+def ipa_point_weights_init_(weights):
+    with torch.no_grad():
+        softplus_inverse_1 = 0.541324854612918
+        weights.fill_(softplus_inverse_1)