PyPI - crfm-helm - Versions diffs - 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

crfm-helm 0.3.0py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (546) hide show

helm/clients/image_generation/mindalle/models/__init__.py ADDED Viewed

@@ -0,0 +1,216 @@
+# ------------------------------------------------------------------------------------
+# minDALL-E
+# Copyright (c) 2021 Kakao Brain Corp. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------
+import os
+import torch
+import torch.nn as nn
+from typing import Optional, Tuple
+from torch.cuda.amp import autocast
+from torch.optim.lr_scheduler import CosineAnnealingLR
+from torch.nn import functional as F
+from .stage1.vqgan import VQGAN
+from .stage2.transformer import Transformer1d, iGPT
+from .. import utils
+from ..utils.config import get_base_config
+from ..utils.sampling import sampling, sampling_igpt
+from .tokenizer import build_tokenizer
+from helm.common.optional_dependencies import handle_module_not_found_error
+try:
+    import pytorch_lightning as pl
+    from omegaconf import OmegaConf
+except ModuleNotFoundError as e:
+    handle_module_not_found_error(e, ["heim"])
+_MODELS = {
+    "minDALL-E/1.3B": "https://arena.kakaocdn.net/brainrepo/models/minDALL-E/57b008f02ceaa02b779c8b7463143315/1.3B.tar.gz"
+}
+class Dalle(nn.Module):
+    def __init__(self, config: OmegaConf) -> None:
+        super().__init__()
+        self.tokenizer = None
+        self.stage1 = VQGAN(
+            n_embed=config.stage1.n_embed, embed_dim=config.stage1.embed_dim, hparams=config.stage1.hparams
+        )
+        self.stage2 = Transformer1d(
+            vocab_size_txt=config.stage2.vocab_size_txt,
+            vocab_size_img=config.stage2.vocab_size_img,
+            hparams=config.stage2.hparams,
+        )
+        self.config_stage1 = config.stage1
+        self.config_stage2 = config.stage2
+        self.config_dataset = config.dataset
+    @classmethod
+    def from_pretrained(cls, path: str) -> nn.Module:
+        path = _MODELS[path] if path in _MODELS else path
+        path = utils.realpath_url_or_path(path, root=os.path.expanduser(".helm_cache/minDALL-E"))
+        config_base = get_base_config()
+        config_new = OmegaConf.load(os.path.join(path, "config.yaml"))
+        config_update = OmegaConf.merge(config_base, config_new)
+        model = cls(config_update)
+        model.tokenizer = build_tokenizer(
+            os.path.join(path, "tokenizer"),
+            context_length=model.config_dataset.context_length,
+            lowercase=True,
+            dropout=None,
+        )
+        model.stage1.from_ckpt(os.path.join(path, "stage1_last.ckpt"))
+        model.stage2.from_ckpt(os.path.join(path, "stage2_last.ckpt"))
+        return model
+    @torch.no_grad()
+    def sampling(
+        self,
+        prompt: str,
+        top_k: int = 256,
+        top_p: Optional[float] = None,
+        softmax_temperature: float = 1.0,
+        num_candidates: int = 96,
+        device: str = "cuda:0",
+        use_fp16: bool = True,
+    ) -> torch.FloatTensor:
+        self.stage1.eval()
+        self.stage2.eval()
+        tokens = self.tokenizer.encode(prompt)
+        tokens = torch.LongTensor(tokens.ids)
+        tokens = torch.repeat_interleave(tokens.unsqueeze(0), num_candidates, dim=0)
+        # Check if the encoding works as intended
+        # print(self.tokenizer.decode_batch(tokens.tolist(), skip_special_tokens=True)[0])
+        tokens = tokens.to(device)
+        codes = sampling(
+            self.stage2, tokens, top_k=top_k, top_p=top_p, softmax_temperature=softmax_temperature, use_fp16=use_fp16
+        )
+        codes = codes.view(num_candidates, 16, 16)  # [B, 16, 16]
+        pixels = torch.clamp(self.stage1.decode_code(codes) * 0.5 + 0.5, 0, 1)  # [B, 256, 256]
+        return pixels
+class ImageGPT(pl.LightningModule):
+    def __init__(self, config: OmegaConf) -> None:
+        super().__init__()
+        self.stage1 = VQGAN(
+            n_embed=config.stage1.n_embed, embed_dim=config.stage1.embed_dim, hparams=config.stage1.hparams
+        )
+        self.stage2 = iGPT(
+            vocab_size_img=config.stage2.vocab_size_img,
+            use_cls_cond=config.stage2.use_cls_cond,
+            hparams=config.stage2.hparams,
+        )
+        self.config = config
+        self.use_cls_cond = config.stage2.use_cls_cond
+        # make the parameters in stage 1 not trainable
+        self.stage1.eval()
+        for p in self.stage1.parameters():
+            p.requires_grad = False
+    @classmethod
+    def from_pretrained(cls, path_upstream: str, path_downstream: str) -> Tuple[nn.Module, OmegaConf]:
+        config_base = get_base_config(use_default=False)
+        config_down = OmegaConf.load(path_downstream)
+        config_down = OmegaConf.merge(config_base, config_down)
+        model = cls(config_down)
+        model.stage1.from_ckpt(os.path.join(path_upstream, "stage1_last.ckpt"), strict=True)
+        model.stage2.from_ckpt(os.path.join(path_upstream, "stage2_last.ckpt"), strict=False)
+        return model, config_down
+    def sample(
+        self,
+        cls_idx: Optional[int] = None,
+        top_k: int = 256,
+        top_p: Optional[float] = None,
+        softmax_temperature: float = 1.0,
+        num_candidates: int = 16,
+        device: str = "cuda:0",
+        use_fp16: bool = True,
+        is_tqdm: bool = True,
+    ) -> torch.FloatTensor:
+        self.stage1.eval()
+        self.stage2.eval()
+        if cls_idx is None:
+            sos = self.stage2.sos.repeat(num_candidates, 1, 1)
+        else:
+            sos = torch.LongTensor([cls_idx]).to(device=device)
+            sos = sos.repeat(num_candidates)
+            sos = self.stage2.sos(sos).unsqueeze(1)
+        codes = sampling_igpt(
+            self.stage2,
+            sos=sos,
+            top_k=top_k,
+            top_p=top_p,
+            softmax_temperature=softmax_temperature,
+            use_fp16=use_fp16,
+            is_tqdm=is_tqdm,
+        )
+        codes = codes.view(num_candidates, 16, 16)  # [B, 16, 16]
+        pixels = torch.clamp(self.stage1.decode_code(codes) * 0.5 + 0.5, 0, 1)  # [B, 256, 256]
+        return pixels
+    def forward(self, images: torch.FloatTensor, labels: Optional[torch.LongTensor] = None) -> torch.FloatTensor:
+        B, C, H, W = images.shape
+        with torch.no_grad():
+            with autocast(enabled=False):
+                codes = self.stage1.get_codes(images).detach()
+        logits = self.stage2(codes, labels)
+        return logits, codes
+    def training_step(self, batch, batch_idx):
+        images, labels = batch
+        logits, codes = self(images, labels=labels if self.use_cls_cond else None)
+        loss = F.cross_entropy(logits.view(-1, logits.shape[-1]), codes.view(-1))
+        self.log("train/loss", loss, on_step=True, on_epoch=True, prog_bar=False, logger=True)
+        return loss
+    def validation_step(self, batch, batch_idx):
+        images, labels = batch
+        logits, codes = self(images, labels=labels if self.use_cls_cond else None)
+        loss = F.cross_entropy(logits.view(-1, logits.shape[-1]), codes.view(-1))
+        self.log("val/loss", loss, on_step=False, on_epoch=True, prog_bar=False, logger=True)
+        return loss
+    def configure_optimizers(self):
+        assert self.config.optimizer.opt_type == "adamW"
+        assert self.config.optimizer.sched_type == "cosine"
+        opt = torch.optim.AdamW(
+            self.parameters(),
+            lr=self.config.optimizer.base_lr,
+            betas=self.config.optimizer.betas,
+            weight_decay=self.config.optimizer.weight_decay,
+        )
+        sched = CosineAnnealingLR(opt, T_max=self.config.optimizer.max_steps, eta_min=self.config.optimizer.min_lr)
+        sched = {"scheduler": sched, "name": "cosine"}
+        return [opt], [sched]
+    def optimizer_step(
+        self,
+        epoch,
+        batch_idx,
+        optimizer,
+        optimizer_idx,
+        optimizer_closure,
+        on_tpu=False,
+        using_native_amp=False,
+        using_lbfgs=False,
+    ):
+        optimizer.step(closure=optimizer_closure)
+        self.lr_schedulers().step()
+        self.log("lr", self.lr_schedulers().get_last_lr()[0], on_step=True, on_epoch=False, prog_bar=True, logger=True)
+    def on_epoch_start(self):
+        self.stage1.eval()

helm/clients/image_generation/mindalle/models/stage1/__init__.py ADDED Viewed

File without changes

helm/clients/image_generation/mindalle/models/stage1/layers.py ADDED Viewed

@@ -0,0 +1,312 @@
+# ------------------------------------------------------------------------------------
+# Modified from VQGAN (https://github.com/CompVis/taming-transformers)
+# Copyright (c) 2020 Patrick Esser and Robin Rombach and Björn Ommer. All Rights Reserved.
+# ------------------------------------------------------------------------------------
+import torch
+import torch.nn as nn
+from typing import Tuple, Optional
+def nonlinearity(x):
+    # swish
+    return x * torch.sigmoid(x)
+def Normalize(in_channels):
+    return torch.nn.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True)
+class Upsample(nn.Module):
+    def __init__(self, in_channels, with_conv):
+        super().__init__()
+        self.with_conv = with_conv
+        if self.with_conv:
+            self.conv = torch.nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
+    def forward(self, x):
+        x = torch.nn.functional.interpolate(x, scale_factor=2.0, mode="nearest")
+        if self.with_conv:
+            x = self.conv(x)
+        return x
+class Downsample(nn.Module):
+    def __init__(self, in_channels, with_conv):
+        super().__init__()
+        self.with_conv = with_conv
+        if self.with_conv:
+            # no asymmetric padding in torch conv, must do it ourselves
+            self.conv = torch.nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=2, padding=0)
+    def forward(self, x):
+        if self.with_conv:
+            pad = (0, 1, 0, 1)
+            x = torch.nn.functional.pad(x, pad, mode="constant", value=0)
+            x = self.conv(x)
+        else:
+            x = torch.nn.functional.avg_pool2d(x, kernel_size=2, stride=2)
+        return x
+class ResnetBlock(nn.Module):
+    def __init__(self, *, in_channels, out_channels=None, conv_shortcut=False, dropout, temb_channels=512):
+        assert temb_channels == 0
+        super().__init__()
+        self.in_channels = in_channels
+        out_channels = in_channels if out_channels is None else out_channels
+        self.out_channels = out_channels
+        self.use_conv_shortcut = conv_shortcut
+        self.norm1 = Normalize(in_channels)
+        self.conv1 = torch.nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
+        self.norm2 = Normalize(out_channels)
+        self.dropout = torch.nn.Dropout(dropout)
+        self.conv2 = torch.nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
+        if self.in_channels != self.out_channels:
+            if self.use_conv_shortcut:
+                self.conv_shortcut = torch.nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
+            else:
+                self.nin_shortcut = torch.nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
+    def forward(self, x, temb=None):
+        assert temb is None
+        h = x
+        h = self.norm1(h)
+        h = nonlinearity(h)
+        h = self.conv1(h)
+        h = self.norm2(h)
+        h = nonlinearity(h)
+        h = self.dropout(h)
+        h = self.conv2(h)
+        if self.in_channels != self.out_channels:
+            if self.use_conv_shortcut:
+                x = self.conv_shortcut(x)
+            else:
+                x = self.nin_shortcut(x)
+        return x + h
+class AttnBlock(nn.Module):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.in_channels = in_channels
+        self.norm = Normalize(in_channels)
+        self.q = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0)
+        self.k = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0)
+        self.v = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0)
+        self.proj_out = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0)
+    def forward(self, x):
+        h_ = x
+        h_ = self.norm(h_)
+        q = self.q(h_)
+        k = self.k(h_)
+        v = self.v(h_)
+        # compute attention
+        b, c, h, w = q.shape
+        q = q.reshape(b, c, h * w)
+        q = q.permute(0, 2, 1)  # b,hw,c
+        k = k.reshape(b, c, h * w)  # b,c,hw
+        w_ = torch.bmm(q, k)  # b,hw,hw    w[b,i,j]=sum_c q[b,i,c]k[b,c,j]
+        w_ = w_ * (int(c) ** (-0.5))
+        w_ = torch.nn.functional.softmax(w_, dim=2)
+        # attend to values
+        v = v.reshape(b, c, h * w)
+        w_ = w_.permute(0, 2, 1)  # b,hw,hw (first hw of k, second of q)
+        h_ = torch.bmm(v, w_)  # b, c,hw (hw of q) h_[b,c,j] = sum_i v[b,c,i] w_[b,i,j]
+        h_ = h_.reshape(b, c, h, w)
+        h_ = self.proj_out(h_)
+        return x + h_
+class Encoder(nn.Module):
+    def __init__(
+        self,
+        *,  # forced to use named arguments
+        ch: int,
+        out_ch: int,
+        ch_mult: Tuple[int] = (1, 2, 4, 8),
+        num_res_blocks: int,
+        attn_resolutions: Tuple[int],
+        pdrop: float = 0.0,
+        resamp_with_conv: bool = True,
+        in_channels: int,
+        resolution: int,
+        z_channels: int,
+        double_z: Optional[bool] = None
+    ) -> None:
+        super().__init__()
+        self.ch = ch
+        self.temb_ch = 0
+        self.num_resolutions = len(ch_mult)
+        self.num_res_blocks = num_res_blocks
+        self.resolution = resolution
+        self.in_channels = in_channels
+        # downsampling
+        self.conv_in = torch.nn.Conv2d(in_channels, self.ch, kernel_size=3, stride=1, padding=1)
+        curr_res = resolution
+        in_ch_mult = (1,) + tuple(ch_mult)
+        self.down = nn.ModuleList()
+        for i_level in range(self.num_resolutions):
+            block = nn.ModuleList()
+            attn = nn.ModuleList()
+            block_in = ch * in_ch_mult[i_level]
+            block_out = ch * ch_mult[i_level]
+            for i_block in range(self.num_res_blocks):
+                block.append(
+                    ResnetBlock(in_channels=block_in, out_channels=block_out, temb_channels=self.temb_ch, dropout=pdrop)
+                )
+                block_in = block_out
+                if curr_res in attn_resolutions:
+                    attn.append(AttnBlock(block_in))
+            down = nn.Module()
+            down.block = block
+            down.attn = attn
+            if i_level != self.num_resolutions - 1:
+                down.downsample = Downsample(block_in, resamp_with_conv)
+                curr_res = curr_res // 2
+            self.down.append(down)
+        # middle
+        self.mid = nn.Module()
+        self.mid.block_1 = ResnetBlock(
+            in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=pdrop
+        )
+        self.mid.attn_1 = AttnBlock(block_in)
+        self.mid.block_2 = ResnetBlock(
+            in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=pdrop
+        )
+        # end
+        self.norm_out = Normalize(block_in)
+        self.conv_out = torch.nn.Conv2d(
+            block_in, 2 * z_channels if double_z else z_channels, kernel_size=3, stride=1, padding=1
+        )
+    def forward(self, x):
+        assert x.shape[2] == x.shape[3] == self.resolution, "{}, {}".format(x.shape, self.resolution)
+        # downsampling
+        h = self.conv_in(x)
+        for i_level in range(self.num_resolutions):
+            for i_block in range(self.num_res_blocks):
+                h = self.down[i_level].block[i_block](h)
+                if len(self.down[i_level].attn) > 0:
+                    h = self.down[i_level].attn[i_block](h)
+            if i_level != self.num_resolutions - 1:
+                h = self.down[i_level].downsample(h)
+        # middle
+        h = self.mid.block_1(h)
+        h = self.mid.attn_1(h)
+        h = self.mid.block_2(h)
+        # end
+        h = self.norm_out(h)
+        h = nonlinearity(h)
+        h = self.conv_out(h)
+        return h
+class Decoder(nn.Module):
+    def __init__(
+        self,
+        *,  # forced to use named arguments
+        ch: int,
+        out_ch: int,
+        ch_mult: Tuple[int] = (1, 2, 4, 8),
+        num_res_blocks: int,
+        attn_resolutions: Tuple[int],
+        pdrop: float = 0.0,
+        resamp_with_conv: bool = True,
+        in_channels: int,
+        resolution: int,
+        z_channels: int,
+        double_z: bool
+    ) -> None:
+        super().__init__()
+        self.ch = ch
+        self.temb_ch = 0
+        self.num_resolutions = len(ch_mult)
+        self.num_res_blocks = num_res_blocks
+        self.resolution = resolution
+        self.in_channels = in_channels
+        # compute in_ch_mult, block_in and curr_res at lowest res
+        block_in = ch * ch_mult[self.num_resolutions - 1]
+        curr_res = resolution // 2 ** (self.num_resolutions - 1)
+        self.z_shape = (1, z_channels, curr_res, curr_res)
+        # z to block_in
+        self.conv_in = torch.nn.Conv2d(z_channels, block_in, kernel_size=3, stride=1, padding=1)
+        # middle
+        self.mid = nn.Module()
+        self.mid.block_1 = ResnetBlock(
+            in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=pdrop
+        )
+        self.mid.attn_1 = AttnBlock(block_in)
+        self.mid.block_2 = ResnetBlock(
+            in_channels=block_in, out_channels=block_in, temb_channels=self.temb_ch, dropout=pdrop
+        )
+        # upsampling
+        self.up = nn.ModuleList()
+        for i_level in reversed(range(self.num_resolutions)):
+            block = nn.ModuleList()
+            attn = nn.ModuleList()
+            block_out = ch * ch_mult[i_level]
+            for i_block in range(self.num_res_blocks + 1):
+                block.append(
+                    ResnetBlock(in_channels=block_in, out_channels=block_out, temb_channels=self.temb_ch, dropout=pdrop)
+                )
+                block_in = block_out
+                if curr_res in attn_resolutions:
+                    attn.append(AttnBlock(block_in))
+            up = nn.Module()
+            up.block = block
+            up.attn = attn
+            if i_level != 0:
+                up.upsample = Upsample(block_in, resamp_with_conv)
+                curr_res = curr_res * 2
+            self.up.insert(0, up)  # prepend to get consistent order
+        # end
+        self.norm_out = Normalize(block_in)
+        self.conv_out = torch.nn.Conv2d(block_in, out_ch, kernel_size=3, stride=1, padding=1)
+    def forward(self, z):
+        assert z.shape[1:] == self.z_shape[1:]
+        self.last_z_shape = z.shape
+        # z to block_in
+        h = self.conv_in(z)
+        # middle
+        h = self.mid.block_1(h)
+        h = self.mid.attn_1(h)
+        h = self.mid.block_2(h)
+        # upsampling
+        for i_level in reversed(range(self.num_resolutions)):
+            for i_block in range(self.num_res_blocks + 1):
+                h = self.up[i_level].block[i_block](h)
+                if len(self.up[i_level].attn) > 0:
+                    h = self.up[i_level].attn[i_block](h)
+            if i_level != 0:
+                h = self.up[i_level].upsample(h)
+        h = self.norm_out(h)
+        h = nonlinearity(h)
+        h = self.conv_out(h)
+        return h

helm/clients/image_generation/mindalle/models/stage1/vqgan.py ADDED Viewed

@@ -0,0 +1,103 @@
+# ------------------------------------------------------------------------------------
+# Modified from VQGAN (https://github.com/CompVis/taming-transformers)
+# Copyright (c) 2020 Patrick Esser and Robin Rombach and Björn Ommer. All Rights Reserved.
+# ------------------------------------------------------------------------------------
+import torch
+import torch.nn as nn
+from typing import List, Tuple, Optional
+from .layers import Encoder, Decoder
+from helm.common.optional_dependencies import handle_module_not_found_error
+class VectorQuantizer(nn.Module):
+    """
+    Simplified VectorQuantizer in the original VQGAN repository
+    by removing unncessary modules for sampling
+    """
+    def __init__(self, dim: int, n_embed: int, beta: float) -> None:
+        super().__init__()
+        self.n_embed = n_embed
+        self.dim = dim
+        self.beta = beta
+        self.embedding = nn.Embedding(self.n_embed, self.dim)
+        self.embedding.weight.data.uniform_(-1.0 / self.n_embed, 1.0 / self.n_embed)
+    def forward(self, z: torch.FloatTensor) -> Tuple[torch.FloatTensor, torch.LongTensor]:
+        try:
+            from einops import rearrange
+        except ModuleNotFoundError as e:
+            handle_module_not_found_error(e, ["heim"])
+        z = rearrange(z, "b c h w -> b h w c").contiguous()  # [B,C,H,W] -> [B,H,W,C]
+        z_flattened = z.view(-1, self.dim)
+        d = (
+            torch.sum(z_flattened**2, dim=1, keepdim=True)
+            + torch.sum(self.embedding.weight**2, dim=1)
+            - 2 * torch.einsum("bd,dn->bn", z_flattened, rearrange(self.embedding.weight, "n d -> d n"))
+        )
+        min_encoding_indices = torch.argmin(d, dim=1)
+        z_q = self.embedding(min_encoding_indices).view(z.shape)
+        return z_q, min_encoding_indices
+    def get_codebook_entry(self, indices: torch.LongTensor, shape: Optional[List[int]] = None) -> torch.FloatTensor:
+        z_q = self.embedding(indices)
+        if shape is not None:
+            z_q = z_q.view(shape)
+            z_q = z_q.permute(0, 3, 1, 2).contiguous()
+        return z_q
+class VQGAN(nn.Module):
+    def __init__(self, n_embed: int, embed_dim: int, hparams) -> None:
+        super().__init__()
+        self.encoder = Encoder(**hparams)
+        self.decoder = Decoder(**hparams)
+        self.quantize = VectorQuantizer(dim=embed_dim, n_embed=n_embed, beta=0.25)
+        self.quant_conv = torch.nn.Conv2d(hparams.z_channels, embed_dim, 1)
+        self.post_quant_conv = torch.nn.Conv2d(embed_dim, hparams.z_channels, 1)
+        self.latent_dim = hparams.attn_resolutions[0]
+    def forward(self, x: torch.FloatTensor) -> torch.FloatTensor:
+        quant = self.encode(x)
+        dec = self.decode(quant)
+        return dec
+    def encode(self, x: torch.FloatTensor) -> torch.FloatTensor:
+        try:
+            from einops import rearrange
+        except ModuleNotFoundError as e:
+            handle_module_not_found_error(e, ["heim"])
+        h = self.encoder(x)
+        h = self.quant_conv(h)
+        quant = self.quantize(h)[0]
+        quant = rearrange(quant, "b h w c -> b c h w").contiguous()
+        return quant
+    def decode(self, quant: torch.FloatTensor) -> torch.FloatTensor:
+        quant = self.post_quant_conv(quant)
+        dec = self.decoder(quant)
+        return dec
+    def decode_code(self, code: torch.LongTensor) -> torch.FloatTensor:
+        quant = self.quantize.get_codebook_entry(code)
+        quant = quant.permute(0, 3, 1, 2)
+        dec = self.decode(quant)
+        return dec
+    def get_codes(self, x: torch.FloatTensor) -> torch.LongTensor:
+        h = self.encoder(x)
+        h = self.quant_conv(h)
+        codes = self.quantize(h)[1].view(x.shape[0], self.latent_dim**2)
+        return codes
+    def from_ckpt(self, path: str, strict: bool = True) -> None:
+        ckpt = torch.load(path, map_location="cpu")["state_dict"]
+        self.load_state_dict(ckpt, strict=strict)
+        print(f"{path} successfully restored..")

helm/clients/image_generation/mindalle/models/stage2/__init__.py ADDED Viewed

File without changes

crfm-helm 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

crfm-helm 0.3.0py3-none-any.whl → 0.5.0py3-none-any.whl