PyPI - flaxdiff - Versions diffs - 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl - Mend

flaxdiff 0.2.7py3-none-any.whl → 0.2.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

flaxdiff/data/dataloaders.py +23 -19
flaxdiff/data/dataset_map.py +2 -1
flaxdiff/data/sources/base.py +12 -0
flaxdiff/data/sources/images.py +75 -3
flaxdiff/data/sources/videos.py +5 -0
flaxdiff/inference/utils.py +7 -1
flaxdiff/models/common.py +1 -70
flaxdiff/models/hilbert.py +617 -0
flaxdiff/models/simple_dit.py +275 -0
flaxdiff/models/simple_mmdit.py +730 -0
flaxdiff/models/simple_vit.py +405 -145
flaxdiff/models/vit_common.py +262 -0
flaxdiff/trainer/general_diffusion_trainer.py +30 -10
flaxdiff/trainer/simple_trainer.py +113 -19
{flaxdiff-0.2.7.dist-info → flaxdiff-0.2.9.dist-info}/METADATA +1 -1
{flaxdiff-0.2.7.dist-info → flaxdiff-0.2.9.dist-info}/RECORD +18 -15
{flaxdiff-0.2.7.dist-info → flaxdiff-0.2.9.dist-info}/WHEEL +1 -1
flaxdiff/models/better_uvit.py +0 -380
{flaxdiff-0.2.7.dist-info → flaxdiff-0.2.9.dist-info}/top_level.txt +0 -0

flaxdiff/models/simple_vit.py CHANGED Viewed

@@ -10,177 +10,437 @@ from flaxdiff.models.simple_unet import FourierEmbedding, TimeProjection, ConvLa
 import einops
 from flax.typing import Dtype, PrecisionLike
 from functools import partial
-from .common import hilbert_indices, inverse_permutation
+from .hilbert import hilbert_indices, inverse_permutation, hilbert_patchify, hilbert_unpatchify
+from .vit_common import _rotate_half, unpatchify, PatchEmbedding, apply_rotary_embedding, RotaryEmbedding, RoPEAttention, AdaLNZero, AdaLNParams
+from .simple_dit import DiTBlock
-def unpatchify(x, channels=3):
-    patch_size = int((x.shape[2] // channels) ** 0.5)
-    h = w = int(x.shape[1] ** .5)
-    assert h * w == x.shape[1] and patch_size ** 2 * channels == x.shape[2], f"Invalid shape: {x.shape}, should be {h*w}, {patch_size**2*channels}"
-    x = einops.rearrange(x, 'B (h w) (p1 p2 C) -> B (h p1) (w p2) C', h=h, p1=patch_size, p2=patch_size)
-    return x
-class PatchEmbedding(nn.Module):
-    patch_size: int
-    embedding_dim: int
-    dtype: Any = jnp.float32
-    precision: Any = jax.lax.Precision.HIGH
-    @nn.compact
-    def __call__(self, x):
-        batch, height, width, channels = x.shape
-        assert height % self.patch_size == 0 and width % self.patch_size == 0, "Image dimensions must be divisible by patch size"
-        x = nn.Conv(features=self.embedding_dim,
-                    kernel_size=(self.patch_size, self.patch_size),
-                    strides=(self.patch_size, self.patch_size),
-                    dtype=self.dtype,
-                    precision=self.precision)(x)
-        x = jnp.reshape(x, (batch, -1, self.embedding_dim))
-        return x
-class PositionalEncoding(nn.Module):
-    max_len: int
-    embedding_dim: int
-    @nn.compact
-    def __call__(self, x):
-        pe = self.param('pos_encoding',
-                        jax.nn.initializers.zeros,
-                        (1, self.max_len, self.embedding_dim))
-        return x + pe[:, :x.shape[1], :]
 class UViT(nn.Module):
-    output_channels:int=3
+    output_channels: int = 3
     patch_size: int = 16
-    emb_features:int=768
-    num_layers: int = 12
+    emb_features: int = 768
+    num_layers: int = 12  # Should be even for U-Net structure
     num_heads: int = 12
-    dropout_rate: float = 0.1
-    use_projection: bool = False
-    use_flash_attention: bool = False
+    dropout_rate: float = 0.1  # Dropout is often 0 in diffusion models
+    use_projection: bool = False  # In TransformerBlock MLP
+    use_flash_attention: bool = False  # Passed to TransformerBlock
+    # Passed to TransformerBlock (likely False for UViT)
     use_self_and_cross: bool = False
-    force_fp32_for_softmax: bool = True
-    activation:Callable = jax.nn.swish
-    norm_groups:int=8
-    dtype: Optional[Dtype] = None
+    force_fp32_for_softmax: bool = True  # Passed to TransformerBlock
+    # Used in final convs if add_residualblock_output
+    activation: Callable = jax.nn.swish
+    norm_groups: int = 8
+    dtype: Optional[Dtype] = None  # e.g., jnp.float32 or jnp.bfloat16
     precision: PrecisionLike = None
     add_residualblock_output: bool = False
-    norm_inputs: bool = False
-    explicitly_add_residual: bool = True
-    norm_epsilon: float = 1e-4 # Added epsilon parameter, increased default
-    use_hilbert: bool = False # Toggle Hilbert patch reorder
+    norm_inputs: bool = False  # Passed to TransformerBlock
+    explicitly_add_residual: bool = True  # Passed to TransformerBlock
+    norm_epsilon: float = 1e-5  # Adjusted default
+    use_hilbert: bool = False  # Toggle Hilbert patch reorder
+    use_remat: bool = False  # Add flag to use remat
     def setup(self):
+        assert self.num_layers % 2 == 0, "num_layers must be even for U-Net structure"
+        half_layers = self.num_layers // 2
+        # --- Norm Layer ---
         if self.norm_groups > 0:
-            self.norm = partial(nn.GroupNorm, self.norm_groups, epsilon=self.norm_epsilon)
+            print(f"Warning: norm_groups > 0 not fully supported with standard LayerNorm fallback in UViT setup. Using LayerNorm.")
+            self.norm_factory = partial(
+                nn.LayerNorm, epsilon=self.norm_epsilon, dtype=self.dtype)
         else:
-            self.norm = partial(nn.RMSNorm, epsilon=self.norm_epsilon)
+            self.norm_factory = partial(
+                nn.LayerNorm, epsilon=self.norm_epsilon, dtype=self.dtype)
+        # --- Input Path ---
+        self.patch_embed = PatchEmbedding(
+            patch_size=self.patch_size,
+            embedding_dim=self.emb_features,
+            dtype=self.dtype,
+            precision=self.precision,
+            name="patch_embed"
+        )
+        if self.use_hilbert:
+            self.hilbert_proj = nn.Dense(
+                features=self.emb_features,
+                dtype=self.dtype,
+                precision=self.precision,
+                name="hilbert_projection"
+            )
+        max_patches = (512 // self.patch_size)**2
+        self.pos_encoding = self.param('pos_encoding',
+                                       jax.nn.initializers.normal(stddev=0.02),
+                                       (1, max_patches, self.emb_features))
+        # --- Conditioning ---
+        self.time_embed = nn.Sequential([
+            FourierEmbedding(features=self.emb_features),
+            TimeProjection(features=self.emb_features)
+        ], name="time_embed")
+        self.text_proj = nn.DenseGeneral(
+            features=self.emb_features,
+            dtype=self.dtype,
+            precision=self.precision,
+            name="text_proj"
+        )
+        # --- Transformer Blocks ---
+        BlockClass = TransformerBlock
+        self.down_blocks = [
+            BlockClass(
+                heads=self.num_heads,
+                dim_head=self.emb_features // self.num_heads,
+                dtype=self.dtype, precision=self.precision, use_projection=self.use_projection,
+                use_flash_attention=self.use_flash_attention, use_self_and_cross=self.use_self_and_cross,
+                force_fp32_for_softmax=self.force_fp32_for_softmax,
+                only_pure_attention=False, norm_inputs=self.norm_inputs,
+                explicitly_add_residual=self.explicitly_add_residual,
+                norm_epsilon=self.norm_epsilon,
+                name=f"down_block_{i}"
+            ) for i in range(half_layers)
+        ]
+        self.mid_block = BlockClass(
+            heads=self.num_heads,
+            dim_head=self.emb_features // self.num_heads,
+            dtype=self.dtype, precision=self.precision, use_projection=self.use_projection,
+            use_flash_attention=self.use_flash_attention, use_self_and_cross=self.use_self_and_cross,
+            force_fp32_for_softmax=self.force_fp32_for_softmax,
+            only_pure_attention=False, norm_inputs=self.norm_inputs,
+            explicitly_add_residual=self.explicitly_add_residual,
+            norm_epsilon=self.norm_epsilon,
+            name="mid_block"
+        )
+        self.up_dense = [
+            nn.DenseGeneral(
+                features=self.emb_features,
+                dtype=self.dtype,
+                precision=self.precision,
+                name=f"up_dense_{i}"
+            ) for i in range(half_layers)
+        ]
+        self.up_blocks = [
+            BlockClass(
+                heads=self.num_heads,
+                dim_head=self.emb_features // self.num_heads,
+                dtype=self.dtype, precision=self.precision, use_projection=self.use_projection,
+                use_flash_attention=self.use_flash_attention, use_self_and_cross=self.use_self_and_cross,
+                force_fp32_for_softmax=self.force_fp32_for_softmax,
+                only_pure_attention=False, norm_inputs=self.norm_inputs,
+                explicitly_add_residual=self.explicitly_add_residual,
+                norm_epsilon=self.norm_epsilon,
+                name=f"up_block_{i}"
+            ) for i in range(half_layers)
+        ]
+        # --- Output Path ---
+        self.final_norm = self.norm_factory(name="final_norm")
+        patch_dim = self.patch_size ** 2 * self.output_channels
+        self.final_proj = nn.Dense(
+            features=patch_dim,
+            dtype=self.dtype,
+            precision=self.precision,
+            kernel_init=nn.initializers.zeros,
+            name="final_proj"
+        )
+        if self.add_residualblock_output:
+            self.final_conv1 = ConvLayer(
+                "conv",
+                features=64, kernel_size=(3, 3), strides=(1, 1),
+                dtype=self.dtype, precision=self.precision, name="final_conv1"
+            )
+            self.final_norm_conv = self.norm_factory(
+                name="final_norm_conv")
+            self.final_conv2 = ConvLayer(
+                "conv",
+                features=self.output_channels, kernel_size=(3, 3), strides=(1, 1),
+                dtype=jnp.float32,
+                precision=self.precision, name="final_conv2"
+            )
+        else:
+            self.final_conv_direct = ConvLayer(
+                "conv",
+                features=self.output_channels, kernel_size=(1, 1), strides=(1, 1),
+                dtype=jnp.float32,
+                precision=self.precision, name="final_conv_direct"
+            )
     @nn.compact
     def __call__(self, x, temb, textcontext=None):
-        # Time embedding
-        temb = FourierEmbedding(features=self.emb_features)(temb)
-        temb = TimeProjection(features=self.emb_features)(temb)
         original_img = x
         B, H, W, C = original_img.shape
         H_P = H // self.patch_size
         W_P = W // self.patch_size
+        num_patches = H_P * W_P
+        assert H % self.patch_size == 0 and W % self.patch_size == 0, "Image dimensions must be divisible by patch size"
-        # Patch embedding
-        x = PatchEmbedding(patch_size=self.patch_size, embedding_dim=self.emb_features,
-                           dtype=self.dtype, precision=self.precision)(x)
-        num_patches = x.shape[1]
-        # Optional Hilbert reorder
+        hilbert_inv_idx = None
         if self.use_hilbert:
+            patches_raw, hilbert_inv_idx_calc = hilbert_patchify(
+                x, self.patch_size)
+            x_patches = self.hilbert_proj(patches_raw)
             idx = hilbert_indices(H_P, W_P)
-            inv_idx = inverse_permutation(idx)
-            x = x[:, idx, :]
-        context_emb = nn.DenseGeneral(features=self.emb_features,
-                               dtype=self.dtype, precision=self.precision)(textcontext)
-        num_text_tokens = textcontext.shape[1]
-        # Add time embedding
-        temb = jnp.expand_dims(temb, axis=1)
-        x = jnp.concatenate([x, temb, context_emb], axis=1)
-        # Add positional encoding
-        x = PositionalEncoding(max_len=x.shape[1], embedding_dim=self.emb_features)(x)
+            hilbert_inv_idx = inverse_permutation(
+                idx, total_size=num_patches)
+            x_patches = x_patches[:, idx, :]
+        else:
+            x_patches = self.patch_embed(x)
+        assert num_patches <= self.pos_encoding.shape[
+            1], f"Number of patches {num_patches} exceeds max_len {self.pos_encoding.shape[1]} in positional encoding"
+        x_patches = x_patches + self.pos_encoding[:, :num_patches, :]
+        time_token = self.time_embed(temb.astype(
+            jnp.float32))
+        time_token = jnp.expand_dims(time_token.astype(
+            self.dtype), axis=1)
+        if textcontext is not None:
+            text_tokens = self.text_proj(
+                textcontext.astype(self.dtype))
+            num_text_tokens = text_tokens.shape[1]
+            x = jnp.concatenate([x_patches, time_token, text_tokens], axis=1)
+        else:
+            num_text_tokens = 0
+            x = jnp.concatenate([x_patches, time_token], axis=1)
         skips = []
-        # In blocks
         for i in range(self.num_layers // 2):
-            x = TransformerBlock(heads=self.num_heads, dim_head=self.emb_features // self.num_heads,
-                                 dtype=self.dtype, precision=self.precision, use_projection=self.use_projection,
-                                 use_flash_attention=self.use_flash_attention, use_self_and_cross=False, force_fp32_for_softmax=self.force_fp32_for_softmax,
-                                 only_pure_attention=False,
-                                 norm_inputs=self.norm_inputs,
-                                 explicitly_add_residual=self.explicitly_add_residual,
-                                 norm_epsilon=self.norm_epsilon, # Pass epsilon
-                                 )(x)
+            x = self.down_blocks[i](x)
             skips.append(x)
-        # Middle block
-        x = TransformerBlock(heads=self.num_heads, dim_head=self.emb_features // self.num_heads,
-                             dtype=self.dtype, precision=self.precision, use_projection=self.use_projection,
-                             use_flash_attention=self.use_flash_attention, use_self_and_cross=False, force_fp32_for_softmax=self.force_fp32_for_softmax,
-                             only_pure_attention=False,
-                            norm_inputs=self.norm_inputs,
-                            explicitly_add_residual=self.explicitly_add_residual,
-                            norm_epsilon=self.norm_epsilon, # Pass epsilon
-                            )(x)
-        # Out blocks
+        x = self.mid_block(x)
         for i in range(self.num_layers // 2):
-            x = jnp.concatenate([x, skips.pop()], axis=-1)
-            x = nn.DenseGeneral(features=self.emb_features,
-                                   dtype=self.dtype, precision=self.precision)(x)
-            x = TransformerBlock(heads=self.num_heads, dim_head=self.emb_features // self.num_heads,
-                                 dtype=self.dtype, precision=self.precision, use_projection=self.use_projection,
-                                 use_flash_attention=self.use_flash_attention, use_self_and_cross=self.use_self_and_cross, force_fp32_for_softmax=self.force_fp32_for_softmax,
-                                 only_pure_attention=False,
-                                 norm_inputs=self.norm_inputs,
-                                 explicitly_add_residual=self.explicitly_add_residual,
-                                 norm_epsilon=self.norm_epsilon, # Pass epsilon
-                                 )(x)
-        x = self.norm()(x) # Uses norm_epsilon defined in setup
-        patch_dim = self.patch_size ** 2 * self.output_channels
-        x = nn.Dense(features=patch_dim, dtype=self.dtype, precision=self.precision)(x)
-        # If Hilbert, restore original patch order
+            skip_conn = skips.pop()
+            x = jnp.concatenate([x, skip_conn], axis=-1)
+            x = self.up_dense[i](x)
+            x = self.up_blocks[i](x)
+        x = self.final_norm(x)
+        x_patches_out = x[:, :num_patches, :]
+        x_patches_out = self.final_proj(x_patches_out)
         if self.use_hilbert:
-            x = x[:, inv_idx, :]
-        # Extract only the image patch tokens (first num_patches tokens)
-        x = x[:, :num_patches, :]
-        x = unpatchify(x, channels=self.output_channels)
+            assert hilbert_inv_idx is not None, "Hilbert inverse index missing"
+            x_image = hilbert_unpatchify(
+                x_patches_out, hilbert_inv_idx, self.patch_size, H, W, self.output_channels)
+        else:
+            x_image = unpatchify(x_patches_out, channels=self.output_channels)
         if self.add_residualblock_output:
-            # Concatenate the original image
-            x = jnp.concatenate([original_img, x], axis=-1)
-            x = ConvLayer(
-                "conv",
-                features=64,
-                kernel_size=(3, 3),
-                strides=(1, 1),
-                # activation=jax.nn.mish
+            x_image = jnp.concatenate(
+                [original_img.astype(self.dtype), x_image], axis=-1)
+            x_image = self.final_conv1(x_image)
+            x_image = self.final_norm_conv(x_image)
+            x_image = self.activation(x_image)
+            x_image = self.final_conv2(x_image)
+        else:
+            pass
+        return x_image
+# --- Simple U-DiT ---
+class SimpleUDiT(nn.Module):
+    """
+    A Simple U-Net Diffusion Transformer (U-DiT) implementation.
+    Combines the U-Net structure with DiT blocks using RoPE and AdaLN-Zero conditioning.
+    Based on SimpleDiT and standard U-Net principles.
+    """
+    output_channels: int = 3
+    patch_size: int = 16
+    emb_features: int = 768
+    num_layers: int = 12 # Should be even for U-Net structure
+    num_heads: int = 12
+    mlp_ratio: int = 4
+    dropout_rate: float = 0.0  # Typically 0 for diffusion
+    dtype: Optional[Dtype] = None # e.g., jnp.float32 or jnp.bfloat16
+    precision: PrecisionLike = None
+    use_flash_attention: bool = False # Passed to DiTBlock -> RoPEAttention
+    force_fp32_for_softmax: bool = True # Passed to DiTBlock -> RoPEAttention
+    norm_epsilon: float = 1e-5
+    learn_sigma: bool = False
+    use_hilbert: bool = False
+    norm_groups: int = 0
+    activation: Callable = jax.nn.swish
+    def setup(self):
+        assert self.num_layers % 2 == 0, "num_layers must be even for U-Net structure"
+        half_layers = self.num_layers // 2
+        self.patch_embed = PatchEmbedding(
+            patch_size=self.patch_size,
+            embedding_dim=self.emb_features,
+            dtype=self.dtype,
+            precision=self.precision,
+            name="patch_embed"
+        )
+        if self.use_hilbert:
+            self.hilbert_proj = nn.Dense(
+                features=self.emb_features,
                 dtype=self.dtype,
-                precision=self.precision
-            )(x)
-            x = self.norm()(x)
-            x = self.activation(x)
-        x = ConvLayer(
-            "conv",
-            features=self.output_channels,
-            kernel_size=(3, 3),
-            strides=(1, 1),
-            # activation=jax.nn.mish
+                precision=self.precision,
+                name="hilbert_projection"
+            )
+        self.time_embed = nn.Sequential([
+            FourierEmbedding(features=self.emb_features),
+            TimeProjection(features=self.emb_features * self.mlp_ratio),
+            nn.Dense(features=self.emb_features, dtype=self.dtype, precision=self.precision)
+        ], name="time_embed")
+        self.text_proj = nn.Dense(
+            features=self.emb_features,
+            dtype=self.dtype,
+            precision=self.precision,
+            name="text_proj"
+        )
+        max_patches = (512 // self.patch_size)**2
+        self.rope = RotaryEmbedding(
+            dim=self.emb_features // self.num_heads,
+            max_seq_len=max_patches,
+            dtype=self.dtype,
+            name="rope_emb"
+        )
+        self.down_blocks = [
+            DiTBlock(
+                features=self.emb_features,
+                num_heads=self.num_heads,
+                mlp_ratio=self.mlp_ratio,
+                dropout_rate=self.dropout_rate,
+                dtype=self.dtype,
+                precision=self.precision,
+                use_flash_attention=self.use_flash_attention,
+                force_fp32_for_softmax=self.force_fp32_for_softmax,
+                norm_epsilon=self.norm_epsilon,
+                rope_emb=self.rope,
+                name=f"down_block_{i}"
+            ) for i in range(half_layers)
+        ]
+        self.mid_block = DiTBlock(
+            features=self.emb_features,
+            num_heads=self.num_heads,
+            mlp_ratio=self.mlp_ratio,
+            dropout_rate=self.dropout_rate,
             dtype=self.dtype,
-            precision=self.precision
-        )(x)
-        return x
+            precision=self.precision,
+            use_flash_attention=self.use_flash_attention,
+            force_fp32_for_softmax=self.force_fp32_for_softmax,
+            norm_epsilon=self.norm_epsilon,
+            rope_emb=self.rope,
+            name="mid_block"
+        )
+        self.up_dense = [
+             nn.DenseGeneral(
+                 features=self.emb_features,
+                 dtype=self.dtype,
+                 precision=self.precision,
+                 name=f"up_dense_{i}"
+             ) for i in range(half_layers)
+        ]
+        self.up_blocks = [
+            DiTBlock(
+                features=self.emb_features,
+                num_heads=self.num_heads,
+                mlp_ratio=self.mlp_ratio,
+                dropout_rate=self.dropout_rate,
+                dtype=self.dtype,
+                precision=self.precision,
+                use_flash_attention=self.use_flash_attention,
+                force_fp32_for_softmax=self.force_fp32_for_softmax,
+                norm_epsilon=self.norm_epsilon,
+                rope_emb=self.rope,
+                name=f"up_block_{i}"
+            ) for i in range(half_layers)
+        ]
+        self.final_norm = nn.LayerNorm(
+            epsilon=self.norm_epsilon, dtype=self.dtype, name="final_norm")
+        output_dim = self.patch_size * self.patch_size * self.output_channels
+        if self.learn_sigma:
+            output_dim *= 2
+        self.final_proj = nn.Dense(
+            features=output_dim,
+            dtype=jnp.float32,
+            precision=self.precision,
+            kernel_init=nn.initializers.zeros,
+            name="final_proj"
+        )
+    @nn.compact
+    def __call__(self, x, temb, textcontext=None):
+        B, H, W, C = x.shape
+        H_P = H // self.patch_size
+        W_P = W // self.patch_size
+        num_patches = H_P * W_P
+        assert H % self.patch_size == 0 and W % self.patch_size == 0, "Image dimensions must be divisible by patch size"
+        x = x.astype(self.dtype)
+        hilbert_inv_idx = None
+        if self.use_hilbert:
+            patches_raw, _ = hilbert_patchify(x, self.patch_size)
+            x_seq = self.hilbert_proj(patches_raw)
+            idx = hilbert_indices(H_P, W_P)
+            hilbert_inv_idx = inverse_permutation(idx, total_size=num_patches)
+        else:
+            x_seq = self.patch_embed(x)
+        t_emb = self.time_embed(temb.astype(jnp.float32))
+        t_emb = t_emb.astype(self.dtype)
+        cond_emb = t_emb
+        if textcontext is not None:
+            text_emb = self.text_proj(textcontext.astype(self.dtype))
+            if text_emb.ndim == 3:
+                text_emb = jnp.mean(text_emb, axis=1)
+            cond_emb = cond_emb + text_emb
+        skips = []
+        for i in range(self.num_layers // 2):
+            x_seq = self.down_blocks[i](x_seq, conditioning=cond_emb, freqs_cis=None)
+            skips.append(x_seq)
+        x_seq = self.mid_block(x_seq, conditioning=cond_emb, freqs_cis=None)
+        for i in range(self.num_layers // 2):
+            skip_conn = skips.pop()
+            x_seq = jnp.concatenate([x_seq, skip_conn], axis=-1)
+            x_seq = self.up_dense[i](x_seq)
+            x_seq = self.up_blocks[i](x_seq, conditioning=cond_emb, freqs_cis=None)
+        x_out = self.final_norm(x_seq)
+        x_out = self.final_proj(x_out)
+        if self.use_hilbert:
+            assert hilbert_inv_idx is not None, "Hilbert inverse index missing"
+            if self.learn_sigma:
+                x_mean, x_logvar = jnp.split(x_out, 2, axis=-1)
+                x_image = hilbert_unpatchify(x_mean, hilbert_inv_idx, self.patch_size, H, W, self.output_channels)
+            else:
+                x_image = hilbert_unpatchify(x_out, hilbert_inv_idx, self.patch_size, H, W, self.output_channels)
+        else:
+            if self.learn_sigma:
+                x_mean, x_logvar = jnp.split(x_out, 2, axis=-1)
+                x_image = unpatchify(x_mean, channels=self.output_channels)
+            else:
+                x_image = unpatchify(x_out, channels=self.output_channels)
+        return x_image.astype(jnp.float32)

flaxdiff 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl

flaxdiff 0.2.7py3-none-any.whl → 0.2.9py3-none-any.whl