PyPI - flaxdiff - Versions diffs - 0.1.37.7__tar.gz → 0.1.38.1__tar.gz - Mend

flaxdiff 0.1.37.7tar.gz → 0.1.38.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: flaxdiff
-Version: 0.1.37.7
+Version: 0.1.38.1
 Summary: A versatile and easy to understand Diffusion library
 Author-email: Ashish Kumar Singh <ashishkmr472@gmail.com>
 License-Expression: MIT

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/data/sources/tfds.py RENAMED Viewed

@@ -50,13 +50,12 @@ def tfds_augmenters(image_scale, method):
     else:
         interpolation = cv2.INTER_AREA
-    augments = augmax.Chain(
-        augmax.HorizontalFlip(0.5),
-        augmax.RandomContrast((-0.05, 0.05), 1.),
-        augmax.RandomBrightness((-0.2, 0.2), 1.)
-    )
+    from torchvision.transforms import v2
-    augments = jax.jit(augments, backend="cpu")
+    augments = v2.Compose([
+        v2.RandomHorizontalFlip(p=0.5),
+        v2.ColorJitter(brightness=0.2, contrast=0.05, saturation=0.2)
+    ])
     class augmenters(pygrain.MapTransform):
         def __init__(self, *args, **kwargs):
@@ -67,8 +66,9 @@ def tfds_augmenters(image_scale, method):
             image = element['image']
             image = cv2.resize(image, (image_scale, image_scale),
                             interpolation=interpolation)
-            # image = augments(image)
+            image = augments(image)
             # image = (image - 127.5) / 127.5
             caption = labelizer(element)
             results = self.tokenize(caption)
             return {

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/models/attention.py RENAMED Viewed

@@ -23,7 +23,7 @@ class EfficientAttention(nn.Module):
     dtype: Optional[Dtype] = None
     precision: PrecisionLike = None
     use_bias: bool = True
-    kernel_init: Callable = kernel_init(1.0)
+    # kernel_init: Callable = kernel_init(1.0)
     force_fp32_for_softmax: bool = True
     def setup(self):
@@ -34,15 +34,21 @@ class EfficientAttention(nn.Module):
             self.heads * self.dim_head,
             precision=self.precision,
             use_bias=self.use_bias,
-            kernel_init=self.kernel_init,
+            # kernel_init=self.kernel_init,
             dtype=self.dtype
         )
         self.query = dense(name="to_q")
         self.key = dense(name="to_k")
         self.value = dense(name="to_v")
-        self.proj_attn = nn.DenseGeneral(self.query_dim, use_bias=False, precision=self.precision,
-                                     kernel_init=self.kernel_init, dtype=self.dtype, name="to_out_0")
+        self.proj_attn = nn.DenseGeneral(
+            self.query_dim,
+            use_bias=False,
+            precision=self.precision,
+            # kernel_init=self.kernel_init,
+            dtype=self.dtype,
+            name="to_out_0"
+        )
         # self.attnfn = make_fast_generalized_attention(qkv_dim=inner_dim, lax_scan_unroll=16)
     def _reshape_tensor_to_head_dim(self, tensor):
@@ -115,7 +121,7 @@ class NormalAttention(nn.Module):
     dtype: Optional[Dtype] = None
     precision: PrecisionLike = None
     use_bias: bool = True
-    kernel_init: Callable = kernel_init(1.0)
+    # kernel_init: Callable = kernel_init(1.0)
     force_fp32_for_softmax: bool = True
     def setup(self):
@@ -126,7 +132,7 @@ class NormalAttention(nn.Module):
             axis=-1,
             precision=self.precision,
             use_bias=self.use_bias,
-            kernel_init=self.kernel_init,
+            # kernel_init=self.kernel_init,
             dtype=self.dtype
         )
         self.query = dense(name="to_q")
@@ -140,7 +146,7 @@ class NormalAttention(nn.Module):
             use_bias=self.use_bias,
             dtype=self.dtype,
             name="to_out_0",
-            kernel_init=self.kernel_init
+            # kernel_init=self.kernel_init
             # kernel_init=jax.nn.initializers.xavier_uniform()
         )
@@ -236,7 +242,7 @@ class BasicTransformerBlock(nn.Module):
     dtype: Optional[Dtype] = None
     precision: PrecisionLike = None
     use_bias: bool = True
-    kernel_init: Callable = kernel_init(1.0)
+    # kernel_init: Callable = kernel_init(1.0)
     use_flash_attention:bool = False
     use_cross_only:bool = False
     only_pure_attention:bool = False
@@ -256,7 +262,7 @@ class BasicTransformerBlock(nn.Module):
             precision=self.precision,
             use_bias=self.use_bias,
             dtype=self.dtype,
-            kernel_init=self.kernel_init,
+            # kernel_init=self.kernel_init,
             force_fp32_for_softmax=self.force_fp32_for_softmax
         )
         self.attention2 = attenBlock(
@@ -267,7 +273,7 @@ class BasicTransformerBlock(nn.Module):
             precision=self.precision,
             use_bias=self.use_bias,
             dtype=self.dtype,
-            kernel_init=self.kernel_init,
+            # kernel_init=self.kernel_init,
             force_fp32_for_softmax=self.force_fp32_for_softmax
         )
@@ -303,7 +309,7 @@ class TransformerBlock(nn.Module):
     use_self_and_cross:bool = True
     only_pure_attention:bool = False
     force_fp32_for_softmax: bool = True
-    kernel_init: Callable = kernel_init(1.0)
+    # kernel_init: Callable = kernel_init(1.0)
     norm_inputs: bool = True
     explicitly_add_residual: bool = True
@@ -317,12 +323,12 @@ class TransformerBlock(nn.Module):
             if self.use_linear_attention:
                 projected_x = nn.Dense(features=inner_dim,
                                        use_bias=False, precision=self.precision,
-                                       kernel_init=self.kernel_init,
+                                    #    kernel_init=self.kernel_init,
                                        dtype=self.dtype, name=f'project_in')(x)
             else:
                 projected_x = nn.Conv(
                     features=inner_dim, kernel_size=(1, 1),
-                    kernel_init=self.kernel_init,
+                    # kernel_init=self.kernel_init,
                     strides=(1, 1), padding='VALID', use_bias=False, dtype=self.dtype,
                     precision=self.precision, name=f'project_in_conv',
                 )(x)
@@ -344,19 +350,19 @@ class TransformerBlock(nn.Module):
             use_cross_only=(not self.use_self_and_cross),
             only_pure_attention=self.only_pure_attention,
             force_fp32_for_softmax=self.force_fp32_for_softmax,
-            kernel_init=self.kernel_init
+            # kernel_init=self.kernel_init
         )(projected_x, context)
         if self.use_projection == True:
             if self.use_linear_attention:
                 projected_x = nn.Dense(features=C, precision=self.precision,
                                        dtype=self.dtype, use_bias=False,
-                                       kernel_init=self.kernel_init,
+                                    #    kernel_init=self.kernel_init,
                                        name=f'project_out')(projected_x)
             else:
                 projected_x = nn.Conv(
                     features=C, kernel_size=(1, 1),
-                    kernel_init=self.kernel_init,
+                    # kernel_init=self.kernel_i nit,
                     strides=(1, 1), padding='VALID', use_bias=False, dtype=self.dtype,
                     precision=self.precision, name=f'project_out_conv',
                 )(projected_x)

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/models/autoencoder/diffusers.py RENAMED Viewed

@@ -14,15 +14,15 @@ class StableDiffusionVAE(AutoEncoder):
     def __init__(self, modelname = "CompVis/stable-diffusion-v1-4", revision="bf16", dtype=jnp.bfloat16):
         from diffusers.models.vae_flax import FlaxEncoder, FlaxDecoder
-        from diffusers import FlaxStableDiffusionPipeline
+        from diffusers import FlaxStableDiffusionPipeline, FlaxAutoencoderKL
-        pipeline, params = FlaxStableDiffusionPipeline.from_pretrained(
+        vae, params = FlaxAutoencoderKL.from_pretrained(
             modelname,
-            revision=revision,
+            # revision=revision,
             dtype=dtype,
         )
-        vae = pipeline.vae
+        # vae = pipeline.vae
         enc = FlaxEncoder(
             in_channels=vae.config.in_channels,

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/models/common.py RENAMED Viewed

@@ -108,13 +108,16 @@ class FourierEmbedding(nn.Module):
 class TimeProjection(nn.Module):
     features:int
     activation:Callable=jax.nn.gelu
-    kernel_init:Callable=kernel_init(1.0)
     @nn.compact
     def __call__(self, x):
-        x = nn.DenseGeneral(self.features, kernel_init=self.kernel_init)(x)
+        x = nn.DenseGeneral(
+            self.features,
+        )(x)
         x = self.activation(x)
-        x = nn.DenseGeneral(self.features, kernel_init=self.kernel_init)(x)
+        x = nn.DenseGeneral(
+            self.features,
+        )(x)
         x = self.activation(x)
         return x
@@ -123,7 +126,6 @@ class SeparableConv(nn.Module):
     kernel_size:tuple=(3, 3)
     strides:tuple=(1, 1)
     use_bias:bool=False
-    kernel_init:Callable=kernel_init(1.0)
     padding:str="SAME"
     dtype: Optional[Dtype] = None
     precision: PrecisionLike = None
@@ -133,7 +135,7 @@ class SeparableConv(nn.Module):
         in_features = x.shape[-1]
         depthwise = nn.Conv(
             features=in_features, kernel_size=self.kernel_size,
-            strides=self.strides, kernel_init=self.kernel_init,
+            strides=self.strides,
             feature_group_count=in_features, use_bias=self.use_bias,
             padding=self.padding,
             dtype=self.dtype,
@@ -141,7 +143,7 @@ class SeparableConv(nn.Module):
         )(x)
         pointwise = nn.Conv(
             features=self.features, kernel_size=(1, 1),
-            strides=(1, 1), kernel_init=self.kernel_init,
+            strides=(1, 1),
             use_bias=self.use_bias,
             dtype=self.dtype,
             precision=self.precision
@@ -153,7 +155,6 @@ class ConvLayer(nn.Module):
     features:int
     kernel_size:tuple=(3, 3)
     strides:tuple=(1, 1)
-    kernel_init:Callable=kernel_init(1.0)
     dtype: Optional[Dtype] = None
     precision: PrecisionLike = None
@@ -164,7 +165,6 @@ class ConvLayer(nn.Module):
                 features=self.features,
                 kernel_size=self.kernel_size,
                 strides=self.strides,
-                kernel_init=self.kernel_init,
                 dtype=self.dtype,
                 precision=self.precision
             )
@@ -183,7 +183,6 @@ class ConvLayer(nn.Module):
                 features=self.features,
                 kernel_size=self.kernel_size,
                 strides=self.strides,
-                kernel_init=self.kernel_init,
                 dtype=self.dtype,
                 precision=self.precision
             )
@@ -192,7 +191,6 @@ class ConvLayer(nn.Module):
                 features=self.features,
                 kernel_size=self.kernel_size,
                 strides=self.strides,
-                kernel_init=self.kernel_init,
                 dtype=self.dtype,
                 precision=self.precision
             )
@@ -206,7 +204,6 @@ class Upsample(nn.Module):
     activation:Callable=jax.nn.swish
     dtype: Optional[Dtype] = None
     precision: PrecisionLike = None
-    kernel_init:Callable=kernel_init(1.0)
     @nn.compact
     def __call__(self, x, residual=None):
@@ -221,7 +218,6 @@ class Upsample(nn.Module):
             strides=(1, 1),
             dtype=self.dtype,
             precision=self.precision,
-            kernel_init=self.kernel_init
         )(out)
         if residual is not None:
             out = jnp.concatenate([out, residual], axis=-1)
@@ -233,7 +229,6 @@ class Downsample(nn.Module):
     activation:Callable=jax.nn.swish
     dtype: Optional[Dtype] = None
     precision: PrecisionLike = None
-    kernel_init:Callable=kernel_init(1.0)
     @nn.compact
     def __call__(self, x, residual=None):
@@ -244,7 +239,6 @@ class Downsample(nn.Module):
             strides=(2, 2),
             dtype=self.dtype,
             precision=self.precision,
-            kernel_init=self.kernel_init
         )(x)
         if residual is not None:
             if residual.shape[1] > out.shape[1]:
@@ -269,7 +263,6 @@ class ResidualBlock(nn.Module):
     direction:str=None
     res:int=2
     norm_groups:int=8
-    kernel_init:Callable=kernel_init(1.0)
     dtype: Optional[Dtype] = None
     precision: PrecisionLike = None
     named_norms:bool=False
@@ -296,7 +289,6 @@ class ResidualBlock(nn.Module):
             features=self.features,
             kernel_size=self.kernel_size,
             strides=self.strides,
-            kernel_init=self.kernel_init,
             name="conv1",
             dtype=self.dtype,
             precision=self.precision
@@ -321,7 +313,6 @@ class ResidualBlock(nn.Module):
             features=self.features,
             kernel_size=self.kernel_size,
             strides=self.strides,
-            kernel_init=self.kernel_init,
             name="conv2",
             dtype=self.dtype,
             precision=self.precision
@@ -333,7 +324,6 @@ class ResidualBlock(nn.Module):
                 features=self.features,
                 kernel_size=(1, 1),
                 strides=1,
-                kernel_init=self.kernel_init,
                 name="residual_conv",
                 dtype=self.dtype,
                 precision=self.precision

flaxdiff-0.1.38.1/flaxdiff/models/general.py ADDED Viewed

@@ -0,0 +1,21 @@
+from flax import linen as nn
+import jax
+import jax.numpy as jnp
+class BCHWModelWrapper(nn.Module):
+    model: nn.Module
+    @nn.compact
+    def __call__(self, x, temb, textcontext):
+        # Reshape the input to BCHW format from BHWC
+        x = jnp.transpose(x, (0, 3, 1, 2))
+        # Pass the input through the UNet model
+        out = self.model(
+            sample=x,
+            timesteps=temb,
+            encoder_hidden_states=textcontext,
+        )
+        # Reshape the output back to BHWC format
+        out = jnp.transpose(out.sample, (0, 2, 3, 1))
+        return out

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/models/simple_unet.py RENAMED Viewed

@@ -20,7 +20,6 @@ class Unet(nn.Module):
     dtype: Optional[Dtype] = None
     precision: PrecisionLike = None
     named_norms: bool = False # This is for backward compatibility reasons; older checkpoints have named norms
-    kernel_init: Callable = partial(kernel_init, dtype=jnp.float32)
     def setup(self):
         if self.norm_groups > 0:
@@ -50,7 +49,6 @@ class Unet(nn.Module):
             features=self.feature_depths[0],
             kernel_size=(3, 3),
             strides=(1, 1),
-            kernel_init=self.kernel_init(scale=1.0),
             dtype=self.dtype,
             precision=self.precision
         )(x)
@@ -65,7 +63,6 @@ class Unet(nn.Module):
                     down_conv_type,
                     name=f"down_{i}_residual_{j}",
                     features=dim_in,
-                    kernel_init=self.kernel_init(scale=1.0),
                     kernel_size=(3, 3),
                     strides=(1, 1),
                     activation=self.activation,
@@ -85,7 +82,6 @@ class Unet(nn.Module):
                                         force_fp32_for_softmax=attention_config.get("force_fp32_for_softmax", False),
                                         norm_inputs=attention_config.get("norm_inputs", True),
                                         explicitly_add_residual=attention_config.get("explicitly_add_residual", True),
-                                        kernel_init=self.kernel_init(scale=1.0),
                                         name=f"down_{i}_attention_{j}")(x, textcontext)
                 # print("down residual for feature level", i, "is of shape", x.shape, "features", dim_in)
                 downs.append(x)
@@ -108,7 +104,6 @@ class Unet(nn.Module):
                 middle_conv_type,
                 name=f"middle_res1_{j}",
                 features=middle_dim_out,
-                kernel_init=self.kernel_init(scale=1.0),
                 kernel_size=(3, 3),
                 strides=(1, 1),
                 activation=self.activation,
@@ -129,13 +124,11 @@ class Unet(nn.Module):
                                     force_fp32_for_softmax=middle_attention.get("force_fp32_for_softmax", False),
                                     norm_inputs=middle_attention.get("norm_inputs", True),
                                     explicitly_add_residual=middle_attention.get("explicitly_add_residual", True),
-                                    kernel_init=self.kernel_init(scale=1.0),
                                     name=f"middle_attention_{j}")(x, textcontext)
             x = ResidualBlock(
                 middle_conv_type,
                 name=f"middle_res2_{j}",
                 features=middle_dim_out,
-                kernel_init=self.kernel_init(scale=1.0),
                 kernel_size=(3, 3),
                 strides=(1, 1),
                 activation=self.activation,
@@ -157,7 +150,6 @@ class Unet(nn.Module):
                     up_conv_type,# if j == 0 else "separable",
                     name=f"up_{i}_residual_{j}",
                     features=dim_out,
-                    kernel_init=self.kernel_init(scale=1.0),
                     kernel_size=kernel_size,
                     strides=(1, 1),
                     activation=self.activation,
@@ -177,7 +169,6 @@ class Unet(nn.Module):
                                         force_fp32_for_softmax=middle_attention.get("force_fp32_for_softmax", False),
                                         norm_inputs=attention_config.get("norm_inputs", True),
                                         explicitly_add_residual=attention_config.get("explicitly_add_residual", True),
-                                        kernel_init=self.kernel_init(scale=1.0),
                                         name=f"up_{i}_attention_{j}")(x, textcontext)
             # print("Upscaling ", i, x.shape)
             if i != len(feature_depths) - 1:
@@ -196,7 +187,6 @@ class Unet(nn.Module):
             features=self.feature_depths[0],
             kernel_size=(3, 3),
             strides=(1, 1),
-            kernel_init=self.kernel_init(scale=1.0),
             dtype=self.dtype,
             precision=self.precision
         )(x)
@@ -207,7 +197,6 @@ class Unet(nn.Module):
             conv_type,
             name="final_residual",
             features=self.feature_depths[0],
-            kernel_init=self.kernel_init(scale=1.0),
             kernel_size=(3,3),
             strides=(1, 1),
             activation=self.activation,
@@ -226,7 +215,7 @@ class Unet(nn.Module):
             kernel_size=(3, 3),
             strides=(1, 1),
             # activation=jax.nn.mish
-            kernel_init=self.kernel_init(scale=0.0),
+            # kernel_init=self.kernel_init(scale=0.0),
             dtype=self.dtype,
             precision=self.precision
         )(x)

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/models/simple_vit.py RENAMED Viewed

@@ -23,7 +23,6 @@ class PatchEmbedding(nn.Module):
     embedding_dim: int
     dtype: Any = jnp.float32
     precision: Any = jax.lax.Precision.HIGH
-    kernel_init: Callable = partial(kernel_init, 1.0)
     @nn.compact
     def __call__(self, x):
@@ -34,7 +33,6 @@ class PatchEmbedding(nn.Module):
                     kernel_size=(self.patch_size, self.patch_size),
                     strides=(self.patch_size, self.patch_size),
                     dtype=self.dtype,
-                    kernel_init=self.kernel_init(),
                     precision=self.precision)(x)
         x = jnp.reshape(x, (batch, -1, self.embedding_dim))
         return x
@@ -67,7 +65,7 @@ class UViT(nn.Module):
     norm_groups:int=8
     dtype: Optional[Dtype] = None
     precision: PrecisionLike = None
-    kernel_init: Callable = partial(kernel_init, scale=1.0)
+    # kernel_init: Callable = partial(kernel_init, scale=1.0)
     add_residualblock_output: bool = False
     norm_inputs: bool = False
     explicitly_add_residual: bool = True
@@ -88,10 +86,10 @@ class UViT(nn.Module):
         # Patch embedding
         x = PatchEmbedding(patch_size=self.patch_size, embedding_dim=self.emb_features,
-                           dtype=self.dtype, precision=self.precision, kernel_init=self.kernel_init)(x)
+                           dtype=self.dtype, precision=self.precision)(x)
         num_patches = x.shape[1]
-        context_emb = nn.DenseGeneral(features=self.emb_features, kernel_init=self.kernel_init(),
+        context_emb = nn.DenseGeneral(features=self.emb_features,
                                dtype=self.dtype, precision=self.precision)(textcontext)
         num_text_tokens = textcontext.shape[1]
@@ -116,7 +114,7 @@ class UViT(nn.Module):
                                  only_pure_attention=False,
                                  norm_inputs=self.norm_inputs,
                                  explicitly_add_residual=self.explicitly_add_residual,
-                                 kernel_init=self.kernel_init())(x)
+                                 )(x)
             skips.append(x)
         # Middle block
@@ -126,12 +124,12 @@ class UViT(nn.Module):
                              only_pure_attention=False,
                             norm_inputs=self.norm_inputs,
                             explicitly_add_residual=self.explicitly_add_residual,
-                             kernel_init=self.kernel_init())(x)
+                            )(x)
         # # Out blocks
         for i in range(self.num_layers // 2):
             x = jnp.concatenate([x, skips.pop()], axis=-1)
-            x = nn.DenseGeneral(features=self.emb_features, kernel_init=self.kernel_init(),
+            x = nn.DenseGeneral(features=self.emb_features,
                                    dtype=self.dtype, precision=self.precision)(x)
             x = TransformerBlock(heads=self.num_heads, dim_head=self.emb_features // self.num_heads,
                                  dtype=self.dtype, precision=self.precision, use_projection=self.use_projection,
@@ -139,13 +137,13 @@ class UViT(nn.Module):
                                  only_pure_attention=False,
                                  norm_inputs=self.norm_inputs,
                                  explicitly_add_residual=self.explicitly_add_residual,
-                                 kernel_init=self.kernel_init())(x)
+                                 )(x)
         # print(f'Shape of x after transformer blocks: {x.shape}')
         x = self.norm()(x)
         patch_dim = self.patch_size ** 2 * self.output_channels
-        x = nn.Dense(features=patch_dim, dtype=self.dtype, precision=self.precision, kernel_init=self.kernel_init())(x)
+        x = nn.Dense(features=patch_dim, dtype=self.dtype, precision=self.precision)(x)
         x = x[:, 1 + num_text_tokens:, :]
         x = unpatchify(x, channels=self.output_channels)
@@ -159,7 +157,6 @@ class UViT(nn.Module):
                 kernel_size=(3, 3),
                 strides=(1, 1),
                 # activation=jax.nn.mish
-                kernel_init=self.kernel_init(scale=0.0),
                 dtype=self.dtype,
                 precision=self.precision
             )(x)
@@ -173,7 +170,6 @@ class UViT(nn.Module):
             kernel_size=(3, 3),
             strides=(1, 1),
             # activation=jax.nn.mish
-            kernel_init=self.kernel_init(scale=0.0),
             dtype=self.dtype,
             precision=self.precision
         )(x)

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/trainer/diffusion_trainer.py RENAMED Viewed

@@ -231,11 +231,11 @@ class DiffusionTrainer(SimpleTrainer):
                     ),
                 )
-            train_state = new_state.apply_ema(self.ema_decay)
+            new_state = new_state.apply_ema(self.ema_decay)
             if distributed_training:
                 loss = jax.lax.pmean(loss, "data")
-            return train_state, loss, rng_state
+            return new_state, loss, rng_state
         if distributed_training:
             train_step = shard_map(

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/trainer/simple_trainer.py RENAMED Viewed

@@ -159,7 +159,7 @@ class SimpleTrainer:
             self.best_loss = 1e9
     def get_input_ones(self):
-        return {k: jnp.ones((1, *v)) for k, v in self.input_shapes.items()}
+        return {k: jnp.ones((1, *v), dtype=self.model.dtype) for k, v in self.input_shapes.items()}
     def generate_states(
         self,
@@ -437,12 +437,30 @@ class SimpleTrainer:
                 # If the loss is too low, we can assume the model has diverged
                 print(colored(f"Loss too low at step {current_step} => {loss}", 'red'))
                 # Reset the model to the old state
-                if self.best_state is not None:
-                    print(colored(f"Resetting model to best state", 'red'))
-                    train_state = self.best_state
-                    loss = self.best_loss
+                # if self.best_state is not None:
+                #     print(colored(f"Resetting model to best state", 'red'))
+                #     train_state = self.best_state
+                #     loss = self.best_loss
+                # else:
+                #     exit(1)
+                # Check if there are any NaN/inf values in the train_state.params
+                params = train_state.params
+                if isinstance(params, dict):
+                    for key, value in params.items():
+                        if isinstance(value, jnp.ndarray):
+                            if jnp.isnan(value).any() or jnp.isinf(value).any():
+                                print(colored(f"NaN/inf values found in params at step {current_step}", 'red'))
+                                # Reset the model to the old state
+                                # train_state = self.best_state
+                                # loss = self.best_loss
+                                # break
+                            else:
+                                print(colored(f"Params are fine at step {current_step}", 'green'))
                 else:
-                    exit(1)
+                    print(colored(f"Params are not a dict at step {current_step}", 'red'))
+                exit(1)
             epoch_loss += loss
             current_step += 1

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: flaxdiff
-Version: 0.1.37.7
+Version: 0.1.38.1
 Summary: A versatile and easy to understand Diffusion library
 Author-email: Ashish Kumar Singh <ashishkmr472@gmail.com>
 License-Expression: MIT

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff.egg-info/SOURCES.txt RENAMED Viewed

@@ -21,6 +21,7 @@ flaxdiff/models/__init__.py
 flaxdiff/models/attention.py
 flaxdiff/models/common.py
 flaxdiff/models/favor_fastattn.py
+flaxdiff/models/general.py
 flaxdiff/models/simple_unet.py
 flaxdiff/models/simple_vit.py
 flaxdiff/models/autoencoder/__init__.py

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "flaxdiff"
-version = "0.1.37.7"
+version = "0.1.38.1"
 description = "A versatile and easy to understand Diffusion library"
 readme = "README.md"
 authors = [

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/README.md RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/__init__.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/data/__init__.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/data/dataset_map.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/data/datasets.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/data/online_loader.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/data/sources/gcs.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/metrics/inception.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/metrics/psnr.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/metrics/ssim.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/metrics/utils.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/models/__init__.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/models/autoencoder/__init__.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/models/autoencoder/autoencoder.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/models/autoencoder/simple_autoenc.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/models/favor_fastattn.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/predictors/__init__.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/samplers/__init__.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/samplers/common.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/samplers/ddim.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/samplers/ddpm.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/samplers/euler.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/samplers/heun_sampler.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/samplers/multistep_dpm.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/samplers/rk4_sampler.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/schedulers/__init__.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/schedulers/common.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/schedulers/continuous.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/schedulers/cosine.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/schedulers/discrete.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/schedulers/exp.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/schedulers/karras.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/schedulers/linear.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/schedulers/sqrt.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/trainer/__init__.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/trainer/autoencoder_trainer.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/trainer/video_diffusion_trainer.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff/utils.py RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff.egg-info/requires.txt RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/flaxdiff.egg-info/top_level.txt RENAMED Viewed

File without changes

{flaxdiff-0.1.37.7 → flaxdiff-0.1.38.1}/setup.cfg RENAMED Viewed

File without changes

flaxdiff 0.1.37.7__tar.gz → 0.1.38.1__tar.gz

flaxdiff 0.1.37.7tar.gz → 0.1.38.1tar.gz