PyPI - flaxdiff - Versions diffs - 0.1.35.4__py3-none-any.whl → 0.1.35.6__py3-none-any.whl - Mend

flaxdiff 0.1.35.4py3-none-any.whl → 0.1.35.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

flaxdiff/models/attention.py CHANGED Viewed

@@ -11,6 +11,7 @@ import einops
 import functools
 import math
 from .common import kernel_init
+import jax.experimental.pallas.ops.tpu.flash_attention
 class EfficientAttention(nn.Module):
     """
@@ -303,27 +304,30 @@ class TransformerBlock(nn.Module):
     only_pure_attention:bool = False
     force_fp32_for_softmax: bool = True
     kernel_init: Callable = kernel_init(1.0)
+    norm_inputs: bool = True
+    explicitly_add_residual: bool = True
     @nn.compact
     def __call__(self, x, context=None):
         inner_dim = self.heads * self.dim_head
         C = x.shape[-1]
-        normed_x = nn.RMSNorm(epsilon=1e-5, dtype=self.dtype)(x)
+        if self.norm_inputs:
+            x = nn.RMSNorm(epsilon=1e-5, dtype=self.dtype)(x)
         if self.use_projection == True:
             if self.use_linear_attention:
                 projected_x = nn.Dense(features=inner_dim,
                                        use_bias=False, precision=self.precision,
                                        kernel_init=self.kernel_init,
-                                       dtype=self.dtype, name=f'project_in')(normed_x)
+                                       dtype=self.dtype, name=f'project_in')(x)
             else:
                 projected_x = nn.Conv(
                     features=inner_dim, kernel_size=(1, 1),
                     kernel_init=self.kernel_init,
                     strides=(1, 1), padding='VALID', use_bias=False, dtype=self.dtype,
                     precision=self.precision, name=f'project_in_conv',
-                )(normed_x)
+                )(x)
         else:
-            projected_x = normed_x
+            projected_x = x
             inner_dim = C
         context = projected_x if context is None else context
@@ -356,6 +360,9 @@ class TransformerBlock(nn.Module):
                     strides=(1, 1), padding='VALID', use_bias=False, dtype=self.dtype,
                     precision=self.precision, name=f'project_out_conv',
                 )(projected_x)
-        out = x + projected_x
+        if self.only_pure_attention or self.explicitly_add_residual:
+            projected_x = x + projected_x
+        out = projected_x
         return out

flaxdiff/models/simple_unet.py CHANGED Viewed

@@ -50,7 +50,7 @@ class Unet(nn.Module):
             features=self.feature_depths[0],
             kernel_size=(3, 3),
             strides=(1, 1),
-            kernel_init=self.kernel_init(1.0),
+            kernel_init=self.kernel_init(scale=1.0),
             dtype=self.dtype,
             precision=self.precision
         )(x)
@@ -65,7 +65,7 @@ class Unet(nn.Module):
                     down_conv_type,
                     name=f"down_{i}_residual_{j}",
                     features=dim_in,
-                    kernel_init=self.kernel_init(1.0),
+                    kernel_init=self.kernel_init(scale=1.0),
                     kernel_size=(3, 3),
                     strides=(1, 1),
                     activation=self.activation,
@@ -83,7 +83,9 @@ class Unet(nn.Module):
                                         precision=attention_config.get("precision", self.precision),
                                         only_pure_attention=attention_config.get("only_pure_attention", True),
                                         force_fp32_for_softmax=attention_config.get("force_fp32_for_softmax", False),
-                                        kernel_init=self.kernel_init(1.0),
+                                        norm_inputs=attention_config.get("norm_inputs", True),
+                                        explicitly_add_residual=attention_config.get("explicitly_add_residual", True),
+                                        kernel_init=self.kernel_init(scale=1.0),
                                         name=f"down_{i}_attention_{j}")(x, textcontext)
                 # print("down residual for feature level", i, "is of shape", x.shape, "features", dim_in)
                 downs.append(x)
@@ -106,7 +108,7 @@ class Unet(nn.Module):
                 middle_conv_type,
                 name=f"middle_res1_{j}",
                 features=middle_dim_out,
-                kernel_init=self.kernel_init(1.0),
+                kernel_init=self.kernel_init(scale=1.0),
                 kernel_size=(3, 3),
                 strides=(1, 1),
                 activation=self.activation,
@@ -125,13 +127,15 @@ class Unet(nn.Module):
                                     precision=middle_attention.get("precision", self.precision),
                                     only_pure_attention=middle_attention.get("only_pure_attention", True),
                                     force_fp32_for_softmax=middle_attention.get("force_fp32_for_softmax", False),
-                                    kernel_init=self.kernel_init(1.0),
+                                    norm_inputs=middle_attention.get("norm_inputs", True),
+                                    explicitly_add_residual=middle_attention.get("explicitly_add_residual", True),
+                                    kernel_init=self.kernel_init(scale=1.0),
                                     name=f"middle_attention_{j}")(x, textcontext)
             x = ResidualBlock(
                 middle_conv_type,
                 name=f"middle_res2_{j}",
                 features=middle_dim_out,
-                kernel_init=self.kernel_init(1.0),
+                kernel_init=self.kernel_init(scale=1.0),
                 kernel_size=(3, 3),
                 strides=(1, 1),
                 activation=self.activation,
@@ -153,7 +157,7 @@ class Unet(nn.Module):
                     up_conv_type,# if j == 0 else "separable",
                     name=f"up_{i}_residual_{j}",
                     features=dim_out,
-                    kernel_init=self.kernel_init(1.0),
+                    kernel_init=self.kernel_init(scale=1.0),
                     kernel_size=kernel_size,
                     strides=(1, 1),
                     activation=self.activation,
@@ -171,7 +175,9 @@ class Unet(nn.Module):
                                         precision=attention_config.get("precision", self.precision),
                                         only_pure_attention=attention_config.get("only_pure_attention", True),
                                         force_fp32_for_softmax=middle_attention.get("force_fp32_for_softmax", False),
-                                        kernel_init=self.kernel_init(1.0),
+                                        norm_inputs=attention_config.get("norm_inputs", True),
+                                        explicitly_add_residual=attention_config.get("explicitly_add_residual", True),
+                                        kernel_init=self.kernel_init(scale=1.0),
                                         name=f"up_{i}_attention_{j}")(x, textcontext)
             # print("Upscaling ", i, x.shape)
             if i != len(feature_depths) - 1:
@@ -190,7 +196,7 @@ class Unet(nn.Module):
             features=self.feature_depths[0],
             kernel_size=(3, 3),
             strides=(1, 1),
-            kernel_init=self.kernel_init(1.0),
+            kernel_init=self.kernel_init(scale=1.0),
             dtype=self.dtype,
             precision=self.precision
         )(x)
@@ -201,7 +207,7 @@ class Unet(nn.Module):
             conv_type,
             name="final_residual",
             features=self.feature_depths[0],
-            kernel_init=self.kernel_init(1.0),
+            kernel_init=self.kernel_init(scale=1.0),
             kernel_size=(3,3),
             strides=(1, 1),
             activation=self.activation,
@@ -220,7 +226,7 @@ class Unet(nn.Module):
             kernel_size=(3, 3),
             strides=(1, 1),
             # activation=jax.nn.mish
-            kernel_init=self.kernel_init(0.0),
+            kernel_init=self.kernel_init(scale=0.0),
             dtype=self.dtype,
             precision=self.precision
         )(x)

flaxdiff/models/simple_vit.py CHANGED Viewed

@@ -69,6 +69,8 @@ class UViT(nn.Module):
     precision: PrecisionLike = None
     kernel_init: Callable = partial(kernel_init, scale=1.0)
     add_residualblock_output: bool = False
+    norm_inputs: bool = False
+    explicitly_add_residual: bool = True
     def setup(self):
         if self.norm_groups > 0:
@@ -110,16 +112,20 @@ class UViT(nn.Module):
         for i in range(self.num_layers // 2):
             x = TransformerBlock(heads=self.num_heads, dim_head=self.emb_features // self.num_heads,
                                  dtype=self.dtype, precision=self.precision, use_projection=self.use_projection,
-                                 use_flash_attention=self.use_flash_attention, use_self_and_cross=self.use_self_and_cross, force_fp32_for_softmax=self.force_fp32_for_softmax,
+                                 use_flash_attention=self.use_flash_attention, use_self_and_cross=False, force_fp32_for_softmax=self.force_fp32_for_softmax,
                                  only_pure_attention=False,
+                                 norm_inputs=self.norm_inputs,
+                                 explicitly_add_residual=self.explicitly_add_residual,
                                  kernel_init=self.kernel_init())(x)
             skips.append(x)
         # Middle block
         x = TransformerBlock(heads=self.num_heads, dim_head=self.emb_features // self.num_heads,
                              dtype=self.dtype, precision=self.precision, use_projection=self.use_projection,
-                             use_flash_attention=self.use_flash_attention, use_self_and_cross=self.use_self_and_cross, force_fp32_for_softmax=self.force_fp32_for_softmax,
+                             use_flash_attention=self.use_flash_attention, use_self_and_cross=False, force_fp32_for_softmax=self.force_fp32_for_softmax,
                              only_pure_attention=False,
+                            norm_inputs=self.norm_inputs,
+                            explicitly_add_residual=self.explicitly_add_residual,
                              kernel_init=self.kernel_init())(x)
         # # Out blocks
@@ -131,6 +137,8 @@ class UViT(nn.Module):
                                  dtype=self.dtype, precision=self.precision, use_projection=self.use_projection,
                                  use_flash_attention=self.use_flash_attention, use_self_and_cross=self.use_self_and_cross, force_fp32_for_softmax=self.force_fp32_for_softmax,
                                  only_pure_attention=False,
+                                 norm_inputs=self.norm_inputs,
+                                 explicitly_add_residual=self.explicitly_add_residual,
                                  kernel_init=self.kernel_init())(x)
         # print(f'Shape of x after transformer blocks: {x.shape}')

{flaxdiff-0.1.35.4.dist-info → flaxdiff-0.1.35.6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: flaxdiff
-Version: 0.1.35.4
+Version: 0.1.35.6
 Summary: A versatile and easy to understand Diffusion library
 Author: Ashish Kumar Singh
 Author-email: ashishkmr472@gmail.com

{flaxdiff-0.1.35.4.dist-info → flaxdiff-0.1.35.6.dist-info}/RECORD RENAMED Viewed

@@ -3,11 +3,11 @@ flaxdiff/utils.py,sha256=B0GcHlzlVYDNEIdh2v5qmP4u0neIT-FqexNohuyuCvg,2452
 flaxdiff/data/__init__.py,sha256=PM3PkHihyohT5SHVYKc8vQ4IeVfGPpCktkSVwvqMjQ4,52
 flaxdiff/data/online_loader.py,sha256=DoHrMZCi5gMd9tmkCpZIUU9lGxvfYtuaz58943_lCRc,11315
 flaxdiff/models/__init__.py,sha256=FAivVYXxM2JrCFIXf-C3374RB2Hth25dBrzOeNFhH1U,26
-flaxdiff/models/attention.py,sha256=ZbDGIb5Q6FRqJ6qRY660cqw4WvF9IwCnhEuYdTpLPdM,13023
+flaxdiff/models/attention.py,sha256=JvrP7-09MV6IfRLRBhqjPmNUU-lkEMk9TOnJSBKcar8,13289
 flaxdiff/models/common.py,sha256=hWsSs2BP2J-JN1s4qLRr-h-KYkcVyl2hOp1Wsm_L-h8,10994
 flaxdiff/models/favor_fastattn.py,sha256=79Ew1nqarsNLPzZaBSd1ILORzJr74CupYeqGiCQK5E4,27689
-flaxdiff/models/simple_unet.py,sha256=h1o9mQlLJy7Ec8Pz_O5miRbAyUaM5UNhSs-oXzpQvZo,10763
-flaxdiff/models/simple_vit.py,sha256=NHt6v-teGjiI65fk1l1WN3WqfeqTE7xY9VYqBiYUDgI,7454
+flaxdiff/models/simple_unet.py,sha256=L5m2j5580QP7pJ5VIme7U5xYA22PZiGP7qdvcKUnB38,11463
+flaxdiff/models/simple_vit.py,sha256=UCDDr0XVnpf6tbJWKFtEt3_nAqMqOoakXf5amyVWZNo,7929
 flaxdiff/models/autoencoder/__init__.py,sha256=qY-7MldZpsfkF-_T2LqlRK7VHbqfmosz0NmvzDlBkOk,78
 flaxdiff/models/autoencoder/autoencoder.py,sha256=27_hYl0yXAdH9Mx4Xu9J79mSNo-FEKr9SxhVaS3ffn4,591
 flaxdiff/models/autoencoder/diffusers.py,sha256=JHeFLCxiHhu-QHwhKiCuKsQJn4AZumquiuxgZkiYGQ0,3643
@@ -34,7 +34,7 @@ flaxdiff/trainer/__init__.py,sha256=T-vUVq4zHcMK6kpCsG4Gu8vn71q6lZD-lg-Ul7yKfEk,
 flaxdiff/trainer/autoencoder_trainer.py,sha256=al7AsZ7yeDMEiDD-gbcXf0ADq_xfk1VMxvg24GfA-XQ,7008
 flaxdiff/trainer/diffusion_trainer.py,sha256=wKkg63DWZjx2MoM3VQNCDIr40rWN8fUGxH9jWWxfZao,9373
 flaxdiff/trainer/simple_trainer.py,sha256=cawm6fZNQoLLATMneAU2gQ9j7kefqHnBPHuaIj3i_a4,18237
-flaxdiff-0.1.35.4.dist-info/METADATA,sha256=V-f8eJ2FM44hFQE3pPoRqDY_fUY_3Niq6DeYP-ARulw,22085
-flaxdiff-0.1.35.4.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
-flaxdiff-0.1.35.4.dist-info/top_level.txt,sha256=-2-nXnfkJgSfkki1tjm5Faw6Dso7vhtdn2szwCdX5CQ,9
-flaxdiff-0.1.35.4.dist-info/RECORD,,
+flaxdiff-0.1.35.6.dist-info/METADATA,sha256=NVCk5V7Zc3iq-nrWTivzO17dQa1fIjYgjJb800ZrZhQ,22085
+flaxdiff-0.1.35.6.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
+flaxdiff-0.1.35.6.dist-info/top_level.txt,sha256=-2-nXnfkJgSfkki1tjm5Faw6Dso7vhtdn2szwCdX5CQ,9
+flaxdiff-0.1.35.6.dist-info/RECORD,,

{flaxdiff-0.1.35.4.dist-info → flaxdiff-0.1.35.6.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (74.1.2)
+Generator: setuptools (75.1.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{flaxdiff-0.1.35.4.dist-info → flaxdiff-0.1.35.6.dist-info}/top_level.txt RENAMED Viewed

File without changes

flaxdiff 0.1.35.4__py3-none-any.whl → 0.1.35.6__py3-none-any.whl

flaxdiff 0.1.35.4py3-none-any.whl → 0.1.35.6py3-none-any.whl