PyPI - flaxdiff - Versions diffs - 0.1.37.3__py3-none-any.whl → 0.1.37.4__py3-none-any.whl - Mend

flaxdiff 0.1.37.3py3-none-any.whl → 0.1.37.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

flaxdiff/predictors/__init__.py CHANGED Viewed

@@ -81,16 +81,16 @@ class KarrasPredictionTransform(DiffusionPredictionTransform):
         epsilon = (x_t - x_0 * signal_rate) / noise_rate
         return x_0, epsilon
-    def pred_transform(self, x_t, preds, rates: tuple[jnp.ndarray, jnp.ndarray]) -> jnp.ndarray:
+    def pred_transform(self, x_t, preds, rates: tuple[jnp.ndarray, jnp.ndarray], epsilon=1e-8) -> jnp.ndarray:
         _, sigma = rates
-        c_out = sigma * self.sigma_data / jnp.sqrt(self.sigma_data ** 2 + sigma ** 2)
-        c_skip = self.sigma_data ** 2 / (self.sigma_data ** 2 + sigma ** 2)
+        c_out = sigma * self.sigma_data / (jnp.sqrt(self.sigma_data ** 2 + sigma ** 2) + epsilon)
+        c_skip = self.sigma_data ** 2 / (self.sigma_data ** 2 + sigma ** 2 + epsilon)
         c_out = c_out.reshape((-1, 1, 1, 1))
         c_skip = c_skip.reshape((-1, 1, 1, 1))
         x_0 = c_out * preds + c_skip * x_t
         return x_0
-    def get_input_scale(self, rates: tuple[jnp.ndarray, jnp.ndarray]) -> jnp.ndarray:
+    def get_input_scale(self, rates: tuple[jnp.ndarray, jnp.ndarray], epsilon=1e-8) -> jnp.ndarray:
         _, sigma = rates
-        c_in = 1 / jnp.sqrt(self.sigma_data ** 2 + sigma ** 2)
+        c_in = 1 / (jnp.sqrt(self.sigma_data ** 2 + sigma ** 2) + epsilon)
         return c_in

flaxdiff/trainer/diffusion_trainer.py CHANGED Viewed

@@ -167,7 +167,10 @@ class DiffusionTrainer(SimpleTrainer):
             noise_level, local_rng_state = noise_schedule.generate_timesteps(images.shape[0], local_rng_state)
             local_rng_state, rngs = local_rng_state.get_random_key()
-            noise: jax.Array = jax.random.normal(rngs, shape=images.shape)
+            noise: jax.Array = jax.random.normal(rngs, shape=images.shape, dtype=jnp.float32)
+            # Make sure image is also float32
+            images = images.astype(jnp.float32)
             rates = noise_schedule.get_rates(noise_level)
             noisy_images, c_in, expected_output = model_output_transform.forward_diffusion(
@@ -197,8 +200,23 @@ class DiffusionTrainer(SimpleTrainer):
                 loss, grads = grad_fn(train_state.params)
                 if distributed_training:
                     grads = jax.lax.pmean(grads, "data")
+            # # check gradients for NaN/Inf
+            # has_nan_or_inf = jax.tree_util.tree_reduce(
+            #     lambda acc, x: jnp.logical_or(acc, jnp.logical_or(jnp.isnan(x).any(), jnp.isinf(x).any())),
+            #     grads,
+            #     initializer=False
+            # )
-            new_state = train_state.apply_gradients(grads=grads)
+            # # Only apply gradients if they're valid
+            # new_state = jax.lax.cond(
+            #     has_nan_or_inf,
+            #     lambda _: train_state,  # Skip gradient update
+            #     lambda _: train_state.apply_gradients(grads=grads),
+            #     operand=None
+            # )
+            # new_state = train_state.apply_gradients(grads=grads)
             if train_state.dynamic_scale is not None:
                 # if is_fin == False the gradients contain Inf/NaNs and optimizer state and

flaxdiff/trainer/simple_trainer.py CHANGED Viewed

@@ -403,7 +403,6 @@ class SimpleTrainer:
         rng_state
     ):
         global_device_count = jax.device_count()
-        local_device_count = jax.local_device_count()
         process_index = jax.process_index()
         if self.distributed_training:
             global_device_indexes = jnp.arange(global_device_count)
@@ -434,11 +433,16 @@ class SimpleTrainer:
                 # loss = jax.experimental.multihost_utils.process_allgather(loss)
                 loss = jnp.mean(loss) # Just to make sure its a scaler value
-            if loss <= 1e-6:
+            if loss <= 1e-8:
                 # If the loss is too low, we can assume the model has diverged
                 print(colored(f"Loss too low at step {current_step} => {loss}", 'red'))
                 # Reset the model to the old state
-                exit(1)
+                if self.best_state is not None:
+                    print(colored(f"Resetting model to best state", 'red'))
+                    train_state = self.best_state
+                    loss = self.best_loss
+                else:
+                    exit(1)
             epoch_loss += loss
             current_step += 1

{flaxdiff-0.1.37.3.dist-info → flaxdiff-0.1.37.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: flaxdiff
-Version: 0.1.37.3
+Version: 0.1.37.4
 Summary: A versatile and easy to understand Diffusion library
 Author-email: Ashish Kumar Singh <ashishkmr472@gmail.com>
 License-Expression: MIT

{flaxdiff-0.1.37.3.dist-info → flaxdiff-0.1.37.4.dist-info}/RECORD RENAMED Viewed

@@ -20,7 +20,7 @@ flaxdiff/models/autoencoder/__init__.py,sha256=qY-7MldZpsfkF-_T2LqlRK7VHbqfmosz0
 flaxdiff/models/autoencoder/autoencoder.py,sha256=27_hYl0yXAdH9Mx4Xu9J79mSNo-FEKr9SxhVaS3ffn4,591
 flaxdiff/models/autoencoder/diffusers.py,sha256=JHeFLCxiHhu-QHwhKiCuKsQJn4AZumquiuxgZkiYGQ0,3643
 flaxdiff/models/autoencoder/simple_autoenc.py,sha256=UXHPgDmwGTnv3Uts6Zj3p9R9nJXnEiEXbllgarwDfXM,805
-flaxdiff/predictors/__init__.py,sha256=SKkYYRF9Wfgk2zhtZw4vCXOdOeRlrm2Mk6cvuaEvAzc,4403
+flaxdiff/predictors/__init__.py,sha256=S0R8_x-KST_cwaFKgBvaG4pwiMtrmgWjZCseyYfBPc4,4465
 flaxdiff/samplers/__init__.py,sha256=EY9v1pgwEoR64Kiz9K8fAR-4_ir9c03mYeY3hrpUNhE,308
 flaxdiff/samplers/common.py,sha256=7gKNY4mWVnLjtcioGLFD_Vwmxg9zJovUb8EcYWlc_GE,8833
 flaxdiff/samplers/ddim.py,sha256=hTjDm0SmIj-Tkc80QRATMcN_sKVhHbqZQboRQCAn4mY,569
@@ -40,10 +40,10 @@ flaxdiff/schedulers/linear.py,sha256=6003F5ISq1Wc0h6UAzY95MJgsDIKGMhBzbiVALpea0k
 flaxdiff/schedulers/sqrt.py,sha256=1F84ZgQPuoNMhe6yxGTR2G0h7dPOZtm4UDQOakbSsEU,445
 flaxdiff/trainer/__init__.py,sha256=T-vUVq4zHcMK6kpCsG4Gu8vn71q6lZD-lg-Ul7yKfEk,128
 flaxdiff/trainer/autoencoder_trainer.py,sha256=hxihkRL9WCIQVGOP-pc1jjjIUaRXDLcNo3_erTKsuWM,7049
-flaxdiff/trainer/diffusion_trainer.py,sha256=KVeXJ9ZQKcvD-O_hCJnxro0dQRuQe5ZVGGMEL4Lgm9k,12814
-flaxdiff/trainer/simple_trainer.py,sha256=lmRo8N0bMupIyS3ejPvPtxoskY_3GLC8iyJE6u4TIWc,21990
+flaxdiff/trainer/diffusion_trainer.py,sha256=oIvDco8nOZT0HSz2ZX5b8u3Y1_UhMlOECqt0vBLPn1Q,13567
+flaxdiff/trainer/simple_trainer.py,sha256=LScHQZCy5ksSC7n0GC0tjOXK-zptxpMJsC6Udf-nz18,22178
 flaxdiff/trainer/video_diffusion_trainer.py,sha256=gMkKpnKNTo8QhTx5ptEEkc7W5-7rzXIr9queU53hXyQ,2197
-flaxdiff-0.1.37.3.dist-info/METADATA,sha256=7U7SINGO_ZzsUeuTPi2LTMqxrj93Cvglyh1Q7D39zRM,23985
-flaxdiff-0.1.37.3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-flaxdiff-0.1.37.3.dist-info/top_level.txt,sha256=-2-nXnfkJgSfkki1tjm5Faw6Dso7vhtdn2szwCdX5CQ,9
-flaxdiff-0.1.37.3.dist-info/RECORD,,
+flaxdiff-0.1.37.4.dist-info/METADATA,sha256=rdptpmTYbuDZlIcENjmgdYlHc7NA8FUDM_NRlfhAeWU,23985
+flaxdiff-0.1.37.4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+flaxdiff-0.1.37.4.dist-info/top_level.txt,sha256=-2-nXnfkJgSfkki1tjm5Faw6Dso7vhtdn2szwCdX5CQ,9
+flaxdiff-0.1.37.4.dist-info/RECORD,,

{flaxdiff-0.1.37.3.dist-info → flaxdiff-0.1.37.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{flaxdiff-0.1.37.3.dist-info → flaxdiff-0.1.37.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

flaxdiff 0.1.37.3__py3-none-any.whl → 0.1.37.4__py3-none-any.whl

flaxdiff 0.1.37.3py3-none-any.whl → 0.1.37.4py3-none-any.whl