PyPI - flaxdiff - Versions diffs - 0.2.7__py3-none-any.whl → 0.2.8__py3-none-any.whl - Mend

flaxdiff 0.2.7py3-none-any.whl → 0.2.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

flaxdiff/data/dataloaders.py +36 -24
flaxdiff/data/dataset_map.py +2 -2
flaxdiff/data/sources/base.py +12 -0
flaxdiff/data/sources/images.py +68 -11
flaxdiff/data/sources/videos.py +5 -0
flaxdiff/models/common.py +1 -70
flaxdiff/models/hilbert.py +617 -0
flaxdiff/models/simple_dit.py +476 -0
flaxdiff/models/simple_mmdit.py +861 -0
flaxdiff/models/simple_vit.py +278 -117
flaxdiff/trainer/general_diffusion_trainer.py +29 -10
flaxdiff/trainer/simple_trainer.py +113 -19
{flaxdiff-0.2.7.dist-info → flaxdiff-0.2.8.dist-info}/METADATA +1 -1
{flaxdiff-0.2.7.dist-info → flaxdiff-0.2.8.dist-info}/RECORD +16 -14
{flaxdiff-0.2.7.dist-info → flaxdiff-0.2.8.dist-info}/WHEEL +1 -1
flaxdiff/models/better_uvit.py +0 -380
{flaxdiff-0.2.7.dist-info → flaxdiff-0.2.8.dist-info}/top_level.txt +0 -0

flaxdiff/trainer/simple_trainer.py CHANGED Viewed

@@ -26,6 +26,7 @@ from flax.training.dynamic_scale import DynamicScale
 from flaxdiff.utils import RandomMarkovState
 from flax.training import dynamic_scale as dynamic_scale_lib
 from dataclasses import dataclass
+import shutil
 import gc
 PROCESS_COLOR_MAP = {
@@ -73,6 +74,76 @@ class SimpleTrainState(train_state.TrainState):
     metrics: Metrics
     dynamic_scale: dynamic_scale_lib.DynamicScale
+def move_contents_to_subdir(target_dir, new_subdir_name):
+    # --- 1. Validate Target Directory ---
+    if not os.path.isdir(target_dir):
+        print(f"Error: Target directory '{target_dir}' not found or is not a directory.")
+        return
+    # --- 2. Define Paths ---
+    # Construct the full path for the new subdirectory
+    new_subdir_path = os.path.join(target_dir, new_subdir_name)
+    # --- 3. Create New Subdirectory ---
+    try:
+        # Create the subdirectory.
+        # exist_ok=True prevents an error if the directory already exists.
+        os.makedirs(new_subdir_path, exist_ok=True)
+        print(f"Subdirectory '{new_subdir_path}' created or already exists.")
+    except OSError as e:
+        print(f"Error creating subdirectory '{new_subdir_path}': {e}")
+        return # Stop execution if subdirectory creation fails
+    # --- 4. List Contents of Target Directory ---
+    try:
+        items_to_move = os.listdir(target_dir)
+    except OSError as e:
+        print(f"Error listing contents of '{target_dir}': {e}")
+        return # Stop if we can't list directory contents
+    # --- 5. Move Items ---
+    print(f"Moving items from '{target_dir}' to '{new_subdir_path}'...")
+    moved_count = 0
+    error_count = 0
+    for item_name in items_to_move:
+        # Construct the full path of the item in the target directory
+        source_path = os.path.join(target_dir, item_name)
+        # IMPORTANT: Skip the newly created subdirectory itself!
+        if source_path == new_subdir_path:
+            continue
+        # Construct the destination path inside the new subdirectory
+        destination_path = os.path.join(new_subdir_path, item_name)
+        # Move the item
+        try:
+            shutil.move(source_path, destination_path)
+            # print(f"  Moved: '{item_name}'") # Uncomment for verbose output
+            moved_count += 1
+        except Exception as e:
+            print(f"  Error moving '{item_name}': {e}")
+            error_count += 1
+    print(f"\nOperation complete.")
+    print(f"  Successfully moved: {moved_count} item(s).")
+    if error_count > 0:
+        print(f"  Errors encountered: {error_count} item(s).")
+def load_from_checkpoint(
+    checkpoint_dir: str,
+):
+    try:
+        checkpointer = orbax.checkpoint.PyTreeCheckpointer()
+        options = orbax.checkpoint.CheckpointManagerOptions(create=False)
+        # Convert checkpoint_dir to absolute path
+        checkpoint_dir = os.path.abspath(checkpoint_dir)
+        manager = orbax.checkpoint.CheckpointManager(checkpoint_dir, checkpointer, options)
+        ckpt = manager.restore(checkpoint_dir)
+        # Extract as above
+        state, best_state = None, None
+        if 'state' in ckpt:
+            state = ckpt['state']
+        if 'best_state' in ckpt:
+            best_state = ckpt['best_state']
+        print(f"Loaded checkpoint from local dir {checkpoint_dir}")
+        return state, best_state
+    except Exception as e:
+        print(f"Warning: Failed to load checkpoint from local dir: {e}")
+        return None, None
 @dataclass
 class SimpleTrainer:
     state: SimpleTrainState
@@ -97,6 +168,7 @@ class SimpleTrainer:
                  checkpoint_step: int = None,
                  use_dynamic_scale: bool = False,
                  max_checkpoints_to_keep: int = 2,
+                 train_start_step_override: int = None,
                  ):
         if distributed_training is None or distributed_training is True:
             # Auto-detect if we are running on multiple devices
@@ -112,11 +184,32 @@ class SimpleTrainer:
         self.input_shapes = input_shapes
         self.checkpoint_base_path = checkpoint_base_path
+        load_directly_from_dir = False
         if wandb_config is not None and jax.process_index() == 0:
             import wandb
             run = wandb.init(resume='allow', **wandb_config)
             self.wandb = run
+            if 'id' in wandb_config:
+                # If resuming from a previous run, and train_start_step_override is not set,
+                # set the start step to the last step of the previous run
+                if train_start_step_override is None:
+                    train_start_step_override = run.summary['train/step'] + 1
+                print(f"Resuming from previous run {wandb_config['id']} with start step {train_start_step_override}")
+                # If load_from_checkpoint is not set, and an artifact is found, load the artifact
+                if load_from_checkpoint is None:
+                    api_run = wandb.Api().run(f"{wandb_config['entity']}/{wandb_config['project']}/{wandb_config['id']}")
+                    model_artifacts = [i for i in api_run.logged_artifacts() if i.type == 'model']
+                    if model_artifacts:
+                        artifact = model_artifacts[0]
+                        artifact_dir = artifact.download()
+                        print(f"Loading model from artifact {artifact.name} at {artifact_dir}")
+                        # Move the artifact's contents
+                        load_from_checkpoint = artifact_dir
+                        load_directly_from_dir = True
             # define our custom x axis metric
             self.wandb.define_metric("train/step")
             self.wandb.define_metric("train/epoch")
@@ -142,12 +235,16 @@ class SimpleTrainer:
             self.checkpoint_path(), async_checkpointer, options)
         if load_from_checkpoint is not None:
-            latest_epoch, latest_step, old_state, old_best_state, rngstate = self.load(load_from_checkpoint, checkpoint_step)
+            latest_step, old_state, old_best_state, rngstate = self.load(load_from_checkpoint, checkpoint_step, load_directly_from_dir)
         else:
-            latest_epoch, latest_step, old_state, old_best_state, rngstate = 0, 0, None, None, None
+            latest_step, old_state, old_best_state, rngstate = 0, None, None, None
         self.latest_step = latest_step
+        if train_start_step_override is not None:
+            self.latest_step = train_start_step_override
+            print(f"Overriding start step to {self.latest_step}")
         if rngstate:
             self.rngstate = RandomMarkovState(**rngstate)
         else:
@@ -239,15 +336,12 @@ class SimpleTrainer:
             os.makedirs(path)
         return path
-    def load(self, checkpoint_path=None, checkpoint_step=None):
-        if checkpoint_path is None:
-            checkpointer = self.checkpointer
-        else:
-            checkpointer = orbax.checkpoint.PyTreeCheckpointer()
-            options = orbax.checkpoint.CheckpointManagerOptions(
-                max_to_keep=4, create=False)
-            checkpointer = orbax.checkpoint.CheckpointManager(
-                checkpoint_path, checkpointer, options)
+    def load(self, checkpoint_path, checkpoint_step=None, load_directly_from_dir=False):
+        checkpointer = orbax.checkpoint.PyTreeCheckpointer()
+        options = orbax.checkpoint.CheckpointManagerOptions(
+            max_to_keep=4, create=False)
+        checkpointer = orbax.checkpoint.CheckpointManager(
+            checkpoint_path, checkpointer, options)
         if checkpoint_step is None:
             step = checkpointer.latest_step()
@@ -259,7 +353,8 @@ class SimpleTrainer:
             checkpoint_path if checkpoint_path else self.checkpoint_path(),
             f"{step}")
         self.loaded_checkpoint_path = loaded_checkpoint_path
-        ckpt = checkpointer.restore(step)
+        ckpt = checkpointer.restore(step) if not load_directly_from_dir else checkpointer.restore(checkpoint_path)
         state = ckpt['state']
         best_state = ckpt['best_state']
         rngstate = ckpt['rngs']
@@ -268,10 +363,8 @@ class SimpleTrainer:
         if self.best_loss == 0:
             # It cant be zero as that must have been some problem
             self.best_loss = 1e9
-        current_epoch = ckpt.get('epoch', step) # Must be a checkpoint from an older version which used epochs instead of steps
-        print(
-            f"Loaded model from checkpoint at epoch {current_epoch} step {step}", ckpt['best_loss'])
-        return current_epoch, step, state, best_state, rngstate
+        print(f"Loaded model from checkpoint at step {step}", ckpt['best_loss'])
+        return step, state, best_state, rngstate
     def save(self, epoch=0, step=0, state=None, rngstate=None):
         print(f"Saving model at epoch {epoch} step {step}")
@@ -507,6 +600,7 @@ class SimpleTrainer:
     def fit(self, data, train_steps_per_epoch, epochs, train_step_args={}, val_steps_per_epoch=5, validation_step_args={}):
         train_ds = iter(data['train']())
+        val_ds = data.get('val', data.get('test', None))()
         train_step = self._define_train_step(**train_step_args)
         val_step = self._define_validation_step(**validation_step_args)
         train_state = self.state
@@ -520,7 +614,7 @@ class SimpleTrainer:
             self.validation_loop(
                 train_state,
                 val_step,
-                data.get('val', data.get('test', None)),
+                val_ds,
                 val_steps_per_epoch,
                 self.latest_step,
             )
@@ -571,11 +665,11 @@ class SimpleTrainer:
                 self.validation_loop(
                     train_state,
                     val_step,
-                    data.get('val', data.get('test', None)),
+                    val_ds,
                     val_steps_per_epoch,
                     current_step,
                 )
                 print(colored(f"Validation done on process index {process_index}", PROCESS_COLOR_MAP[process_index]))
-        self.save(epochs)
+        self.save(epochs)#
         return self.state

{flaxdiff-0.2.7.dist-info → flaxdiff-0.2.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: flaxdiff
-Version: 0.2.7
+Version: 0.2.8
 Summary: A versatile and easy to understand Diffusion library
 Author-email: Ashish Kumar Singh <ashishkmr472@gmail.com>
 License-Expression: MIT

{flaxdiff-0.2.7.dist-info → flaxdiff-0.2.8.dist-info}/RECORD RENAMED Viewed

@@ -2,16 +2,16 @@ flaxdiff/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 flaxdiff/utils.py,sha256=DmlWUY1FGz4ESxIHaPQJf92CHjsdMjyDd651wFUtyNg,8838
 flaxdiff/data/__init__.py,sha256=8W5y7NyAOWtpLi8WRawk4VYeE3DMDnM3B_jKPD8BoFQ,143
 flaxdiff/data/benchmark_decord.py,sha256=x56Db1VPmziv_9KJvWdfS0O7cffsYkF5tt5WvldOKc0,13720
-flaxdiff/data/dataloaders.py,sha256=LV8ugqoB86yihfYeOJZHHdRZJNmZ63A2NQkdILMR9QA,23564
-flaxdiff/data/dataset_map.py,sha256=_6SYnmrYO-URDd8vPAmALTV6r0eMGWWmwUtsdjKGXnA,5072
+flaxdiff/data/dataloaders.py,sha256=HQR0rsLNYXRPBmdOBKFCc3UfWsmSbSO_-dOQHCbu_VA,23966
+flaxdiff/data/dataset_map.py,sha256=Dz_suGz23Cy7RfWt0FDRX7Q3NTB5SAw2UNHO_-p0qiM,5098
 flaxdiff/data/online_loader.py,sha256=t1jEhdB6gWTlwx68ehj1ol_PrImbwXYiRlrJPCmNgCM,35701
 flaxdiff/data/sources/audio_utils.py,sha256=X27gG1yQt_abVOYgMtruYmZD7-8_uQCRhhTSpn4clkI,4514
 flaxdiff/data/sources/av_example.py,sha256=RIcbVKqckFqbfnV65NQotzIBxjdDuM67kD1nY8fqw5Q,3826
 flaxdiff/data/sources/av_utils.py,sha256=LCr9MJNurOaoxY-sjzkLqJS_MlX0x3gRSlKAVIglAU0,24045
-flaxdiff/data/sources/base.py,sha256=uhF0odJSYRy0SLw1xnI9Q_q_xiVht2DmEYcX1j9AWT4,4246
-flaxdiff/data/sources/images.py,sha256=RFLtKW1xzw6ZPVXtCMmnTg1MPb8dc7rP77rZWbK7qpo,11796
+flaxdiff/data/sources/base.py,sha256=4Rm9pCtXxzoB8FO0lkDHsrX3ULoU_PNNcid978e6ir0,4610
+flaxdiff/data/sources/images.py,sha256=71TzTVbPzV-Md3-1Lk4eWfb11w6aaO01OClwK_SiCSM,14708
 flaxdiff/data/sources/utils.py,sha256=kFzM4_kPoThbAu54ulABmEDAR33tR50NgzXIpC0Dzjk,7316
-flaxdiff/data/sources/videos.py,sha256=CVpOH6A4P2D8iv3gZIhd2GB5ATUD8Vsm_wVYbbugWD4,9359
+flaxdiff/data/sources/videos.py,sha256=NkxwEruNpAwDCM53q4WurQ802gSjQMOqjNLxYOqjoNE,9545
 flaxdiff/data/sources/voxceleb2.py,sha256=BoKfat_hsw6ObDyyaiQmPbBzuFiqgCGlgAZmf-t5Iz8,18621
 flaxdiff/inference/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 flaxdiff/inference/pipeline.py,sha256=8S30FAlXEjvrDd87H-qdD6biySQZ3cJUflU8gdmPxig,9223
@@ -27,12 +27,14 @@ flaxdiff/metrics/ssim.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 flaxdiff/metrics/utils.py,sha256=YuuOfqvqgIjsceupwNeJ59vQ2TnGeNMIyKdkIqOmoNg,1702
 flaxdiff/models/__init__.py,sha256=amtDF07DfiAdnZsvWX4eaW79nwNEU1s8Zb4PB3ewtg4,118
 flaxdiff/models/attention.py,sha256=YkED3_MRTjI9aTMTTQdsuReHhG8MK0Z4OVuU2j8ZAHs,13524
-flaxdiff/models/better_uvit.py,sha256=wPxvYBjuWQH6-OqW79VedzN6_WRY1f2mysPxaciWLww,15598
-flaxdiff/models/common.py,sha256=0j9AAjGPgkBLHo2DlYj0R6OsUNw2QaoDjaXSKq2mqkA,12647
+flaxdiff/models/common.py,sha256=QpciwuJldvLUwyAyWBQqiPPGVI-c9qLR7h7C1YoRX7w,10510
 flaxdiff/models/favor_fastattn.py,sha256=79Ew1nqarsNLPzZaBSd1ILORzJr74CupYeqGiCQK5E4,27689
 flaxdiff/models/general.py,sha256=7xMME6KVKQY8sScyHYH4f-Kek4j1pRfplKShFXwVZd4,587
+flaxdiff/models/hilbert.py,sha256=AjlAv49dL6UAYWslMJfCMLiFqY4kTgpiUWr2nc1mk34,24823
+flaxdiff/models/simple_dit.py,sha256=Hc2jLOZCYSDm6x88m3bGYu-OKge1TukiQPSdlaO68rE,19667
+flaxdiff/models/simple_mmdit.py,sha256=RmOq6LbfDBUUEib6MSAURujxn9iHgdh77a6ntNsWI2w,36210
 flaxdiff/models/simple_unet.py,sha256=pjeixszG_6gEY5PNFbQ7KbOyg4z5bfn4RUbINCJexOM,10758
-flaxdiff/models/simple_vit.py,sha256=no0o3os8dEKGU5I4PMBJlXq6XKjhUex8S8uZ9BDPZS4,7971
+flaxdiff/models/simple_vit.py,sha256=QEHPyaQIYhqSYrD6eb65X70jQL-y09nRT8Yc4b5Jq6Q,15181
 flaxdiff/models/unet_3d.py,sha256=LF0PMxBKGU-_lAMtO_Coxy1yRE02yKKdgb7i6YZxI_4,20163
 flaxdiff/models/unet_3d_blocks.py,sha256=lRYDc9X1VEu54Kg7xEEphXYiQ09tabPXKi-hEcKFYug,19687
 flaxdiff/models/autoencoder/__init__.py,sha256=qY-7MldZpsfkF-_T2LqlRK7VHbqfmosz0NmvzDlBkOk,78
@@ -60,9 +62,9 @@ flaxdiff/schedulers/sqrt.py,sha256=mCd_szmOqF6vqQKiAiEOqV_3eBIPGYrW3VxK0o4rBuo,4
 flaxdiff/trainer/__init__.py,sha256=xSoierfi26gxfgxlNnwvyyPmuPAJ--5i3mEHxt3S-AE,215
 flaxdiff/trainer/autoencoder_trainer.py,sha256=2FP2P-k9c0n_k3eT0trkq73dQrHRdBj9ObK1idcyhSw,6996
 flaxdiff/trainer/diffusion_trainer.py,sha256=reQEVWKTqKAeyCMQ-curPOfSRmBKxKooK8EVtUuorcM,14599
-flaxdiff/trainer/general_diffusion_trainer.py,sha256=BeDpJzgR8bUClJI4epQXlAul27MwiSfRW0lIBZSiPWk,28342
-flaxdiff/trainer/simple_trainer.py,sha256=raLS1shwpjJBT_bYXLAB2E4kA9MbwasDTzDTUqfCCUc,24312
-flaxdiff-0.2.7.dist-info/METADATA,sha256=nwglJYeF2lH_MNq5PeFLR8TSPU-I9tzJUcBbTaLYxRM,24057
-flaxdiff-0.2.7.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
-flaxdiff-0.2.7.dist-info/top_level.txt,sha256=-2-nXnfkJgSfkki1tjm5Faw6Dso7vhtdn2szwCdX5CQ,9
-flaxdiff-0.2.7.dist-info/RECORD,,
+flaxdiff/trainer/general_diffusion_trainer.py,sha256=FUvc--3ibRAjrYiKbA-FyLqKhusakxeNOa6UJZaK4SU,29307
+flaxdiff/trainer/simple_trainer.py,sha256=Hdltuo3lgF61N04Lxc7L3z6NLveW4_h1ff7_5mu3Wbg,28730
+flaxdiff-0.2.8.dist-info/METADATA,sha256=y2jLjsEkR-GKvLWuGzlyBrk1SNM6tCPT0Oc7vRZC7_I,24057
+flaxdiff-0.2.8.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
+flaxdiff-0.2.8.dist-info/top_level.txt,sha256=-2-nXnfkJgSfkki1tjm5Faw6Dso7vhtdn2szwCdX5CQ,9
+flaxdiff-0.2.8.dist-info/RECORD,,

{flaxdiff-0.2.7.dist-info → flaxdiff-0.2.8.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.1.0)
+Generator: setuptools (80.3.1)
 Root-Is-Purelib: true
 Tag: py3-none-any

flaxdiff/models/better_uvit.py DELETED Viewed

@@ -1,380 +0,0 @@
-# flaxdiff/models/better_uvit.py
-import jax
-import jax.numpy as jnp
-from flax import linen as nn
-from typing import Callable, Any, Optional, Tuple, Sequence, Union
-import einops
-from functools import partial
-# Re-use existing components if they are suitable
-from .common import kernel_init, FourierEmbedding, TimeProjection, hilbert_indices, inverse_permutation
-from .attention import NormalAttention # Using NormalAttention for RoPE integration
-from flax.typing import Dtype, PrecisionLike
-# --- Rotary Positional Embedding (RoPE) ---
-# Adapted from https://github.com/google-deepmind/ring_attention/blob/main/ring_attention/layers/rotary.py
-def _rotate_half(x: jax.Array) -> jax.Array:
-  """Rotates half the hidden dims of the input."""
-  x1 = x[..., : x.shape[-1] // 2]
-  x2 = x[..., x.shape[-1] // 2 :]
-  return jnp.concatenate((-x2, x1), axis=-1)
-def apply_rotary_embedding(
-    x: jax.Array, freqs_cis: jax.Array
-) -> jax.Array:
-  """Applies rotary embedding to the input tensor using rotate_half method."""
-  # x shape: [..., Sequence, Dimension] e.g. [B, H, S, D] or [B, S, D]
-  # freqs_cis shape: complex [Sequence, Dimension / 2]
-  # Extract cos and sin from the complex freqs_cis
-  cos_freqs = jnp.real(freqs_cis) # Shape [S, D/2]
-  sin_freqs = jnp.imag(freqs_cis) # Shape [S, D/2]
-  # Expand dims for broadcasting: [1, 1, S, D/2] or [1, S, D/2]
-  if x.ndim == 4: # [B, H, S, D]
-      cos_freqs = jnp.expand_dims(cos_freqs, axis=(0, 1))
-      sin_freqs = jnp.expand_dims(sin_freqs, axis=(0, 1))
-  elif x.ndim == 3: # [B, S, D]
-      cos_freqs = jnp.expand_dims(cos_freqs, axis=0)
-      sin_freqs = jnp.expand_dims(sin_freqs, axis=0)
-  # Duplicate cos and sin for the full dimension D
-  # Shape becomes [..., S, D]
-  cos_freqs = jnp.concatenate([cos_freqs, cos_freqs], axis=-1)
-  sin_freqs = jnp.concatenate([sin_freqs, sin_freqs], axis=-1)
-  # Apply rotation: x * cos + rotate_half(x) * sin
-  x_rotated = x * cos_freqs + _rotate_half(x) * sin_freqs
-  return x_rotated.astype(x.dtype)
-class RotaryEmbedding(nn.Module):
-  dim: int # Dimension of the head
-  max_seq_len: int = 2048
-  base: int = 10000
-  dtype: Dtype = jnp.float32
-  def setup(self):
-    inv_freq = 1.0 / (
-        self.base ** (jnp.arange(0, self.dim, 2, dtype=jnp.float32) / self.dim)
-    )
-    t = jnp.arange(self.max_seq_len, dtype=jnp.float32)
-    freqs = jnp.outer(t, inv_freq) # Shape: [max_seq_len, dim / 2]
-    # Precompute the complex form: cos(theta) + i * sin(theta)
-    self.freqs_cis_complex = jnp.cos(freqs) + 1j * jnp.sin(freqs)
-    # Shape: [max_seq_len, dim / 2]
-  def __call__(self, seq_len: int):
-    if seq_len > self.max_seq_len:
-        raise ValueError(f"Sequence length {seq_len} exceeds max_seq_len {self.max_seq_len}")
-    # Return complex shape [seq_len, dim / 2]
-    return self.freqs_cis_complex[:seq_len, :]
-# --- Attention with RoPE ---
-class RoPEAttention(NormalAttention):
-    rope_emb: RotaryEmbedding
-    @nn.compact
-    def __call__(self, x, context=None, freqs_cis=None):
-        # x has shape [B, H, W, C] or [B, S, C]
-        orig_x_shape = x.shape
-        is_4d = len(orig_x_shape) == 4
-        if is_4d:
-            B, H, W, C = x.shape
-            seq_len = H * W
-            x = x.reshape((B, seq_len, C))
-        else:
-            B, seq_len, C = x.shape
-        context = x if context is None else context
-        if len(context.shape) == 4:
-            _B, _H, _W, _C = context.shape
-            context_seq_len = _H * _W
-            context = context.reshape((B, context_seq_len, _C))
-        else:
-            _B, context_seq_len, _C = context.shape
-        query = self.query(x) # [B, S, H, D]
-        key = self.key(context) # [B, S_ctx, H, D]
-        value = self.value(context) # [B, S_ctx, H, D]
-        # Apply RoPE to query and key
-        if freqs_cis is not None:
-            # Permute to [B, H, S, D] for RoPE application if needed by apply_rotary_embedding
-            query = einops.rearrange(query, 'b s h d -> b h s d')
-            key = einops.rearrange(key, 'b s h d -> b h s d')
-            query = apply_rotary_embedding(query, freqs_cis)
-            key = apply_rotary_embedding(key, freqs_cis) # Apply to key as well
-            # Permute back to [B, S, H, D] for dot_product_attention
-            query = einops.rearrange(query, 'b h s d -> b s h d')
-            key = einops.rearrange(key, 'b h s d -> b s h d')
-        hidden_states = nn.dot_product_attention(
-            query, key, value, dtype=self.dtype, broadcast_dropout=False,
-            dropout_rng=None, precision=self.precision, force_fp32_for_softmax=self.force_fp32_for_softmax,
-            deterministic=True
-        ) # Output shape [B, S, H, D]
-        proj = self.proj_attn(hidden_states) # Output shape [B, S, C]
-        if is_4d:
-            proj = proj.reshape(orig_x_shape) # Reshape back if input was 4D
-        return proj
-# --- adaLN-Zero ---
-class AdaLNZero(nn.Module):
-    features: int
-    dtype: Optional[Dtype] = None
-    precision: PrecisionLike = None
-    norm_epsilon: float = 1e-5 # Standard LayerNorm epsilon
-    @nn.compact
-    def __call__(self, x, conditioning):
-        # Project conditioning signal to get scale and shift parameters
-        # Conditioning shape: [B, D_cond] -> [B, 1, ..., 1, 6 * features] for broadcasting
-        # Or [B, 1, 6*features] if x is [B, S, F]
-        # Ensure conditioning has seq dim if x does
-        if x.ndim == 3 and conditioning.ndim == 2: # x=[B,S,F], cond=[B,D_cond]
-            conditioning = jnp.expand_dims(conditioning, axis=1) # cond=[B,1,D_cond]
-        # Project conditioning to get 6 params per feature (scale_mlp, shift_mlp, gate_mlp, scale_attn, shift_attn, gate_attn)
-        # Using nn.DenseGeneral for flexibility if needed, but nn.Dense is fine if cond is [B, D_cond] or [B, 1, D_cond]
-        ada_params = nn.Dense(
-            features=6 * self.features,
-            dtype=self.dtype,
-            precision=self.precision,
-            kernel_init=nn.initializers.zeros, # Initialize projection to zero (Zero init)
-            name="ada_proj"
-        )(conditioning)
-        # Split into scale, shift, gate for MLP and Attention
-        scale_mlp, shift_mlp, gate_mlp, scale_attn, shift_attn, gate_attn = jnp.split(ada_params, 6, axis=-1)
-        # Apply Layer Normalization
-        norm = nn.LayerNorm(epsilon=self.norm_epsilon, use_scale=False, use_bias=False, dtype=self.dtype)
-        # norm = nn.RMSNorm(epsilon=self.norm_epsilon, dtype=self.dtype) # Alternative: RMSNorm
-        norm_x = norm(x)
-        # Modulate for Attention path
-        x_attn = norm_x * (1 + scale_attn) + shift_attn
-        # Modulate for MLP path
-        x_mlp = norm_x * (1 + scale_mlp) + shift_mlp
-        # Return modulated outputs and gates
-        return x_attn, gate_attn, x_mlp, gate_mlp
-# --- DiT Block ---
-class DiTBlock(nn.Module):
-    features: int
-    num_heads: int
-    mlp_ratio: int = 4
-    dropout_rate: float = 0.0 # Typically dropout is not used in diffusion models
-    dtype: Optional[Dtype] = None
-    precision: PrecisionLike = None
-    use_flash_attention: bool = False # Keep option, but RoPEAttention uses NormalAttention base
-    force_fp32_for_softmax: bool = True
-    norm_epsilon: float = 1e-5
-    rope_emb: RotaryEmbedding # Pass RoPE module
-    def setup(self):
-        hidden_features = int(self.features * self.mlp_ratio)
-        self.ada_ln_zero = AdaLNZero(self.features, dtype=self.dtype, precision=self.precision, norm_epsilon=self.norm_epsilon)
-        # Use RoPEAttention
-        self.attention = RoPEAttention(
-            query_dim=self.features,
-            heads=self.num_heads,
-            dim_head=self.features // self.num_heads,
-            dtype=self.dtype,
-            precision=self.precision,
-            use_bias=True, # Bias is common in DiT attention proj
-            force_fp32_for_softmax=self.force_fp32_for_softmax,
-            rope_emb=self.rope_emb # Pass RoPE module instance
-        )
-        # Standard MLP block
-        self.mlp = nn.Sequential([
-            nn.Dense(features=hidden_features, dtype=self.dtype, precision=self.precision),
-            nn.gelu,
-            nn.Dense(features=self.features, dtype=self.dtype, precision=self.precision)
-        ])
-    @nn.compact
-    def __call__(self, x, conditioning, freqs_cis):
-        # x shape: [B, S, F]
-        # conditioning shape: [B, D_cond]
-        residual = x
-        # Apply adaLN-Zero to get modulated inputs and gates
-        x_attn, gate_attn, x_mlp, gate_mlp = self.ada_ln_zero(x, conditioning)
-        # Attention block
-        attn_output = self.attention(x_attn, context=None, freqs_cis=freqs_cis) # Self-attention only
-        x = residual + gate_attn * attn_output
-        # MLP block
-        mlp_output = self.mlp(x_mlp)
-        x = x + gate_mlp * mlp_output
-        return x
-# --- Patch Embedding (reuse or define if needed) ---
-# Assuming PatchEmbedding exists in simple_vit.py and is suitable
-from .simple_vit import PatchEmbedding, unpatchify
-# --- Better UViT (DiT Style) ---
-class BetterUViT(nn.Module):
-    output_channels: int = 3
-    patch_size: int = 16
-    emb_features: int = 768
-    num_layers: int = 12
-    num_heads: int = 12
-    mlp_ratio: int = 4
-    dropout_rate: float = 0.0 # Typically 0 for diffusion
-    dtype: Optional[Dtype] = None
-    precision: PrecisionLike = None
-    use_flash_attention: bool = False # Passed down, but RoPEAttention uses NormalAttention
-    force_fp32_for_softmax: bool = True
-    norm_epsilon: float = 1e-5
-    learn_sigma: bool = False # Option to predict sigma like in DiT paper
-    use_hilbert: bool = False  # Toggle Hilbert patch reorder
-    def setup(self):
-        self.patch_embed = PatchEmbedding(
-            patch_size=self.patch_size,
-            embedding_dim=self.emb_features,
-            dtype=self.dtype,
-            precision=self.precision
-        )
-        # Time embedding projection
-        self.time_embed = nn.Sequential([
-            FourierEmbedding(features=self.emb_features),
-            TimeProjection(features=self.emb_features * self.mlp_ratio), # Project to MLP dim
-            nn.Dense(features=self.emb_features, dtype=self.dtype, precision=self.precision) # Final projection
-        ])
-        # Text context projection (if used)
-        # Assuming textcontext is already projected to some dimension, project it to match emb_features
-        # This might need adjustment based on how text context is provided
-        self.text_proj = nn.Dense(features=self.emb_features, dtype=self.dtype, precision=self.precision, name="text_context_proj")
-        # Rotary Positional Embedding
-        # Max length needs to be estimated or set large enough.
-        # For images, seq len = (H/P) * (W/P). Example: 256/16 * 256/16 = 16*16 = 256
-        # Add 1 if a class token is used, or more for text tokens if concatenated.
-        # Let's assume max seq len accommodates patches + time + text tokens if needed, or just patches.
-        # If only patches use RoPE, max_len = max_image_tokens
-        # If time/text are concatenated *before* blocks, max_len needs to include them.
-        # DiT typically applies PE only to patch tokens. Let's follow that.
-        # max_len should be max number of patches.
-        # Example: max image size 512x512, patch 16 -> (512/16)^2 = 32^2 = 1024 patches
-        self.rope = RotaryEmbedding(dim=self.emb_features // self.num_heads, max_seq_len=4096, dtype=self.dtype) # Dim per head
-        # Transformer Blocks
-        self.blocks = [
-            DiTBlock(
-                features=self.emb_features,
-                num_heads=self.num_heads,
-                mlp_ratio=self.mlp_ratio,
-                dropout_rate=self.dropout_rate,
-                dtype=self.dtype,
-                precision=self.precision,
-                use_flash_attention=self.use_flash_attention,
-                force_fp32_for_softmax=self.force_fp32_for_softmax,
-                norm_epsilon=self.norm_epsilon,
-                rope_emb=self.rope, # Pass RoPE instance
-                name=f"dit_block_{i}"
-            ) for i in range(self.num_layers)
-        ]
-        # Final Layer (Normalization + Linear Projection)
-        self.final_norm = nn.LayerNorm(epsilon=self.norm_epsilon, dtype=self.dtype, name="final_norm")
-        # self.final_norm = nn.RMSNorm(epsilon=self.norm_epsilon, dtype=self.dtype, name="final_norm")
-        # Predict patch pixels + potentially sigma
-        output_dim = self.patch_size * self.patch_size * self.output_channels
-        if self.learn_sigma:
-            output_dim *= 2 # Predict both mean and variance (or log_variance)
-        self.final_proj = nn.Dense(
-            features=output_dim,
-            dtype=self.dtype,
-            precision=self.precision,
-            kernel_init=nn.initializers.zeros, # Initialize final layer to zero
-            name="final_proj"
-        )
-    @nn.compact
-    def __call__(self, x, temb, textcontext=None):
-        B, H, W, C = x.shape
-        assert H % self.patch_size == 0 and W % self.patch_size == 0, "Image dimensions must be divisible by patch size"
-        # 1. Patch Embedding
-        patches = self.patch_embed(x) # Shape: [B, num_patches, emb_features]
-        num_patches = patches.shape[1]
-        # Optional Hilbert reorder
-        if self.use_hilbert:
-            idx = hilbert_indices(H // self.patch_size, W // self.patch_size)
-            inv_idx = inverse_permutation(idx)
-            patches = patches[:, idx, :]
-        # replace x with patches
-        x_seq = patches
-        # 2. Prepare Conditioning Signal (Time + Text Context)
-        t_emb = self.time_embed(temb) # Shape: [B, emb_features]
-        cond_emb = t_emb
-        if textcontext is not None:
-            text_emb = self.text_proj(textcontext) # Shape: [B, num_text_tokens, emb_features]
-            # Pool or select text embedding (e.g., mean pool or use CLS token)
-            # Assuming mean pooling for simplicity
-            text_emb_pooled = jnp.mean(text_emb, axis=1) # Shape: [B, emb_features]
-            cond_emb = cond_emb + text_emb_pooled # Combine time and text embeddings
-        # 3. Apply RoPE
-        # Get RoPE frequencies for the sequence length (number of patches)
-        freqs_cis = self.rope(seq_len=num_patches) # Shape [num_patches, D_head/2]
-        # 4. Apply Transformer Blocks with adaLN-Zero conditioning
-        for block in self.blocks:
-            x_seq = block(x_seq, conditioning=cond_emb, freqs_cis=freqs_cis)
-        # 5. Final Layer
-        x_out = self.final_norm(x_seq)
-        x_out = self.final_proj(x_out) # Shape: [B, num_patches, patch_pixels (*2 if learn_sigma)]
-        # Optional Hilbert inverse reorder
-        if self.use_hilbert:
-            x_out = x_out[:, inv_idx, :]
-        # 6. Unpatchify
-        if self.learn_sigma:
-            # Split into mean and variance predictions
-            x_mean, x_logvar = jnp.split(x_out, 2, axis=-1)
-            x = unpatchify(x_mean, channels=self.output_channels)
-            # Return both mean and logvar if needed by the loss function
-            # For now, just returning the mean prediction like standard diffusion models
-            # logvar = unpatchify(x_logvar, channels=self.output_channels)
-            # return x, logvar
-            return x
-        else:
-            x = unpatchify(x_out, channels=self.output_channels) # Shape: [B, H, W, C]
-            return x

{flaxdiff-0.2.7.dist-info → flaxdiff-0.2.8.dist-info}/top_level.txt RENAMED Viewed

File without changes

flaxdiff 0.2.7__py3-none-any.whl → 0.2.8__py3-none-any.whl

flaxdiff 0.2.7py3-none-any.whl → 0.2.8py3-none-any.whl