PyPI - flaxdiff - Versions diffs - 0.2.7__py3-none-any.whl → 0.2.8__py3-none-any.whl - Mend

flaxdiff 0.2.7py3-none-any.whl → 0.2.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

flaxdiff/data/dataloaders.py +36 -24
flaxdiff/data/dataset_map.py +2 -2
flaxdiff/data/sources/base.py +12 -0
flaxdiff/data/sources/images.py +68 -11
flaxdiff/data/sources/videos.py +5 -0
flaxdiff/models/common.py +1 -70
flaxdiff/models/hilbert.py +617 -0
flaxdiff/models/simple_dit.py +476 -0
flaxdiff/models/simple_mmdit.py +861 -0
flaxdiff/models/simple_vit.py +278 -117
flaxdiff/trainer/general_diffusion_trainer.py +29 -10
flaxdiff/trainer/simple_trainer.py +113 -19
{flaxdiff-0.2.7.dist-info → flaxdiff-0.2.8.dist-info}/METADATA +1 -1
{flaxdiff-0.2.7.dist-info → flaxdiff-0.2.8.dist-info}/RECORD +16 -14
{flaxdiff-0.2.7.dist-info → flaxdiff-0.2.8.dist-info}/WHEEL +1 -1
flaxdiff/models/better_uvit.py +0 -380
{flaxdiff-0.2.7.dist-info → flaxdiff-0.2.8.dist-info}/top_level.txt +0 -0

flaxdiff/models/simple_vit.py CHANGED Viewed

@@ -10,15 +10,19 @@ from flaxdiff.models.simple_unet import FourierEmbedding, TimeProjection, ConvLa
 import einops
 from flax.typing import Dtype, PrecisionLike
 from functools import partial
-from .common import hilbert_indices, inverse_permutation
+from .hilbert import hilbert_indices, inverse_permutation, hilbert_patchify, hilbert_unpatchify
 def unpatchify(x, channels=3):
     patch_size = int((x.shape[2] // channels) ** 0.5)
     h = w = int(x.shape[1] ** .5)
-    assert h * w == x.shape[1] and patch_size ** 2 * channels == x.shape[2], f"Invalid shape: {x.shape}, should be {h*w}, {patch_size**2*channels}"
-    x = einops.rearrange(x, 'B (h w) (p1 p2 C) -> B (h p1) (w p2) C', h=h, p1=patch_size, p2=patch_size)
+    assert h * w == x.shape[1] and patch_size ** 2 * \
+        channels == x.shape[2], f"Invalid shape: {x.shape}, should be {h*w}, {patch_size**2*channels}"
+    x = einops.rearrange(
+        x, 'B (h w) (p1 p2 C) -> B (h p1) (w p2) C', h=h, p1=patch_size, p2=patch_size)
     return x
 class PatchEmbedding(nn.Module):
     patch_size: int
     embedding_dim: int
@@ -29,15 +33,16 @@ class PatchEmbedding(nn.Module):
     def __call__(self, x):
         batch, height, width, channels = x.shape
         assert height % self.patch_size == 0 and width % self.patch_size == 0, "Image dimensions must be divisible by patch size"
-        x = nn.Conv(features=self.embedding_dim,
-                    kernel_size=(self.patch_size, self.patch_size),
+        x = nn.Conv(features=self.embedding_dim,
+                    kernel_size=(self.patch_size, self.patch_size),
                     strides=(self.patch_size, self.patch_size),
                     dtype=self.dtype,
                     precision=self.precision)(x)
         x = jnp.reshape(x, (batch, -1, self.embedding_dim))
         return x
 class PositionalEncoding(nn.Module):
     max_len: int
     embedding_dim: int
@@ -49,138 +54,294 @@ class PositionalEncoding(nn.Module):
                         (1, self.max_len, self.embedding_dim))
         return x + pe[:, :x.shape[1], :]
 class UViT(nn.Module):
-    output_channels:int=3
+    output_channels: int = 3
     patch_size: int = 16
-    emb_features:int=768
-    num_layers: int = 12
+    emb_features: int = 768
+    num_layers: int = 12  # Should be even for U-Net structure
     num_heads: int = 12
-    dropout_rate: float = 0.1
-    use_projection: bool = False
-    use_flash_attention: bool = False
+    dropout_rate: float = 0.1  # Dropout is often 0 in diffusion models
+    use_projection: bool = False  # In TransformerBlock MLP
+    use_flash_attention: bool = False  # Passed to TransformerBlock
+    # Passed to TransformerBlock (likely False for UViT)
     use_self_and_cross: bool = False
-    force_fp32_for_softmax: bool = True
-    activation:Callable = jax.nn.swish
-    norm_groups:int=8
-    dtype: Optional[Dtype] = None
+    force_fp32_for_softmax: bool = True  # Passed to TransformerBlock
+    # Used in final convs if add_residualblock_output
+    activation: Callable = jax.nn.swish
+    norm_groups: int = 8
+    dtype: Optional[Dtype] = None  # e.g., jnp.float32 or jnp.bfloat16
     precision: PrecisionLike = None
     add_residualblock_output: bool = False
-    norm_inputs: bool = False
-    explicitly_add_residual: bool = True
-    norm_epsilon: float = 1e-4 # Added epsilon parameter, increased default
-    use_hilbert: bool = False # Toggle Hilbert patch reorder
+    norm_inputs: bool = False  # Passed to TransformerBlock
+    explicitly_add_residual: bool = True  # Passed to TransformerBlock
+    norm_epsilon: float = 1e-5  # Adjusted default
+    use_hilbert: bool = False  # Toggle Hilbert patch reorder
+    use_remat: bool = False  # Add flag to use remat
     def setup(self):
+        assert self.num_layers % 2 == 0, "num_layers must be even for U-Net structure"
+        half_layers = self.num_layers // 2
+        # --- Norm Layer ---
         if self.norm_groups > 0:
-            self.norm = partial(nn.GroupNorm, self.norm_groups, epsilon=self.norm_epsilon)
+            # GroupNorm needs features arg, which varies. Define partial here, apply in __call__?
+            # Or maybe use LayerNorm/RMSNorm consistently? Let's use LayerNorm for simplicity here.
+            # If GroupNorm is essential, it needs careful handling with changing feature sizes.
+            # self.norm_factory = partial(nn.GroupNorm, self.norm_groups, epsilon=self.norm_epsilon, dtype=self.dtype)
+            print(f"Warning: norm_groups > 0 not fully supported with standard LayerNorm fallback in UViT setup. Using LayerNorm.")
+            self.norm_factory = partial(
+                nn.LayerNorm, epsilon=self.norm_epsilon, dtype=self.dtype)
+        else:
+            # Use LayerNorm or RMSNorm for sequence normalization
+            # self.norm_factory = partial(nn.RMSNorm, epsilon=self.norm_epsilon, dtype=self.dtype)
+            self.norm_factory = partial(
+                nn.LayerNorm, epsilon=self.norm_epsilon, dtype=self.dtype)
+        # --- Input Path ---
+        self.patch_embed = PatchEmbedding(
+            patch_size=self.patch_size,
+            embedding_dim=self.emb_features,
+            dtype=self.dtype,
+            precision=self.precision,
+            name="patch_embed"
+        )
+        if self.use_hilbert:
+            # Projection layer needed after raw Hilbert patches
+            self.hilbert_proj = nn.Dense(
+                features=self.emb_features,
+                dtype=self.dtype,
+                precision=self.precision,
+                name="hilbert_projection"
+            )
+        # Positional encoding (learned) - applied only to patch tokens
+        # Max length needs to accommodate max possible patches
+        # Example: 512x512 image, patch 16 -> (512/16)^2 = 1024 patches
+        # Estimate max patches, adjust if needed
+        max_patches = (512 // self.patch_size)**2
+        self.pos_encoding = self.param('pos_encoding',
+                                       # Standard init for ViT pos embeds
+                                       jax.nn.initializers.normal(stddev=0.02),
+                                       (1, max_patches, self.emb_features))
+        # --- Conditioning ---
+        self.time_embed = nn.Sequential([
+            FourierEmbedding(features=self.emb_features),
+            TimeProjection(features=self.emb_features)
+        ], name="time_embed")
+        # Text projection
+        self.text_proj = nn.DenseGeneral(
+            features=self.emb_features,
+            dtype=self.dtype,
+            precision=self.precision,
+            name="text_proj"
+        )
+        # --- Transformer Blocks ---
+        BlockClass = TransformerBlock
+        self.down_blocks = [
+            BlockClass(
+                heads=self.num_heads,
+                dim_head=self.emb_features // self.num_heads,
+                dtype=self.dtype, precision=self.precision, use_projection=self.use_projection,
+                use_flash_attention=self.use_flash_attention, use_self_and_cross=self.use_self_and_cross,
+                force_fp32_for_softmax=self.force_fp32_for_softmax,
+                only_pure_attention=False, norm_inputs=self.norm_inputs,
+                explicitly_add_residual=self.explicitly_add_residual,
+                norm_epsilon=self.norm_epsilon,
+                name=f"down_block_{i}"
+            ) for i in range(half_layers)
+        ]
+        self.mid_block = BlockClass(
+            heads=self.num_heads,
+            dim_head=self.emb_features // self.num_heads,
+            dtype=self.dtype, precision=self.precision, use_projection=self.use_projection,
+            use_flash_attention=self.use_flash_attention, use_self_and_cross=self.use_self_and_cross,
+            force_fp32_for_softmax=self.force_fp32_for_softmax,
+            only_pure_attention=False, norm_inputs=self.norm_inputs,
+            explicitly_add_residual=self.explicitly_add_residual,
+            norm_epsilon=self.norm_epsilon,
+            name="mid_block"
+        )
+        self.up_dense = [
+            nn.DenseGeneral(  # Project concatenated skip + up_path features back to emb_features
+                features=self.emb_features,
+                dtype=self.dtype,
+                precision=self.precision,
+                name=f"up_dense_{i}"
+            ) for i in range(half_layers)
+        ]
+        self.up_blocks = [
+            BlockClass(
+                heads=self.num_heads,
+                dim_head=self.emb_features // self.num_heads,
+                dtype=self.dtype, precision=self.precision, use_projection=self.use_projection,
+                use_flash_attention=self.use_flash_attention, use_self_and_cross=self.use_self_and_cross,
+                force_fp32_for_softmax=self.force_fp32_for_softmax,
+                only_pure_attention=False, norm_inputs=self.norm_inputs,
+                explicitly_add_residual=self.explicitly_add_residual,
+                norm_epsilon=self.norm_epsilon,
+                name=f"up_block_{i}"
+            ) for i in range(half_layers)
+        ]
+        # --- Output Path ---
+        self.final_norm = self.norm_factory(name="final_norm")  # Use factory
+        patch_dim = self.patch_size ** 2 * self.output_channels
+        self.final_proj = nn.Dense(
+            features=patch_dim,
+            dtype=self.dtype,  # Keep model dtype for projection
+            precision=self.precision,
+            kernel_init=nn.initializers.zeros,  # Zero init final layer
+            name="final_proj"
+        )
+        if self.add_residualblock_output:
+            # Define these layers only if needed
+            self.final_conv1 = ConvLayer(
+                "conv",
+                features=64, kernel_size=(3, 3), strides=(1, 1),
+                dtype=self.dtype, precision=self.precision, name="final_conv1"
+            )
+            self.final_norm_conv = self.norm_factory(
+                name="final_norm_conv")  # Use factory
+            self.final_conv2 = ConvLayer(
+                "conv",
+                features=self.output_channels, kernel_size=(3, 3), strides=(1, 1),
+                dtype=jnp.float32,  # Often good to have final conv output float32
+                precision=self.precision, name="final_conv2"
+            )
         else:
-            self.norm = partial(nn.RMSNorm, epsilon=self.norm_epsilon)
+            # Final conv to map features to output channels directly after unpatchify
+            self.final_conv_direct = ConvLayer(
+                "conv",
+                # Use 1x1 conv
+                features=self.output_channels, kernel_size=(1, 1), strides=(1, 1),
+                dtype=jnp.float32,  # Output float32
+                precision=self.precision, name="final_conv_direct"
+            )
     @nn.compact
     def __call__(self, x, temb, textcontext=None):
-        # Time embedding
-        temb = FourierEmbedding(features=self.emb_features)(temb)
-        temb = TimeProjection(features=self.emb_features)(temb)
-        original_img = x
+        original_img = x  # Keep original for potential residual connection
         B, H, W, C = original_img.shape
         H_P = H // self.patch_size
         W_P = W // self.patch_size
+        num_patches = H_P * W_P
+        assert H % self.patch_size == 0 and W % self.patch_size == 0, "Image dimensions must be divisible by patch size"
-        # Patch embedding
-        x = PatchEmbedding(patch_size=self.patch_size, embedding_dim=self.emb_features,
-                           dtype=self.dtype, precision=self.precision)(x)
-        num_patches = x.shape[1]
-        # Optional Hilbert reorder
+        # --- Patch Embedding ---
+        hilbert_inv_idx = None
         if self.use_hilbert:
+            # Use hilbert_patchify to get raw patches and inverse index
+            patches_raw, hilbert_inv_idx_calc = hilbert_patchify(
+                x, self.patch_size)  # Shape [B, S, P*P*C]
+            # Project raw patches
+            # Shape [B, S, emb_features]
+            x_patches = self.hilbert_proj(patches_raw)
+            # Calculate inverse permutation (needs total_size)
             idx = hilbert_indices(H_P, W_P)
-            inv_idx = inverse_permutation(idx)
-            x = x[:, idx, :]
-        context_emb = nn.DenseGeneral(features=self.emb_features,
-                               dtype=self.dtype, precision=self.precision)(textcontext)
-        num_text_tokens = textcontext.shape[1]
-        # Add time embedding
-        temb = jnp.expand_dims(temb, axis=1)
-        x = jnp.concatenate([x, temb, context_emb], axis=1)
-        # Add positional encoding
-        x = PositionalEncoding(max_len=x.shape[1], embedding_dim=self.emb_features)(x)
+            hilbert_inv_idx = inverse_permutation(
+                idx, total_size=num_patches)  # Corrected call
+            # Apply Hilbert reordering *after* projection
+            x_patches = x_patches[:, idx, :]
+        else:
+            # Standard patch embedding
+            # Shape: [B, num_patches, emb_features]
+            x_patches = self.patch_embed(x)
+        # --- Positional Encoding ---
+        # Add positional encoding only to patch tokens
+        assert num_patches <= self.pos_encoding.shape[
+            1], f"Number of patches {num_patches} exceeds max_len {self.pos_encoding.shape[1]} in positional encoding"
+        x_patches = x_patches + self.pos_encoding[:, :num_patches, :]
+        # --- Conditioning Tokens ---
+        # Time embedding: [B, D] -> [B, 1, D]
+        time_token = self.time_embed(temb.astype(
+            jnp.float32))  # Ensure input is float32
+        time_token = jnp.expand_dims(time_token.astype(
+            self.dtype), axis=1)  # Cast back and add seq dim
+        # Text embedding: [B, S_text, D_in] -> [B, S_text, D]
+        if textcontext is not None:
+            text_tokens = self.text_proj(
+                textcontext.astype(self.dtype))  # Cast context
+            num_text_tokens = text_tokens.shape[1]
+            # Concatenate: [Patches+Pos, Time, Text]
+            x = jnp.concatenate([x_patches, time_token, text_tokens], axis=1)
+        else:
+            # Concatenate: [Patches+Pos, Time]
+            num_text_tokens = 0
+            x = jnp.concatenate([x_patches, time_token], axis=1)
+        # --- U-Net Transformer ---
         skips = []
-        # In blocks
+        # Down blocks (Encoder)
         for i in range(self.num_layers // 2):
-            x = TransformerBlock(heads=self.num_heads, dim_head=self.emb_features // self.num_heads,
-                                 dtype=self.dtype, precision=self.precision, use_projection=self.use_projection,
-                                 use_flash_attention=self.use_flash_attention, use_self_and_cross=False, force_fp32_for_softmax=self.force_fp32_for_softmax,
-                                 only_pure_attention=False,
-                                 norm_inputs=self.norm_inputs,
-                                 explicitly_add_residual=self.explicitly_add_residual,
-                                 norm_epsilon=self.norm_epsilon, # Pass epsilon
-                                 )(x)
-            skips.append(x)
+            x = self.down_blocks[i](x)  # Pass full sequence (patches+cond)
+            skips.append(x)  # Store output for skip connection
         # Middle block
-        x = TransformerBlock(heads=self.num_heads, dim_head=self.emb_features // self.num_heads,
-                             dtype=self.dtype, precision=self.precision, use_projection=self.use_projection,
-                             use_flash_attention=self.use_flash_attention, use_self_and_cross=False, force_fp32_for_softmax=self.force_fp32_for_softmax,
-                             only_pure_attention=False,
-                            norm_inputs=self.norm_inputs,
-                            explicitly_add_residual=self.explicitly_add_residual,
-                            norm_epsilon=self.norm_epsilon, # Pass epsilon
-                            )(x)
-        # Out blocks
+        x = self.mid_block(x)
+        # Up blocks (Decoder)
         for i in range(self.num_layers // 2):
-            x = jnp.concatenate([x, skips.pop()], axis=-1)
-            x = nn.DenseGeneral(features=self.emb_features,
-                                   dtype=self.dtype, precision=self.precision)(x)
-            x = TransformerBlock(heads=self.num_heads, dim_head=self.emb_features // self.num_heads,
-                                 dtype=self.dtype, precision=self.precision, use_projection=self.use_projection,
-                                 use_flash_attention=self.use_flash_attention, use_self_and_cross=self.use_self_and_cross, force_fp32_for_softmax=self.force_fp32_for_softmax,
-                                 only_pure_attention=False,
-                                 norm_inputs=self.norm_inputs,
-                                 explicitly_add_residual=self.explicitly_add_residual,
-                                 norm_epsilon=self.norm_epsilon, # Pass epsilon
-                                 )(x)
-        x = self.norm()(x) # Uses norm_epsilon defined in setup
-        patch_dim = self.patch_size ** 2 * self.output_channels
-        x = nn.Dense(features=patch_dim, dtype=self.dtype, precision=self.precision)(x)
-        # If Hilbert, restore original patch order
-        if self.use_hilbert:
-            x = x[:, inv_idx, :]
+            skip_conn = skips.pop()
+            # Concatenate along feature dimension
+            x = jnp.concatenate([x, skip_conn], axis=-1)
+            # Project back to emb_features
+            x = self.up_dense[i](x)
+            # Apply transformer block
+            x = self.up_blocks[i](x)
+        # --- Output Processing ---
+        # Normalize before final projection
+        x = self.final_norm(x)  # Apply norm factory instance
         # Extract only the image patch tokens (first num_patches tokens)
-        x = x[:, :num_patches, :]
-        x = unpatchify(x, channels=self.output_channels)
+        # Conditioning tokens (time, text) are discarded here
+        x_patches_out = x[:, :num_patches, :]
+        # Project to patch pixel dimensions
+        # Shape: [B, num_patches, patch_dim]
+        x_patches_out = self.final_proj(x_patches_out)
+        # --- Unpatchify ---
+        if self.use_hilbert:
+            # Restore Hilbert order to row-major order and then to image
+            assert hilbert_inv_idx is not None, "Hilbert inverse index missing"
+            x_image = hilbert_unpatchify(
+                x_patches_out, hilbert_inv_idx, self.patch_size, H, W, self.output_channels)
+        else:
+            # Standard unpatchify
+            # Shape: [B, H, W, C_out]
+            x_image = unpatchify(x_patches_out, channels=self.output_channels)
+        # --- Final Convolutions ---
         if self.add_residualblock_output:
-            # Concatenate the original image
-            x = jnp.concatenate([original_img, x], axis=-1)
-            x = ConvLayer(
-                "conv",
-                features=64,
-                kernel_size=(3, 3),
-                strides=(1, 1),
-                # activation=jax.nn.mish
-                dtype=self.dtype,
-                precision=self.precision
-            )(x)
-            x = self.norm()(x)
-            x = self.activation(x)
-        x = ConvLayer(
-            "conv",
-            features=self.output_channels,
-            kernel_size=(3, 3),
-            strides=(1, 1),
-            # activation=jax.nn.mish
-            dtype=self.dtype,
-            precision=self.precision
-        )(x)
-        return x
+            # Concatenate the original image (ensure dtype matches)
+            x_image = jnp.concatenate(
+                [original_img.astype(self.dtype), x_image], axis=-1)
+            x_image = self.final_conv1(x_image)
+            # Apply norm factory instance
+            x_image = self.final_norm_conv(x_image)
+            x_image = self.activation(x_image)
+            x_image = self.final_conv2(x_image)  # Outputs float32
+        else:
+            # Apply a simple 1x1 conv to map features if needed (unpatchify already gives C_out channels)
+            # Or just return x_image if channels match output_channels
+            # If unpatchify output channels == self.output_channels, this might be redundant
+            # Let's assume unpatchify gives correct channels, but ensure float32
+            # x_image = self.final_conv_direct(x_image) # Use 1x1 conv if needed
+            pass  # Assuming unpatchify output is correct
+        # Ensure final output is float32
+        return x_image

flaxdiff/trainer/general_diffusion_trainer.py CHANGED Viewed

@@ -129,6 +129,7 @@ class GeneralDiffusionTrainer(DiffusionTrainer):
                  frames_per_sample: int = None,
                  wandb_config: Dict[str, Any] = None,
                  eval_metrics: List[EvaluationMetric] = None,
+                 best_tracker_metric: str = "train/best_loss",
                  **kwargs
                  ):
         """
@@ -196,6 +197,8 @@ class GeneralDiffusionTrainer(DiffusionTrainer):
             **kwargs
         )
+        self.best_tracker_metric = best_tracker_metric
         # Store video-specific parameters
         self.frames_per_sample = frames_per_sample
@@ -203,6 +206,7 @@ class GeneralDiffusionTrainer(DiffusionTrainer):
         self.conditional_inputs = input_config.conditions
         # Determine if we're working with video or images
         self.is_video = self._is_video_data()
+        self.best_val_metrics = {}
     def _is_video_data(self):
         sample_data_shape = self.input_config.sample_data_shape
@@ -423,7 +427,7 @@ class GeneralDiffusionTrainer(DiffusionTrainer):
         process_index = jax.process_index()
         generate_samples = val_step_fn
-        val_ds = iter(val_ds()) if val_ds else None
+        val_ds = iter(val_ds) if val_ds else None
         # Evaluation step
         try:
             metrics = {metric.name: [] for metric in self.eval_metrics} if self.eval_metrics else {}
@@ -465,11 +469,17 @@ class GeneralDiffusionTrainer(DiffusionTrainer):
                         else:  # [B,H,W,C] - Image data
                             self._log_image_samples(samples, current_step)
-            if getattr(self, 'wandb', None) is not None and self.wandb:
-                # metrics is a dict of metrics
-                if metrics and type(metrics) == dict:
-                    # Flatten the metrics
-                    metrics = {k: np.mean(v) for k, v in metrics.items()}
+            # Flatten the metrics
+            if metrics:
+                metrics = {k: np.mean(v) for k, v in metrics.items()}
+                # Update the best validation metrics
+                for key, value in metrics.items():
+                    if key not in self.best_val_metrics:
+                        self.best_val_metrics[key] = value
+                    else:
+                        self.best_val_metrics[key] = min(self.best_val_metrics[key], value)
+                # Log the best validation metrics
+                if getattr(self, 'wandb', None) is not None and self.wandb:
                     # Log the metrics
                     for key, value in metrics.items():
                         if isinstance(value, jnp.ndarray):
@@ -477,7 +487,10 @@ class GeneralDiffusionTrainer(DiffusionTrainer):
                         self.wandb.log({
                             f"val/{key}": value,
                         }, step=current_step)
+            # Close validation dataset iterator
+            del val_ds
         except StopIteration:
             print(f"Validation dataset exhausted for process index {process_index}")
         except Exception as e:
@@ -602,9 +615,13 @@ class GeneralDiffusionTrainer(DiffusionTrainer):
         """
         if self.wandb is None:
             raise ValueError("Wandb is not initialized. Cannot get best runs.")
+        import wandb
         # Get the sweep runs
-        runs = sorted(self.wandb.runs, key=lambda x: x.summary.get(metric, float('inf')))
+        runs = [i for i in wandb.Api().runs(path=f"{self.wandb.entity}/{self.wandb.project}", filters={"config.dataset.name": self.wandb.config['dataset']['name']})]
+        if not runs:
+            raise ValueError("No runs found in wandb.")
+        print(f"Getting best runs from wandb {self.wandb.id}...")
+        runs = sorted(runs, key=lambda x: x.summary.get(metric, float('inf')))
         best_runs = runs[:top_k]
         lower_bound = best_runs[-1].summary.get(metric, float('inf'))
         upper_bound = best_runs[0].summary.get(metric, float('inf'))
@@ -636,6 +653,8 @@ class GeneralDiffusionTrainer(DiffusionTrainer):
         # Check if current run is one of the best
         if metric == "train/best_loss":
             current_run_metric = self.best_loss
+        elif metric in self.best_val_metrics:
+            current_run_metric = self.best_val_metrics[metric]
         else:
             current_run_metric = self.wandb.summary.get(metric, float('inf') if is_lower_better else float('-inf'))
@@ -653,7 +672,7 @@ class GeneralDiffusionTrainer(DiffusionTrainer):
         if self.wandb is not None:
             checkpoint = get_latest_checkpoint(self.checkpoint_path())
             try:
-                is_good, is_best = self.__compare_run_against_best__(top_k=5, metric="train/best_loss", from_sweeps=hasattr(self, "wandb_sweep"))
+                is_good, is_best = self.__compare_run_against_best__(top_k=5, metric=self.best_tracker_metric, from_sweeps=hasattr(self, "wandb_sweep"))
                 if is_good:
                     # Push to registry with appropriate aliases
                     aliases = []

flaxdiff 0.2.7__py3-none-any.whl → 0.2.8__py3-none-any.whl

flaxdiff 0.2.7py3-none-any.whl → 0.2.8py3-none-any.whl