PyPI - flaxdiff - Versions diffs - 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl - Mend

flaxdiff 0.2.2py3-none-any.whl → 0.2.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

flaxdiff/data/dataloaders.py +32 -3
flaxdiff/data/dataset_map.py +1 -1
flaxdiff/data/sources/images.py +17 -0
flaxdiff/metrics/__init__.py +0 -0
flaxdiff/metrics/common.py +11 -0
flaxdiff/metrics/images.py +59 -0
flaxdiff/trainer/general_diffusion_trainer.py +67 -23
flaxdiff/trainer/simple_trainer.py +7 -5
{flaxdiff-0.2.2.dist-info → flaxdiff-0.2.4.dist-info}/METADATA +3 -1
{flaxdiff-0.2.2.dist-info → flaxdiff-0.2.4.dist-info}/RECORD +12 -9
{flaxdiff-0.2.2.dist-info → flaxdiff-0.2.4.dist-info}/WHEEL +1 -1
{flaxdiff-0.2.2.dist-info → flaxdiff-0.2.4.dist-info}/top_level.txt +0 -0

flaxdiff/data/dataloaders.py CHANGED Viewed

@@ -258,7 +258,7 @@ def get_dataset_grain(
     image_scale=256,
     count=None,
     num_epochs=None,
-    method=jax.image.ResizeMethod.LANCZOS3,
+    method=None, #jax.image.ResizeMethod.LANCZOS3,
     worker_count=32,
     read_thread_count=64,
     read_buffer_size=50,
@@ -291,7 +291,7 @@ def get_dataset_grain(
     local_batch_size = batch_size // jax.process_count()
-    sampler = pygrain.IndexSampler(
+    train_sampler = pygrain.IndexSampler(
         num_records=len(data_source) if count is None else count,
         shuffle=True,
         seed=seed,
@@ -299,6 +299,14 @@ def get_dataset_grain(
         shard_options=pygrain.ShardByJaxProcess(),
     )
+    # val_sampler = pygrain.IndexSampler(
+    #     num_records=len(data_source) if count is None else count,
+    #     shuffle=False,
+    #     seed=seed,
+    #     num_epochs=num_epochs,
+    #     shard_options=pygrain.ShardByJaxProcess(),
+    # )
     def get_trainset():
         transformations = [
             augmenter(),
@@ -307,7 +315,7 @@ def get_dataset_grain(
         loader = pygrain.DataLoader(
             data_source=data_source,
-            sampler=sampler,
+            sampler=train_sampler,
             operations=transformations,
             worker_count=worker_count,
             read_options=pygrain.ReadOptions(
@@ -316,10 +324,31 @@ def get_dataset_grain(
             worker_buffer_size=worker_buffer_size,
         )
         return loader
+    # def get_valset():
+    #     transformations = [
+    #         augmenter(),
+    #         pygrain.Batch(local_batch_size, drop_remainder=True),
+    #     ]
+    #     loader = pygrain.DataLoader(
+    #         data_source=data_source,
+    #         sampler=val_sampler,
+    #         operations=transformations,
+    #         worker_count=worker_count,
+    #         read_options=pygrain.ReadOptions(
+    #             read_thread_count, read_buffer_size
+    #         ),
+    #         worker_buffer_size=worker_buffer_size,
+    #     )
+    #     return loader
+    get_valset = get_trainset  # For now, use the same function for validation
     return {
         "train": get_trainset,
         "train_len": len(data_source),
+        "val": get_valset,
+        "val_len": len(data_source),
         "local_batch_size": local_batch_size,
         "global_batch_size": batch_size,
     }

flaxdiff/data/dataset_map.py CHANGED Viewed

@@ -21,7 +21,7 @@ datasetMap = {
         "augmenter": gcs_augmenters,
     },
     "laiona_coco": {
-        "source": data_source_gcs('arrayrecord2/laion-aesthetics-12m+mscoco-2017'),
+        "source": data_source_gcs('datasets/laion12m+mscoco'),
         "augmenter": gcs_augmenters,
     },
     "aesthetic_coyo": {

flaxdiff/data/sources/images.py CHANGED Viewed

@@ -167,6 +167,16 @@ class ImageTFDSAugmenter(DataAugmenter):
         return TFDSTransform
+"""
+Batch structure:
+{
+"image": image_batch,
+"text": {
+    "input_ids": input_ids_batch,
+    "attention_mask": attention_mask_batch,
+}
+"""
 # ----------------------------------------------------------------------------------
 # GCS Image Source
@@ -248,6 +258,13 @@ class ImageGCSAugmenter(DataAugmenter):
             A callable that returns a pygrain.MapTransform.
         """
         labelizer = self.labelizer
+        if method is None:
+            if image_scale > 256:
+                method = cv2.INTER_CUBIC
+            else:
+                method = cv2.INTER_AREA
+        print(f"Using method: {method}")
         class GCSTransform(pygrain.MapTransform):
             def __init__(self, *args, **kwargs):

flaxdiff/metrics/__init__.py ADDED Viewed

File without changes

flaxdiff/metrics/common.py ADDED Viewed

@@ -0,0 +1,11 @@
+from typing import Callable
+from dataclasses import dataclass
+@dataclass
+class EvaluationMetric:
+    """
+    Evaluation metrics for the diffusion model.
+    The function is given generated samples batch [B, H, W, C] and the original batch.
+    """
+    function: Callable
+    name: str

flaxdiff/metrics/images.py ADDED Viewed

@@ -0,0 +1,59 @@
+from .common import EvaluationMetric
+import jax
+import jax.numpy as jnp
+def get_clip_metric(
+    modelname: str = "openai/clip-vit-large-patch14",
+):
+    from transformers import AutoProcessor, FlaxCLIPModel
+    model = FlaxCLIPModel.from_pretrained(modelname, dtype=jnp.float16)
+    processor = AutoProcessor.from_pretrained(modelname, use_fast=True, dtype=jnp.float16)
+    @jax.jit
+    def calc(pixel_values, input_ids, attention_mask):
+        # Get the logits
+        generated_out = model(
+            pixel_values=pixel_values,
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+        )
+        gen_img_emb = generated_out.image_embeds
+        txt_emb = generated_out.text_embeds
+        # 1. Normalize embeddings (essential for cosine similarity/distance)
+        gen_img_emb = gen_img_emb / (jnp.linalg.norm(gen_img_emb, axis=-1, keepdims=True) + 1e-6)
+        txt_emb = txt_emb / (jnp.linalg.norm(txt_emb, axis=-1, keepdims=True) + 1e-6)
+        # 2. Calculate cosine similarity
+        # Using einsum for batch dot product: batch (b), embedding_dim (d) -> bd,bd->b
+        # Calculate cosine similarity
+        similarity = jnp.einsum('bd,bd->b', gen_img_emb, txt_emb)
+        scaled_distance = (1.0 - similarity)
+        # 4. Average over the batch
+        mean_scaled_distance = jnp.mean(scaled_distance)
+        return mean_scaled_distance
+    def clip_metric(
+        generated: jnp.ndarray,
+        batch
+    ):
+        original_conditions = batch['text']
+        # Convert samples from [-1, 1] to [0, 255] and uint8
+        generated = (((generated + 1.0) / 2.0) * 255).astype(jnp.uint8)
+        generated_inputs = processor(images=generated, return_tensors="jax", padding=True,)
+        pixel_values = generated_inputs['pixel_values']
+        input_ids = original_conditions['input_ids']
+        attention_mask = original_conditions['attention_mask']
+        return calc(pixel_values, input_ids, attention_mask)
+    return EvaluationMetric(
+        function=clip_metric,
+        name='clip_similarity'
+    )

flaxdiff/trainer/general_diffusion_trainer.py CHANGED Viewed

@@ -18,15 +18,17 @@ from ..samplers.ddim import DDIMSampler
 from flaxdiff.utils import RandomMarkovState, serialize_model, get_latest_checkpoint
 from flaxdiff.inputs import ConditioningEncoder, ConditionalInputConfig, DiffusionInputConfig
-from .simple_trainer import SimpleTrainer, SimpleTrainState, Metrics
+from .simple_trainer import SimpleTrainer, SimpleTrainState, Metrics, convert_to_global_tree
 from flaxdiff.models.autoencoder.autoencoder import AutoEncoder
 from flax.training import dynamic_scale as dynamic_scale_lib
 # Reuse the TrainState from the DiffusionTrainer
-from flaxdiff.trainer.diffusion_trainer import TrainState, DiffusionTrainer
+from .diffusion_trainer import TrainState, DiffusionTrainer
 import shutil
+from flaxdiff.metrics.common import EvaluationMetric
 def generate_modelname(
     dataset_name: str,
     noise_schedule_name: str,
@@ -126,6 +128,7 @@ class GeneralDiffusionTrainer(DiffusionTrainer):
                  native_resolution: int = None,
                  frames_per_sample: int = None,
                  wandb_config: Dict[str, Any] = None,
+                 eval_metrics: List[EvaluationMetric] = None,
                  **kwargs
                  ):
         """
@@ -150,6 +153,7 @@ class GeneralDiffusionTrainer(DiffusionTrainer):
             autoencoder=autoencoder,
         )
         self.input_config = input_config
+        self.eval_metrics = eval_metrics
         if wandb_config is not None:
             # If input_config is not in wandb_config, add it
@@ -363,7 +367,6 @@ class GeneralDiffusionTrainer(DiffusionTrainer):
         def generate_samples(
             val_state: TrainState,
             batch,
-            sampler: DiffusionSampler,
             diffusion_steps: int,
         ):
             # Process all conditional inputs
@@ -385,7 +388,7 @@ class GeneralDiffusionTrainer(DiffusionTrainer):
                 model_conditioning_inputs=tuple(model_conditioning_inputs),
             )
-        return sampler, generate_samples
+        return generate_samples
     def _get_image_size(self):
         """Helper to determine image size from available information."""
@@ -415,32 +418,73 @@ class GeneralDiffusionTrainer(DiffusionTrainer):
         """
         Run validation and log samples for both image and video diffusion.
         """
-        sampler, generate_samples = val_step_fn
-        val_ds = iter(val_ds()) if val_ds else None
+        global_device_count = jax.device_count()
+        local_device_count = jax.local_device_count()
+        process_index = jax.process_index()
+        generate_samples = val_step_fn
+        val_ds = iter(val_ds()) if val_ds else None
+        # Evaluation step
         try:
-            # Generate samples
-            samples = generate_samples(
-                val_state,
-                next(val_ds),
-                sampler,
-                diffusion_steps,
-            )
-            # Log samples to wandb
-            if getattr(self, 'wandb', None) is not None and self.wandb:
-                import numpy as np
+            metrics = {metric.name: [] for metric in self.eval_metrics} if self.eval_metrics else {}
+            for i in range(val_steps_per_epoch):
+                if val_ds is None:
+                    batch = None
+                else:
+                    batch = next(val_ds)
+                    if self.distributed_training and global_device_count > 1:
+                        batch = convert_to_global_tree(self.mesh, batch)
+                # Generate samples
+                samples = generate_samples(
+                    val_state,
+                    batch,
+                    diffusion_steps,
+                )
-                # Process samples differently based on dimensionality
-                if len(samples.shape) == 5:  # [B,T,H,W,C] - Video data
-                    self._log_video_samples(samples, current_step)
-                else:  # [B,H,W,C] - Image data
-                    self._log_image_samples(samples, current_step)
+                if self.eval_metrics is not None:
+                    for metric in self.eval_metrics:
+                        try:
+                            # Evaluate metrics
+                            metric_val = metric.function(samples, batch)
+                            metrics[metric.name].append(metric_val)
+                        except Exception as e:
+                            print("Error in evaluation metrics:", e)
+                            import traceback
+                            traceback.print_exc()
+                            pass
+                if i == 0:
+                    print(f"Evaluation started for process index {process_index}")
+                    # Log samples to wandb
+                    if getattr(self, 'wandb', None) is not None and self.wandb:
+                        import numpy as np
+                        # Process samples differently based on dimensionality
+                        if len(samples.shape) == 5:  # [B,T,H,W,C] - Video data
+                            self._log_video_samples(samples, current_step)
+                        else:  # [B,H,W,C] - Image data
+                            self._log_image_samples(samples, current_step)
+            if getattr(self, 'wandb', None) is not None and self.wandb:
+                # metrics is a dict of metrics
+                if metrics and type(metrics) == dict:
+                    # Flatten the metrics
+                    metrics = {k: np.mean(v) for k, v in metrics.items()}
+                    # Log the metrics
+                    for key, value in metrics.items():
+                        if isinstance(value, jnp.ndarray):
+                            value = np.array(value)
+                        self.wandb.log({
+                            f"val/{key}": value,
+                        }, step=current_step)
+        except StopIteration:
+            print(f"Validation dataset exhausted for process index {process_index}")
         except Exception as e:
-            print("Error in validation loop:", e)
+            print(f"Error during validation for process index {process_index}: {e}")
             import traceback
             traceback.print_exc()
     def _log_video_samples(self, samples, current_step):
         """Helper to log video samples to wandb."""

flaxdiff/trainer/simple_trainer.py CHANGED Viewed

@@ -411,7 +411,9 @@ class SimpleTrainer:
         train_ds,
         train_steps_per_epoch,
         current_step,
-        rng_state
+        rng_state,
+        save_every:int=None,
+        val_every=None,
     ):
         global_device_count = jax.device_count()
         process_index = jax.process_index()
@@ -491,8 +493,8 @@ class SimpleTrainer:
                             "train/loss": loss,
                         }, step=current_step)
                 # Save the model every few steps
-                if i % 10000 == 0 and i > 0:
-                    print(f"Saving model after 10000 step {current_step}")
+                if save_every and i % save_every == 0 and i > 0:
+                    print(f"Saving model after {save_every} step {current_step}")
                     print(f"Devices: {len(jax.devices())}") # To sync the devices
                     self.save(current_epoch, current_step, train_state, rng_state)
                     print(f"Saving done by process index {process_index}")
@@ -518,7 +520,7 @@ class SimpleTrainer:
             self.validation_loop(
                 train_state,
                 val_step,
-                data.get('test', data.get('val', None)),
+                data.get('val', data.get('test', None)),
                 val_steps_per_epoch,
                 self.latest_step,
             )
@@ -569,7 +571,7 @@ class SimpleTrainer:
                 self.validation_loop(
                     train_state,
                     val_step,
-                    data.get('test', None),
+                    data.get('val', data.get('test', None)),
                     val_steps_per_epoch,
                     current_step,
                 )

{flaxdiff-0.2.2.dist-info → flaxdiff-0.2.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: flaxdiff
-Version: 0.2.2
+Version: 0.2.4
 Summary: A versatile and easy to understand Diffusion library
 Author-email: Ashish Kumar Singh <ashishkmr472@gmail.com>
 License-Expression: MIT
@@ -22,6 +22,8 @@ Requires-Dist: python-dotenv
 # ![](images/logo.jpeg "FlaxDiff")
+**This project is being used for the UMD Course project MSML 605: MLOps**
 **This project is partially supported by [Google TPU Research Cloud](https://sites.research.google/trc/about/). I would like to thank the Google Cloud TPU team for providing me with the resources to train the bigger text-conditional models in multi-host distributed settings.**
 ## A Versatile and simple Diffusion Library

{flaxdiff-0.2.2.dist-info → flaxdiff-0.2.4.dist-info}/RECORD RENAMED Viewed

@@ -2,14 +2,14 @@ flaxdiff/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 flaxdiff/utils.py,sha256=DmlWUY1FGz4ESxIHaPQJf92CHjsdMjyDd651wFUtyNg,8838
 flaxdiff/data/__init__.py,sha256=8W5y7NyAOWtpLi8WRawk4VYeE3DMDnM3B_jKPD8BoFQ,143
 flaxdiff/data/benchmark_decord.py,sha256=x56Db1VPmziv_9KJvWdfS0O7cffsYkF5tt5WvldOKc0,13720
-flaxdiff/data/dataloaders.py,sha256=V4goNCK0JD_TthggXAEgJJD4LxJi1pUDew1x_fMCuO4,22576
-flaxdiff/data/dataset_map.py,sha256=NrLG1XtIxy8GcCsZ-e6eascjgsP0Xq5lVA1z3HIIYyI,5093
+flaxdiff/data/dataloaders.py,sha256=LV8ugqoB86yihfYeOJZHHdRZJNmZ63A2NQkdILMR9QA,23564
+flaxdiff/data/dataset_map.py,sha256=_6SYnmrYO-URDd8vPAmALTV6r0eMGWWmwUtsdjKGXnA,5072
 flaxdiff/data/online_loader.py,sha256=t1jEhdB6gWTlwx68ehj1ol_PrImbwXYiRlrJPCmNgCM,35701
 flaxdiff/data/sources/audio_utils.py,sha256=X27gG1yQt_abVOYgMtruYmZD7-8_uQCRhhTSpn4clkI,4514
 flaxdiff/data/sources/av_example.py,sha256=RIcbVKqckFqbfnV65NQotzIBxjdDuM67kD1nY8fqw5Q,3826
 flaxdiff/data/sources/av_utils.py,sha256=LCr9MJNurOaoxY-sjzkLqJS_MlX0x3gRSlKAVIglAU0,24045
 flaxdiff/data/sources/base.py,sha256=uhF0odJSYRy0SLw1xnI9Q_q_xiVht2DmEYcX1j9AWT4,4246
-flaxdiff/data/sources/images.py,sha256=WpH4ywZhNol26peX3m6m5NrmDJ1K2s6fRcYHvOFlOk8,11102
+flaxdiff/data/sources/images.py,sha256=P7Rea7Zu0h9l7Zoc33zEHKdLI1ST6JEqgl1-bRwORM4,11460
 flaxdiff/data/sources/utils.py,sha256=kFzM4_kPoThbAu54ulABmEDAR33tR50NgzXIpC0Dzjk,7316
 flaxdiff/data/sources/videos.py,sha256=CVpOH6A4P2D8iv3gZIhd2GB5ATUD8Vsm_wVYbbugWD4,9359
 flaxdiff/data/sources/voxceleb2.py,sha256=BoKfat_hsw6ObDyyaiQmPbBzuFiqgCGlgAZmf-t5Iz8,18621
@@ -18,6 +18,9 @@ flaxdiff/inference/pipeline.py,sha256=oMBRjvTtlC3Yzl1FqiBHcI4V34HXGAecCg8UvQbKoO
 flaxdiff/inference/utils.py,sha256=SRNYo-YtHzEPRpNv0fD8ZrUvnRIK941Rh4tjlsOGRgM,12278
 flaxdiff/inputs/__init__.py,sha256=ybPjQsFAf5sqRVZG1sRiOl99EnwpI-NQ8HE3y7UbXmU,7197
 flaxdiff/inputs/encoders.py,sha256=pjfbx4Rk7bLoE80MOfThZDm6YtsDncRekmn0Bmg_CwI,2963
+flaxdiff/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+flaxdiff/metrics/common.py,sha256=E0MkL43dicImzNNa-RyQ3sVcrUbpeLlooIQsKTIStvo,285
+flaxdiff/metrics/images.py,sha256=sIuF_Sa2VmPOKrFFoUpzhqOqNa9P7NF0njbrYi93AvE,2128
 flaxdiff/metrics/inception.py,sha256=a5kjMCPMT9gB88c_HCKiek-2vsAyoE35K7nDt4h4pVI,31843
 flaxdiff/metrics/psnr.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 flaxdiff/metrics/ssim.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -56,9 +59,9 @@ flaxdiff/schedulers/sqrt.py,sha256=mCd_szmOqF6vqQKiAiEOqV_3eBIPGYrW3VxK0o4rBuo,4
 flaxdiff/trainer/__init__.py,sha256=xSoierfi26gxfgxlNnwvyyPmuPAJ--5i3mEHxt3S-AE,215
 flaxdiff/trainer/autoencoder_trainer.py,sha256=2FP2P-k9c0n_k3eT0trkq73dQrHRdBj9ObK1idcyhSw,6996
 flaxdiff/trainer/diffusion_trainer.py,sha256=reQEVWKTqKAeyCMQ-curPOfSRmBKxKooK8EVtUuorcM,14599
-flaxdiff/trainer/general_diffusion_trainer.py,sha256=7VAeT3TzCDUyns8wdZbIwXJqDKx_FYSzq8toOkaeQMI,24802
-flaxdiff/trainer/simple_trainer.py,sha256=CF2mMcc6AtBgcR1XiqKevRL0paGS0S9ZJofCns32nRM,24214
-flaxdiff-0.2.2.dist-info/METADATA,sha256=pzYYdy1zK7lbaqSRdpopZHHYx7q3BP0DL11hGTOO7h4,23982
-flaxdiff-0.2.2.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
-flaxdiff-0.2.2.dist-info/top_level.txt,sha256=-2-nXnfkJgSfkki1tjm5Faw6Dso7vhtdn2szwCdX5CQ,9
-flaxdiff-0.2.2.dist-info/RECORD,,
+flaxdiff/trainer/general_diffusion_trainer.py,sha256=9c3Ys5sN4_eTehusLjS6IKW5XPOkxoguik-6G0cyQc4,27082
+flaxdiff/trainer/simple_trainer.py,sha256=raLS1shwpjJBT_bYXLAB2E4kA9MbwasDTzDTUqfCCUc,24312
+flaxdiff-0.2.4.dist-info/METADATA,sha256=mqm2um1TtgjQzrGlvl7x_CCb_09hK376_dsRECe6qLQ,24057
+flaxdiff-0.2.4.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
+flaxdiff-0.2.4.dist-info/top_level.txt,sha256=-2-nXnfkJgSfkki1tjm5Faw6Dso7vhtdn2szwCdX5CQ,9
+flaxdiff-0.2.4.dist-info/RECORD,,

{flaxdiff-0.2.2.dist-info → flaxdiff-0.2.4.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (79.0.0)
+Generator: setuptools (80.1.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{flaxdiff-0.2.2.dist-info → flaxdiff-0.2.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

flaxdiff 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl

flaxdiff 0.2.2py3-none-any.whl → 0.2.4py3-none-any.whl