PyPI - flaxdiff - Versions diffs - 0.2.3__py3-none-any.whl → 0.2.4__py3-none-any.whl - Mend

flaxdiff 0.2.3py3-none-any.whl → 0.2.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

flaxdiff/data/dataloaders.py CHANGED Viewed

@@ -258,7 +258,7 @@ def get_dataset_grain(
     image_scale=256,
     count=None,
     num_epochs=None,
-    method=jax.image.ResizeMethod.LANCZOS3,
+    method=None, #jax.image.ResizeMethod.LANCZOS3,
     worker_count=32,
     read_thread_count=64,
     read_buffer_size=50,

flaxdiff/metrics/__init__.py ADDED Viewed

File without changes

flaxdiff/metrics/common.py ADDED Viewed

@@ -0,0 +1,11 @@
+from typing import Callable
+from dataclasses import dataclass
+@dataclass
+class EvaluationMetric:
+    """
+    Evaluation metrics for the diffusion model.
+    The function is given generated samples batch [B, H, W, C] and the original batch.
+    """
+    function: Callable
+    name: str

flaxdiff/metrics/images.py ADDED Viewed

@@ -0,0 +1,59 @@
+from .common import EvaluationMetric
+import jax
+import jax.numpy as jnp
+def get_clip_metric(
+    modelname: str = "openai/clip-vit-large-patch14",
+):
+    from transformers import AutoProcessor, FlaxCLIPModel
+    model = FlaxCLIPModel.from_pretrained(modelname, dtype=jnp.float16)
+    processor = AutoProcessor.from_pretrained(modelname, use_fast=True, dtype=jnp.float16)
+    @jax.jit
+    def calc(pixel_values, input_ids, attention_mask):
+        # Get the logits
+        generated_out = model(
+            pixel_values=pixel_values,
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+        )
+        gen_img_emb = generated_out.image_embeds
+        txt_emb = generated_out.text_embeds
+        # 1. Normalize embeddings (essential for cosine similarity/distance)
+        gen_img_emb = gen_img_emb / (jnp.linalg.norm(gen_img_emb, axis=-1, keepdims=True) + 1e-6)
+        txt_emb = txt_emb / (jnp.linalg.norm(txt_emb, axis=-1, keepdims=True) + 1e-6)
+        # 2. Calculate cosine similarity
+        # Using einsum for batch dot product: batch (b), embedding_dim (d) -> bd,bd->b
+        # Calculate cosine similarity
+        similarity = jnp.einsum('bd,bd->b', gen_img_emb, txt_emb)
+        scaled_distance = (1.0 - similarity)
+        # 4. Average over the batch
+        mean_scaled_distance = jnp.mean(scaled_distance)
+        return mean_scaled_distance
+    def clip_metric(
+        generated: jnp.ndarray,
+        batch
+    ):
+        original_conditions = batch['text']
+        # Convert samples from [-1, 1] to [0, 255] and uint8
+        generated = (((generated + 1.0) / 2.0) * 255).astype(jnp.uint8)
+        generated_inputs = processor(images=generated, return_tensors="jax", padding=True,)
+        pixel_values = generated_inputs['pixel_values']
+        input_ids = original_conditions['input_ids']
+        attention_mask = original_conditions['attention_mask']
+        return calc(pixel_values, input_ids, attention_mask)
+    return EvaluationMetric(
+        function=clip_metric,
+        name='clip_similarity'
+    )

flaxdiff/trainer/general_diffusion_trainer.py CHANGED Viewed

@@ -27,6 +27,8 @@ from flax.training import dynamic_scale as dynamic_scale_lib
 from .diffusion_trainer import TrainState, DiffusionTrainer
 import shutil
+from flaxdiff.metrics.common import EvaluationMetric
 def generate_modelname(
     dataset_name: str,
     noise_schedule_name: str,
@@ -103,15 +105,6 @@ def generate_modelname(
     # model_name = f"{model_name}-{config_hash}"
     return model_name
-@dataclass
-class EvaluationMetric:
-    """
-    Evaluation metrics for the diffusion model.
-    The function is given generated samples batch [B, H, W, C] and the original batch.
-    """
-    function: Callable
-    name: str
 class GeneralDiffusionTrainer(DiffusionTrainer):
     """
     General trainer for diffusion models supporting both images and videos.

{flaxdiff-0.2.3.dist-info → flaxdiff-0.2.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: flaxdiff
-Version: 0.2.3
+Version: 0.2.4
 Summary: A versatile and easy to understand Diffusion library
 Author-email: Ashish Kumar Singh <ashishkmr472@gmail.com>
 License-Expression: MIT

{flaxdiff-0.2.3.dist-info → flaxdiff-0.2.4.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ flaxdiff/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 flaxdiff/utils.py,sha256=DmlWUY1FGz4ESxIHaPQJf92CHjsdMjyDd651wFUtyNg,8838
 flaxdiff/data/__init__.py,sha256=8W5y7NyAOWtpLi8WRawk4VYeE3DMDnM3B_jKPD8BoFQ,143
 flaxdiff/data/benchmark_decord.py,sha256=x56Db1VPmziv_9KJvWdfS0O7cffsYkF5tt5WvldOKc0,13720
-flaxdiff/data/dataloaders.py,sha256=TgbR5CMxE86L0-1qy5ohZT8zhOPjk3oncd5WPBv08sQ,23557
+flaxdiff/data/dataloaders.py,sha256=LV8ugqoB86yihfYeOJZHHdRZJNmZ63A2NQkdILMR9QA,23564
 flaxdiff/data/dataset_map.py,sha256=_6SYnmrYO-URDd8vPAmALTV6r0eMGWWmwUtsdjKGXnA,5072
 flaxdiff/data/online_loader.py,sha256=t1jEhdB6gWTlwx68ehj1ol_PrImbwXYiRlrJPCmNgCM,35701
 flaxdiff/data/sources/audio_utils.py,sha256=X27gG1yQt_abVOYgMtruYmZD7-8_uQCRhhTSpn4clkI,4514
@@ -18,6 +18,9 @@ flaxdiff/inference/pipeline.py,sha256=oMBRjvTtlC3Yzl1FqiBHcI4V34HXGAecCg8UvQbKoO
 flaxdiff/inference/utils.py,sha256=SRNYo-YtHzEPRpNv0fD8ZrUvnRIK941Rh4tjlsOGRgM,12278
 flaxdiff/inputs/__init__.py,sha256=ybPjQsFAf5sqRVZG1sRiOl99EnwpI-NQ8HE3y7UbXmU,7197
 flaxdiff/inputs/encoders.py,sha256=pjfbx4Rk7bLoE80MOfThZDm6YtsDncRekmn0Bmg_CwI,2963
+flaxdiff/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+flaxdiff/metrics/common.py,sha256=E0MkL43dicImzNNa-RyQ3sVcrUbpeLlooIQsKTIStvo,285
+flaxdiff/metrics/images.py,sha256=sIuF_Sa2VmPOKrFFoUpzhqOqNa9P7NF0njbrYi93AvE,2128
 flaxdiff/metrics/inception.py,sha256=a5kjMCPMT9gB88c_HCKiek-2vsAyoE35K7nDt4h4pVI,31843
 flaxdiff/metrics/psnr.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 flaxdiff/metrics/ssim.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -56,9 +59,9 @@ flaxdiff/schedulers/sqrt.py,sha256=mCd_szmOqF6vqQKiAiEOqV_3eBIPGYrW3VxK0o4rBuo,4
 flaxdiff/trainer/__init__.py,sha256=xSoierfi26gxfgxlNnwvyyPmuPAJ--5i3mEHxt3S-AE,215
 flaxdiff/trainer/autoencoder_trainer.py,sha256=2FP2P-k9c0n_k3eT0trkq73dQrHRdBj9ObK1idcyhSw,6996
 flaxdiff/trainer/diffusion_trainer.py,sha256=reQEVWKTqKAeyCMQ-curPOfSRmBKxKooK8EVtUuorcM,14599
-flaxdiff/trainer/general_diffusion_trainer.py,sha256=1rLU7iooXIlSDIGFZ7bHgpMWmkqMbUzM9fHBu1L0t-U,27252
+flaxdiff/trainer/general_diffusion_trainer.py,sha256=9c3Ys5sN4_eTehusLjS6IKW5XPOkxoguik-6G0cyQc4,27082
 flaxdiff/trainer/simple_trainer.py,sha256=raLS1shwpjJBT_bYXLAB2E4kA9MbwasDTzDTUqfCCUc,24312
-flaxdiff-0.2.3.dist-info/METADATA,sha256=eoCSaBNoDpk90qWz5_NGVkzvuf3Oqt6rSj_ZVTfYn7s,24057
-flaxdiff-0.2.3.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
-flaxdiff-0.2.3.dist-info/top_level.txt,sha256=-2-nXnfkJgSfkki1tjm5Faw6Dso7vhtdn2szwCdX5CQ,9
-flaxdiff-0.2.3.dist-info/RECORD,,
+flaxdiff-0.2.4.dist-info/METADATA,sha256=mqm2um1TtgjQzrGlvl7x_CCb_09hK376_dsRECe6qLQ,24057
+flaxdiff-0.2.4.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
+flaxdiff-0.2.4.dist-info/top_level.txt,sha256=-2-nXnfkJgSfkki1tjm5Faw6Dso7vhtdn2szwCdX5CQ,9
+flaxdiff-0.2.4.dist-info/RECORD,,

{flaxdiff-0.2.3.dist-info → flaxdiff-0.2.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{flaxdiff-0.2.3.dist-info → flaxdiff-0.2.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

flaxdiff 0.2.3__py3-none-any.whl → 0.2.4__py3-none-any.whl

flaxdiff 0.2.3py3-none-any.whl → 0.2.4py3-none-any.whl