PyPI - flaxdiff - Versions diffs - 0.2.6.1__py3-none-any.whl → 0.2.8__py3-none-any.whl - Mend

flaxdiff 0.2.6.1py3-none-any.whl → 0.2.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

flaxdiff/data/dataloaders.py +36 -24
flaxdiff/data/dataset_map.py +2 -2
flaxdiff/data/sources/base.py +12 -0
flaxdiff/data/sources/images.py +71 -12
flaxdiff/data/sources/videos.py +5 -0
flaxdiff/inference/pipeline.py +9 -4
flaxdiff/inference/utils.py +2 -2
flaxdiff/models/common.py +1 -70
flaxdiff/models/hilbert.py +617 -0
flaxdiff/models/simple_dit.py +476 -0
flaxdiff/models/simple_mmdit.py +861 -0
flaxdiff/models/simple_vit.py +278 -117
flaxdiff/trainer/general_diffusion_trainer.py +29 -10
flaxdiff/trainer/simple_trainer.py +113 -19
{flaxdiff-0.2.6.1.dist-info → flaxdiff-0.2.8.dist-info}/METADATA +1 -1
{flaxdiff-0.2.6.1.dist-info → flaxdiff-0.2.8.dist-info}/RECORD +18 -16
{flaxdiff-0.2.6.1.dist-info → flaxdiff-0.2.8.dist-info}/WHEEL +1 -1
flaxdiff/models/better_uvit.py +0 -380
{flaxdiff-0.2.6.1.dist-info → flaxdiff-0.2.8.dist-info}/top_level.txt +0 -0

flaxdiff/data/dataloaders.py CHANGED Viewed

@@ -251,6 +251,12 @@ def generate_collate_fn(media_type="image"):
     else:  # Default to image
         return image_collate
+class CaptionDeletionTransform(pygrain.MapTransform):
+    def map(self, element):
+        """Delete the caption from the element."""
+        if "caption" in element:
+            del element["caption"]
+        return element
 def get_dataset_grain(
     data_name="cc12m",
@@ -286,13 +292,14 @@ def get_dataset_grain(
         Dictionary with train dataset function and metadata.
     """
     dataset = datasetMap[data_name]
-    data_source = dataset["source"](dataset_source)
+    train_source = dataset["source"](dataset_source, split="train")
+    # val_source = dataset["source"](dataset_source, split="val")
     augmenter = dataset["augmenter"](image_scale, method)
     local_batch_size = batch_size // jax.process_count()
     train_sampler = pygrain.IndexSampler(
-        num_records=len(data_source) if count is None else count,
+        num_records=len(train_source) if count is None else count,
         shuffle=True,
         seed=seed,
         num_epochs=num_epochs,
@@ -300,7 +307,7 @@ def get_dataset_grain(
     )
     # val_sampler = pygrain.IndexSampler(
-    #     num_records=len(data_source) if count is None else count,
+    #     num_records=len(val_source) if count is None else count,
     #     shuffle=False,
     #     seed=seed,
     #     num_epochs=num_epochs,
@@ -310,11 +317,17 @@ def get_dataset_grain(
     def get_trainset():
         transformations = [
             augmenter(),
-            pygrain.Batch(local_batch_size, drop_remainder=True),
         ]
+        # if filters:
+        #     print("Adding filters to transformations")
+        #     transformations.append(filters())
+        # transformations.append(CaptionDeletionTransform())
+        transformations.append(pygrain.Batch(local_batch_size, drop_remainder=True))
         loader = pygrain.DataLoader(
-            data_source=data_source,
+            data_source=train_source,
             sampler=train_sampler,
             operations=transformations,
             worker_count=worker_count,
@@ -325,30 +338,29 @@ def get_dataset_grain(
         )
         return loader
-    # def get_valset():
-    #     transformations = [
-    #         augmenter(),
-    #         pygrain.Batch(local_batch_size, drop_remainder=True),
-    #     ]
+    def get_valset():
+        transformations = [
+            augmenter(),
+            pygrain.Batch(local_batch_size, drop_remainder=True),
+        ]
-    #     loader = pygrain.DataLoader(
-    #         data_source=data_source,
-    #         sampler=val_sampler,
-    #         operations=transformations,
-    #         worker_count=worker_count,
-    #         read_options=pygrain.ReadOptions(
-    #             read_thread_count, read_buffer_size
-    #         ),
-    #         worker_buffer_size=worker_buffer_size,
-    #     )
-    #     return loader
-    get_valset = get_trainset  # For now, use the same function for validation
+        loader = pygrain.DataLoader(
+            data_source=train_source,
+            sampler=train_sampler,
+            operations=transformations,
+            worker_count=2,
+            read_options=pygrain.ReadOptions(
+                read_thread_count, read_buffer_size
+            ),
+            worker_buffer_size=2,
+        )
+        return loader
     return {
         "train": get_trainset,
-        "train_len": len(data_source),
+        "train_len": len(train_source),
         "val": get_valset,
-        "val_len": len(data_source),
+        "val_len": len(train_source),
         "local_batch_size": local_batch_size,
         "global_batch_size": batch_size,
     }

flaxdiff/data/dataset_map.py CHANGED Viewed

@@ -8,7 +8,7 @@ from .sources.videos import VideoTFDSSource, VideoLocalSource, AudioVideoAugment
 # ---------------------------------------------------------------------------------
 from .sources.images import data_source_tfds, tfds_augmenters, data_source_gcs
-from .sources.images import data_source_combined_gcs, gcs_augmenters
+from .sources.images import data_source_combined_gcs, gcs_augmenters, gcs_filters
 # Configure the following for your datasets
 datasetMap = {
@@ -21,7 +21,7 @@ datasetMap = {
         "augmenter": gcs_augmenters,
     },
     "laiona_coco": {
-        "source": data_source_gcs('datasets/laion12m+mscoco'),
+        "source": data_source_gcs('datasets/laion12m+mscoco_filtered-new'),
         "augmenter": gcs_augmenters,
     },
     "aesthetic_coyo": {

flaxdiff/data/sources/base.py CHANGED Viewed

@@ -62,6 +62,18 @@ class DataAugmenter(ABC):
         """
         pass
+    @abstractmethod
+    def create_filter(self, **kwargs) -> Callable[[], pygrain.FilterTransform]:
+        """Create a filter function for the data.
+        Args:
+            **kwargs: Additional arguments for the filter.
+        Returns:
+            A callable that returns a pygrain.FilterTransform instance.
+        """
+        pass
     @staticmethod
     def create(augmenter_type: str, **kwargs) -> 'DataAugmenter':
         """Factory method to create a data augmenter of the specified type.

flaxdiff/data/sources/images.py CHANGED Viewed

@@ -82,7 +82,7 @@ def labelizer_oxford_flowers102(path):
 class ImageTFDSSource(DataSource):
     """Data source for TensorFlow Datasets (TFDS) image datasets."""
-    def __init__(self, name: str, use_tf: bool = True, split: str = "all"):
+    def __init__(self, name: str, use_tf: bool = True):
         """Initialize a TFDS image data source.
         Args:
@@ -92,9 +92,8 @@ class ImageTFDSSource(DataSource):
         """
         self.name = name
         self.use_tf = use_tf
-        self.split = split
-    def get_source(self, path_override: str) -> Any:
+    def get_source(self, path_override: str, split: str = "all") -> Any:
         """Get the TFDS data source.
         Args:
@@ -105,20 +104,22 @@ class ImageTFDSSource(DataSource):
         """
         import tensorflow_datasets as tfds
         if self.use_tf:
-            return tfds.load(self.name, split=self.split, shuffle_files=True)
+            return tfds.load(self.name, split=split, shuffle_files=True)
         else:
-            return tfds.data_source(self.name, split=self.split, try_gcs=False)
+            return tfds.data_source(self.name, split=split, try_gcs=False)
 class ImageTFDSAugmenter(DataAugmenter):
     """Augmenter for TFDS image datasets."""
-    def __init__(self, label_path: str = "/home/mrwhite0racle/tensorflow_datasets/oxford_flowers102/2.1.1/label.labels.txt"):
+    def __init__(self, label_path: str = None):
         """Initialize a TFDS image augmenter.
         Args:
             label_path: Path to the labels file for datasets like Oxford Flowers.
         """
+        if label_path is None:
+            label_path = os.path.join(os.path.expanduser("~"), "tensorflow_datasets/oxford_flowers102/2.1.1/label.labels.txt")
         self.label_path = label_path
     def create_transform(self, image_scale: int = 256, method: Any = None) -> Callable[[], pygrain.MapTransform]:
@@ -166,7 +167,11 @@ class ImageTFDSAugmenter(DataAugmenter):
                 }
         return TFDSTransform
+    def create_filter(self, image_scale: int = 256):
+        class FilterTransform(pygrain.FilterTransform):
+            def map(self, element) -> bool:
+                return True
 """
 Batch structure:
 {
@@ -193,7 +198,7 @@ class ImageGCSSource(DataSource):
         """
         self.source = source
-    def get_source(self, path_override: str = "/home/mrwhite0racle/gcs_mount") -> Any:
+    def get_source(self, path_override: str = "/home/mrwhite0racle/gcs_mount", split: str = "train") -> Any:
         """Get the GCS data source.
         Args:
@@ -205,6 +210,8 @@ class ImageGCSSource(DataSource):
         records_path = os.path.join(path_override, self.source)
         records = [os.path.join(records_path, i) for i in os.listdir(
             records_path) if 'array_record' in i]
+        if split == "val":
+            records = records[:1]
         return pygrain.ArrayRecordDataSource(records)
@@ -219,7 +226,7 @@ class CombinedImageGCSSource(DataSource):
         """
         self.sources = sources
-    def get_source(self, path_override: str = "/home/mrwhite0racle/gcs_mount") -> Any:
+    def get_source(self, path_override: str = "/home/mrwhite0racle/gcs_mount", split: str = "train") -> Any:
         """Get the combined GCS data source.
         Args:
@@ -233,9 +240,10 @@ class CombinedImageGCSSource(DataSource):
         for records_path in records_paths:
             records += [os.path.join(records_path, i) for i in os.listdir(
                 records_path) if 'array_record' in i]
+        if split == "val":
+            records = records[:1]
         return pygrain.ArrayRecordDataSource(records)
 class ImageGCSAugmenter(DataAugmenter):
     """Augmenter for GCS image datasets."""
@@ -295,6 +303,52 @@ class ImageGCSAugmenter(DataAugmenter):
                 }
         return GCSTransform
+    def create_filter(self, image_scale: int = 256):
+        import torch.nn.functional as F
+        class FilterTransform(pygrain.FilterTransform):
+            """
+            Filter transform for GCS data source.
+            """
+            def __init__(self, model=None, processor=None, method=cv2.INTER_AREA):
+                super().__init__()
+                self.image_scale = image_scale
+                if model is None:
+                    from transformers import AutoProcessor, CLIPVisionModelWithProjection, FlaxCLIPModel, CLIPModel
+                    model_name = "openai/clip-vit-base-patch32"
+                    model = CLIPModel.from_pretrained(model_name)
+                    processor = AutoProcessor.from_pretrained(model_name, use_fast=False)
+                self.method = method
+                self.model = model
+                self.processor = processor
+                # def _filter_(pixel_values, input_ids):
+                #     image_embeds = self.model.get_image_features(pixel_values=pixel_values)
+                #     text_embeds = self.model.get_text_features(input_ids=input_ids)
+                #     image_embeds = image_embeds / jnp.linalg.norm(image_embeds, axis=-1, keepdims=True)
+                #     text_embeds = text_embeds / jnp.linalg.norm(text_embeds, axis=-1, keepdims=True)
+                #     similarity = jnp.sum(image_embeds * text_embeds, axis=-1)
+                #     return jnp.all(similarity >= 0.25)
+                # self._filter_ = _filter_
+            def filter(self, data: Dict[str, Any]) -> bool:
+                images = [data['image']]
+                texts = [data['caption']]
+                inputs = self.processor(text=texts, images=images, return_tensors="pt", padding=True, truncation=True)
+                # result = self._filter_(
+                #     pixel_values=inputs['pixel_values'],
+                #     input_ids=inputs['input_ids']
+                # )
+                # return result
+                image_embeds = self.model.get_image_features(pixel_values=inputs['pixel_values'])
+                text_embeds = self.model.get_text_features(input_ids=inputs['input_ids'])
+                similarity = F.cosine_similarity(image_embeds, text_embeds)
+                # Filter out images with similarity less than 0.25
+                return similarity[0] >= 0.25
+        return FilterTransform
 # ----------------------------------------------------------------------------------
@@ -303,9 +357,9 @@ class ImageGCSAugmenter(DataAugmenter):
 # These functions maintain backward compatibility with existing code
-def data_source_tfds(name, use_tf=True, split="all"):
+def data_source_tfds(name, use_tf=True):
     """Legacy function for TFDS data sources."""
-    source = ImageTFDSSource(name=name, use_tf=use_tf, split=split)
+    source = ImageTFDSSource(name=name, use_tf=use_tf)
     return source.get_source
@@ -331,3 +385,8 @@ def gcs_augmenters(image_scale, method):
     """Legacy function for GCS augmenters."""
     augmenter = ImageGCSAugmenter()
     return augmenter.create_transform(image_scale=image_scale, method=method)
+def gcs_filters(image_scale):
+    """Legacy function for GCS Filters."""
+    augmenter = ImageGCSAugmenter()
+    return augmenter.create_filter(image_scale=image_scale)

flaxdiff/data/sources/videos.py CHANGED Viewed

@@ -216,6 +216,11 @@ class AudioVideoAugmenter(DataAugmenter):
         return AudioVideoTransform
+    def create_filter(self, image_scale: int = 256):
+        class FilterTransform(pygrain.FilterTransform):
+            def map(self, element) -> bool:
+                return True
 # ----------------------------------------------------------------------------------
 # Helper functions for video datasets

flaxdiff/inference/pipeline.py CHANGED Viewed

@@ -25,6 +25,7 @@ from flaxdiff.inference.utils import parse_config, load_from_wandb_run, load_fro
 @dataclass
 class InferencePipeline:
     """Inference pipeline for a general model."""
+    name: str = None
     model: nn.Module = None
     state: SimpleTrainState = None
     best_state: SimpleTrainState = None
@@ -44,6 +45,7 @@ class DiffusionInferencePipeline(InferencePipeline):
     This pipeline handles loading models from wandb and generating samples using the
     DiffusionSampler from FlaxDiff.
     """
+    artifact: Any = None
     state: TrainState = None
     best_state: TrainState = None
     rngstate: Optional[RandomMarkovState] = None
@@ -51,7 +53,6 @@ class DiffusionInferencePipeline(InferencePipeline):
     model_output_transform: DiffusionPredictionTransform = None
     autoencoder: AutoEncoder = None
     input_config: DiffusionInputConfig = None
-    wandb_run = None
     samplers: Dict[Type[DiffusionSampler], Dict[float, DiffusionSampler]] = field(default_factory=dict)
     config: Dict[str, Any] = field(default_factory=dict)
@@ -76,7 +77,7 @@ class DiffusionInferencePipeline(InferencePipeline):
         Returns:
             DiffusionInferencePipeline instance
         """
-        states, config, run = load_from_wandb_run(
+        states, config, run, artifact = load_from_wandb_run(
             wandb_run,
             project=project,
             entity=entity,
@@ -95,6 +96,7 @@ class DiffusionInferencePipeline(InferencePipeline):
             best_state=best_state,
             rngstate=RandomMarkovState(jax.random.PRNGKey(42)),
             run=run,
+            artifact=artifact,
         )
         return pipeline
@@ -119,7 +121,7 @@ class DiffusionInferencePipeline(InferencePipeline):
         Returns:
             DiffusionInferencePipeline instance
         """
-        states, config, run = load_from_wandb_registry(
+        states, config, run, artifact = load_from_wandb_registry(
             modelname=modelname,
             project=project,
             entity=entity,
@@ -140,6 +142,7 @@ class DiffusionInferencePipeline(InferencePipeline):
             best_state=best_state,
             rngstate=RandomMarkovState(jax.random.PRNGKey(42)),
             run=run,
+            artifact=artifact,
         )
         return pipeline
@@ -151,11 +154,14 @@ class DiffusionInferencePipeline(InferencePipeline):
         best_state: Optional[Dict[str, Any]] = None,
         rngstate: Optional[RandomMarkovState] = None,
         run=None,
+        artifact=None,
     ):
         if rngstate is None:
             rngstate = RandomMarkovState(jax.random.PRNGKey(42))
         # Build and return pipeline
         return cls(
+            name=run.name if run else None,
+            artifact=artifact,
             model=config['model'],
             state=state,
             best_state=best_state,
@@ -165,7 +171,6 @@ class DiffusionInferencePipeline(InferencePipeline):
             autoencoder=config['autoencoder'],
             input_config=config['input_config'],
             config=config,
-            wandb_run=run,
         )
     def get_sampler(

flaxdiff/inference/utils.py CHANGED Viewed

@@ -292,7 +292,7 @@ def load_from_wandb_run(
         config = run.config
     except Exception as e:
         print(f"Warning: Failed to load model from wandb: {e}")
-    return states, config, run
+    return states, config, run, artifact
 def load_from_wandb_registry(
     modelname: str,
@@ -318,4 +318,4 @@ def load_from_wandb_registry(
         config = run.config
     except Exception as e:
         print(f"Warning: Failed to load model from wandb: {e}")
-    return states, config, run
+    return states, config, run, artifact

flaxdiff/models/common.py CHANGED Viewed

@@ -335,73 +335,4 @@ class ResidualBlock(nn.Module):
         out = jnp.concatenate([out, extra_features], axis=-1) if extra_features is not None else out
-        return out
-# Convert Hilbert index d to 2D coordinates (x, y) for an n x n grid
-def _d2xy(n, d):
-    x = 0
-    y = 0
-    t = d
-    s = 1
-    while s < n:
-        rx = (t // 2) & 1
-        ry = (t ^ rx) & 1
-        if ry == 0:
-            if rx == 1:
-                x = n - 1 - x
-                y = n - 1 - y
-            x, y = y, x
-        x += s * rx
-        y += s * ry
-        t //= 4
-        s *= 2
-    return x, y
-# Hilbert index mapping for a rectangular grid of patches H_P x W_P
-def hilbert_indices(H_P, W_P):
-    size = max(H_P, W_P)
-    order = math.ceil(math.log2(size))
-    n = 1 << order
-    coords = []
-    for d in range(n * n):
-        x, y = _d2xy(n, d)
-        # x is column index, y is row index
-        if x < W_P and y < H_P:
-            coords.append((y, x))  # (row, col)
-            if len(coords) == H_P * W_P:
-                break
-    # Convert (row, col) to linear indices row-major
-    indices = [r * W_P + c for r, c in coords]
-    return jnp.array(indices, dtype=jnp.int32)
-# Inverse permutation: given idx where idx[i] = new position of element i, return inv such that inv[idx[i]] = i
-def inverse_permutation(idx):
-    inv = jnp.zeros_like(idx)
-    inv = inv.at[idx].set(jnp.arange(idx.shape[0], dtype=idx.dtype))
-    return inv
-# Patchify using Hilbert ordering: extract patches and reorder sequence
-def hilbert_patchify(x, patch_size):
-    B, H, W, C = x.shape
-    H_P = H // patch_size
-    W_P = W // patch_size
-    # Extract patches in row-major
-    patches = rearrange(x, 'b (h p1) (w p2) c -> b (h w) (p1 p2 c)', p1=patch_size, p2=patch_size)
-    idx = hilbert_indices(H_P, W_P)
-    return patches[:, idx, :]
-# Unpatchify from Hilbert ordering: reorder sequence back and reconstruct image
-def hilbert_unpatchify(patches, patch_size, H, W, C):
-    B, N, D = patches.shape
-    H_P = H // patch_size
-    W_P = W // patch_size
-    inv = inverse_permutation(hilbert_indices(H_P, W_P))
-    # Reorder back to row-major
-    linear = patches[:, inv, :]
-    # Reconstruct image
-    x = rearrange(linear, 'b (h w) (p1 p2 c) -> b (h p1) (w p2) c', h=H_P, w=W_P, p1=patch_size, p2=patch_size, c=C)
-    return x
+        return out

flaxdiff 0.2.6.1__py3-none-any.whl → 0.2.8__py3-none-any.whl

flaxdiff 0.2.6.1py3-none-any.whl → 0.2.8py3-none-any.whl