PyPI - flaxdiff - Versions diffs - 0.1.35.5__tar.gz → 0.1.36__tar.gz - Mend

flaxdiff 0.1.35.5tar.gz → 0.1.36tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

{flaxdiff-0.1.35.5 → flaxdiff-0.1.36}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: flaxdiff
-Version: 0.1.35.5
+Version: 0.1.36
 Summary: A versatile and easy to understand Diffusion library
 Author: Ashish Kumar Singh
 Author-email: ashishkmr472@gmail.com
@@ -10,6 +10,12 @@ Requires-Dist: optax>=0.2.2
 Requires-Dist: jax>=0.4.28
 Requires-Dist: orbax
 Requires-Dist: clu
+Dynamic: author
+Dynamic: author-email
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: requires-dist
+Dynamic: summary
 # ![](images/logo.jpeg "FlaxDiff")

flaxdiff-0.1.36/flaxdiff/data/dataset_map.py ADDED Viewed

@@ -0,0 +1,71 @@
+from .sources.tfds import data_source_tfds, tfds_augmenters
+from .sources.gcs import data_source_gcs, data_source_combined_gcs, gcs_augmenters
+# Configure the following for your datasets
+datasetMap = {
+    "oxford_flowers102": {
+        "source": data_source_tfds("oxford_flowers102", use_tf=False),
+        "augmenter": tfds_augmenters,
+    },
+    "cc12m": {
+        "source": data_source_gcs('arrayrecord2/cc12m'),
+        "augmenter": gcs_augmenters,
+    },
+    "laiona_coco": {
+        "source": data_source_gcs('arrayrecord2/laion-aesthetics-12m+mscoco-2017'),
+        "augmenter": gcs_augmenters,
+    },
+    "aesthetic_coyo": {
+        "source": data_source_gcs('arrayrecords/aestheticCoyo_0.25clip_6aesthetic'),
+        "augmenter": gcs_augmenters,
+    },
+    "combined_aesthetic": {
+        "source": data_source_combined_gcs([
+                'arrayrecord2/laion-aesthetics-12m+mscoco-2017',
+                'arrayrecords/aestheticCoyo_0.25clip_6aesthetic',
+                'arrayrecord2/cc12m',
+                'arrayrecords/aestheticCoyo_0.25clip_6aesthetic',
+            ]),
+        "augmenter": gcs_augmenters,
+    },
+    "laiona_coco_coyo": {
+        "source": data_source_combined_gcs([
+                'arrayrecords/aestheticCoyo_0.25clip_6aesthetic',
+                'arrayrecord2/laion-aesthetics-12m+mscoco-2017',
+                'arrayrecords/aestheticCoyo_0.25clip_6aesthetic',
+            ]),
+        "augmenter": gcs_augmenters,
+    },
+    "combined_30m": {
+        "source": data_source_combined_gcs([
+                'arrayrecord2/laion-aesthetics-12m+mscoco-2017',
+                'arrayrecord2/cc12m',
+                'arrayrecord2/aestheticCoyo_0.26_clip_5.5aesthetic_256plus',
+                "arrayrecord2/playground+leonardo_x4+cc3m.parquet",
+            ]),
+        "augmenter": gcs_augmenters,
+    }
+}
+onlineDatasetMap = {
+    "combined_online": {
+        "source": [
+            # "gs://flaxdiff-datasets-regional/datasets/laion-aesthetics-12m+mscoco-2017.parquet"
+            # "ChristophSchuhmann/MS_COCO_2017_URL_TEXT",
+            # "dclure/laion-aesthetics-12m-umap",
+            "gs://flaxdiff-datasets-regional/datasets/laion-aesthetics-12m+mscoco-2017",
+            "gs://flaxdiff-datasets-regional/datasets/coyo700m-aesthetic-5.4_25M",
+            "gs://flaxdiff-datasets-regional/datasets/leonardo-liked-1.8m",
+            "gs://flaxdiff-datasets-regional/datasets/leonardo-liked-1.8m",
+            "gs://flaxdiff-datasets-regional/datasets/leonardo-liked-1.8m",
+            "gs://flaxdiff-datasets-regional/datasets/cc12m",
+            "gs://flaxdiff-datasets-regional/datasets/playground-liked",
+            "gs://flaxdiff-datasets-regional/datasets/leonardo-liked-1.8m",
+            "gs://flaxdiff-datasets-regional/datasets/leonardo-liked-1.8m",
+            "gs://flaxdiff-datasets-regional/datasets/cc3m",
+            "gs://flaxdiff-datasets-regional/datasets/cc3m",
+            "gs://flaxdiff-datasets-regional/datasets/laion2B-en-aesthetic-4.2_37M",
+            # "gs://flaxdiff-datasets-regional/datasets/laiion400m-185M"
+        ]
+    }
+}

flaxdiff-0.1.36/flaxdiff/data/datasets.py ADDED Viewed

@@ -0,0 +1,169 @@
+import jax.numpy as jnp
+import grain.python as pygrain
+from typing import Dict
+import numpy as np
+import jax
+from flaxdiff.utils import convert_to_global_tree, AutoTextTokenizer
+from .dataset_map import datasetMap, onlineDatasetMap
+import traceback
+from .online_loader import OnlineStreamingDataLoader
+import queue
+from jax.sharding import Mesh
+import threading
+def batch_mesh_map(mesh):
+    class augmenters(pygrain.MapTransform):
+        def __init__(self, *args, **kwargs):
+            super().__init__(*args, **kwargs)
+        def map(self, batch) -> Dict[str, jnp.array]:
+            return convert_to_global_tree(mesh, batch)
+    return augmenters
+def get_dataset_grain(
+    data_name="cc12m",
+    batch_size=64,
+    image_scale=256,
+    count=None,
+    num_epochs=None,
+    method=jax.image.ResizeMethod.LANCZOS3,
+    worker_count=32,
+    read_thread_count=64,
+    read_buffer_size=50,
+    worker_buffer_size=20,
+    seed=0,
+    dataset_source="/mnt/gcs_mount/arrayrecord2/cc12m/",
+):
+    dataset = datasetMap[data_name]
+    data_source = dataset["source"](dataset_source)
+    augmenter = dataset["augmenter"](image_scale, method)
+    local_batch_size = batch_size // jax.process_count()
+    sampler = pygrain.IndexSampler(
+        num_records=len(data_source) if count is None else count,
+        shuffle=True,
+        seed=seed,
+        num_epochs=num_epochs,
+        shard_options=pygrain.ShardByJaxProcess(),
+    )
+    def get_trainset():
+        transformations = [
+            augmenter(),
+            pygrain.Batch(local_batch_size, drop_remainder=True),
+        ]
+        # if mesh != None:
+        #     transformations += [batch_mesh_map(mesh)]
+        loader = pygrain.DataLoader(
+            data_source=data_source,
+            sampler=sampler,
+            operations=transformations,
+            worker_count=worker_count,
+            read_options=pygrain.ReadOptions(
+                read_thread_count, read_buffer_size
+            ),
+            worker_buffer_size=worker_buffer_size,
+        )
+        return loader
+    return {
+        "train": get_trainset,
+        "train_len": len(data_source),
+        "local_batch_size": local_batch_size,
+        "global_batch_size": batch_size,
+        # "null_labels": null_labels,
+        # "null_labels_full": null_labels_full,
+        # "model": model,
+        # "tokenizer": tokenizer,
+    }
+def generate_collate_fn():
+    auto_tokenize = AutoTextTokenizer(tensor_type="np")
+    def default_collate(batch):
+        try:
+            # urls = [sample["url"] for sample in batch]
+            captions = [sample["caption"] for sample in batch]
+            results = auto_tokenize(captions)
+            images = np.stack([sample["image"] for sample in batch], axis=0)
+            return {
+                "image": images,
+                "input_ids": results['input_ids'],
+                "attention_mask": results['attention_mask'],
+            }
+        except Exception as e:
+            print("Error in collate function", e, [sample["image"].shape for sample in batch])
+            traceback.print_exc()
+    return default_collate
+def get_dataset_online(
+        data_name="combined_online",
+        batch_size=64,
+        image_scale=256,
+        count=None,
+        num_epochs=None,
+        method=jax.image.ResizeMethod.LANCZOS3,
+        worker_count=32,
+        read_thread_count=64,
+        read_buffer_size=50,
+        worker_buffer_size=20,
+        seed=0,
+        dataset_source="/mnt/gcs_mount/arrayrecord2/cc12m/",
+    ):
+    local_batch_size = batch_size // jax.process_count()
+    sources = onlineDatasetMap[data_name]["source"]
+    dataloader = OnlineStreamingDataLoader(
+            sources,
+            batch_size=local_batch_size,
+            num_workers=worker_count,
+            num_threads=read_thread_count,
+            image_shape=(image_scale, image_scale),
+            global_process_count=jax.process_count(),
+            global_process_index=jax.process_index(),
+            prefetch=worker_buffer_size,
+            collate_fn=generate_collate_fn(),
+            default_split="train",
+        )
+    def get_trainset(mesh: Mesh = None):
+        if mesh != None:
+            class dataLoaderWithMesh:
+                def __init__(self, dataloader, mesh):
+                    self.dataloader = dataloader
+                    self.mesh = mesh
+                    self.tmp_queue = queue.Queue(worker_buffer_size)
+                    def batch_loader():
+                        for batch in self.dataloader:
+                            try:
+                                self.tmp_queue.put(convert_to_global_tree(mesh, batch))
+                            except Exception as e:
+                                print("Error processing batch", e)
+                    self.loader_thread = threading.Thread(target=batch_loader)
+                    self.loader_thread.start()
+                def __iter__(self):
+                    return self
+                def __next__(self):
+                    return self.tmp_queue.get()
+            dataloader_with_mesh = dataLoaderWithMesh(dataloader, mesh)
+            return dataloader_with_mesh
+        return dataloader
+    return {
+        "train": get_trainset,
+        "train_len": len(dataloader) * jax.process_count(),
+        "local_batch_size": local_batch_size,
+        "global_batch_size": batch_size,
+        # "null_labels": null_labels,
+        # "null_labels_full": null_labels_full,
+        # "model": model,
+        # "tokenizer": tokenizer,
+    }

{flaxdiff-0.1.35.5 → flaxdiff-0.1.36}/flaxdiff/data/online_loader.py RENAMED Viewed

@@ -45,36 +45,43 @@ def fetch_single_image(image_url, timeout=None, retries=0):
 def default_image_processor(
-    image, image_shape,
+    image, image_shape,
     min_image_shape=(128, 128),
     upscale_interpolation=cv2.INTER_CUBIC,
     downscale_interpolation=cv2.INTER_AREA,
 ):
-    image = np.array(image)
-    original_height, original_width = image.shape[:2]
-    # check if the image is too small
-    if min(original_height, original_width) < min(min_image_shape):
-        return None, original_height, original_width
-    # check if wrong aspect ratio
-    if max(original_height, original_width) / min(original_height, original_width) > 2.4:
-        return None, original_height, original_width
-    # check if the variance is too low
-    if np.std(image) < 1e-5:
-        return None, original_height, original_width
-    # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-    downscale = max(original_width, original_height) > max(image_shape)
-    interpolation = downscale_interpolation if downscale else upscale_interpolation
-    image = A.longest_max_size(image, max(
-        image_shape), interpolation=interpolation)
-    image = A.pad(
-        image,
-        min_height=image_shape[0],
-        min_width=image_shape[1],
-        border_mode=cv2.BORDER_CONSTANT,
-        value=[255, 255, 255],
-    )
-    return image, original_height, original_width
+    try:
+        image = np.array(image)
+        if len(image.shape) != 3 or image.shape[2] != 3:
+            return None, 0, 0
+        original_height, original_width = image.shape[:2]
+        # check if the image is too small
+        if min(original_height, original_width) < min(min_image_shape):
+            return None, original_height, original_width
+        # check if wrong aspect ratio
+        if max(original_height, original_width) / min(original_height, original_width) > 2.4:
+            return None, original_height, original_width
+        # check if the variance is too low
+        if np.std(image) < 1e-5:
+            return None, original_height, original_width
+        # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        downscale = max(original_width, original_height) > max(image_shape)
+        interpolation = downscale_interpolation if downscale else upscale_interpolation
+        image = A.longest_max_size(image, max(
+            image_shape), interpolation=interpolation)
+        image = A.pad(
+            image,
+            min_height=image_shape[0],
+            min_width=image_shape[1],
+            border_mode=cv2.BORDER_CONSTANT,
+            value=[255, 255, 255],
+        )
+        return image, original_height, original_width
+    except Exception as e:
+        # print("Error processing image", e, image_shape, interpolation)
+        # traceback.print_exc()
+        return None, 0, 0
 def map_sample(
@@ -120,14 +127,36 @@ def map_sample(
         # })
         pass
 def default_feature_extractor(sample):
+    url = None
+    if "url" in sample:
+        url = sample["url"]
+    elif "URL" in sample:
+        url = sample["URL"]
+    elif "image_url" in sample:
+        url = sample["image_url"]
+    else:
+        print("No url found in sample, skipping", sample.keys())
+    caption = None
+    if "caption" in sample:
+        caption = sample["caption"]
+    elif "CAPTION" in sample:
+        caption = sample["CAPTION"]
+    elif "txt" in sample:
+        caption = sample["txt"]
+    elif "TEXT" in sample:
+        caption = sample["TEXT"]
+    elif "text" in sample:
+        caption = sample["text"]
+    else:
+        print("No caption found in sample, skipping", sample.keys())
     return {
-        "url": sample["url"],
-        "caption": sample["caption"],
+        "url": url,
+        "caption": caption,
     }
 def map_batch(
     batch, num_threads=256, image_shape=(256, 256),
     min_image_shape=(128, 128),
@@ -301,15 +330,13 @@ class OnlineStreamingDataLoader():
         self.dataset = dataset.shard(
             num_shards=global_process_count, index=global_process_index)
         print(f"Dataset length: {len(dataset)}")
-        self.iterator = ImageBatchIterator(
-            self.dataset, image_shape=image_shape,
-            min_image_shape=min_image_shape,
-            num_workers=num_workers, batch_size=batch_size, num_threads=num_threads,
-            timeout=timeout, retries=retries, image_processor=image_processor,
-            upscale_interpolation=upscale_interpolation,
-            downscale_interpolation=downscale_interpolation,
-            feature_extractor=feature_extractor
-        )
+        self.iterator = ImageBatchIterator(self.dataset, image_shape=image_shape,
+                                           min_image_shape=min_image_shape,
+                                           num_workers=num_workers, batch_size=batch_size, num_threads=num_threads,
+                                            timeout=timeout, retries=retries, image_processor=image_processor,
+                                             upscale_interpolation=upscale_interpolation,
+                                             downscale_interpolation=downscale_interpolation,
+                                             feature_extractor=feature_extractor)
         self.batch_size = batch_size
         # Launch a thread to load batches in the background
@@ -320,7 +347,7 @@ class OnlineStreamingDataLoader():
                 try:
                     self.batch_queue.put(collate_fn(batch))
                 except Exception as e:
-                    print("Error processing batch", e)
+                    print("Error collating batch", e)
         self.loader_thread = threading.Thread(target=batch_loader)
         self.loader_thread.start()
@@ -333,4 +360,4 @@ class OnlineStreamingDataLoader():
         # return self.collate_fn(next(self.iterator))
     def __len__(self):
-        return len(self.dataset)
+        return len(self.dataset)

{flaxdiff-0.1.35.5 → flaxdiff-0.1.36}/flaxdiff/models/attention.py RENAMED Viewed

@@ -11,6 +11,7 @@ import einops
 import functools
 import math
 from .common import kernel_init
+import jax.experimental.pallas.ops.tpu.flash_attention
 class EfficientAttention(nn.Module):
     """

{flaxdiff-0.1.35.5 → flaxdiff-0.1.36}/flaxdiff/models/simple_unet.py RENAMED Viewed

@@ -50,7 +50,7 @@ class Unet(nn.Module):
             features=self.feature_depths[0],
             kernel_size=(3, 3),
             strides=(1, 1),
-            kernel_init=self.kernel_init(1.0),
+            kernel_init=self.kernel_init(scale=1.0),
             dtype=self.dtype,
             precision=self.precision
         )(x)
@@ -65,7 +65,7 @@ class Unet(nn.Module):
                     down_conv_type,
                     name=f"down_{i}_residual_{j}",
                     features=dim_in,
-                    kernel_init=self.kernel_init(1.0),
+                    kernel_init=self.kernel_init(scale=1.0),
                     kernel_size=(3, 3),
                     strides=(1, 1),
                     activation=self.activation,
@@ -85,7 +85,7 @@ class Unet(nn.Module):
                                         force_fp32_for_softmax=attention_config.get("force_fp32_for_softmax", False),
                                         norm_inputs=attention_config.get("norm_inputs", True),
                                         explicitly_add_residual=attention_config.get("explicitly_add_residual", True),
-                                        kernel_init=self.kernel_init(1.0),
+                                        kernel_init=self.kernel_init(scale=1.0),
                                         name=f"down_{i}_attention_{j}")(x, textcontext)
                 # print("down residual for feature level", i, "is of shape", x.shape, "features", dim_in)
                 downs.append(x)
@@ -108,7 +108,7 @@ class Unet(nn.Module):
                 middle_conv_type,
                 name=f"middle_res1_{j}",
                 features=middle_dim_out,
-                kernel_init=self.kernel_init(1.0),
+                kernel_init=self.kernel_init(scale=1.0),
                 kernel_size=(3, 3),
                 strides=(1, 1),
                 activation=self.activation,
@@ -129,13 +129,13 @@ class Unet(nn.Module):
                                     force_fp32_for_softmax=middle_attention.get("force_fp32_for_softmax", False),
                                     norm_inputs=middle_attention.get("norm_inputs", True),
                                     explicitly_add_residual=middle_attention.get("explicitly_add_residual", True),
-                                    kernel_init=self.kernel_init(1.0),
+                                    kernel_init=self.kernel_init(scale=1.0),
                                     name=f"middle_attention_{j}")(x, textcontext)
             x = ResidualBlock(
                 middle_conv_type,
                 name=f"middle_res2_{j}",
                 features=middle_dim_out,
-                kernel_init=self.kernel_init(1.0),
+                kernel_init=self.kernel_init(scale=1.0),
                 kernel_size=(3, 3),
                 strides=(1, 1),
                 activation=self.activation,
@@ -157,7 +157,7 @@ class Unet(nn.Module):
                     up_conv_type,# if j == 0 else "separable",
                     name=f"up_{i}_residual_{j}",
                     features=dim_out,
-                    kernel_init=self.kernel_init(1.0),
+                    kernel_init=self.kernel_init(scale=1.0),
                     kernel_size=kernel_size,
                     strides=(1, 1),
                     activation=self.activation,
@@ -177,7 +177,7 @@ class Unet(nn.Module):
                                         force_fp32_for_softmax=middle_attention.get("force_fp32_for_softmax", False),
                                         norm_inputs=attention_config.get("norm_inputs", True),
                                         explicitly_add_residual=attention_config.get("explicitly_add_residual", True),
-                                        kernel_init=self.kernel_init(1.0),
+                                        kernel_init=self.kernel_init(scale=1.0),
                                         name=f"up_{i}_attention_{j}")(x, textcontext)
             # print("Upscaling ", i, x.shape)
             if i != len(feature_depths) - 1:
@@ -196,7 +196,7 @@ class Unet(nn.Module):
             features=self.feature_depths[0],
             kernel_size=(3, 3),
             strides=(1, 1),
-            kernel_init=self.kernel_init(1.0),
+            kernel_init=self.kernel_init(scale=1.0),
             dtype=self.dtype,
             precision=self.precision
         )(x)
@@ -207,7 +207,7 @@ class Unet(nn.Module):
             conv_type,
             name="final_residual",
             features=self.feature_depths[0],
-            kernel_init=self.kernel_init(1.0),
+            kernel_init=self.kernel_init(scale=1.0),
             kernel_size=(3,3),
             strides=(1, 1),
             activation=self.activation,
@@ -226,7 +226,7 @@ class Unet(nn.Module):
             kernel_size=(3, 3),
             strides=(1, 1),
             # activation=jax.nn.mish
-            kernel_init=self.kernel_init(0.0),
+            kernel_init=self.kernel_init(scale=0.0),
             dtype=self.dtype,
             precision=self.precision
         )(x)

{flaxdiff-0.1.35.5 → flaxdiff-0.1.36}/flaxdiff/models/simple_vit.py RENAMED Viewed

@@ -70,7 +70,7 @@ class UViT(nn.Module):
     kernel_init: Callable = partial(kernel_init, scale=1.0)
     add_residualblock_output: bool = False
     norm_inputs: bool = False
-    explicitly_add_residual: bool = False
+    explicitly_add_residual: bool = True
     def setup(self):
         if self.norm_groups > 0:

flaxdiff 0.1.35.5__tar.gz → 0.1.36__tar.gz

flaxdiff 0.1.35.5tar.gz → 0.1.36tar.gz