PyPI - flaxdiff - Versions diffs - 0.1.36.1__py3-none-any.whl → 0.1.36.3__py3-none-any.whl - Mend

flaxdiff 0.1.36.1py3-none-any.whl → 0.1.36.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

flaxdiff/data/__init__.py +1 -0
flaxdiff/data/dataset_map.py +71 -0
flaxdiff/data/datasets.py +169 -0
flaxdiff/data/online_loader.py +363 -0
flaxdiff/data/sources/gcs.py +81 -0
flaxdiff/data/sources/tfds.py +67 -0
flaxdiff/metrics/inception.py +658 -0
flaxdiff/metrics/utils.py +49 -0
flaxdiff/models/__init__.py +1 -0
flaxdiff/models/attention.py +368 -0
flaxdiff/models/autoencoder/__init__.py +2 -0
flaxdiff/models/autoencoder/autoencoder.py +19 -0
flaxdiff/models/autoencoder/diffusers.py +91 -0
flaxdiff/models/autoencoder/simple_autoenc.py +26 -0
flaxdiff/models/common.py +346 -0
flaxdiff/models/favor_fastattn.py +723 -0
flaxdiff/models/simple_unet.py +233 -0
flaxdiff/models/simple_vit.py +180 -0
flaxdiff/predictors/__init__.py +96 -0
flaxdiff/samplers/__init__.py +7 -0
flaxdiff/samplers/common.py +165 -0
flaxdiff/samplers/ddim.py +10 -0
flaxdiff/samplers/ddpm.py +37 -0
flaxdiff/samplers/euler.py +56 -0
flaxdiff/samplers/heun_sampler.py +27 -0
flaxdiff/samplers/multistep_dpm.py +59 -0
flaxdiff/samplers/rk4_sampler.py +34 -0
flaxdiff/schedulers/__init__.py +6 -0
flaxdiff/schedulers/common.py +98 -0
flaxdiff/schedulers/continuous.py +12 -0
flaxdiff/schedulers/cosine.py +40 -0
flaxdiff/schedulers/discrete.py +74 -0
flaxdiff/schedulers/exp.py +13 -0
flaxdiff/schedulers/karras.py +69 -0
flaxdiff/schedulers/linear.py +14 -0
flaxdiff/schedulers/sqrt.py +10 -0
flaxdiff/trainer/__init__.py +2 -0
flaxdiff/trainer/autoencoder_trainer.py +182 -0
flaxdiff/trainer/diffusion_trainer.py +326 -0
flaxdiff/trainer/simple_trainer.py +540 -0
flaxdiff/trainer/video_diffusion_trainer.py +62 -0
{flaxdiff-0.1.36.1.dist-info → flaxdiff-0.1.36.3.dist-info}/METADATA +1 -1
flaxdiff-0.1.36.3.dist-info/RECORD +47 -0
flaxdiff-0.1.36.1.dist-info/RECORD +0 -6
{flaxdiff-0.1.36.1.dist-info → flaxdiff-0.1.36.3.dist-info}/WHEEL +0 -0
{flaxdiff-0.1.36.1.dist-info → flaxdiff-0.1.36.3.dist-info}/top_level.txt +0 -0

flaxdiff/trainer/autoencoder_trainer.py ADDED Viewed

@@ -0,0 +1,182 @@
+from flax import linen as nn
+import jax
+from typing import Callable
+from dataclasses import field
+import jax.numpy as jnp
+import optax
+from jax.sharding import Mesh, PartitionSpec as P
+from jax.experimental.shard_map import shard_map
+from typing import Dict, Callable, Sequence, Any, Union, Tuple
+from ..schedulers import NoiseScheduler
+from ..predictors import DiffusionPredictionTransform, EpsilonPredictionTransform
+from flaxdiff.utils import RandomMarkovState
+from .simple_trainer import SimpleTrainer, SimpleTrainState, Metrics
+from .diffusion_trainer import TrainState
+from flaxdiff.models.autoencoder.autoencoder import AutoEncoder
+class AutoEncoderTrainer(SimpleTrainer):
+    def __init__(self,
+                 model: nn.Module,
+                 input_shape: Union[int, int, int],
+                 latent_dim: int,
+                 spatial_scale: int,
+                 optimizer: optax.GradientTransformation,
+                 rngs: jax.random.PRNGKey,
+                 name: str = "Autoencoder",
+                 **kwargs
+                 ):
+        super().__init__(
+            model=model,
+            input_shapes={"image": input_shape},
+            optimizer=optimizer,
+            rngs=rngs,
+            name=name,
+            **kwargs
+        )
+        self.latent_dim = latent_dim
+        self.spatial_scale = spatial_scale
+    def generate_states(
+        self,
+        optimizer: optax.GradientTransformation,
+        rngs: jax.random.PRNGKey,
+        existing_state: dict = None,
+        existing_best_state: dict = None,
+        model: nn.Module = None,
+        param_transforms: Callable = None
+    ) -> Tuple[TrainState, TrainState]:
+        print("Generating states for DiffusionTrainer")
+        rngs, subkey = jax.random.split(rngs)
+        if existing_state == None:
+            input_vars = self.get_input_ones()
+            params = model.init(subkey, **input_vars)
+            new_state = {"params": params, "ema_params": params}
+        else:
+            new_state = existing_state
+        if param_transforms is not None:
+            params = param_transforms(params)
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=new_state['params'],
+            ema_params=new_state['ema_params'],
+            tx=optimizer,
+            rngs=rngs,
+            metrics=Metrics.empty()
+        )
+        if existing_best_state is not None:
+            best_state = state.replace(
+                params=existing_best_state['params'], ema_params=existing_best_state['ema_params'])
+        else:
+            best_state = state
+        return state, best_state
+    def _define_train_step(self, batch_size, null_labels_seq, text_embedder):
+        noise_schedule: NoiseScheduler = self.noise_schedule
+        model = self.model
+        model_output_transform = self.model_output_transform
+        loss_fn = self.loss_fn
+        unconditional_prob = self.unconditional_prob
+        # Determine the number of unconditional samples
+        num_unconditional = int(batch_size * unconditional_prob)
+        nS, nC = null_labels_seq.shape
+        null_labels_seq = jnp.broadcast_to(
+            null_labels_seq, (batch_size, nS, nC))
+        distributed_training = self.distributed_training
+        autoencoder = self.autoencoder
+        # @jax.jit
+        def train_step(train_state: TrainState, rng_state: RandomMarkovState, batch, local_device_index):
+            """Train for a single step."""
+            rng_state, subkey = rng_state.get_random_key()
+            subkey = jax.random.fold_in(subkey, local_device_index.reshape())
+            local_rng_state = RandomMarkovState(subkey)
+            images = batch['image']
+            if autoencoder is not None:
+                # Convert the images to latent space
+                local_rng_state, rngs = local_rng_state.get_random_key()
+                images = autoencoder.encode(images, rngs)
+            else:
+                # normalize image
+                images = (images - 127.5) / 127.5
+            output = text_embedder(
+                input_ids=batch['input_ids'], attention_mask=batch['attention_mask'])
+            label_seq = output.last_hidden_state
+            # Generate random probabilities to decide how much of this batch will be unconditional
+            label_seq = jnp.concat(
+                [null_labels_seq[:num_unconditional], label_seq[num_unconditional:]], axis=0)
+            noise_level, local_rng_state = noise_schedule.generate_timesteps(images.shape[0], local_rng_state)
+            local_rng_state, rngs = local_rng_state.get_random_key()
+            noise: jax.Array = jax.random.normal(rngs, shape=images.shape)
+            rates = noise_schedule.get_rates(noise_level)
+            noisy_images, c_in, expected_output = model_output_transform.forward_diffusion(
+                images, noise, rates)
+            def model_loss(params):
+                preds = model.apply(params, *noise_schedule.transform_inputs(noisy_images*c_in, noise_level), label_seq)
+                preds = model_output_transform.pred_transform(
+                    noisy_images, preds, rates)
+                nloss = loss_fn(preds, expected_output)
+                # nloss = jnp.mean(nloss, axis=1)
+                nloss *= noise_schedule.get_weights(noise_level)
+                nloss = jnp.mean(nloss)
+                loss = nloss
+                return loss
+            loss, grads = jax.value_and_grad(model_loss)(train_state.params)
+            if distributed_training:
+                grads = jax.lax.pmean(grads, "data")
+                loss = jax.lax.pmean(loss, "data")
+            train_state = train_state.apply_gradients(grads=grads)
+            train_state = train_state.apply_ema(self.ema_decay)
+            return train_state, loss, rng_state
+        if distributed_training:
+            train_step = shard_map(train_step, mesh=self.mesh, in_specs=(P(), P(), P('data'), P('data')),
+                                   out_specs=(P(), P(), P()))
+            train_step = jax.jit(train_step)
+        return train_step
+    def _define_compute_metrics(self):
+        @jax.jit
+        def compute_metrics(state: TrainState, expected, pred):
+            loss = jnp.mean(jnp.square(pred - expected))
+            metric_updates = state.metrics.single_from_model_output(loss=loss)
+            metrics = state.metrics.merge(metric_updates)
+            state = state.replace(metrics=metrics)
+            return state
+        return compute_metrics
+    def fit(self, data, steps_per_epoch, epochs):
+        null_labels_full = data['null_labels_full']
+        local_batch_size = data['local_batch_size']
+        text_embedder = data['model']
+        super().fit(data, steps_per_epoch, epochs, {
+            "batch_size": local_batch_size, "null_labels_seq": null_labels_full, "text_embedder": text_embedder})
+def boolean_string(s):
+    if type(s) == bool:
+        return s
+    return s == 'True'

flaxdiff/trainer/diffusion_trainer.py ADDED Viewed

@@ -0,0 +1,326 @@
+import flax
+from flax import linen as nn
+import jax
+from typing import Callable
+from dataclasses import field
+import jax.numpy as jnp
+import traceback
+import optax
+import functools
+from jax.sharding import Mesh, PartitionSpec as P
+from jax.experimental.shard_map import shard_map
+from typing import Dict, Callable, Sequence, Any, Union, Tuple, Type
+from ..schedulers import NoiseScheduler
+from ..predictors import DiffusionPredictionTransform, EpsilonPredictionTransform
+from ..samplers.common import DiffusionSampler
+from flaxdiff.utils import RandomMarkovState
+from .simple_trainer import SimpleTrainer, SimpleTrainState, Metrics
+from flaxdiff.models.autoencoder.autoencoder import AutoEncoder
+from flax.training import dynamic_scale as dynamic_scale_lib
+from flaxdiff.utils import TextEncoder, ConditioningEncoder
+class TrainState(SimpleTrainState):
+    rngs: jax.random.PRNGKey
+    ema_params: dict
+    def apply_ema(self, decay: float = 0.999):
+        new_ema_params = jax.tree_util.tree_map(
+            lambda ema, param: decay * ema + (1 - decay) * param,
+            self.ema_params,
+            self.params,
+        )
+        return self.replace(ema_params=new_ema_params)
+from flaxdiff.models.autoencoder.autoencoder import AutoEncoder
+class DiffusionTrainer(SimpleTrainer):
+    noise_schedule: NoiseScheduler
+    model_output_transform: DiffusionPredictionTransform
+    ema_decay: float = 0.999
+    def __init__(self,
+                 model: nn.Module,
+                 input_shapes: Dict[str, Tuple[int]],
+                 optimizer: optax.GradientTransformation,
+                 noise_schedule: NoiseScheduler,
+                 rngs: jax.random.PRNGKey,
+                 unconditional_prob: float = 0.12,
+                 name: str = "Diffusion",
+                 model_output_transform: DiffusionPredictionTransform = EpsilonPredictionTransform(),
+                 autoencoder: AutoEncoder = None,
+                 encoder: ConditioningEncoder = None,
+                 **kwargs
+                 ):
+        super().__init__(
+            model=model,
+            input_shapes=input_shapes,
+            optimizer=optimizer,
+            rngs=rngs,
+            name=name,
+            **kwargs
+        )
+        self.noise_schedule = noise_schedule
+        self.model_output_transform = model_output_transform
+        self.unconditional_prob = unconditional_prob
+        self.autoencoder = autoencoder
+        self.encoder = encoder
+    def generate_states(
+        self,
+        optimizer: optax.GradientTransformation,
+        rngs: jax.random.PRNGKey,
+        existing_state: dict = None,
+        existing_best_state: dict = None,
+        model: nn.Module = None,
+        param_transforms: Callable = None,
+        use_dynamic_scale: bool = False
+    ) -> Tuple[TrainState, TrainState]:
+        print("Generating states for DiffusionTrainer")
+        rngs, subkey = jax.random.split(rngs)
+        if existing_state == None:
+            input_vars = self.get_input_ones()
+            params = model.init(subkey, **input_vars)
+            new_state = {"params": params, "ema_params": params}
+        else:
+            new_state = existing_state
+        if param_transforms is not None:
+            params = param_transforms(params)
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=new_state['params'],
+            ema_params=new_state['ema_params'],
+            tx=optimizer,
+            rngs=rngs,
+            metrics=Metrics.empty(),
+            dynamic_scale = dynamic_scale_lib.DynamicScale() if use_dynamic_scale else None
+        )
+        if existing_best_state is not None:
+            best_state = state.replace(
+                params=existing_best_state['params'], ema_params=existing_best_state['ema_params'])
+        else:
+            best_state = state
+        return state, best_state
+    def _define_train_step(self, batch_size):
+        noise_schedule: NoiseScheduler = self.noise_schedule
+        model = self.model
+        model_output_transform = self.model_output_transform
+        loss_fn = self.loss_fn
+        unconditional_prob = self.unconditional_prob
+        # Determine the number of unconditional samples
+        num_unconditional = int(batch_size * unconditional_prob)
+        null_labels_full = self.encoder([""])
+        null_labels_seq = jnp.array(null_labels_full[0], dtype=jnp.float16)
+        conditioning_encoder = self.encoder
+        nS, nC = null_labels_seq.shape
+        null_labels_seq = jnp.broadcast_to(
+            null_labels_seq, (batch_size, nS, nC))
+        distributed_training = self.distributed_training
+        autoencoder = self.autoencoder
+        # @jax.jit
+        def train_step(train_state: TrainState, rng_state: RandomMarkovState, batch, local_device_index):
+            """Train for a single step."""
+            rng_state, subkey = rng_state.get_random_key()
+            subkey = jax.random.fold_in(subkey, local_device_index.reshape())
+            local_rng_state = RandomMarkovState(subkey)
+            images = batch['image']
+            # First get the standard deviation of the images
+            # std = jnp.std(images, axis=(1, 2, 3))
+            # is_non_zero = (std > 0)
+            images = jnp.array(images, dtype=jnp.float32)
+            # normalize image
+            images = (images - 127.5) / 127.5
+            if autoencoder is not None:
+                # Convert the images to latent space
+                local_rng_state, rngs = local_rng_state.get_random_key()
+                images = autoencoder.encode(images, rngs)
+            label_seq = conditioning_encoder.encode_from_tokens(batch)
+            # Generate random probabilities to decide how much of this batch will be unconditional
+            label_seq = jnp.concat(
+                [null_labels_seq[:num_unconditional], label_seq[num_unconditional:]], axis=0)
+            noise_level, local_rng_state = noise_schedule.generate_timesteps(images.shape[0], local_rng_state)
+            local_rng_state, rngs = local_rng_state.get_random_key()
+            noise: jax.Array = jax.random.normal(rngs, shape=images.shape)
+            rates = noise_schedule.get_rates(noise_level)
+            noisy_images, c_in, expected_output = model_output_transform.forward_diffusion(
+                images, noise, rates)
+            def model_loss(params):
+                preds = model.apply(params, *noise_schedule.transform_inputs(noisy_images*c_in, noise_level), label_seq)
+                preds = model_output_transform.pred_transform(
+                    noisy_images, preds, rates)
+                nloss = loss_fn(preds, expected_output)
+                # Ignore the loss contribution of images with zero standard deviation
+                nloss *= noise_schedule.get_weights(noise_level)
+                # nloss = jnp.mean(nloss, axis=(1,2,3))
+                # nloss = jnp.where(is_non_zero, nloss, 0)
+                # nloss = jnp.mean(nloss, where=nloss != 0)
+                nloss = jnp.mean(nloss)
+                loss = nloss
+                return loss
+            if train_state.dynamic_scale is not None:
+                # dynamic scale takes care of averaging gradients across replicas
+                grad_fn = train_state.dynamic_scale.value_and_grad(
+                    model_loss, axis_name="data"
+                )
+                dynamic_scale, is_fin, loss, grads = grad_fn(train_state.params)
+                train_state = train_state.replace(dynamic_scale=dynamic_scale)
+            else:
+                grad_fn = jax.value_and_grad(model_loss)
+                loss, grads = grad_fn(train_state.params)
+                if distributed_training:
+                    grads = jax.lax.pmean(grads, "data")
+            new_state = train_state.apply_gradients(grads=grads)
+            if train_state.dynamic_scale is not None:
+                # if is_fin == False the gradients contain Inf/NaNs and optimizer state and
+                # params should be restored (= skip this step).
+                select_fn = functools.partial(jnp.where, is_fin)
+                new_state = new_state.replace(
+                    opt_state=jax.tree_util.tree_map(
+                        select_fn, new_state.opt_state, train_state.opt_state
+                    ),
+                    params=jax.tree_util.tree_map(
+                        select_fn, new_state.params, train_state.params
+                    ),
+                )
+            train_state = new_state.apply_ema(self.ema_decay)
+            if distributed_training:
+                loss = jax.lax.pmean(loss, "data")
+            return train_state, loss, rng_state
+        if distributed_training:
+            train_step = shard_map(train_step, mesh=self.mesh, in_specs=(P(), P(), P('data'), P('data')),
+                                   out_specs=(P(), P(), P()))
+            train_step = jax.jit(train_step)
+        return train_step
+    def _define_vaidation_step(self, sampler_class: Type[DiffusionSampler]):
+        model = self.model
+        encoder = self.encoder
+        autoencoder = self.autoencoder
+        null_labels_full = encoder([""])
+        null_labels_full = null_labels_full.astype(jnp.float16)
+        # null_labels_seq = jnp.array(null_labels_full[0], dtype=jnp.float16)
+        def generate_sampler(state: TrainState):
+            sampler = sampler_class(
+                model=model,
+                params=state.ema_params,
+                noise_schedule=self.noise_schedule,
+                model_output_transform=self.model_output_transform,
+                image_size=self.input_shapes['x'][0],
+                null_labels_seq=null_labels_full,
+                autoencoder=autoencoder,
+            )
+            return sampler
+        def generate_samples(
+            batch,
+            sampler: DiffusionSampler,
+            diffusion_steps: int,
+        ):
+            labels_seq = encoder.encode_from_tokens(batch)
+            labels_seq = jnp.array(labels_seq, dtype=jnp.float16)
+            samples = sampler.generate_images(
+                num_images=len(labels_seq),
+                diffusion_steps=diffusion_steps,
+                start_step=1000,
+                end_step=0,
+                priors=None,
+                model_conditioning_inputs=(labels_seq,),
+            )
+            return samples
+        return generate_sampler, generate_samples
+    def validation_loop(
+        self,
+        val_state: SimpleTrainState,
+        val_step_fn: Callable,
+        val_ds,
+        val_steps_per_epoch,
+        current_step,
+        diffusion_steps=200,
+    ):
+        generate_sampler, generate_samples = val_step_fn
+        sampler = generate_sampler(val_state)
+        val_ds = iter(val_ds()) if val_ds else None
+        # Evaluation step
+        try:
+            samples = generate_samples(
+                next(val_ds),
+                sampler,
+                diffusion_steps,
+            )
+            # Put each sample on wandb
+            if self.wandb:
+                import numpy as np
+                from wandb import Image as wandbImage
+                wandb_images = []
+                for i in range(samples.shape[0]):
+                    # convert the sample to numpy
+                    sample = np.array(samples[i])
+                    # denormalize the image
+                    sample = (sample + 1) * 127.5
+                    sample = np.clip(sample, 0, 255).astype(np.uint8)
+                    # add the image to the list
+                    wandb_images.append(sample)
+                    # log the images to wandb
+                    self.wandb.log({
+                        f"sample_{i}": wandbImage(sample, caption=f"Sample {i} at step {current_step}")
+                    }, step=current_step)
+        except Exception as e:
+            print("Error logging images to wandb", e)
+            traceback.print_exc()
+    def fit(self, data, training_steps_per_epoch, epochs, val_steps_per_epoch=8, sampler_class=None):
+        local_batch_size = data['local_batch_size']
+        validation_step_args = {
+            "sampler_class": sampler_class,
+        }
+        super().fit(
+            data,
+            train_steps_per_epoch=training_steps_per_epoch,
+            epochs=epochs,
+            train_step_args={"batch_size": local_batch_size},
+            val_steps_per_epoch=val_steps_per_epoch,
+            validation_step_args=validation_step_args,
+        )

flaxdiff 0.1.36.1__py3-none-any.whl → 0.1.36.3__py3-none-any.whl

flaxdiff 0.1.36.1py3-none-any.whl → 0.1.36.3py3-none-any.whl