PyPI - flaxdiff - Versions diffs - 0.1.1__tar.gz → 0.1.3__tar.gz - Mend

flaxdiff 0.1.1tar.gz → 0.1.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: flaxdiff
-Version: 0.1.1
+Version: 0.1.3
 Summary: A versatile and easy to understand Diffusion library
 Author: Ashish Kumar Singh
 Author-email: ashishkmr472@gmail.com

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff/models/attention.py RENAMED Viewed

@@ -11,105 +11,6 @@ import functools
 import math
 from .common import kernel_init
-def _query_chunk_attention(query, key, value, precision, key_chunk_size: int = 4096):
-    """Multi-head dot product attention with a limited number of queries."""
-    num_kv, num_heads, k_features = key.shape[-3:]
-    v_features = value.shape[-1]
-    key_chunk_size = min(key_chunk_size, num_kv)
-    query = query / jnp.sqrt(k_features)
-    @functools.partial(jax.checkpoint, prevent_cse=False)
-    def summarize_chunk(query, key, value):
-        attn_weights = jnp.einsum("...qhd,...khd->...qhk", query, key, precision=precision)
-        max_score = jnp.max(attn_weights, axis=-1, keepdims=True)
-        max_score = jax.lax.stop_gradient(max_score)
-        exp_weights = jnp.exp(attn_weights - max_score)
-        exp_values = jnp.einsum("...vhf,...qhv->...qhf", value, exp_weights, precision=precision)
-        max_score = jnp.einsum("...qhk->...qh", max_score)
-        return (exp_values, exp_weights.sum(axis=-1), max_score)
-    def chunk_scanner(chunk_idx):
-        # julienne key array
-        key_chunk = jax.lax.dynamic_slice(
-            operand=key,
-            start_indices=[0] * (key.ndim - 3) + [chunk_idx, 0, 0],  # [...,k,h,d]
-            slice_sizes=list(key.shape[:-3]) + [key_chunk_size, num_heads, k_features],  # [...,k,h,d]
-        )
-        # julienne value array
-        value_chunk = jax.lax.dynamic_slice(
-            operand=value,
-            start_indices=[0] * (value.ndim - 3) + [chunk_idx, 0, 0],  # [...,v,h,d]
-            slice_sizes=list(value.shape[:-3]) + [key_chunk_size, num_heads, v_features],  # [...,v,h,d]
-        )
-        return summarize_chunk(query, key_chunk, value_chunk)
-    chunk_values, chunk_weights, chunk_max = jax.lax.map(f=chunk_scanner, xs=jnp.arange(0, num_kv, key_chunk_size))
-    global_max = jnp.max(chunk_max, axis=0, keepdims=True)
-    max_diffs = jnp.exp(chunk_max - global_max)
-    chunk_values *= jnp.expand_dims(max_diffs, axis=-1)
-    chunk_weights *= max_diffs
-    all_values = chunk_values.sum(axis=0)
-    all_weights = jnp.expand_dims(chunk_weights, -1).sum(axis=0)
-    return all_values / all_weights
-def jax_memory_efficient_attention(
-    query, key, value, precision=jax.lax.Precision.HIGHEST, query_chunk_size: int = 1024, key_chunk_size: int = 4096
-):
-    r"""
-    Flax Memory-efficient multi-head dot product attention. https://arxiv.org/abs/2112.05682v2
-    https://github.com/AminRezaei0x443/memory-efficient-attention
-    Args:
-        query (`jnp.ndarray`): (batch..., query_length, head, query_key_depth_per_head)
-        key (`jnp.ndarray`): (batch..., key_value_length, head, query_key_depth_per_head)
-        value (`jnp.ndarray`): (batch..., key_value_length, head, value_depth_per_head)
-        precision (`jax.lax.Precision`, *optional*, defaults to `jax.lax.Precision.HIGHEST`):
-            numerical precision for computation
-        query_chunk_size (`int`, *optional*, defaults to 1024):
-            chunk size to divide query array value must divide query_length equally without remainder
-        key_chunk_size (`int`, *optional*, defaults to 4096):
-            chunk size to divide key and value array value must divide key_value_length equally without remainder
-    Returns:
-        (`jnp.ndarray`) with shape of (batch..., query_length, head, value_depth_per_head)
-    """
-    num_q, num_heads, q_features = query.shape[-3:]
-    def chunk_scanner(chunk_idx, _):
-        # julienne query array
-        query_chunk = jax.lax.dynamic_slice(
-            operand=query,
-            start_indices=([0] * (query.ndim - 3)) + [chunk_idx, 0, 0],  # [...,q,h,d]
-            slice_sizes=list(query.shape[:-3]) + [min(query_chunk_size, num_q), num_heads, q_features],  # [...,q,h,d]
-        )
-        return (
-            chunk_idx + query_chunk_size,  # unused ignore it
-            _query_chunk_attention(
-                query=query_chunk, key=key, value=value, precision=precision, key_chunk_size=key_chunk_size
-            ),
-        )
-    _, res = jax.lax.scan(
-        f=chunk_scanner,
-        init=0,
-        xs=None,
-        length=math.ceil(num_q / query_chunk_size),  # start counter  # stop counter
-    )
-    return jnp.concatenate(res, axis=-3)  # fuse the chunked result back
 class EfficientAttention(nn.Module):
     """
     Based on the pallas attention implementation.
@@ -125,41 +26,77 @@ class EfficientAttention(nn.Module):
     def setup(self):
         inner_dim = self.dim_head * self.heads
         # Weights were exported with old names {to_q, to_k, to_v, to_out}
-        self.query = nn.DenseGeneral(inner_dim, use_bias=False, precision=self.precision,
-                                     kernel_init=self.kernel_init(), dtype=self.dtype, name="to_q")
-        self.key = nn.DenseGeneral(inner_dim, use_bias=False, precision=self.precision,
-                                     kernel_init=self.kernel_init(), dtype=self.dtype, name="to_k")
-        self.value = nn.DenseGeneral(inner_dim, use_bias=False, precision=self.precision,
-                                     kernel_init=self.kernel_init(), dtype=self.dtype, name="to_v")
+        dense = functools.partial(
+            nn.Dense,
+            self.heads * self.dim_head,
+            precision=self.precision,
+            use_bias=self.use_bias,
+            kernel_init=self.kernel_init(),
+            dtype=self.dtype
+        )
+        self.query = dense(name="to_q")
+        self.key = dense(name="to_k")
+        self.value = dense(name="to_v")
         self.proj_attn = nn.DenseGeneral(self.query_dim, use_bias=False, precision=self.precision,
                                      kernel_init=self.kernel_init(), dtype=self.dtype, name="to_out_0")
         # self.attnfn = make_fast_generalized_attention(qkv_dim=inner_dim, lax_scan_unroll=16)
+    def _reshape_tensor_to_head_dim(self, tensor):
+        batch_size, _, seq_len, dim = tensor.shape
+        head_size = self.heads
+        tensor = tensor.reshape(batch_size, seq_len, head_size, dim // head_size)
+        tensor = jnp.transpose(tensor, (0, 2, 1, 3))
+        return tensor
+    def _reshape_tensor_from_head_dim(self, tensor):
+        batch_size, _, seq_len, dim = tensor.shape
+        head_size = self.heads
+        tensor = jnp.transpose(tensor, (0, 2, 1, 3))
+        tensor = tensor.reshape(batch_size, 1, seq_len, dim * head_size)
+        return tensor
     @nn.compact
     def __call__(self, x:jax.Array, context=None):
+        # print(x.shape)
         # x has shape [B, H * W, C]
         context = x if context is None else context
+        B, H, W, C = x.shape
+        x = x.reshape((B, 1, H * W, C))
+        if len(context.shape) == 4:
+            B, _H, _W, _C = context.shape
+            context = context.reshape((B, 1, _H * _W, _C))
+        else:
+            B, SEQ, _C = context.shape
+            context = context.reshape((B, 1, SEQ, _C))
         query = self.query(x)
         key = self.key(context)
         value = self.value(context)
-        # print(query.shape, key.shape, value.shape)
+        query = self._reshape_tensor_to_head_dim(query)
+        key = self._reshape_tensor_to_head_dim(key)
+        value = self._reshape_tensor_to_head_dim(value)
-        # hidden_states = jax.experimental.pallas.ops.tpu.flash_attention.mha_reference(
-        #     query, key, value, None
-        # )
-        hidden_states = nn.dot_product_attention(
-            query, key, value, dtype=self.dtype, broadcast_dropout=False, dropout_rng=None, precision=self.precision
+        hidden_states = jax.experimental.pallas.ops.tpu.flash_attention.flash_attention(
+            query, key, value, None
         )
-        # hidden_states = self.attnfn(
-        #     query, key, value, None
+        hidden_states = self._reshape_tensor_from_head_dim(hidden_states)
+        # hidden_states = nn.dot_product_attention(
+        #     query, key, value, dtype=self.dtype, broadcast_dropout=False, dropout_rng=None, precision=self.precision
         # )
         proj = self.proj_attn(hidden_states)
+        proj = proj.reshape((B, H, W, C))
         return proj
 class NormalAttention(nn.Module):
     """
     Simple implementation of the normal attention.
@@ -201,7 +138,11 @@ class NormalAttention(nn.Module):
     @nn.compact
     def __call__(self, x, context=None):
         # x has shape [B, H, W, C]
+        B, H, W, C = x.shape
+        x = x.reshape((B, H*W, C))
         context = x if context is None else context
+        if len(context.shape) == 4:
+            context = context.reshape((B, H*W, C))
         query = self.query(x)
         key = self.key(context)
         value = self.value(context)
@@ -210,6 +151,7 @@ class NormalAttention(nn.Module):
             query, key, value, dtype=self.dtype, broadcast_dropout=False, dropout_rng=None, precision=self.precision
         )
         proj = self.proj_attn(hidden_states)
+        proj = proj.reshape((B, H, W, C))
         return proj
 class AttentionBlock(nn.Module):

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff/models/common.py RENAMED Viewed

@@ -2,6 +2,6 @@ import jax.numpy as jnp
 from flax import linen as nn
 # Kernel initializer to use
-def kernel_init(scale):
+def kernel_init(scale, dtype=jnp.float32):
     scale = max(scale, 1e-10)
-    return nn.initializers.variance_scaling(scale=scale, mode="fan_in", distribution="truncated_normal")
+    return nn.initializers.variance_scaling(scale=scale, mode="fan_avg", distribution="truncated_normal", dtype=dtype)

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff/models/simple_unet.py RENAMED Viewed

@@ -5,6 +5,7 @@ from typing import Dict, Callable, Sequence, Any, Union
 import einops
 from .common import kernel_init
 from .attention import TransformerBlock
 class WeightStandardizedConv(nn.Module):
     """
     apply weight standardization  https://arxiv.org/abs/1903.10520
@@ -243,6 +244,7 @@ def l2norm(t, axis=1, eps=1e-12):
     denom = jnp.clip(jnp.linalg.norm(t, ord=2, axis=axis, keepdims=True), eps)
     out = t/denom
     return (out)
 class ResidualBlock(nn.Module):
     conv_type:str
     features:int
@@ -327,7 +329,7 @@ class Unet(nn.Module):
     precision: Any = jax.lax.Precision.HIGH
     @nn.compact
-    def __call__(self, x, temb, textcontext=None):
+    def __call__(self, x, temb, textcontext):
         # print("embedding features", self.emb_features)
         temb = FourierEmbedding(features=self.emb_features)(temb)
         temb = TimeProjection(features=self.emb_features)(temb)
@@ -341,6 +343,8 @@ class Unet(nn.Module):
         conv_type = up_conv_type = down_conv_type = middle_conv_type = "conv"
         # middle_conv_type = "separable"
+        print(f"input shape: {x.shape}")
         x = ConvLayer(
             conv_type,
             features=self.feature_depths[0],
@@ -351,6 +355,8 @@ class Unet(nn.Module):
             precision=self.precision
         )(x)
         downs = [x]
+        print(f"x shape: {x.shape}")
         # Downscaling blocks
         for i, (dim_out, attention_config) in enumerate(zip(feature_depths, attention_configs)):
@@ -370,18 +376,13 @@ class Unet(nn.Module):
                     precision=self.precision
                 )(x, temb)
                 if attention_config is not None and j == self.num_res_blocks - 1:   # Apply attention only on the last block
-                    B, H, W, _ = x.shape
-                    if H > TS:
-                        padded_context = jnp.pad(textcontext, ((0, 0), (0, H - TS), (0, 0)), mode='constant', constant_values=0).reshape((B, 1, H, TC))
-                    else:
-                        padded_context = None
                     x = TransformerBlock(heads=attention_config['heads'], dtype=attention_config.get('dtype', jnp.float32),
                                        dim_head=dim_in // attention_config['heads'],
                                        use_flash_attention=attention_config.get("flash_attention", True),
                                        use_projection=attention_config.get("use_projection", False),
                                        use_self_and_cross=attention_config.get("use_self_and_cross", True),
                                        precision=attention_config.get("precision", self.precision),
-                                       name=f"down_{i}_attention_{j}")(x, padded_context)
+                                       name=f"down_{i}_attention_{j}")(x, textcontext)
                 # print("down residual for feature level", i, "is of shape", x.shape, "features", dim_in)
                 downs.append(x)
             if i != len(feature_depths) - 1:
@@ -419,7 +420,7 @@ class Unet(nn.Module):
                                     use_projection=middle_attention.get("use_projection", False),
                                     use_self_and_cross=False,
                                     precision=attention_config.get("precision", self.precision),
-                                    name=f"middle_attention_{j}")(x)
+                                    name=f"middle_attention_{j}")(x, textcontext)
             x = ResidualBlock(
                 middle_conv_type,
                 name=f"middle_res2_{j}",
@@ -454,18 +455,13 @@ class Unet(nn.Module):
                     precision=self.precision
                 )(x, temb)
                 if attention_config is not None and j == self.num_res_blocks - 1:   # Apply attention only on the last block
-                    B, H, W, _ = x.shape
-                    if H > TS:
-                        padded_context = jnp.pad(textcontext, ((0, 0), (0, H - TS), (0, 0)), mode='constant', constant_values=0).reshape((B, 1, H, TC))
-                    else:
-                        padded_context = None
                     x = TransformerBlock(heads=attention_config['heads'], dtype=attention_config.get('dtype', jnp.float32),
                                        dim_head=dim_out // attention_config['heads'],
                                        use_flash_attention=attention_config.get("flash_attention", True),
                                        use_projection=attention_config.get("use_projection", False),
                                        use_self_and_cross=attention_config.get("use_self_and_cross", True),
                                         precision=attention_config.get("precision", self.precision),
-                                       name=f"up_{i}_attention_{j}")(x, padded_context)
+                                       name=f"up_{i}_attention_{j}")(x, textcontext)
             # print("Upscaling ", i, x.shape)
             if i != len(feature_depths) - 1:
                 x = Upsample(

flaxdiff-0.1.3/flaxdiff/models/simple_vit.py ADDED Viewed

@@ -0,0 +1,123 @@
+# simple_vit.py
+import jax
+import jax.numpy as jnp
+from flax import linen as nn
+from typing import Callable, Any
+from .simply_unet import FourierEmbedding, TimeProjection, ConvLayer, kernel_init
+from .attention import TransformerBlock
+class PatchEmbedding(nn.Module):
+    patch_size: int
+    embedding_dim: int
+    dtype: Any = jnp.float32
+    precision: Any = jax.lax.Precision.HIGH
+    @nn.compact
+    def __call__(self, x):
+        batch, height, width, channels = x.shape
+        assert height % self.patch_size == 0 and width % self.patch_size == 0, "Image dimensions must be divisible by patch size"
+        x = nn.Conv(features=self.embedding_dim,
+                    kernel_size=(self.patch_size, self.patch_size),
+                    strides=(self.patch_size, self.patch_size),
+                    dtype=self.dtype,
+                    precision=self.precision)(x)
+        x = jnp.reshape(x, (batch, -1, self.embedding_dim))
+        return x
+class PositionalEncoding(nn.Module):
+    max_len: int
+    embedding_dim: int
+    @nn.compact
+    def __call__(self, x):
+        pe = self.param('pos_encoding',
+                        jax.nn.initializers.zeros,
+                        (1, self.max_len, self.embedding_dim))
+        return x + pe[:, :x.shape[1], :]
+class TransformerEncoder(nn.Module):
+    num_layers: int
+    num_heads: int
+    mlp_dim: int
+    dropout_rate: float = 0.1
+    dtype: Any = jnp.float32
+    precision: Any = jax.lax.Precision.HIGH
+    @nn.compact
+    def __call__(self, x, training=True):
+        for _ in range(self.num_layers):
+            x = TransformerBlock(
+                heads=self.num_heads,
+                dim_head=x.shape[-1] // self.num_heads,
+                mlp_dim=self.mlp_dim,
+                dropout_rate=self.dropout_rate,
+                dtype=self.dtype,
+                precision=self.precision
+            )(x)
+        return x
+class VisionTransformer(nn.Module):
+    patch_size: int = 16
+    embedding_dim: int = 768
+    num_layers: int = 12
+    num_heads: int = 12
+    mlp_dim: int = 3072
+    emb_features: int = 256
+    dropout_rate: float = 0.1
+    dtype: Any = jnp.float32
+    precision: Any = jax.lax.Precision.HIGH
+    @nn.compact
+    def __call__(self, x, temb, textcontext=None):
+        # Time embedding
+        temb = FourierEmbedding(features=self.emb_features)(temb)
+        temb = TimeProjection(features=self.emb_features)(temb)
+        # Patch embedding
+        x = PatchEmbedding(patch_size=self.patch_size, embedding_dim=self.embedding_dim,
+                           dtype=self.dtype, precision=self.precision)(x)
+        # Add positional encoding
+        x = PositionalEncoding(max_len=x.shape[1], embedding_dim=self.embedding_dim)(x)
+        # Add time embedding
+        temb = jnp.expand_dims(temb, axis=1)
+        x = jnp.concatenate([x, temb], axis=1)
+        # Add text context
+        if textcontext is not None:
+            x = jnp.concatenate([x, textcontext], axis=1)
+        # Transformer encoder
+        x = TransformerEncoder(
+            num_layers=self.num_layers,
+            num_heads=self.num_heads,
+            mlp_dim=self.mlp_dim,
+            dropout_rate=self.dropout_rate,
+            dtype=self.dtype,
+            precision=self.precision
+        )(x)
+        # Extract the image tokens (exclude time and text embeddings)
+        num_patches = (x.shape[1] - 1 - (0 if textcontext is None else textcontext.shape[1]))
+        x = x[:, :num_patches, :]
+        # Reshape to image dimensions
+        batch, _, _ = x.shape
+        height = width = int((num_patches) ** 0.5)
+        x = jnp.reshape(x, (batch, height, width, self.embedding_dim))
+        # Final convolution to get the desired output channels
+        x = ConvLayer(
+            conv_type="conv",
+            features=3,
+            kernel_size=(3, 3),
+            strides=(1, 1),
+            kernel_init=kernel_init(0.0),
+            dtype=self.dtype,
+            precision=self.precision
+        )(x)
+        return x

flaxdiff-0.1.3/flaxdiff/trainer/__init__.py ADDED Viewed

@@ -0,0 +1,201 @@
+import orbax.checkpoint
+import tqdm
+from flax import linen as nn
+import jax
+from typing import Callable
+from dataclasses import field
+import jax.numpy as jnp
+from clu import metrics
+from flax.training import train_state  # Useful dataclass to keep train state
+import optax
+from flax import struct                # Flax dataclasses
+import time
+import os
+import orbax
+from flax.training import orbax_utils
+from ..schedulers import NoiseScheduler
+from ..predictors import DiffusionPredictionTransform, EpsilonPredictionTransform
+from .simple_trainer import SimpleTrainer, SimpleTrainState
+class TrainState(SimpleTrainState):
+    rngs: jax.random.PRNGKey
+    ema_params: dict
+    def get_random_key(self):
+        rngs, subkey = jax.random.split(self.rngs)
+        return self.replace(rngs=rngs), subkey
+    def apply_ema(self, decay: float = 0.999):
+        new_ema_params = jax.tree_util.tree_map(
+            lambda ema, param: decay * ema + (1 - decay) * param,
+            self.ema_params,
+            self.params,
+        )
+        return self.replace(ema_params=new_ema_params)
+class DiffusionTrainer(SimpleTrainer):
+    noise_schedule: NoiseScheduler
+    model_output_transform: DiffusionPredictionTransform
+    ema_decay: float = 0.999
+    def __init__(self,
+                 model: nn.Module,
+                 input_shapes: Dict[str, Tuple[int]],
+                 optimizer: optax.GradientTransformation,
+                 noise_schedule: NoiseScheduler,
+                 rngs: jax.random.PRNGKey,
+                 unconditional_prob: float = 0.2,
+                 name: str = "Diffusion",
+                 model_output_transform: DiffusionPredictionTransform = EpsilonPredictionTransform(),
+                 **kwargs
+                 ):
+        super().__init__(
+            model=model,
+            input_shapes=input_shapes,
+            optimizer=optimizer,
+            rngs=rngs,
+            name=name,
+            **kwargs
+        )
+        self.noise_schedule = noise_schedule
+        self.model_output_transform = model_output_transform
+        self.unconditional_prob = unconditional_prob
+    def __init_fn(
+        self,
+        optimizer: optax.GradientTransformation,
+        rngs: jax.random.PRNGKey,
+        existing_state: dict = None,
+        existing_best_state: dict = None,
+        model: nn.Module = None,
+        param_transforms: Callable = None
+    ) -> Tuple[TrainState, TrainState]:
+        rngs, subkey = jax.random.split(rngs)
+        if existing_state == None:
+            input_vars = self.get_input_ones()
+            params = model.init(subkey, **input_vars)
+            new_state = {"params": params, "ema_params": params}
+        else:
+            new_state = existing_state
+        if param_transforms is not None:
+            params = param_transforms(params)
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=new_state['params'],
+            ema_params=new_state['ema_params'],
+            tx=optimizer,
+            rngs=rngs,
+            metrics=Metrics.empty()
+        )
+        if existing_best_state is not None:
+            best_state = state.replace(
+                params=existing_best_state['params'], ema_params=existing_best_state['ema_params'])
+        else:
+            best_state = state
+        return state, best_state
+    def _define_train_step(self, batch_size, null_labels_seq, text_embedder):
+        noise_schedule = self.noise_schedule
+        model = self.model
+        model_output_transform = self.model_output_transform
+        loss_fn = self.loss_fn
+        unconditional_prob = self.unconditional_prob
+        # Determine the number of unconditional samples
+        num_unconditional = int(batch_size * unconditional_prob)
+        nS, nC = null_labels_seq.shape
+        null_labels_seq = jnp.broadcast_to(
+            null_labels_seq, (batch_size, nS, nC))
+        distributed_training = self.distributed_training
+        def train_step(state: TrainState, batch):
+            """Train for a single step."""
+            images = batch['image']
+            # normalize image
+            images = (images - 127.5) / 127.5
+            output = text_embedder(
+                input_ids=batch['input_ids'], attention_mask=batch['attention_mask'])
+            # output = infer(input_ids=batch['input_ids'], attention_mask=batch['attention_mask'])
+            label_seq = output.last_hidden_state
+            # Generate random probabilities to decide how much of this batch will be unconditional
+            label_seq = jnp.concat(
+                [null_labels_seq[:num_unconditional], label_seq[num_unconditional:]], axis=0)
+            noise_level, state = noise_schedule.generate_timesteps(
+                images.shape[0], state)
+            state, rngs = state.get_random_key()
+            noise: jax.Array = jax.random.normal(rngs, shape=images.shape)
+            rates = noise_schedule.get_rates(noise_level)
+            noisy_images, c_in, expected_output = model_output_transform.forward_diffusion(
+                images, noise, rates)
+            def model_loss(params):
+                preds = model.apply(
+                    params, *noise_schedule.transform_inputs(noisy_images*c_in, noise_level), label_seq)
+                preds = model_output_transform.pred_transform(
+                    noisy_images, preds, rates)
+                nloss = loss_fn(preds, expected_output)
+                # nloss = jnp.mean(nloss, axis=1)
+                nloss *= noise_schedule.get_weights(noise_level)
+                nloss = jnp.mean(nloss)
+                loss = nloss
+                return loss
+            loss, grads = jax.value_and_grad(model_loss)(state.params)
+            if distributed_training:
+                grads = jax.lax.pmean(grads, "device")
+            state = state.apply_gradients(grads=grads)
+            state = state.apply_ema(self.ema_decay)
+            return state, loss
+        if distributed_training:
+            train_step = jax.pmap(axis_name="device")(train_step)
+        else:
+            train_step = jax.jit(train_step)
+        return train_step
+    def _define_compute_metrics(self):
+        @jax.jit
+        def compute_metrics(state: TrainState, expected, pred):
+            loss = jnp.mean(jnp.square(pred - expected))
+            metric_updates = state.metrics.single_from_model_output(loss=loss)
+            metrics = state.metrics.merge(metric_updates)
+            state = state.replace(metrics=metrics)
+            return state
+        return compute_metrics
+    def fit(self, data, steps_per_epoch, epochs):
+        null_labels_full = data['null_labels_full']
+        local_batch_size = data['local_batch_size']
+        text_embedder = data['model']
+        super().fit(data, steps_per_epoch, epochs, {
+            "batch_size": local_batch_size, "null_labels_seq": null_labels_full, "text_embedder": text_embedder})
+                        pbar.set_postfix(loss=f'{loss:.4f}')
+                        pbar.update(100)
+            end_time = time.time()
+            self.state = state
+            total_time = end_time - start_time
+            avg_time_per_step = total_time / steps_per_epoch
+            avg_loss = epoch_loss / steps_per_epoch
+            if avg_loss < self.best_loss:
+                self.best_loss = avg_loss
+                self.best_state = state
+                self.save(epoch, best=True)
+            print(f"\n\tEpoch {epoch+1} completed. Avg Loss: {avg_loss}, Time: {total_time:.2f}s, Best Loss: {self.best_loss}")
+        return self.state

flaxdiff-0.1.3/flaxdiff/trainer/simple_trainer.py ADDED Viewed

@@ -0,0 +1,323 @@
+import orbax.checkpoint
+import tqdm
+from flax import linen as nn
+import jax
+from typing import Callable
+from dataclasses import field
+import jax.numpy as jnp
+from clu import metrics
+from flax.training import train_state  # Useful dataclass to keep train state
+import optax
+from flax import struct                # Flax dataclasses
+import time
+import os
+import orbax
+from flax.training import orbax_utils
+@struct.dataclass
+class Metrics(metrics.Collection):
+    accuracy: metrics.Accuracy
+    loss: metrics.Average.from_output('loss')
+# Define the TrainState
+class SimpleTrainState(train_state.TrainState):
+    rngs: jax.random.PRNGKey
+    metrics: Metrics
+    def get_random_key(self):
+        rngs, subkey = jax.random.split(self.rngs)
+        return self.replace(rngs=rngs), subkey
+class SimpleTrainer:
+    state: SimpleTrainState
+    best_state: SimpleTrainState
+    best_loss: float
+    model: nn.Module
+    ema_decay: float = 0.999
+    def __init__(self,
+                 model: nn.Module,
+                 input_shapes: Dict[str, Tuple[int]],
+                 optimizer: optax.GradientTransformation,
+                 rngs: jax.random.PRNGKey,
+                 train_state: SimpleTrainState = None,
+                 name: str = "Simple",
+                 load_from_checkpoint: bool = False,
+                 checkpoint_suffix: str = "",
+                 loss_fn=optax.l2_loss,
+                 param_transforms: Callable = None,
+                 wandb_config: Dict[str, Any] = None,
+                 distributed_training: bool = None,
+                 ):
+        if distributed_training is None or distributed_training is True:
+            # Auto-detect if we are running on multiple devices
+            distributed_training = jax.device_count() > 1
+        self.distributed_training = distributed_training
+        self.model = model
+        self.name = name
+        self.loss_fn = loss_fn
+        self.input_shapes = input_shapes
+        if wandb_config is not None:
+            run = wandb.init(**wandb_config)
+            self.wandb = run
+        checkpointer = orbax.checkpoint.PyTreeCheckpointer()
+        options = orbax.checkpoint.CheckpointManagerOptions(
+            max_to_keep=4, create=True)
+        self.checkpointer = orbax.checkpoint.CheckpointManager(
+            self.checkpoint_path() + checkpoint_suffix, checkpointer, options)
+        if load_from_checkpoint:
+            latest_epoch, old_state, old_best_state = self.load()
+        else:
+            latest_epoch, old_state, old_best_state = 0, None, None
+        self.latest_epoch = latest_epoch
+        if train_state == None:
+            self.init_state(optimizer, rngs, existing_state=old_state,
+                            existing_best_state=old_best_state, model=model, param_transforms=param_transforms)
+        else:
+            self.state = train_state
+            self.best_state = train_state
+            self.best_loss = 1e9
+    def get_input_ones(self):
+        return {k: jnp.ones((1, *v)) for k, v in self.input_shapes.items()}
+    def __init_fn(
+        self,
+        optimizer: optax.GradientTransformation,
+        rngs: jax.random.PRNGKey,
+        existing_state: dict = None,
+        existing_best_state: dict = None,
+        model: nn.Module = None,
+        param_transforms: Callable = None
+    ) -> Tuple[SimpleTrainState, SimpleTrainState]:
+        rngs, subkey = jax.random.split(rngs)
+        if existing_state == None:
+            input_vars = self.get_input_ones()
+            params = model.init(subkey, **input_vars)
+        state = SimpleTrainState.create(
+            apply_fn=model.apply,
+            params=params,
+            tx=optimizer,
+            rngs=rngs,
+            metrics=Metrics.empty()
+        )
+        if existing_best_state is not None:
+            best_state = state.replace(
+                params=existing_best_state['params'])
+        else:
+            best_state = state
+        return state, best_state
+    def init_state(
+        self,
+        optimizer: optax.GradientTransformation,
+        rngs: jax.random.PRNGKey,
+        existing_state: dict = None,
+        existing_best_state: dict = None,
+        model: nn.Module = None,
+        param_transforms: Callable = None
+    ):
+        state, best_state = self.__init_fn(
+            optimizer, rngs, existing_state, existing_best_state, model, param_transforms
+        )
+        self.best_loss = 1e9
+        if self.distributed_training:
+            devices = jax.local_devices()
+            if len(devices) > 1:
+                print("Replicating state across devices ", devices)
+                state = flax.jax_utils.replicate(state, devices)
+                best_state = flax.jax_utils.replicate(best_state, devices)
+            else:
+                print("Not replicating any state, Only single device connected to the process")
+        self.state = state
+        self.best_state = best_state
+    def get_state(self):
+        return flax.jax_utils.unreplicate(self.state)
+    def get_best_state(self):
+        return flax.jax_utils.unreplicate(self.best_state)
+    def checkpoint_path(self):
+        experiment_name = self.name
+        path = os.path.join(os.path.abspath('./checkpoints'), experiment_name)
+        if not os.path.exists(path):
+            os.makedirs(path)
+        return path
+    def tensorboard_path(self):
+        experiment_name = self.name
+        path = os.path.join(os.path.abspath('./tensorboard'), experiment_name)
+        if not os.path.exists(path):
+            os.makedirs(path)
+        return path
+    def load(self):
+        epoch = self.checkpointer.latest_step()
+        print("Loading model from checkpoint", epoch)
+        ckpt = self.checkpointer.restore(epoch)
+        state = ckpt['state']
+        best_state = ckpt['best_state']
+        # Convert the state to a TrainState
+        self.best_loss = ckpt['best_loss']
+        print(
+            f"Loaded model from checkpoint at epoch {epoch}", ckpt['best_loss'])
+        return epoch, state, best_state
+    def save(self, epoch=0):
+        print(f"Saving model at epoch {epoch}")
+        ckpt = {
+            # 'model': self.model,
+            'state': self.get_state(),
+            'best_state': self.get_best_state(),
+            'best_loss': self.best_loss
+        }
+        try:
+            save_args = orbax_utils.save_args_from_target(ckpt)
+            self.checkpointer.save(epoch, ckpt, save_kwargs={
+                                   'save_args': save_args}, force=True)
+            pass
+        except Exception as e:
+            print("Error saving checkpoint", e)
+    def _define_train_step(self, **kwargs):
+        model = self.model
+        loss_fn = self.loss_fn
+        distributed_training = self.distributed_training
+        def train_step(state: SimpleTrainState, batch):
+            """Train for a single step."""
+            images = batch['image']
+            labels = batch['label']
+            def model_loss(params):
+                preds = model.apply(params, images)
+                expected_output = labels
+                nloss = loss_fn(preds, expected_output)
+                loss = jnp.mean(nloss)
+                return loss
+            loss, grads = jax.value_and_grad(model_loss)(state.params)
+            if distributed_training:
+                grads = jax.lax.pmean(grads, "device")
+            state = state.apply_gradients(grads=grads)
+            return state, loss
+        if distributed_training:
+            train_step = jax.pmap(axis_name="device")(train_step)
+        else:
+            train_step = jax.jit(train_step)
+        return train_step
+    def _define_compute_metrics(self):
+        model = self.model
+        loss_fn = self.loss_fn
+        @jax.jit
+        def compute_metrics(state: SimpleTrainState, batch):
+            preds = model.apply(state.params, batch['image'])
+            expected_output = batch['label']
+            loss = jnp.mean(loss_fn(preds, expected_output))
+            metric_updates = state.metrics.single_from_model_output(
+                loss=loss, logits=preds, labels=expected_output)
+            metrics = state.metrics.merge(metric_updates)
+            state = state.replace(metrics=metrics)
+            return state
+        return compute_metrics
+    def summary(self):
+        input_vars = self.get_input_ones()
+        print(self.model.tabulate(jax.random.key(0), **input_vars,
+              console_kwargs={"width": 200, "force_jupyter": True, }))
+    def config(self):
+        return {
+            "model": self.model,
+            "state": self.state,
+            "name": self.name,
+            "input_shapes": self.input_shapes
+        }
+    def init_tensorboard(self, batch_size, steps_per_epoch, epochs):
+        summary_writer = tensorboard.SummaryWriter(self.tensorboard_path())
+        summary_writer.hparams({
+            **self.config(),
+            "steps_per_epoch": steps_per_epoch,
+            "epochs": epochs,
+            "batch_size": batch_size
+        })
+        return summary_writer
+    def fit(self, data, steps_per_epoch, epochs, train_step_args={}):
+        train_ds = iter(data['train']())
+        if 'test' in data:
+            test_ds = data['test']
+        else:
+            test_ds = None
+        train_step = self._define_train_step(**train_step_args)
+        compute_metrics = self._define_compute_metrics()
+        state = self.state
+        device_count = jax.local_device_count()
+        # train_ds = flax.jax_utils.prefetch_to_device(train_ds, jax.devices())
+        summary_writer = self.init_tensorboard(
+            data['global_batch_size'], steps_per_epoch, epochs)
+        while self.latest_epoch <= epochs:
+            self.latest_epoch += 1
+            current_epoch = self.latest_epoch
+            print(f"\nEpoch {current_epoch}/{epochs}")
+            start_time = time.time()
+            epoch_loss = 0
+            with tqdm.tqdm(total=steps_per_epoch, desc=f'\t\tEpoch {current_epoch}', ncols=100, unit='step') as pbar:
+                for i in range(steps_per_epoch):
+                    batch = next(train_ds)
+                    if self.distributed_training and device_count > 1:
+                        batch = jax.tree.map(lambda x: x.reshape(
+                            (device_count, -1, *x.shape[1:])), batch)
+                    state, loss = train_step(state, batch)
+                    loss = jnp.mean(loss)
+                    epoch_loss += loss
+                    if i % 100 == 0:
+                        pbar.set_postfix(loss=f'{loss:.4f}')
+                        pbar.update(100)
+                        current_step = current_epoch*steps_per_epoch + i
+                        summary_writer.scalar(
+                            'Train Loss', loss, step=current_step)
+                        if self.wandb is not None:
+                            self.wandb.log({"train/loss": loss})
+            print(f"\n\tEpoch done")
+            end_time = time.time()
+            self.state = state
+            total_time = end_time - start_time
+            avg_time_per_step = total_time / steps_per_epoch
+            avg_loss = epoch_loss / steps_per_epoch
+            if avg_loss < self.best_loss:
+                self.best_loss = avg_loss
+                self.best_state = state
+                self.save(current_epoch)
+            # Compute Metrics
+            metrics_str = ''
+            print(
+                f"\n\tEpoch {current_epoch} completed. Avg Loss: {avg_loss}, Time: {total_time:.2f}s, Best Loss: {self.best_loss} {metrics_str}")
+        self.save(epochs)
+        return self.state

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: flaxdiff
-Version: 0.1.1
+Version: 0.1.3
 Summary: A versatile and easy to understand Diffusion library
 Author: Ashish Kumar Singh
 Author-email: ashishkmr472@gmail.com

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff.egg-info/SOURCES.txt RENAMED Viewed

@@ -12,6 +12,7 @@ flaxdiff/models/attention.py
 flaxdiff/models/common.py
 flaxdiff/models/favor_fastattn.py
 flaxdiff/models/simple_unet.py
+flaxdiff/models/simple_vit.py
 flaxdiff/predictors/__init__.py
 flaxdiff/samplers/__init__.py
 flaxdiff/samplers/common.py
@@ -30,4 +31,5 @@ flaxdiff/schedulers/exp.py
 flaxdiff/schedulers/karras.py
 flaxdiff/schedulers/linear.py
 flaxdiff/schedulers/sqrt.py
-flaxdiff/trainer/__init__.py
+flaxdiff/trainer/__init__.py
+flaxdiff/trainer/simple_trainer.py

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/setup.py RENAMED Viewed

@@ -11,7 +11,7 @@ required_packages=[
 setup(
     name='flaxdiff',
     packages=find_packages(),
-    version='0.1.1',
+    version='0.1.3',
     description='A versatile and easy to understand Diffusion library',
     long_description=open('README.md').read(),
     long_description_content_type='text/markdown',

flaxdiff-0.1.1/flaxdiff/trainer/__init__.py DELETED Viewed

@@ -1,216 +0,0 @@
-import orbax.checkpoint
-import tqdm
-from flax import linen as nn
-import jax
-from typing import Callable
-from dataclasses import field
-import jax.numpy as jnp
-from clu import metrics
-from flax.training import train_state  # Useful dataclass to keep train state
-import optax
-from flax import struct                # Flax dataclasses
-import time
-import os
-import orbax
-from flax.training import orbax_utils
-from ..schedulers import NoiseScheduler
-from ..predictors import DiffusionPredictionTransform, EpsilonPredictionTransform
-@struct.dataclass
-class Metrics(metrics.Collection):
-  loss: metrics.Average.from_output('loss') # type: ignore
-class ModelState():
-    model: nn.Module
-    params: dict
-    noise_schedule: NoiseScheduler
-    model_output_transform: DiffusionPredictionTransform
-# Define the TrainState with EMA parameters
-class TrainState(train_state.TrainState):
-    rngs: jax.random.PRNGKey
-    ema_params: dict
-    def get_random_key(self):
-        rngs, subkey = jax.random.split(self.rngs)
-        return self.replace(rngs=rngs), subkey
-    def apply_ema(self, decay: float=0.999):
-        new_ema_params = jax.tree_util.tree_map(
-            lambda ema, param: decay * ema + (1 - decay) * param,
-            self.ema_params,
-            self.params,
-        )
-        return self.replace(ema_params=new_ema_params)
-class DiffusionTrainer:
-    state : TrainState
-    best_state : TrainState
-    best_loss : float
-    model : nn.Module
-    noise_schedule : NoiseScheduler
-    model_output_transform:DiffusionPredictionTransform
-    ema_decay:float = 0.999
-    def __init__(self,
-                 model:nn.Module,
-                 optimizer: optax.GradientTransformation,
-                 noise_schedule:NoiseScheduler,
-                 rngs:jax.random.PRNGKey,
-                 train_state:TrainState=None,
-                 name:str="Diffusion",
-                 load_from_checkpoint:bool=False,
-                 param_transforms:Callable=None,
-                 model_output_transform:DiffusionPredictionTransform=EpsilonPredictionTransform(),
-                 loss_fn=optax.l2_loss,
-                 ):
-        self.model = model
-        self.noise_schedule = noise_schedule
-        self.name = name
-        self.model_output_transform = model_output_transform
-        self.loss_fn = loss_fn
-        checkpointer = orbax.checkpoint.PyTreeCheckpointer()
-        options = orbax.checkpoint.CheckpointManagerOptions(max_to_keep=4, create=True)
-        self.checkpointer = orbax.checkpoint.CheckpointManager(self.checkpoint_path(), checkpointer, options)
-        if load_from_checkpoint:
-            params = self.load()
-        else:
-            params = None
-        if train_state == None:
-            self.init_state(optimizer, rngs, params=params, model=model, param_transforms=param_transforms)
-        else:
-            self.state = train_state
-            self.best_state = train_state
-            self.best_loss = 1e9
-    def init_state(self,
-                   optimizer: optax.GradientTransformation,
-                   rngs:jax.random.PRNGKey,
-                   params:dict=None,
-                   model:nn.Module=None,
-                     param_transforms:Callable=None,
-                     batch_size=16,
-                    image_size=64
-                   ):
-        inp = jnp.ones((batch_size, image_size, image_size, 3))
-        temb = jnp.ones((batch_size,))
-        rngs, subkey = jax.random.split(rngs)
-        if params == None:
-            params = model.init(subkey, inp, temb)
-        if param_transforms is not None:
-            params = param_transforms(params)
-        self.best_loss = 1e9
-        self.state = TrainState.create(
-            apply_fn=model.apply,
-            params=params,
-            ema_params=params,
-            tx=optimizer,
-            rngs=rngs,
-        )
-        self.best_state = self.state
-    def checkpoint_path(self):
-        experiment_name = self.name
-        path = os.path.join(os.path.abspath('./models'), experiment_name)
-        if not os.path.exists(path):
-            os.makedirs(path)
-        return path
-    def load(self):
-        step = self.checkpointer.latest_step()
-        print("Loading model from checkpoint", step)
-        ckpt = self.checkpointer.restore(step)
-        state = ckpt['state']
-        # Convert the state to a TrainState
-        self.best_loss = ckpt['best_loss']
-        print(f"Loaded model from checkpoint at step {step}", ckpt['best_loss'])
-        return state.get('params', None)#, ckpt.get('model', None)
-    def save(self, epoch=0, best=False):
-        print(f"Saving model at epoch {epoch}")
-        state = self.best_state if best else self.state
-        # filename = os.path.join(self.checkpoint_path(), f'model_{epoch}' if not best else 'best_model')
-        ckpt = {
-            'model': self.model,
-            'state': state,
-            'best_loss': self.best_loss
-        }
-        save_args = orbax_utils.save_args_from_target(ckpt)
-        self.checkpointer.save(epoch, ckpt, save_kwargs={'save_args': save_args})
-    def summary(self, image_size=64):
-        inp = jnp.ones((1, image_size, image_size, 3))
-        temb = jnp.ones((1,))
-        print(self.model.tabulate(jax.random.key(0), inp, temb, console_kwargs={"width": 200, "force_jupyter":True, }))
-    def _define_train_step(self):
-        noise_schedule = self.noise_schedule
-        model = self.model
-        model_output_transform = self.model_output_transform
-        loss_fn = self.loss_fn
-        @jax.jit
-        def train_step(state:TrainState, batch):
-            """Train for a single step."""
-            images = batch
-            noise_level, state = noise_schedule.generate_timesteps(images.shape[0], state)
-            state, rngs = state.get_random_key()
-            noise:jax.Array = jax.random.normal(rngs, shape=images.shape)
-            rates = noise_schedule.get_rates(noise_level)
-            noisy_images, c_in, expected_output = model_output_transform.forward_diffusion(images, noise, rates)
-            def model_loss(params):
-                preds = model.apply(params, *noise_schedule.transform_inputs(noisy_images*c_in, noise_level))
-                preds = model_output_transform.pred_transform(noisy_images, preds, rates)
-                nloss = loss_fn(preds, expected_output)
-                # nloss = jnp.mean(nloss, axis=1)
-                nloss *= noise_schedule.get_weights(noise_level)
-                nloss = jnp.mean(nloss)
-                loss = nloss
-                return loss
-            loss, grads = jax.value_and_grad(model_loss)(state.params)
-            state = state.apply_gradients(grads=grads)
-            state = state.apply_ema(self.ema_decay)
-            return state, loss
-        return train_step
-    def _define_compute_metrics(self):
-        @jax.jit
-        def compute_metrics(state:TrainState, expected, pred):
-            loss = jnp.mean(jnp.square(pred - expected))
-            metric_updates = state.metrics.single_from_model_output(loss=loss)
-            metrics = state.metrics.merge(metric_updates)
-            state = state.replace(metrics=metrics)
-            return state
-        return compute_metrics
-    def fit(self, data, steps_per_epoch, epochs):
-        data = iter(data)
-        train_step = self._define_train_step()
-        compute_metrics = self._define_compute_metrics()
-        state = self.state
-        for epoch in range(epochs):
-            print(f"\nEpoch {epoch+1}/{epochs}")
-            start_time = time.time()
-            epoch_loss = 0
-            with tqdm.tqdm(total=steps_per_epoch, desc=f'\t\tEpoch {epoch+1}', ncols=100, unit='step') as pbar:
-                for i in range(steps_per_epoch):
-                    batch = next(data)
-                    state, loss = train_step(state, batch)
-                    epoch_loss += loss
-                    if i % 100 == 0:
-                        pbar.set_postfix(loss=f'{loss:.4f}')
-                        pbar.update(100)
-            end_time = time.time()
-            self.state = state
-            total_time = end_time - start_time
-            avg_time_per_step = total_time / steps_per_epoch
-            avg_loss = epoch_loss / steps_per_epoch
-            if avg_loss < self.best_loss:
-                self.best_loss = avg_loss
-                self.best_state = state
-                self.save(epoch, best=True)
-            print(f"\n\tEpoch {epoch+1} completed. Avg Loss: {avg_loss}, Time: {total_time:.2f}s, Best Loss: {self.best_loss}")
-        return self.state

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/README.md RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff/__init__.py RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff/models/__init__.py RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff/models/favor_fastattn.py RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff/predictors/__init__.py RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff/samplers/__init__.py RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff/samplers/common.py RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff/samplers/ddim.py RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff/samplers/ddpm.py RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff/samplers/euler.py RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff/samplers/heun_sampler.py RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff/samplers/multistep_dpm.py RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff/samplers/rk4_sampler.py RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff/schedulers/__init__.py RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff/schedulers/common.py RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff/schedulers/continuous.py RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff/schedulers/cosine.py RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff/schedulers/discrete.py RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff/schedulers/exp.py RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff/schedulers/karras.py RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff/schedulers/linear.py RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff/schedulers/sqrt.py RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff/utils.py RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff.egg-info/requires.txt RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/flaxdiff.egg-info/top_level.txt RENAMED Viewed

File without changes

{flaxdiff-0.1.1 → flaxdiff-0.1.3}/setup.cfg RENAMED Viewed

File without changes

flaxdiff 0.1.1__tar.gz → 0.1.3__tar.gz

flaxdiff 0.1.1tar.gz → 0.1.3tar.gz