PyPI - flaxdiff - Versions diffs - 0.1.38__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

flaxdiff 0.1.38py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

flaxdiff/data/__init__.py +5 -1
flaxdiff/data/benchmark_decord.py +443 -0
flaxdiff/data/dataloaders.py +608 -0
flaxdiff/data/dataset_map.py +61 -6
flaxdiff/data/online_loader.py +779 -150
flaxdiff/data/sources/audio_utils.py +142 -0
flaxdiff/data/sources/av_example.py +125 -0
flaxdiff/data/sources/av_utils.py +590 -0
flaxdiff/data/sources/base.py +129 -0
flaxdiff/data/sources/images.py +309 -0
flaxdiff/data/sources/utils.py +158 -0
flaxdiff/data/sources/videos.py +250 -0
flaxdiff/data/sources/voxceleb2.py +412 -0
flaxdiff/inference/__init__.py +0 -0
flaxdiff/inference/pipeline.py +260 -0
flaxdiff/inference/utils.py +320 -0
flaxdiff/inputs/__init__.py +173 -0
flaxdiff/inputs/encoders.py +98 -0
flaxdiff/models/__init__.py +2 -1
flaxdiff/models/attention.py +22 -16
flaxdiff/models/autoencoder/autoencoder.py +141 -9
flaxdiff/models/autoencoder/diffusers.py +88 -25
flaxdiff/models/autoencoder/simple_autoenc.py +40 -8
flaxdiff/models/common.py +8 -18
flaxdiff/models/simple_unet.py +6 -17
flaxdiff/models/simple_vit.py +9 -13
flaxdiff/models/unet_3d.py +446 -0
flaxdiff/models/unet_3d_blocks.py +505 -0
flaxdiff/samplers/common.py +358 -96
flaxdiff/samplers/ddim.py +44 -5
flaxdiff/schedulers/karras.py +20 -12
flaxdiff/trainer/__init__.py +2 -1
flaxdiff/trainer/autoencoder_trainer.py +1 -2
flaxdiff/trainer/diffusion_trainer.py +35 -29
flaxdiff/trainer/general_diffusion_trainer.py +583 -0
flaxdiff/trainer/simple_trainer.py +51 -16
flaxdiff/utils.py +128 -57
{flaxdiff-0.1.38.dist-info → flaxdiff-0.2.0.dist-info}/METADATA +1 -1
flaxdiff-0.2.0.dist-info/RECORD +64 -0
{flaxdiff-0.1.38.dist-info → flaxdiff-0.2.0.dist-info}/WHEEL +1 -1
flaxdiff/data/datasets.py +0 -169
flaxdiff/data/sources/gcs.py +0 -81
flaxdiff/data/sources/tfds.py +0 -79
flaxdiff/trainer/video_diffusion_trainer.py +0 -62
flaxdiff-0.1.38.dist-info/RECORD +0 -50
{flaxdiff-0.1.38.dist-info → flaxdiff-0.2.0.dist-info}/top_level.txt +0 -0

flaxdiff/inference/pipeline.py ADDED Viewed

@@ -0,0 +1,260 @@
+import jax
+import flax.linen as nn
+from dataclasses import dataclass, field
+from typing import Optional, Dict, Any, Union, List, Tuple, Type
+from flaxdiff.trainer import (
+    SimpleTrainState,
+    TrainState,
+)
+from flaxdiff.samplers import (
+    DiffusionSampler,
+)
+from flaxdiff.schedulers import (
+    NoiseScheduler,
+)
+from flaxdiff.predictors import (
+    DiffusionPredictionTransform,
+)
+from flaxdiff.models.autoencoder import AutoEncoder
+from flaxdiff.inputs import DiffusionInputConfig
+from flaxdiff.utils import defaultTextEncodeModel, RandomMarkovState
+from flaxdiff.samplers.euler import EulerAncestralSampler
+from .utils import parse_config, load_from_wandb_run, load_from_wandb_registry
+@dataclass
+class InferencePipeline:
+    """Inference pipeline for a general model."""
+    model: nn.Module = None
+    state: SimpleTrainState = None
+    best_state: SimpleTrainState = None
+    def from_wandb(
+        self,
+        wandb_run: str,
+        wandb_project: str,
+        wandb_entity: str,
+    ):
+        raise NotImplementedError("InferencePipeline does not support from_wandb.")
+@dataclass
+class DiffusionInferencePipeline(InferencePipeline):
+    """Inference pipeline for diffusion models.
+    This pipeline handles loading models from wandb and generating samples using the
+    DiffusionSampler from FlaxDiff.
+    """
+    state: TrainState = None
+    best_state: TrainState = None
+    rngstate: Optional[RandomMarkovState] = None
+    noise_schedule: NoiseScheduler = None
+    model_output_transform: DiffusionPredictionTransform = None
+    autoencoder: AutoEncoder = None
+    input_config: DiffusionInputConfig = None
+    samplers: Dict[Type[DiffusionSampler], Dict[float, DiffusionSampler]] = field(default_factory=dict)
+    config: Dict[str, Any] = field(default_factory=dict)
+    @classmethod
+    def from_wandb_run(
+        cls,
+        wandb_run: str,
+        project: str,
+        entity: str,
+    ):
+        """Create an inference pipeline from a wandb run.
+        Args:
+            wandb_run: Run ID or display name
+            project: Wandb project name
+            entity: Wandb entity name
+            wandb_modelname: Model name in wandb registry (if None, loads from checkpoint)
+            checkpoint_step: Specific checkpoint step to load (if None, loads latest)
+            config_overrides: Optional dictionary to override config values
+            checkpoint_base_path: Base path for checkpoint storage
+        Returns:
+            DiffusionInferencePipeline instance
+        """
+        states, config = load_from_wandb_run(
+            wandb_run,
+            project=project,
+            entity=entity,
+        )
+        if states is None:
+            raise ValueError("Failed to load model parameters from wandb.")
+        state, best_state = states
+        parsed_config = parse_config(config)
+        # Create the pipeline
+        pipeline = cls.create(
+            config=parsed_config,
+            state=state,
+            best_state=best_state,
+            rngstate=RandomMarkovState(jax.random.PRNGKey(42)),
+        )
+        return pipeline
+    @classmethod
+    def from_wandb_registry(
+        cls,
+        modelname: str,
+        project: str,
+        entity: str = None,
+        version: str = 'latest',
+        registry: str = 'wandb-registry-model',
+    ):
+        """Create an inference pipeline from a wandb model registry.
+        Args:
+            modelname: Model name in wandb registry
+            project: Wandb project name
+            entity: Wandb entity name
+            version: Version of the model to load (default is 'latest')
+            registry: Registry name (default is 'wandb-registry-model')
+        Returns:
+            DiffusionInferencePipeline instance
+        """
+        states, config = load_from_wandb_registry(
+            modelname=modelname,
+            project=project,
+            entity=entity,
+            version=version,
+            registry=registry,
+        )
+        if states is None:
+            raise ValueError("Failed to load model parameters from wandb.")
+        state, best_state = states
+        parsed_config = parse_config(config)
+        # Create the pipeline
+        pipeline = cls.create(
+            config=parsed_config,
+            state=state,
+            best_state=best_state,
+            rngstate=RandomMarkovState(jax.random.PRNGKey(42)),
+        )
+        return pipeline
+    @classmethod
+    def create(
+        cls,
+        config: Dict[str, Any],
+        state: Dict[str, Any],
+        best_state: Optional[Dict[str, Any]] = None,
+        rngstate: Optional[RandomMarkovState] = None,
+    ):
+        if rngstate is None:
+            rngstate = RandomMarkovState(jax.random.PRNGKey(42))
+        # Build and return pipeline
+        return cls(
+            model=config['model'],
+            state=state,
+            best_state=best_state,
+            rngstate=rngstate,
+            noise_schedule=config['noise_schedule'],
+            model_output_transform=config['prediction_transform'],
+            autoencoder=config['autoencoder'],
+            input_config=config['input_config'],
+            config=config,
+        )
+    def get_sampler(
+        self,
+        guidance_scale: float = 3.0,
+        sampler_class=EulerAncestralSampler,
+    ) -> DiffusionSampler:
+        """Get (or create) a sampler for generating samples.
+        This method caches samplers by their class and guidance scale for reuse.
+        Args:
+            sampler_class: Class for the diffusion sampler
+            guidance_scale: Classifier-free guidance scale (0.0 to disable)
+        Returns:
+            DiffusionSampler instance
+        """
+        # Get or create dictionary for this sampler class
+        if sampler_class not in self.samplers:
+            self.samplers[sampler_class] = {}
+        # Check if we already have a sampler with this guidance scale
+        if guidance_scale not in self.samplers[sampler_class]:
+            # Create unconditional embeddings if using guidance
+            null_embeddings = None
+            if guidance_scale > 0.0:
+                null_text = self.input_config.conditions[0].get_unconditional()
+                null_embeddings = null_text
+                print(f"Created null embeddings for guidance with shape {null_embeddings.shape}")
+            # Create and cache the sampler
+            self.samplers[sampler_class][guidance_scale] = sampler_class(
+                model=self.model,
+                noise_schedule=self.noise_schedule,
+                model_output_transform=self.model_output_transform,
+                guidance_scale=guidance_scale,
+                input_config=self.input_config,
+                autoencoder=self.autoencoder,
+            )
+        return self.samplers[sampler_class][guidance_scale]
+    def generate_samples(
+        self,
+        num_samples: int,
+        resolution: int,
+        conditioning_data: Optional[List[Union[Tuple, Dict]]] = None,  # one list per modality or list of tuples
+        sequence_length: Optional[int] = None,
+        diffusion_steps: int = 50,
+        guidance_scale: float = 1.0,
+        sampler_class=EulerAncestralSampler,
+        timestep_spacing: str = 'linear',
+        seed: Optional[int] = None,
+        start_step: Optional[int] = None,
+        end_step: int = 0,
+        steps_override=None,
+        priors=None,
+        use_best_params: bool = False,
+        use_ema: bool = False,
+    ):
+        # Setup RNG
+        rngstate = self.rngstate or RandomMarkovState(jax.random.PRNGKey(seed or 0))
+        # Get cached or new sampler
+        sampler = self.get_sampler(
+            guidance_scale=guidance_scale,
+            sampler_class=sampler_class,
+        )
+        if hasattr(sampler, 'timestep_spacing'):
+            sampler.timestep_spacing = timestep_spacing
+        print(f"Generating samples: steps={diffusion_steps}, num_samples={num_samples}, guidance={guidance_scale}")
+        if use_best_params:
+            state = self.best_state
+        else:
+            state = self.state
+        if use_ema:
+            params = state['ema_params']
+        else:
+            params = state['params']
+        return sampler.generate_samples(
+            params=params,
+            num_samples=num_samples,
+            resolution=resolution,
+            sequence_length=sequence_length,
+            diffusion_steps=diffusion_steps,
+            start_step=start_step,
+            end_step=end_step,
+            steps_override=steps_override,
+            priors=priors,
+            rngstate=rngstate,
+            conditioning=conditioning_data
+        )

flaxdiff/inference/utils.py ADDED Viewed

@@ -0,0 +1,320 @@
+import jax
+import jax.numpy as jnp
+import json
+from flaxdiff.schedulers import (
+    CosineNoiseScheduler,
+    KarrasVENoiseScheduler,
+)
+from flaxdiff.predictors import (
+    VPredictionTransform,
+    KarrasPredictionTransform,
+)
+from flaxdiff.models.common import kernel_init
+from flaxdiff.models.simple_unet import Unet
+from flaxdiff.models.simple_vit import UViT
+from flaxdiff.models.general import BCHWModelWrapper
+from flaxdiff.models.autoencoder.diffusers import StableDiffusionVAE
+from flaxdiff.inputs import DiffusionInputConfig, ConditionalInputConfig
+from flaxdiff.utils import defaultTextEncodeModel
+from diffusers import FlaxUNet2DConditionModel
+import wandb
+from flaxdiff.models.simple_unet import Unet
+from flaxdiff.models.simple_vit import UViT
+from flaxdiff.models.general import BCHWModelWrapper
+from flaxdiff.models.autoencoder.diffusers import StableDiffusionVAE
+from flaxdiff.inputs import DiffusionInputConfig, ConditionalInputConfig
+from flaxdiff.utils import defaultTextEncodeModel
+from orbax.checkpoint import CheckpointManager, CheckpointManagerOptions, PyTreeCheckpointer
+import os
+import warnings
+def get_wandb_run(wandb_run: str, project, entity):
+    """
+    Try to get the wandb run for the given experiment name and project.
+    Return None if not found.
+    """
+    import wandb
+    wandb_api = wandb.Api()
+    # First try to get the run by treating wandb_run as a run ID
+    try:
+        run = wandb_api.run(f"{entity}/{project}/{wandb_run}")
+        print(f"Found run: {run.name} ({run.id})")
+        return run
+    except wandb.Error as e:
+        print(f"Run not found by ID: {e}")
+        # If that fails, try to get the run by treating wandb_run as a display name
+        # This is a bit of a hack, but it works for now.
+        # Note: this will return all runs with the same display name, so be careful.
+        print(f"Trying to get run by display name: {wandb_run}")
+    runs = wandb_api.runs(path=f"{entity}/{project}", filters={"displayName": wandb_run})
+    for run in runs:
+        print(f"Found run: {run.name} ({run.id})")
+        return run
+    return None
+def parse_config(config, overrides=None):
+    """Parse configuration for inference pipeline.
+    Args:
+        config: Configuration dictionary from wandb run
+        overrides: Optional dictionary of overrides for config parameters
+    Returns:
+        Dictionary containing model, sampler, scheduler, and other required components
+        including DiffusionInputConfig for the general diffusion framework
+    """
+    warnings.filterwarnings("ignore")
+    # Merge config with overrides if provided
+    if overrides is not None:
+        # Create a deep copy of config to avoid modifying the original
+        merged_config = dict(config)
+        # Update arguments with overrides
+        if 'arguments' in merged_config:
+            merged_config['arguments'] = {**merged_config['arguments'], **overrides}
+            # Also update top-level config for key parameters
+            for key in overrides:
+                if key in merged_config:
+                    merged_config[key] = overrides[key]
+    else:
+        merged_config = config
+    # Parse configuration from config dict
+    conf = merged_config
+    # Setup mappings for dtype, precision, and activation
+    DTYPE_MAP = {
+        'bfloat16': jnp.bfloat16,
+        'float32': jnp.float32,
+        'jax.numpy.float32': jnp.float32,
+        'jax.numpy.bfloat16': jnp.bfloat16,
+        'None': None,
+        None: None,
+    }
+    PRECISION_MAP = {
+        'high': jax.lax.Precision.HIGH,
+        'HIGH': jax.lax.Precision.HIGH,
+        'default': jax.lax.Precision.DEFAULT,
+        'DEFAULT': jax.lax.Precision.DEFAULT,
+        'highest': jax.lax.Precision.HIGHEST,
+        'HIGHEST': jax.lax.Precision.HIGHEST,
+        'None': None,
+        None: None,
+    }
+    ACTIVATION_MAP = {
+        'swish': jax.nn.swish,
+        'silu': jax.nn.silu,
+        'jax._src.nn.functions.silu': jax.nn.silu,
+        'mish': jax.nn.mish,
+    }
+    # Get model class based on architecture
+    MODEL_CLASSES = {
+        'unet': Unet,
+        'uvit': UViT,
+        'diffusers_unet_simple': FlaxUNet2DConditionModel
+    }
+    # Map all the leaves of the model config, converting strings to appropriate types
+    def map_nested_config(config):
+        new_config = {}
+        for key, value in config.items():
+            if isinstance(value, dict):
+                new_config[key] = map_nested_config(value)
+            elif isinstance(value, list):
+                new_config[key] = [map_nested_config(item) if isinstance(item, dict) else item for item in value]
+            elif isinstance(value, str):
+                if value in DTYPE_MAP:
+                    new_config[key] = DTYPE_MAP[value]
+                elif value in PRECISION_MAP:
+                    new_config[key] = PRECISION_MAP[value]
+                elif value in ACTIVATION_MAP:
+                    new_config[key] = ACTIVATION_MAP[value]
+                elif value == 'None':
+                    new_config[key] = None
+                elif '.'in value:
+                    # Ignore any other string that contains a dot
+                    print(f"Ignoring key {key} with value {value} as it contains a dot.")
+                else:
+                    new_config[key] = value
+            else:
+                new_config[key] = value
+        return new_config
+    # Parse architecture and model config
+    model_config = conf['model']
+    # Get architecture type
+    architecture = conf.get('architecture', conf.get('arguments', {}).get('architecture', 'unet'))
+    # Handle autoencoder
+    autoencoder_name = conf.get('autoencoder', conf.get('arguments', {}).get('autoencoder'))
+    autoencoder_opts_str = conf.get('autoencoder_opts', conf.get('arguments', {}).get('autoencoder_opts', '{}'))
+    autoencoder = None
+    autoencoder_opts = None
+    if autoencoder_name:
+        print(f"Using autoencoder: {autoencoder_name}")
+        if isinstance(autoencoder_opts_str, str):
+            autoencoder_opts = json.loads(autoencoder_opts_str)
+        else:
+            autoencoder_opts = autoencoder_opts_str
+        if autoencoder_name == 'stable_diffusion':
+            print("Using Stable Diffusion Autoencoder for Latent Diffusion Modeling")
+            autoencoder_opts = map_nested_config(autoencoder_opts)
+            autoencoder = StableDiffusionVAE(**autoencoder_opts)
+    input_config = conf.get('input_config', None)
+    # If not provided, create one based on the older format (backward compatibility)
+    if input_config is None:
+        # Warn if input_config is not provided
+        print("No input_config provided, creating a default one.")
+        image_size = conf['arguments'].get('image_size', 128)
+        image_channels = 3  # Default number of channels
+        # Create text encoder
+        text_encoder = defaultTextEncodeModel()
+        # Create a conditional input config for text conditioning
+        text_conditional_config = ConditionalInputConfig(
+            encoder=text_encoder,
+            conditioning_data_key='text',
+            pretokenized=True,
+            unconditional_input="",
+            model_key_override="textcontext"
+        )
+        # Create the main input config
+        input_config = DiffusionInputConfig(
+            sample_data_key='image',
+            sample_data_shape=(image_size, image_size, image_channels),
+            conditions=[text_conditional_config]
+        )
+    else:
+        # Deserialize the input config if it's a string
+        input_config = DiffusionInputConfig.deserialize(input_config)
+    model_kwargs = map_nested_config(model_config)
+    print(f"Model kwargs after mapping: {model_kwargs}")
+    model_class = MODEL_CLASSES.get(architecture)
+    if not model_class:
+        raise ValueError(f"Unknown architecture: {architecture}. Supported architectures: {', '.join(MODEL_CLASSES.keys())}")
+    # Instantiate the model
+    model = model_class(**model_kwargs)
+    # If using diffusers UNet, wrap it for consistent interface
+    if 'diffusers' in architecture:
+        model = BCHWModelWrapper(model)
+    # Create noise scheduler based on configuration
+    noise_schedule_type = conf.get('noise_schedule', conf.get('arguments', {}).get('noise_schedule', 'edm'))
+    if noise_schedule_type in ['edm', 'karras']:
+        # For both EDM and karras, we use the karras scheduler for inference
+        noise_schedule = KarrasVENoiseScheduler(1, sigma_max=80, rho=7, sigma_data=0.5)
+        prediction_transform = KarrasPredictionTransform(sigma_data=noise_schedule.sigma_data)
+    elif noise_schedule_type == 'cosine':
+        noise_schedule = CosineNoiseScheduler(1000, beta_end=1)
+        prediction_transform = VPredictionTransform()
+    else:
+        raise ValueError(f"Unknown noise schedule: {noise_schedule_type}")
+    # Prepare return dictionary with all components
+    result = {
+        'model': model,
+        'model_config': model_kwargs,
+        'architecture': architecture,
+        'autoencoder': autoencoder,
+        'noise_schedule': noise_schedule,
+        'prediction_transform': prediction_transform,
+        'input_config': input_config,
+        'raw_config': conf,
+    }
+    return result
+def load_from_checkpoint(
+    checkpoint_dir: str,
+):
+    try:
+        checkpointer = PyTreeCheckpointer()
+        options = CheckpointManagerOptions(create=False)
+        # Convert checkpoint_dir to absolute path
+        checkpoint_dir = os.path.abspath(checkpoint_dir)
+        manager = CheckpointManager(checkpoint_dir, checkpointer, options)
+        ckpt = manager.restore(checkpoint_dir)
+        # Extract as above
+        state, best_state = None, None
+        if 'state' in ckpt:
+            state = ckpt['state']
+        if 'best_state' in ckpt:
+            best_state = ckpt['best_state']
+        print(f"Loaded checkpoint from local dir {checkpoint_dir}")
+        return state, best_state
+    except Exception as e:
+        print(f"Warning: Failed to load checkpoint from local dir: {e}")
+        return None, None
+def load_from_wandb_run(
+    run,
+    project: str,
+    entity: str = None,
+):
+    """
+    Loads model from wandb model registry.
+    """
+    # Get the model version from wandb
+    states = None
+    config = None
+    try:
+        if isinstance(run, str):
+            run = get_wandb_run(run, project, entity)
+        # Search for model artifact
+        models = [i for i in run.logged_artifacts() if i.type == 'model']
+        if len(models) == 0:
+            raise ValueError(f"No model artifacts found in run {run.id}")
+        # Pick out any model artifact
+        highest_version = max([{'version':int(i.version[1:]), 'name': i.qualified_name} for i in models], key=lambda x: x['version'])
+        wandb_modelname = highest_version['name']
+        print(f"Loading model from wandb: {wandb_modelname} out of versions {[i.version for i in models]}")
+        artifact = run.use_artifact(wandb.Api().artifact(wandb_modelname))
+        ckpt_dir = artifact.download()
+        print(f"Loaded model from wandb: {wandb_modelname} at path {ckpt_dir}")
+        # Load the model from the checkpoint directory
+        states = load_from_checkpoint(ckpt_dir)
+        config = run.config
+    except Exception as e:
+        print(f"Warning: Failed to load model from wandb: {e}")
+    return states, config
+def load_from_wandb_registry(
+    modelname: str,
+    project: str,
+    entity: str = None,
+    version: str = 'latest',
+    registry: str = 'wandb-registry-model',
+):
+    """
+    Loads model from wandb model registry.
+    """
+    # Get the model version from wandb
+    states = None
+    config = None
+    try:
+        artifact = wandb.Api().artifact(f"{registry}/{modelname}:{version}")
+        ckpt_dir = artifact.download()
+        print(f"Loaded model from wandb registry: {modelname} at path {ckpt_dir}")
+        # Load the model from the checkpoint directory
+        states = load_from_checkpoint(ckpt_dir)
+        run = artifact.logged_by()
+        config = run.config
+    except Exception as e:
+        print(f"Warning: Failed to load model from wandb: {e}")
+    return states, config

flaxdiff 0.1.38__py3-none-any.whl → 0.2.0__py3-none-any.whl

flaxdiff 0.1.38py3-none-any.whl → 0.2.0py3-none-any.whl