PyPI - diffusers - Versions diffs - 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl - Mend

diffusers 0.32.1py3-none-any.whl → 0.33.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (389) hide show

diffusers/quantizers/quanto/quanto_quantizer.py ADDED Viewed

@@ -0,0 +1,177 @@
+from typing import TYPE_CHECKING, Any, Dict, List, Union
+from diffusers.utils.import_utils import is_optimum_quanto_version
+from ...utils import (
+    get_module_from_name,
+    is_accelerate_available,
+    is_accelerate_version,
+    is_optimum_quanto_available,
+    is_torch_available,
+    logging,
+)
+from ..base import DiffusersQuantizer
+if TYPE_CHECKING:
+    from ...models.modeling_utils import ModelMixin
+if is_torch_available():
+    import torch
+if is_accelerate_available():
+    from accelerate.utils import CustomDtype, set_module_tensor_to_device
+if is_optimum_quanto_available():
+    from .utils import _replace_with_quanto_layers
+logger = logging.get_logger(__name__)
+class QuantoQuantizer(DiffusersQuantizer):
+    r"""
+    Diffusers Quantizer for Optimum Quanto
+    """
+    use_keep_in_fp32_modules = True
+    requires_calibration = False
+    required_packages = ["quanto", "accelerate"]
+    def __init__(self, quantization_config, **kwargs):
+        super().__init__(quantization_config, **kwargs)
+    def validate_environment(self, *args, **kwargs):
+        if not is_optimum_quanto_available():
+            raise ImportError(
+                "Loading an optimum-quanto quantized model requires optimum-quanto library (`pip install optimum-quanto`)"
+            )
+        if not is_optimum_quanto_version(">=", "0.2.6"):
+            raise ImportError(
+                "Loading an optimum-quanto quantized model requires `optimum-quanto>=0.2.6`. "
+                "Please upgrade your installation with `pip install --upgrade optimum-quanto"
+            )
+        if not is_accelerate_available():
+            raise ImportError(
+                "Loading an optimum-quanto quantized model requires accelerate library (`pip install accelerate`)"
+            )
+        device_map = kwargs.get("device_map", None)
+        if isinstance(device_map, dict) and len(device_map.keys()) > 1:
+            raise ValueError(
+                "`device_map` for multi-GPU inference or CPU/disk offload is currently not supported with Diffusers and the Quanto backend"
+            )
+    def check_if_quantized_param(
+        self,
+        model: "ModelMixin",
+        param_value: "torch.Tensor",
+        param_name: str,
+        state_dict: Dict[str, Any],
+        **kwargs,
+    ):
+        # Quanto imports diffusers internally. This is here to prevent circular imports
+        from optimum.quanto import QModuleMixin, QTensor
+        from optimum.quanto.tensor.packed import PackedTensor
+        module, tensor_name = get_module_from_name(model, param_name)
+        if self.pre_quantized and any(isinstance(module, t) for t in [QTensor, PackedTensor]):
+            return True
+        elif isinstance(module, QModuleMixin) and "weight" in tensor_name:
+            return not module.frozen
+        return False
+    def create_quantized_param(
+        self,
+        model: "ModelMixin",
+        param_value: "torch.Tensor",
+        param_name: str,
+        target_device: "torch.device",
+        *args,
+        **kwargs,
+    ):
+        """
+        Create the quantized parameter by calling .freeze() after setting it to the module.
+        """
+        dtype = kwargs.get("dtype", torch.float32)
+        module, tensor_name = get_module_from_name(model, param_name)
+        if self.pre_quantized:
+            setattr(module, tensor_name, param_value)
+        else:
+            set_module_tensor_to_device(model, param_name, target_device, param_value, dtype)
+            module.freeze()
+            module.weight.requires_grad = False
+    def adjust_max_memory(self, max_memory: Dict[str, Union[int, str]]) -> Dict[str, Union[int, str]]:
+        max_memory = {key: val * 0.90 for key, val in max_memory.items()}
+        return max_memory
+    def adjust_target_dtype(self, target_dtype: "torch.dtype") -> "torch.dtype":
+        if is_accelerate_version(">=", "0.27.0"):
+            mapping = {
+                "int8": torch.int8,
+                "float8": CustomDtype.FP8,
+                "int4": CustomDtype.INT4,
+                "int2": CustomDtype.INT2,
+            }
+            target_dtype = mapping[self.quantization_config.weights_dtype]
+        return target_dtype
+    def update_torch_dtype(self, torch_dtype: "torch.dtype" = None) -> "torch.dtype":
+        if torch_dtype is None:
+            logger.info("You did not specify `torch_dtype` in `from_pretrained`. Setting it to `torch.float32`.")
+            torch_dtype = torch.float32
+        return torch_dtype
+    def update_missing_keys(self, model, missing_keys: List[str], prefix: str) -> List[str]:
+        # Quanto imports diffusers internally. This is here to prevent circular imports
+        from optimum.quanto import QModuleMixin
+        not_missing_keys = []
+        for name, module in model.named_modules():
+            if isinstance(module, QModuleMixin):
+                for missing in missing_keys:
+                    if (
+                        (name in missing or name in f"{prefix}.{missing}")
+                        and not missing.endswith(".weight")
+                        and not missing.endswith(".bias")
+                    ):
+                        not_missing_keys.append(missing)
+        return [k for k in missing_keys if k not in not_missing_keys]
+    def _process_model_before_weight_loading(
+        self,
+        model: "ModelMixin",
+        device_map,
+        keep_in_fp32_modules: List[str] = [],
+        **kwargs,
+    ):
+        self.modules_to_not_convert = self.quantization_config.modules_to_not_convert
+        if not isinstance(self.modules_to_not_convert, list):
+            self.modules_to_not_convert = [self.modules_to_not_convert]
+        self.modules_to_not_convert.extend(keep_in_fp32_modules)
+        model = _replace_with_quanto_layers(
+            model,
+            modules_to_not_convert=self.modules_to_not_convert,
+            quantization_config=self.quantization_config,
+            pre_quantized=self.pre_quantized,
+        )
+        model.config.quantization_config = self.quantization_config
+    def _process_model_after_weight_loading(self, model, **kwargs):
+        return model
+    @property
+    def is_trainable(self):
+        return True
+    @property
+    def is_serializable(self):
+        return True

diffusers/quantizers/quanto/utils.py ADDED Viewed

@@ -0,0 +1,60 @@
+import torch.nn as nn
+from ...utils import is_accelerate_available, logging
+logger = logging.get_logger(__name__)
+if is_accelerate_available():
+    from accelerate import init_empty_weights
+def _replace_with_quanto_layers(model, quantization_config, modules_to_not_convert: list, pre_quantized=False):
+    # Quanto imports diffusers internally. These are placed here to avoid circular imports
+    from optimum.quanto import QLinear, freeze, qfloat8, qint2, qint4, qint8
+    def _get_weight_type(dtype: str):
+        return {"float8": qfloat8, "int8": qint8, "int4": qint4, "int2": qint2}[dtype]
+    def _replace_layers(model, quantization_config, modules_to_not_convert):
+        has_children = list(model.children())
+        if not has_children:
+            return model
+        for name, module in model.named_children():
+            _replace_layers(module, quantization_config, modules_to_not_convert)
+            if name in modules_to_not_convert:
+                continue
+            if isinstance(module, nn.Linear):
+                with init_empty_weights():
+                    qlinear = QLinear(
+                        in_features=module.in_features,
+                        out_features=module.out_features,
+                        bias=module.bias is not None,
+                        dtype=module.weight.dtype,
+                        weights=_get_weight_type(quantization_config.weights_dtype),
+                    )
+                    model._modules[name] = qlinear
+                    model._modules[name].source_cls = type(module)
+                    model._modules[name].requires_grad_(False)
+        return model
+    model = _replace_layers(model, quantization_config, modules_to_not_convert)
+    has_been_replaced = any(isinstance(replaced_module, QLinear) for _, replaced_module in model.named_modules())
+    if not has_been_replaced:
+        logger.warning(
+            f"{model.__class__.__name__} does not appear to have any `nn.Linear` modules. Quantization will not be applied."
+            " Please check your model architecture, or submit an issue on Github if you think this is a bug."
+            " https://github.com/huggingface/diffusers/issues/new"
+        )
+    # We need to freeze the pre_quantized model in order for the loaded state_dict and model state dict
+    # to match when trying to load weights with load_model_dict_into_meta
+    if pre_quantized:
+        freeze(model)
+    return model

diffusers/quantizers/torchao/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

diffusers/quantizers/torchao/torchao_quantizer.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -23,7 +23,14 @@ from typing import TYPE_CHECKING, Any, Dict, List, Union
 from packaging import version
-from ...utils import get_module_from_name, is_torch_available, is_torch_version, is_torchao_available, logging
+from ...utils import (
+    get_module_from_name,
+    is_torch_available,
+    is_torch_version,
+    is_torchao_available,
+    is_torchao_version,
+    logging,
+)
 from ..base import DiffusersQuantizer
@@ -62,6 +69,43 @@ if is_torchao_available():
     from torchao.quantization import quantize_
+def _update_torch_safe_globals():
+    safe_globals = [
+        (torch.uint1, "torch.uint1"),
+        (torch.uint2, "torch.uint2"),
+        (torch.uint3, "torch.uint3"),
+        (torch.uint4, "torch.uint4"),
+        (torch.uint5, "torch.uint5"),
+        (torch.uint6, "torch.uint6"),
+        (torch.uint7, "torch.uint7"),
+    ]
+    try:
+        from torchao.dtypes import NF4Tensor
+        from torchao.dtypes.floatx.float8_layout import Float8AQTTensorImpl
+        from torchao.dtypes.uintx.uint4_layout import UInt4Tensor
+        from torchao.dtypes.uintx.uintx_layout import UintxAQTTensorImpl, UintxTensor
+        safe_globals.extend([UintxTensor, UInt4Tensor, UintxAQTTensorImpl, Float8AQTTensorImpl, NF4Tensor])
+    except (ImportError, ModuleNotFoundError) as e:
+        logger.warning(
+            "Unable to import `torchao` Tensor objects. This may affect loading checkpoints serialized with `torchao`"
+        )
+        logger.debug(e)
+    finally:
+        torch.serialization.add_safe_globals(safe_globals=safe_globals)
+if (
+    is_torch_available()
+    and is_torch_version(">=", "2.6.0")
+    and is_torchao_available()
+    and is_torchao_version(">=", "0.7.0")
+):
+    _update_torch_safe_globals()
 logger = logging.get_logger(__name__)
@@ -215,6 +259,7 @@ class TorchAoHfQuantizer(DiffusersQuantizer):
         target_device: "torch.device",
         state_dict: Dict[str, Any],
         unexpected_keys: List[str],
+        **kwargs,
     ):
         r"""
         Each nn.Linear layer that needs to be quantized is processsed here. First, we set the value the weight tensor,

diffusers/schedulers/__init__.py CHANGED Viewed

@@ -68,6 +68,7 @@ else:
     _import_structure["scheduling_pndm"] = ["PNDMScheduler"]
     _import_structure["scheduling_repaint"] = ["RePaintScheduler"]
     _import_structure["scheduling_sasolver"] = ["SASolverScheduler"]
+    _import_structure["scheduling_scm"] = ["SCMScheduler"]
     _import_structure["scheduling_sde_ve"] = ["ScoreSdeVeScheduler"]
     _import_structure["scheduling_tcd"] = ["TCDScheduler"]
     _import_structure["scheduling_unclip"] = ["UnCLIPScheduler"]
@@ -168,13 +169,13 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
         from .scheduling_pndm import PNDMScheduler
         from .scheduling_repaint import RePaintScheduler
         from .scheduling_sasolver import SASolverScheduler
+        from .scheduling_scm import SCMScheduler
         from .scheduling_sde_ve import ScoreSdeVeScheduler
         from .scheduling_tcd import TCDScheduler
         from .scheduling_unclip import UnCLIPScheduler
         from .scheduling_unipc_multistep import UniPCMultistepScheduler
         from .scheduling_utils import AysSchedules, KarrasDiffusionSchedulers, SchedulerMixin
         from .scheduling_vq_diffusion import VQDiffusionScheduler
     try:
         if not is_flax_available():
             raise OptionalDependencyNotAvailable()

diffusers/schedulers/scheduling_consistency_models.py CHANGED Viewed

@@ -203,8 +203,7 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
             if timesteps[0] >= self.config.num_train_timesteps:
                 raise ValueError(
-                    f"`timesteps` must start before `self.config.train_timesteps`:"
-                    f" {self.config.num_train_timesteps}."
+                    f"`timesteps` must start before `self.config.train_timesteps`: {self.config.num_train_timesteps}."
                 )
             timesteps = np.array(timesteps, dtype=np.int64)

diffusers/schedulers/scheduling_ddim_inverse.py CHANGED Viewed

@@ -266,7 +266,7 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin):
         self.num_inference_steps = num_inference_steps
-        # "leading" and "trailing" corresponds to annotation of Table 1. of https://arxiv.org/abs/2305.08891
+        # "leading" and "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
         if self.config.timestep_spacing == "leading":
             step_ratio = self.config.num_train_timesteps // self.num_inference_steps
             # creates integer timesteps by multiplying by ratio

diffusers/schedulers/scheduling_ddpm.py CHANGED Viewed

@@ -142,7 +142,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
             The final `beta` value.
         beta_schedule (`str`, defaults to `"linear"`):
             The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
-            `linear`, `scaled_linear`, or `squaredcos_cap_v2`.
+            `linear`, `scaled_linear`, `squaredcos_cap_v2`, or `sigmoid`.
         trained_betas (`np.ndarray`, *optional*):
             An array of betas to pass directly to the constructor without using `beta_start` and `beta_end`.
         variance_type (`str`, defaults to `"fixed_small"`):
@@ -279,8 +279,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
             if timesteps[0] >= self.config.num_train_timesteps:
                 raise ValueError(
-                    f"`timesteps` must start before `self.config.train_timesteps`:"
-                    f" {self.config.num_train_timesteps}."
+                    f"`timesteps` must start before `self.config.train_timesteps`: {self.config.num_train_timesteps}."
                 )
             timesteps = np.array(timesteps, dtype=np.int64)

diffusers/schedulers/scheduling_ddpm_parallel.py CHANGED Viewed

@@ -289,8 +289,7 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
             if timesteps[0] >= self.config.num_train_timesteps:
                 raise ValueError(
-                    f"`timesteps` must start before `self.config.train_timesteps`:"
-                    f" {self.config.num_train_timesteps}."
+                    f"`timesteps` must start before `self.config.train_timesteps`: {self.config.num_train_timesteps}."
                 )
             timesteps = np.array(timesteps, dtype=np.int64)

diffusers/schedulers/scheduling_dpmsolver_multistep.py CHANGED Viewed

@@ -136,8 +136,8 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
             sampling, and `solver_order=3` for unconditional sampling.
         prediction_type (`str`, defaults to `epsilon`, *optional*):
             Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
-            `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
-            Video](https://imagen.research.google/video/paper.pdf) paper).
+            `sample` (directly predicts the noisy sample), `v_prediction` (see section 2.4 of [Imagen
+            Video](https://imagen.research.google/video/paper.pdf) paper), or `flow_prediction`.
         thresholding (`bool`, defaults to `False`):
             Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such
             as Stable Diffusion.
@@ -174,6 +174,10 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
             Whether to use the uniform-logSNR for step sizes proposed by Lu's DPM-Solver in the noise schedule during
             the sampling process. If `True`, the sigmas and time steps are determined according to a sequence of
             `lambda(t)`.
+        use_flow_sigmas (`bool`, *optional*, defaults to `False`):
+            Whether to use flow sigmas for step sizes in the noise schedule during the sampling process.
+        flow_shift (`float`, *optional*, defaults to 1.0):
+            The shift value for the timestep schedule for flow matching.
         final_sigmas_type (`str`, defaults to `"zero"`):
             The final `sigma` value for the noise schedule during the sampling process. If `"sigma_min"`, the final
             sigma is the same as the last sigma in the training schedule. If `zero`, the final sigma is set to 0.
@@ -395,12 +399,16 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
         if self.config.use_karras_sigmas:
             sigmas = np.flip(sigmas).copy()
             sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
-            timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
+            timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
+            if self.config.beta_schedule != "squaredcos_cap_v2":
+                timesteps = timesteps.round()
         elif self.config.use_lu_lambdas:
             lambdas = np.flip(log_sigmas.copy())
             lambdas = self._convert_to_lu(in_lambdas=lambdas, num_inference_steps=num_inference_steps)
             sigmas = np.exp(lambdas)
-            timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
+            timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
+            if self.config.beta_schedule != "squaredcos_cap_v2":
+                timesteps = timesteps.round()
         elif self.config.use_exponential_sigmas:
             sigmas = np.flip(sigmas).copy()
             sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=num_inference_steps)

diffusers/schedulers/scheduling_edm_euler.py CHANGED Viewed

@@ -14,7 +14,7 @@
 import math
 from dataclasses import dataclass
-from typing import Optional, Tuple, Union
+from typing import List, Optional, Tuple, Union
 import torch
@@ -77,6 +77,9 @@ class EDMEulerScheduler(SchedulerMixin, ConfigMixin):
             Video](https://imagen.research.google/video/paper.pdf) paper).
         rho (`float`, *optional*, defaults to 7.0):
             The rho parameter used for calculating the Karras sigma schedule, which is set to 7.0 in the EDM paper [1].
+        final_sigmas_type (`str`, defaults to `"zero"`):
+            The final `sigma` value for the noise schedule during the sampling process. If `"sigma_min"`, the final
+            sigma is the same as the last sigma in the training schedule. If `zero`, the final sigma is set to 0.
     """
     _compatibles = []
@@ -92,6 +95,7 @@ class EDMEulerScheduler(SchedulerMixin, ConfigMixin):
         num_train_timesteps: int = 1000,
         prediction_type: str = "epsilon",
         rho: float = 7.0,
+        final_sigmas_type: str = "zero",  # can be "zero" or "sigma_min"
     ):
         if sigma_schedule not in ["karras", "exponential"]:
             raise ValueError(f"Wrong value for provided for `{sigma_schedule=}`.`")
@@ -99,15 +103,24 @@ class EDMEulerScheduler(SchedulerMixin, ConfigMixin):
         # setable values
         self.num_inference_steps = None
-        ramp = torch.linspace(0, 1, num_train_timesteps)
+        sigmas = torch.arange(num_train_timesteps + 1) / num_train_timesteps
         if sigma_schedule == "karras":
-            sigmas = self._compute_karras_sigmas(ramp)
+            sigmas = self._compute_karras_sigmas(sigmas)
         elif sigma_schedule == "exponential":
-            sigmas = self._compute_exponential_sigmas(ramp)
+            sigmas = self._compute_exponential_sigmas(sigmas)
         self.timesteps = self.precondition_noise(sigmas)
-        self.sigmas = torch.cat([sigmas, torch.zeros(1, device=sigmas.device)])
+        if self.config.final_sigmas_type == "sigma_min":
+            sigma_last = sigmas[-1]
+        elif self.config.final_sigmas_type == "zero":
+            sigma_last = 0
+        else:
+            raise ValueError(
+                f"`final_sigmas_type` must be one of 'zero', or 'sigma_min', but got {self.config.final_sigmas_type}"
+            )
+        self.sigmas = torch.cat([sigmas, torch.full((1,), fill_value=sigma_last, device=sigmas.device)])
         self.is_scale_input_called = False
@@ -197,7 +210,12 @@ class EDMEulerScheduler(SchedulerMixin, ConfigMixin):
         self.is_scale_input_called = True
         return sample
-    def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
+    def set_timesteps(
+        self,
+        num_inference_steps: int = None,
+        device: Union[str, torch.device] = None,
+        sigmas: Optional[Union[torch.Tensor, List[float]]] = None,
+    ):
         """
         Sets the discrete timesteps used for the diffusion chain (to be run before inference).
@@ -206,19 +224,36 @@ class EDMEulerScheduler(SchedulerMixin, ConfigMixin):
                 The number of diffusion steps used when generating samples with a pre-trained model.
             device (`str` or `torch.device`, *optional*):
                 The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
+            sigmas (`Union[torch.Tensor, List[float]]`, *optional*):
+                Custom sigmas to use for the denoising process. If not defined, the default behavior when
+                `num_inference_steps` is passed will be used.
         """
         self.num_inference_steps = num_inference_steps
-        ramp = torch.linspace(0, 1, self.num_inference_steps)
+        if sigmas is None:
+            sigmas = torch.linspace(0, 1, self.num_inference_steps)
+        elif isinstance(sigmas, float):
+            sigmas = torch.tensor(sigmas, dtype=torch.float32)
+        else:
+            sigmas = sigmas
         if self.config.sigma_schedule == "karras":
-            sigmas = self._compute_karras_sigmas(ramp)
+            sigmas = self._compute_karras_sigmas(sigmas)
         elif self.config.sigma_schedule == "exponential":
-            sigmas = self._compute_exponential_sigmas(ramp)
+            sigmas = self._compute_exponential_sigmas(sigmas)
         sigmas = sigmas.to(dtype=torch.float32, device=device)
         self.timesteps = self.precondition_noise(sigmas)
-        self.sigmas = torch.cat([sigmas, torch.zeros(1, device=sigmas.device)])
+        if self.config.final_sigmas_type == "sigma_min":
+            sigma_last = sigmas[-1]
+        elif self.config.final_sigmas_type == "zero":
+            sigma_last = 0
+        else:
+            raise ValueError(
+                f"`final_sigmas_type` must be one of 'zero', or 'sigma_min', but got {self.config.final_sigmas_type}"
+            )
+        self.sigmas = torch.cat([sigmas, torch.full((1,), fill_value=sigma_last, device=sigmas.device)])
         self._step_index = None
         self._begin_index = None
         self.sigmas = self.sigmas.to("cpu")  # to avoid too much CPU/GPU communication

diffusers 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl

diffusers 0.32.1py3-none-any.whl → 0.33.0py3-none-any.whl