PyPI - diffusers - Versions diffs - 0.34.0__py3-none-any.whl → 0.35.0__py3-none-any.whl - Mend

diffusers 0.34.0py3-none-any.whl → 0.35.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (191) hide show

diffusers/__init__.py +98 -1
diffusers/callbacks.py +35 -0
diffusers/commands/custom_blocks.py +134 -0
diffusers/commands/diffusers_cli.py +2 -0
diffusers/commands/fp16_safetensors.py +1 -1
diffusers/configuration_utils.py +11 -2
diffusers/dependency_versions_table.py +3 -3
diffusers/guiders/__init__.py +41 -0
diffusers/guiders/adaptive_projected_guidance.py +188 -0
diffusers/guiders/auto_guidance.py +190 -0
diffusers/guiders/classifier_free_guidance.py +141 -0
diffusers/guiders/classifier_free_zero_star_guidance.py +152 -0
diffusers/guiders/frequency_decoupled_guidance.py +327 -0
diffusers/guiders/guider_utils.py +309 -0
diffusers/guiders/perturbed_attention_guidance.py +271 -0
diffusers/guiders/skip_layer_guidance.py +262 -0
diffusers/guiders/smoothed_energy_guidance.py +251 -0
diffusers/guiders/tangential_classifier_free_guidance.py +143 -0
diffusers/hooks/__init__.py +17 -0
diffusers/hooks/_common.py +56 -0
diffusers/hooks/_helpers.py +293 -0
diffusers/hooks/faster_cache.py +7 -6
diffusers/hooks/first_block_cache.py +259 -0
diffusers/hooks/group_offloading.py +292 -286
diffusers/hooks/hooks.py +56 -1
diffusers/hooks/layer_skip.py +263 -0
diffusers/hooks/layerwise_casting.py +2 -7
diffusers/hooks/pyramid_attention_broadcast.py +14 -11
diffusers/hooks/smoothed_energy_guidance_utils.py +167 -0
diffusers/hooks/utils.py +43 -0
diffusers/loaders/__init__.py +6 -0
diffusers/loaders/ip_adapter.py +255 -4
diffusers/loaders/lora_base.py +63 -30
diffusers/loaders/lora_conversion_utils.py +434 -53
diffusers/loaders/lora_pipeline.py +834 -37
diffusers/loaders/peft.py +28 -5
diffusers/loaders/single_file_model.py +44 -11
diffusers/loaders/single_file_utils.py +170 -2
diffusers/loaders/transformer_flux.py +9 -10
diffusers/loaders/transformer_sd3.py +6 -1
diffusers/loaders/unet.py +22 -5
diffusers/loaders/unet_loader_utils.py +5 -2
diffusers/models/__init__.py +8 -0
diffusers/models/attention.py +484 -3
diffusers/models/attention_dispatch.py +1218 -0
diffusers/models/attention_processor.py +105 -663
diffusers/models/auto_model.py +2 -2
diffusers/models/autoencoders/__init__.py +1 -0
diffusers/models/autoencoders/autoencoder_dc.py +14 -1
diffusers/models/autoencoders/autoencoder_kl.py +1 -1
diffusers/models/autoencoders/autoencoder_kl_cosmos.py +3 -1
diffusers/models/autoencoders/autoencoder_kl_qwenimage.py +1070 -0
diffusers/models/autoencoders/autoencoder_kl_wan.py +370 -40
diffusers/models/cache_utils.py +31 -9
diffusers/models/controlnets/controlnet_flux.py +5 -5
diffusers/models/controlnets/controlnet_union.py +4 -4
diffusers/models/embeddings.py +26 -34
diffusers/models/model_loading_utils.py +233 -1
diffusers/models/modeling_flax_utils.py +1 -2
diffusers/models/modeling_utils.py +159 -94
diffusers/models/transformers/__init__.py +2 -0
diffusers/models/transformers/transformer_chroma.py +16 -117
diffusers/models/transformers/transformer_cogview4.py +36 -2
diffusers/models/transformers/transformer_cosmos.py +11 -4
diffusers/models/transformers/transformer_flux.py +372 -132
diffusers/models/transformers/transformer_hunyuan_video.py +6 -0
diffusers/models/transformers/transformer_ltx.py +104 -23
diffusers/models/transformers/transformer_qwenimage.py +645 -0
diffusers/models/transformers/transformer_skyreels_v2.py +607 -0
diffusers/models/transformers/transformer_wan.py +298 -85
diffusers/models/transformers/transformer_wan_vace.py +15 -21
diffusers/models/unets/unet_2d_condition.py +2 -1
diffusers/modular_pipelines/__init__.py +83 -0
diffusers/modular_pipelines/components_manager.py +1068 -0
diffusers/modular_pipelines/flux/__init__.py +66 -0
diffusers/modular_pipelines/flux/before_denoise.py +689 -0
diffusers/modular_pipelines/flux/decoders.py +109 -0
diffusers/modular_pipelines/flux/denoise.py +227 -0
diffusers/modular_pipelines/flux/encoders.py +412 -0
diffusers/modular_pipelines/flux/modular_blocks.py +181 -0
diffusers/modular_pipelines/flux/modular_pipeline.py +59 -0
diffusers/modular_pipelines/modular_pipeline.py +2446 -0
diffusers/modular_pipelines/modular_pipeline_utils.py +672 -0
diffusers/modular_pipelines/node_utils.py +665 -0
diffusers/modular_pipelines/stable_diffusion_xl/__init__.py +77 -0
diffusers/modular_pipelines/stable_diffusion_xl/before_denoise.py +1874 -0
diffusers/modular_pipelines/stable_diffusion_xl/decoders.py +208 -0
diffusers/modular_pipelines/stable_diffusion_xl/denoise.py +771 -0
diffusers/modular_pipelines/stable_diffusion_xl/encoders.py +887 -0
diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py +380 -0
diffusers/modular_pipelines/stable_diffusion_xl/modular_pipeline.py +365 -0
diffusers/modular_pipelines/wan/__init__.py +66 -0
diffusers/modular_pipelines/wan/before_denoise.py +365 -0
diffusers/modular_pipelines/wan/decoders.py +105 -0
diffusers/modular_pipelines/wan/denoise.py +261 -0
diffusers/modular_pipelines/wan/encoders.py +242 -0
diffusers/modular_pipelines/wan/modular_blocks.py +144 -0
diffusers/modular_pipelines/wan/modular_pipeline.py +90 -0
diffusers/pipelines/__init__.py +31 -0
diffusers/pipelines/audioldm2/pipeline_audioldm2.py +2 -3
diffusers/pipelines/auto_pipeline.py +17 -13
diffusers/pipelines/chroma/pipeline_chroma.py +5 -5
diffusers/pipelines/chroma/pipeline_chroma_img2img.py +5 -5
diffusers/pipelines/cogvideo/pipeline_cogvideox.py +9 -8
diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +9 -8
diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +10 -9
diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +9 -8
diffusers/pipelines/cogview4/pipeline_cogview4.py +16 -15
diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +3 -2
diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +212 -93
diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +7 -3
diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +194 -92
diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +1 -1
diffusers/pipelines/dit/pipeline_dit.py +3 -1
diffusers/pipelines/flux/__init__.py +4 -0
diffusers/pipelines/flux/pipeline_flux.py +34 -26
diffusers/pipelines/flux/pipeline_flux_control.py +8 -8
diffusers/pipelines/flux/pipeline_flux_control_img2img.py +1 -1
diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +1 -1
diffusers/pipelines/flux/pipeline_flux_controlnet.py +1 -1
diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +1 -1
diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1 -1
diffusers/pipelines/flux/pipeline_flux_fill.py +1 -1
diffusers/pipelines/flux/pipeline_flux_img2img.py +1 -1
diffusers/pipelines/flux/pipeline_flux_inpaint.py +1 -1
diffusers/pipelines/flux/pipeline_flux_kontext.py +1134 -0
diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py +1460 -0
diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
diffusers/pipelines/flux/pipeline_output.py +6 -4
diffusers/pipelines/hidream_image/pipeline_hidream_image.py +5 -5
diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +25 -24
diffusers/pipelines/ltx/pipeline_ltx.py +13 -12
diffusers/pipelines/ltx/pipeline_ltx_condition.py +10 -9
diffusers/pipelines/ltx/pipeline_ltx_image2video.py +13 -12
diffusers/pipelines/mochi/pipeline_mochi.py +9 -8
diffusers/pipelines/pipeline_flax_utils.py +2 -2
diffusers/pipelines/pipeline_loading_utils.py +24 -2
diffusers/pipelines/pipeline_utils.py +22 -15
diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +3 -1
diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +20 -0
diffusers/pipelines/qwenimage/__init__.py +55 -0
diffusers/pipelines/qwenimage/pipeline_output.py +21 -0
diffusers/pipelines/qwenimage/pipeline_qwenimage.py +726 -0
diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py +882 -0
diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py +829 -0
diffusers/pipelines/qwenimage/pipeline_qwenimage_inpaint.py +1015 -0
diffusers/pipelines/sana/pipeline_sana_sprint.py +5 -5
diffusers/pipelines/skyreels_v2/__init__.py +59 -0
diffusers/pipelines/skyreels_v2/pipeline_output.py +20 -0
diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2.py +610 -0
diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing.py +978 -0
diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_i2v.py +1059 -0
diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_v2v.py +1063 -0
diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_i2v.py +745 -0
diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -1
diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +1 -1
diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +1 -1
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +2 -1
diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +6 -5
diffusers/pipelines/wan/pipeline_wan.py +78 -20
diffusers/pipelines/wan/pipeline_wan_i2v.py +112 -32
diffusers/pipelines/wan/pipeline_wan_vace.py +1 -2
diffusers/quantizers/__init__.py +1 -177
diffusers/quantizers/base.py +11 -0
diffusers/quantizers/gguf/utils.py +92 -3
diffusers/quantizers/pipe_quant_config.py +202 -0
diffusers/quantizers/torchao/torchao_quantizer.py +26 -0
diffusers/schedulers/scheduling_deis_multistep.py +8 -1
diffusers/schedulers/scheduling_dpmsolver_multistep.py +6 -0
diffusers/schedulers/scheduling_dpmsolver_singlestep.py +6 -0
diffusers/schedulers/scheduling_scm.py +0 -1
diffusers/schedulers/scheduling_unipc_multistep.py +10 -1
diffusers/schedulers/scheduling_utils.py +2 -2
diffusers/schedulers/scheduling_utils_flax.py +1 -1
diffusers/training_utils.py +78 -0
diffusers/utils/__init__.py +10 -0
diffusers/utils/constants.py +4 -0
diffusers/utils/dummy_pt_objects.py +312 -0
diffusers/utils/dummy_torch_and_transformers_objects.py +255 -0
diffusers/utils/dynamic_modules_utils.py +84 -25
diffusers/utils/hub_utils.py +33 -17
diffusers/utils/import_utils.py +70 -0
diffusers/utils/peft_utils.py +11 -8
diffusers/utils/testing_utils.py +136 -10
diffusers/utils/torch_utils.py +18 -0
{diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/METADATA +6 -6
{diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/RECORD +191 -127
{diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/LICENSE +0 -0
{diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/WHEEL +0 -0
{diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/entry_points.txt +0 -0
{diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/top_level.txt +0 -0

diffusers/quantizers/__init__.py CHANGED Viewed

@@ -12,183 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import inspect
-from typing import Dict, List, Optional, Union
-from ..utils import is_transformers_available, logging
 from .auto import DiffusersAutoQuantizer
 from .base import DiffusersQuantizer
-from .quantization_config import QuantizationConfigMixin as DiffQuantConfigMixin
-try:
-    from transformers.utils.quantization_config import QuantizationConfigMixin as TransformersQuantConfigMixin
-except ImportError:
-    class TransformersQuantConfigMixin:
-        pass
-logger = logging.get_logger(__name__)
-class PipelineQuantizationConfig:
-    """
-    Configuration class to be used when applying quantization on-the-fly to [`~DiffusionPipeline.from_pretrained`].
-    Args:
-        quant_backend (`str`): Quantization backend to be used. When using this option, we assume that the backend
-            is available to both `diffusers` and `transformers`.
-        quant_kwargs (`dict`): Params to initialize the quantization backend class.
-        components_to_quantize (`list`): Components of a pipeline to be quantized.
-        quant_mapping (`dict`): Mapping defining the quantization specs to be used for the pipeline
-            components. When using this argument, users are not expected to provide `quant_backend`, `quant_kawargs`,
-            and `components_to_quantize`.
-    """
-    def __init__(
-        self,
-        quant_backend: str = None,
-        quant_kwargs: Dict[str, Union[str, float, int, dict]] = None,
-        components_to_quantize: Optional[List[str]] = None,
-        quant_mapping: Dict[str, Union[DiffQuantConfigMixin, "TransformersQuantConfigMixin"]] = None,
-    ):
-        self.quant_backend = quant_backend
-        # Initialize kwargs to be {} to set to the defaults.
-        self.quant_kwargs = quant_kwargs or {}
-        self.components_to_quantize = components_to_quantize
-        self.quant_mapping = quant_mapping
-        self.post_init()
-    def post_init(self):
-        quant_mapping = self.quant_mapping
-        self.is_granular = True if quant_mapping is not None else False
-        self._validate_init_args()
-    def _validate_init_args(self):
-        if self.quant_backend and self.quant_mapping:
-            raise ValueError("Both `quant_backend` and `quant_mapping` cannot be specified at the same time.")
-        if not self.quant_mapping and not self.quant_backend:
-            raise ValueError("Must provide a `quant_backend` when not providing a `quant_mapping`.")
-        if not self.quant_kwargs and not self.quant_mapping:
-            raise ValueError("Both `quant_kwargs` and `quant_mapping` cannot be None.")
-        if self.quant_backend is not None:
-            self._validate_init_kwargs_in_backends()
-        if self.quant_mapping is not None:
-            self._validate_quant_mapping_args()
-    def _validate_init_kwargs_in_backends(self):
-        quant_backend = self.quant_backend
-        self._check_backend_availability(quant_backend)
-        quant_config_mapping_transformers, quant_config_mapping_diffusers = self._get_quant_config_list()
-        if quant_config_mapping_transformers is not None:
-            init_kwargs_transformers = inspect.signature(quant_config_mapping_transformers[quant_backend].__init__)
-            init_kwargs_transformers = {name for name in init_kwargs_transformers.parameters if name != "self"}
-        else:
-            init_kwargs_transformers = None
-        init_kwargs_diffusers = inspect.signature(quant_config_mapping_diffusers[quant_backend].__init__)
-        init_kwargs_diffusers = {name for name in init_kwargs_diffusers.parameters if name != "self"}
-        if init_kwargs_transformers != init_kwargs_diffusers:
-            raise ValueError(
-                "The signatures of the __init__ methods of the quantization config classes in `diffusers` and `transformers` don't match. "
-                f"Please provide a `quant_mapping` instead, in the {self.__class__.__name__} class. Refer to [the docs](https://huggingface.co/docs/diffusers/main/en/quantization/overview#pipeline-level-quantization) to learn more about how "
-                "this mapping would look like."
-            )
-    def _validate_quant_mapping_args(self):
-        quant_mapping = self.quant_mapping
-        transformers_map, diffusers_map = self._get_quant_config_list()
-        available_transformers = list(transformers_map.values()) if transformers_map else None
-        available_diffusers = list(diffusers_map.values())
-        for module_name, config in quant_mapping.items():
-            if any(isinstance(config, cfg) for cfg in available_diffusers):
-                continue
-            if available_transformers and any(isinstance(config, cfg) for cfg in available_transformers):
-                continue
-            if available_transformers:
-                raise ValueError(
-                    f"Provided config for module_name={module_name} could not be found. "
-                    f"Available diffusers configs: {available_diffusers}; "
-                    f"Available transformers configs: {available_transformers}."
-                )
-            else:
-                raise ValueError(
-                    f"Provided config for module_name={module_name} could not be found. "
-                    f"Available diffusers configs: {available_diffusers}."
-                )
-    def _check_backend_availability(self, quant_backend: str):
-        quant_config_mapping_transformers, quant_config_mapping_diffusers = self._get_quant_config_list()
-        available_backends_transformers = (
-            list(quant_config_mapping_transformers.keys()) if quant_config_mapping_transformers else None
-        )
-        available_backends_diffusers = list(quant_config_mapping_diffusers.keys())
-        if (
-            available_backends_transformers and quant_backend not in available_backends_transformers
-        ) or quant_backend not in quant_config_mapping_diffusers:
-            error_message = f"Provided quant_backend={quant_backend} was not found."
-            if available_backends_transformers:
-                error_message += f"\nAvailable ones (transformers): {available_backends_transformers}."
-            error_message += f"\nAvailable ones (diffusers): {available_backends_diffusers}."
-            raise ValueError(error_message)
-    def _resolve_quant_config(self, is_diffusers: bool = True, module_name: str = None):
-        quant_config_mapping_transformers, quant_config_mapping_diffusers = self._get_quant_config_list()
-        quant_mapping = self.quant_mapping
-        components_to_quantize = self.components_to_quantize
-        # Granular case
-        if self.is_granular and module_name in quant_mapping:
-            logger.debug(f"Initializing quantization config class for {module_name}.")
-            config = quant_mapping[module_name]
-            return config
-        # Global config case
-        else:
-            should_quantize = False
-            # Only quantize the modules requested for.
-            if components_to_quantize and module_name in components_to_quantize:
-                should_quantize = True
-            # No specification for `components_to_quantize` means all modules should be quantized.
-            elif not self.is_granular and not components_to_quantize:
-                should_quantize = True
-            if should_quantize:
-                logger.debug(f"Initializing quantization config class for {module_name}.")
-                mapping_to_use = quant_config_mapping_diffusers if is_diffusers else quant_config_mapping_transformers
-                quant_config_cls = mapping_to_use[self.quant_backend]
-                quant_kwargs = self.quant_kwargs
-                return quant_config_cls(**quant_kwargs)
-        # Fallback: no applicable configuration found.
-        return None
-    def _get_quant_config_list(self):
-        if is_transformers_available():
-            from transformers.quantizers.auto import (
-                AUTO_QUANTIZATION_CONFIG_MAPPING as quant_config_mapping_transformers,
-            )
-        else:
-            quant_config_mapping_transformers = None
-        from ..quantizers.auto import AUTO_QUANTIZATION_CONFIG_MAPPING as quant_config_mapping_diffusers
-        return quant_config_mapping_transformers, quant_config_mapping_diffusers
+from .pipe_quant_config import PipelineQuantizationConfig

diffusers/quantizers/base.py CHANGED Viewed

@@ -209,6 +209,17 @@ class DiffusersQuantizer(ABC):
         return model
+    def get_cuda_warm_up_factor(self):
+        """
+        The factor to be used in `caching_allocator_warmup` to get the number of bytes to pre-allocate to warm up cuda.
+        A factor of 2 means we allocate all bytes in the empty model (since we allocate in fp16), a factor of 4 means
+        we allocate half the memory of the weights residing in the empty model, etc...
+        """
+        # By default we return 4, i.e. half the model size (this corresponds to the case where the model is not
+        # really pre-processed, i.e. we do not have the info that weights are going to be 8 bits before actual
+        # weight loading)
+        return 4
     def _dequantize(self, model):
         raise NotImplementedError(
             f"{self.quantization_config.quant_method} has no implementation of `dequantize`, please raise an issue on GitHub."

diffusers/quantizers/gguf/utils.py CHANGED Viewed

@@ -12,15 +12,15 @@
 # # See the License for the specific language governing permissions and
 # # limitations under the License.
 import inspect
+import os
 from contextlib import nullcontext
 import gguf
 import torch
 import torch.nn as nn
-from ...utils import is_accelerate_available
+from ...utils import is_accelerate_available, is_kernels_available
 if is_accelerate_available():
@@ -29,6 +29,82 @@ if is_accelerate_available():
     from accelerate.hooks import add_hook_to_module, remove_hook_from_module
+can_use_cuda_kernels = (
+    os.getenv("DIFFUSERS_GGUF_CUDA_KERNELS", "false").lower() in ["1", "true", "yes"]
+    and torch.cuda.is_available()
+    and torch.cuda.get_device_capability()[0] >= 7
+)
+if can_use_cuda_kernels and is_kernels_available():
+    from kernels import get_kernel
+    ops = get_kernel("Isotr0py/ggml")
+else:
+    ops = None
+UNQUANTIZED_TYPES = {gguf.GGMLQuantizationType.F32, gguf.GGMLQuantizationType.F16, gguf.GGMLQuantizationType.BF16}
+STANDARD_QUANT_TYPES = {
+    gguf.GGMLQuantizationType.Q4_0,
+    gguf.GGMLQuantizationType.Q4_1,
+    gguf.GGMLQuantizationType.Q5_0,
+    gguf.GGMLQuantizationType.Q5_1,
+    gguf.GGMLQuantizationType.Q8_0,
+    gguf.GGMLQuantizationType.Q8_1,
+}
+KQUANT_TYPES = {
+    gguf.GGMLQuantizationType.Q2_K,
+    gguf.GGMLQuantizationType.Q3_K,
+    gguf.GGMLQuantizationType.Q4_K,
+    gguf.GGMLQuantizationType.Q5_K,
+    gguf.GGMLQuantizationType.Q6_K,
+}
+IMATRIX_QUANT_TYPES = {
+    gguf.GGMLQuantizationType.IQ1_M,
+    gguf.GGMLQuantizationType.IQ1_S,
+    gguf.GGMLQuantizationType.IQ2_XXS,
+    gguf.GGMLQuantizationType.IQ2_XS,
+    gguf.GGMLQuantizationType.IQ2_S,
+    gguf.GGMLQuantizationType.IQ3_XXS,
+    gguf.GGMLQuantizationType.IQ3_S,
+    gguf.GGMLQuantizationType.IQ4_XS,
+    gguf.GGMLQuantizationType.IQ4_NL,
+}
+# TODO(Isotr0py): Currently, we don't have MMQ kernel for I-Matrix quantization.
+# Consolidate DEQUANT_TYPES, MMVQ_QUANT_TYPES and MMQ_QUANT_TYPES after we add
+# MMQ kernel for I-Matrix quantization.
+DEQUANT_TYPES = STANDARD_QUANT_TYPES | KQUANT_TYPES | IMATRIX_QUANT_TYPES
+MMVQ_QUANT_TYPES = STANDARD_QUANT_TYPES | KQUANT_TYPES | IMATRIX_QUANT_TYPES
+MMQ_QUANT_TYPES = STANDARD_QUANT_TYPES | KQUANT_TYPES
+def _fused_mul_mat_gguf(x: torch.Tensor, qweight: torch.Tensor, qweight_type: int) -> torch.Tensor:
+    # there is no need to call any kernel for fp16/bf16
+    if qweight_type in UNQUANTIZED_TYPES:
+        return x @ qweight.T
+    # TODO(Isotr0py): GGUF's MMQ and MMVQ implementation are designed for
+    # contiguous batching and inefficient with diffusers' batching,
+    # so we disabled it now.
+    # elif qweight_type in MMVQ_QUANT_TYPES:
+    #     y = ops.ggml_mul_mat_vec_a8(qweight, x, qweight_type, qweight.shape[0])
+    # elif qweight_type in MMQ_QUANT_TYPES:
+    #     y = ops.ggml_mul_mat_a8(qweight, x, qweight_type, qweight.shape[0])
+    # If there is no available MMQ kernel, fallback to dequantize
+    if qweight_type in DEQUANT_TYPES:
+        block_size, type_size = gguf.GGML_QUANT_SIZES[qweight_type]
+        shape = (qweight.shape[0], qweight.shape[1] // type_size * block_size)
+        weight = ops.ggml_dequantize(qweight, qweight_type, *shape)
+        y = x @ weight.to(x.dtype).T
+    else:
+        # Raise an error if the quantization type is not supported.
+        # Might be useful if llama.cpp adds a new quantization type.
+        # Wrap to GGMLQuantizationType IntEnum to make sure it's a valid type.
+        qweight_type = gguf.GGMLQuantizationType(qweight_type)
+        raise NotImplementedError(f"Unsupported GGUF quantization type: {qweight_type}")
+    return y.as_tensor()
 # Copied from diffusers.quantizers.bitsandbytes.utils._create_accelerate_new_hook
 def _create_accelerate_new_hook(old_hook):
     r"""
@@ -451,11 +527,24 @@ class GGUFLinear(nn.Linear):
     ) -> None:
         super().__init__(in_features, out_features, bias, device)
         self.compute_dtype = compute_dtype
+        self.device = device
+    def forward(self, inputs: torch.Tensor):
+        if ops is not None and self.weight.is_cuda and inputs.is_cuda:
+            return self.forward_cuda(inputs)
+        return self.forward_native(inputs)
-    def forward(self, inputs):
+    def forward_native(self, inputs: torch.Tensor):
         weight = dequantize_gguf_tensor(self.weight)
         weight = weight.to(self.compute_dtype)
         bias = self.bias.to(self.compute_dtype) if self.bias is not None else None
         output = torch.nn.functional.linear(inputs, weight, bias)
         return output
+    def forward_cuda(self, inputs: torch.Tensor):
+        quant_type = self.weight.quant_type
+        output = _fused_mul_mat_gguf(inputs.to(self.compute_dtype), self.weight, quant_type)
+        if self.bias is not None:
+            output += self.bias.to(self.compute_dtype)
+        return output

diffusers/quantizers/pipe_quant_config.py ADDED Viewed

@@ -0,0 +1,202 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import inspect
+from typing import Dict, List, Optional, Union
+from ..utils import is_transformers_available, logging
+from .quantization_config import QuantizationConfigMixin as DiffQuantConfigMixin
+try:
+    from transformers.utils.quantization_config import QuantizationConfigMixin as TransformersQuantConfigMixin
+except ImportError:
+    class TransformersQuantConfigMixin:
+        pass
+logger = logging.get_logger(__name__)
+class PipelineQuantizationConfig:
+    """
+    Configuration class to be used when applying quantization on-the-fly to [`~DiffusionPipeline.from_pretrained`].
+    Args:
+        quant_backend (`str`): Quantization backend to be used. When using this option, we assume that the backend
+            is available to both `diffusers` and `transformers`.
+        quant_kwargs (`dict`): Params to initialize the quantization backend class.
+        components_to_quantize (`list`): Components of a pipeline to be quantized.
+        quant_mapping (`dict`): Mapping defining the quantization specs to be used for the pipeline
+            components. When using this argument, users are not expected to provide `quant_backend`, `quant_kawargs`,
+            and `components_to_quantize`.
+    """
+    def __init__(
+        self,
+        quant_backend: str = None,
+        quant_kwargs: Dict[str, Union[str, float, int, dict]] = None,
+        components_to_quantize: Optional[List[str]] = None,
+        quant_mapping: Dict[str, Union[DiffQuantConfigMixin, "TransformersQuantConfigMixin"]] = None,
+    ):
+        self.quant_backend = quant_backend
+        # Initialize kwargs to be {} to set to the defaults.
+        self.quant_kwargs = quant_kwargs or {}
+        self.components_to_quantize = components_to_quantize
+        self.quant_mapping = quant_mapping
+        self.config_mapping = {}  # book-keeping Example: `{module_name: quant_config}`
+        self.post_init()
+    def post_init(self):
+        quant_mapping = self.quant_mapping
+        self.is_granular = True if quant_mapping is not None else False
+        self._validate_init_args()
+    def _validate_init_args(self):
+        if self.quant_backend and self.quant_mapping:
+            raise ValueError("Both `quant_backend` and `quant_mapping` cannot be specified at the same time.")
+        if not self.quant_mapping and not self.quant_backend:
+            raise ValueError("Must provide a `quant_backend` when not providing a `quant_mapping`.")
+        if not self.quant_kwargs and not self.quant_mapping:
+            raise ValueError("Both `quant_kwargs` and `quant_mapping` cannot be None.")
+        if self.quant_backend is not None:
+            self._validate_init_kwargs_in_backends()
+        if self.quant_mapping is not None:
+            self._validate_quant_mapping_args()
+    def _validate_init_kwargs_in_backends(self):
+        quant_backend = self.quant_backend
+        self._check_backend_availability(quant_backend)
+        quant_config_mapping_transformers, quant_config_mapping_diffusers = self._get_quant_config_list()
+        if quant_config_mapping_transformers is not None:
+            init_kwargs_transformers = inspect.signature(quant_config_mapping_transformers[quant_backend].__init__)
+            init_kwargs_transformers = {name for name in init_kwargs_transformers.parameters if name != "self"}
+        else:
+            init_kwargs_transformers = None
+        init_kwargs_diffusers = inspect.signature(quant_config_mapping_diffusers[quant_backend].__init__)
+        init_kwargs_diffusers = {name for name in init_kwargs_diffusers.parameters if name != "self"}
+        if init_kwargs_transformers != init_kwargs_diffusers:
+            raise ValueError(
+                "The signatures of the __init__ methods of the quantization config classes in `diffusers` and `transformers` don't match. "
+                f"Please provide a `quant_mapping` instead, in the {self.__class__.__name__} class. Refer to [the docs](https://huggingface.co/docs/diffusers/main/en/quantization/overview#pipeline-level-quantization) to learn more about how "
+                "this mapping would look like."
+            )
+    def _validate_quant_mapping_args(self):
+        quant_mapping = self.quant_mapping
+        transformers_map, diffusers_map = self._get_quant_config_list()
+        available_transformers = list(transformers_map.values()) if transformers_map else None
+        available_diffusers = list(diffusers_map.values())
+        for module_name, config in quant_mapping.items():
+            if any(isinstance(config, cfg) for cfg in available_diffusers):
+                continue
+            if available_transformers and any(isinstance(config, cfg) for cfg in available_transformers):
+                continue
+            if available_transformers:
+                raise ValueError(
+                    f"Provided config for module_name={module_name} could not be found. "
+                    f"Available diffusers configs: {available_diffusers}; "
+                    f"Available transformers configs: {available_transformers}."
+                )
+            else:
+                raise ValueError(
+                    f"Provided config for module_name={module_name} could not be found. "
+                    f"Available diffusers configs: {available_diffusers}."
+                )
+    def _check_backend_availability(self, quant_backend: str):
+        quant_config_mapping_transformers, quant_config_mapping_diffusers = self._get_quant_config_list()
+        available_backends_transformers = (
+            list(quant_config_mapping_transformers.keys()) if quant_config_mapping_transformers else None
+        )
+        available_backends_diffusers = list(quant_config_mapping_diffusers.keys())
+        if (
+            available_backends_transformers and quant_backend not in available_backends_transformers
+        ) or quant_backend not in quant_config_mapping_diffusers:
+            error_message = f"Provided quant_backend={quant_backend} was not found."
+            if available_backends_transformers:
+                error_message += f"\nAvailable ones (transformers): {available_backends_transformers}."
+            error_message += f"\nAvailable ones (diffusers): {available_backends_diffusers}."
+            raise ValueError(error_message)
+    def _resolve_quant_config(self, is_diffusers: bool = True, module_name: str = None):
+        quant_config_mapping_transformers, quant_config_mapping_diffusers = self._get_quant_config_list()
+        quant_mapping = self.quant_mapping
+        components_to_quantize = self.components_to_quantize
+        # Granular case
+        if self.is_granular and module_name in quant_mapping:
+            logger.debug(f"Initializing quantization config class for {module_name}.")
+            config = quant_mapping[module_name]
+            self.config_mapping.update({module_name: config})
+            return config
+        # Global config case
+        else:
+            should_quantize = False
+            # Only quantize the modules requested for.
+            if components_to_quantize and module_name in components_to_quantize:
+                should_quantize = True
+            # No specification for `components_to_quantize` means all modules should be quantized.
+            elif not self.is_granular and not components_to_quantize:
+                should_quantize = True
+            if should_quantize:
+                logger.debug(f"Initializing quantization config class for {module_name}.")
+                mapping_to_use = quant_config_mapping_diffusers if is_diffusers else quant_config_mapping_transformers
+                quant_config_cls = mapping_to_use[self.quant_backend]
+                quant_kwargs = self.quant_kwargs
+                quant_obj = quant_config_cls(**quant_kwargs)
+                self.config_mapping.update({module_name: quant_obj})
+                return quant_obj
+        # Fallback: no applicable configuration found.
+        return None
+    def _get_quant_config_list(self):
+        if is_transformers_available():
+            from transformers.quantizers.auto import (
+                AUTO_QUANTIZATION_CONFIG_MAPPING as quant_config_mapping_transformers,
+            )
+        else:
+            quant_config_mapping_transformers = None
+        from ..quantizers.auto import AUTO_QUANTIZATION_CONFIG_MAPPING as quant_config_mapping_diffusers
+        return quant_config_mapping_transformers, quant_config_mapping_diffusers
+    def __repr__(self):
+        out = ""
+        config_mapping = dict(sorted(self.config_mapping.copy().items()))
+        for module_name, config in config_mapping.items():
+            out += f"{module_name} {config}"
+        return out

diffusers/quantizers/torchao/torchao_quantizer.py CHANGED Viewed

@@ -19,6 +19,7 @@ https://github.com/huggingface/transformers/blob/3a8eb74668e9c2cc563b2f5c62fac17
 import importlib
 import types
+from fnmatch import fnmatch
 from typing import TYPE_CHECKING, Any, Dict, List, Union
 from packaging import version
@@ -278,6 +279,31 @@ class TorchAoHfQuantizer(DiffusersQuantizer):
             module._parameters[tensor_name] = torch.nn.Parameter(param_value).to(device=target_device)
             quantize_(module, self.quantization_config.get_apply_tensor_subclass())
+    def get_cuda_warm_up_factor(self):
+        """
+        This factor is used in caching_allocator_warmup to determine how many bytes to pre-allocate for CUDA warmup.
+        - A factor of 2 means we pre-allocate the full memory footprint of the model.
+        - A factor of 4 means we pre-allocate half of that, and so on
+        However, when using TorchAO, calculating memory usage with param.numel() * param.element_size() doesn't give
+        the correct size for quantized weights (like int4 or int8) That's because TorchAO internally represents
+        quantized tensors using subtensors and metadata, and the reported element_size() still corresponds to the
+        torch_dtype not the actual bit-width of the quantized data.
+        To correct for this:
+        - Use a division factor of 8 for int4 weights
+        - Use a division factor of 4 for int8 weights
+        """
+        # Original mapping for non-AOBaseConfig types
+        # For the uint types, this is a best guess. Once these types become more used
+        # we can look into their nuances.
+        map_to_target_dtype = {"int4_*": 8, "int8_*": 4, "uint*": 8, "float8*": 4}
+        quant_type = self.quantization_config.quant_type
+        for pattern, target_dtype in map_to_target_dtype.items():
+            if fnmatch(quant_type, pattern):
+                return target_dtype
+        raise ValueError(f"Unsupported quant_type: {quant_type!r}")
     def _process_model_before_weight_loading(
         self,
         model: "ModelMixin",

diffusers/schedulers/scheduling_deis_multistep.py CHANGED Viewed

@@ -153,6 +153,8 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
         flow_shift: Optional[float] = 1.0,
         timestep_spacing: str = "linspace",
         steps_offset: int = 0,
+        use_dynamic_shifting: bool = False,
+        time_shift_type: str = "exponential",
     ):
         if self.config.use_beta_sigmas and not is_scipy_available():
             raise ImportError("Make sure to install scipy if you want to use beta sigmas.")
@@ -232,7 +234,9 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
         """
         self._begin_index = begin_index
-    def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
+    def set_timesteps(
+        self, num_inference_steps: int, device: Union[str, torch.device] = None, mu: Optional[float] = None
+    ):
         """
         Sets the discrete timesteps used for the diffusion chain (to be run before inference).
@@ -242,6 +246,9 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
             device (`str` or `torch.device`, *optional*):
                 The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
         """
+        if mu is not None:
+            assert self.config.use_dynamic_shifting and self.config.time_shift_type == "exponential"
+            self.config.flow_shift = np.exp(mu)
         # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
         if self.config.timestep_spacing == "linspace":
             timesteps = (

diffusers/schedulers/scheduling_dpmsolver_multistep.py CHANGED Viewed

@@ -230,6 +230,8 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
         timestep_spacing: str = "linspace",
         steps_offset: int = 0,
         rescale_betas_zero_snr: bool = False,
+        use_dynamic_shifting: bool = False,
+        time_shift_type: str = "exponential",
     ):
         if self.config.use_beta_sigmas and not is_scipy_available():
             raise ImportError("Make sure to install scipy if you want to use beta sigmas.")
@@ -330,6 +332,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
         self,
         num_inference_steps: int = None,
         device: Union[str, torch.device] = None,
+        mu: Optional[float] = None,
         timesteps: Optional[List[int]] = None,
     ):
         """
@@ -345,6 +348,9 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
                 based on the `timestep_spacing` attribute. If `timesteps` is passed, `num_inference_steps` and `sigmas`
                 must be `None`, and `timestep_spacing` attribute will be ignored.
         """
+        if mu is not None:
+            assert self.config.use_dynamic_shifting and self.config.time_shift_type == "exponential"
+            self.config.flow_shift = np.exp(mu)
         if num_inference_steps is None and timesteps is None:
             raise ValueError("Must pass exactly one of `num_inference_steps` or `timesteps`.")
         if num_inference_steps is not None and timesteps is not None:

diffusers/schedulers/scheduling_dpmsolver_singlestep.py CHANGED Viewed

@@ -169,6 +169,8 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
         final_sigmas_type: Optional[str] = "zero",  # "zero", "sigma_min"
         lambda_min_clipped: float = -float("inf"),
         variance_type: Optional[str] = None,
+        use_dynamic_shifting: bool = False,
+        time_shift_type: str = "exponential",
     ):
         if self.config.use_beta_sigmas and not is_scipy_available():
             raise ImportError("Make sure to install scipy if you want to use beta sigmas.")
@@ -301,6 +303,7 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
         self,
         num_inference_steps: int = None,
         device: Union[str, torch.device] = None,
+        mu: Optional[float] = None,
         timesteps: Optional[List[int]] = None,
     ):
         """
@@ -316,6 +319,9 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
                 timestep spacing strategy of equal spacing between timesteps schedule is used. If `timesteps` is
                 passed, `num_inference_steps` must be `None`.
         """
+        if mu is not None:
+            assert self.config.use_dynamic_shifting and self.config.time_shift_type == "exponential"
+            self.config.flow_shift = np.exp(mu)
         if num_inference_steps is None and timesteps is None:
             raise ValueError("Must pass exactly one of  `num_inference_steps` or `timesteps`.")
         if num_inference_steps is not None and timesteps is not None:

diffusers/schedulers/scheduling_scm.py CHANGED Viewed

@@ -168,7 +168,6 @@ class SCMScheduler(SchedulerMixin, ConfigMixin):
         else:
             # max_timesteps=arctan(80/0.5)=1.56454 is the default from sCM paper, we choose a different value here
             self.timesteps = torch.linspace(max_timesteps, 0, num_inference_steps + 1, device=device).float()
-        print(f"Set timesteps: {self.timesteps}")
         self._step_index = None
         self._begin_index = None

diffusers 0.34.0__py3-none-any.whl → 0.35.0__py3-none-any.whl

diffusers 0.34.0py3-none-any.whl → 0.35.0py3-none-any.whl