PyPI - diffusers - Versions diffs - 0.31.0__py3-none-any.whl → 0.32.0__py3-none-any.whl - Mend

diffusers 0.31.0py3-none-any.whl → 0.32.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (214) hide show

diffusers/__init__.py +66 -5
diffusers/callbacks.py +56 -3
diffusers/configuration_utils.py +1 -1
diffusers/dependency_versions_table.py +1 -1
diffusers/image_processor.py +25 -17
diffusers/loaders/__init__.py +22 -3
diffusers/loaders/ip_adapter.py +538 -15
diffusers/loaders/lora_base.py +124 -118
diffusers/loaders/lora_conversion_utils.py +318 -3
diffusers/loaders/lora_pipeline.py +1688 -368
diffusers/loaders/peft.py +379 -0
diffusers/loaders/single_file_model.py +71 -4
diffusers/loaders/single_file_utils.py +519 -9
diffusers/loaders/textual_inversion.py +3 -3
diffusers/loaders/transformer_flux.py +181 -0
diffusers/loaders/transformer_sd3.py +89 -0
diffusers/loaders/unet.py +17 -4
diffusers/models/__init__.py +47 -14
diffusers/models/activations.py +22 -9
diffusers/models/attention.py +13 -4
diffusers/models/attention_flax.py +1 -1
diffusers/models/attention_processor.py +2059 -281
diffusers/models/autoencoders/__init__.py +5 -0
diffusers/models/autoencoders/autoencoder_dc.py +620 -0
diffusers/models/autoencoders/autoencoder_kl.py +2 -1
diffusers/models/autoencoders/autoencoder_kl_allegro.py +1149 -0
diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +36 -27
diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +1176 -0
diffusers/models/autoencoders/autoencoder_kl_ltx.py +1338 -0
diffusers/models/autoencoders/autoencoder_kl_mochi.py +1166 -0
diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +3 -10
diffusers/models/autoencoders/autoencoder_tiny.py +4 -2
diffusers/models/autoencoders/vae.py +18 -5
diffusers/models/controlnet.py +47 -802
diffusers/models/controlnet_flux.py +29 -495
diffusers/models/controlnet_sd3.py +25 -379
diffusers/models/controlnet_sparsectrl.py +46 -718
diffusers/models/controlnets/__init__.py +23 -0
diffusers/models/controlnets/controlnet.py +872 -0
diffusers/models/{controlnet_flax.py → controlnets/controlnet_flax.py} +5 -5
diffusers/models/controlnets/controlnet_flux.py +536 -0
diffusers/models/{controlnet_hunyuan.py → controlnets/controlnet_hunyuan.py} +7 -7
diffusers/models/controlnets/controlnet_sd3.py +489 -0
diffusers/models/controlnets/controlnet_sparsectrl.py +788 -0
diffusers/models/controlnets/controlnet_union.py +832 -0
diffusers/models/{controlnet_xs.py → controlnets/controlnet_xs.py} +14 -13
diffusers/models/controlnets/multicontrolnet.py +183 -0
diffusers/models/embeddings.py +838 -43
diffusers/models/model_loading_utils.py +88 -6
diffusers/models/modeling_flax_utils.py +1 -1
diffusers/models/modeling_utils.py +74 -28
diffusers/models/normalization.py +78 -13
diffusers/models/transformers/__init__.py +5 -0
diffusers/models/transformers/auraflow_transformer_2d.py +2 -2
diffusers/models/transformers/cogvideox_transformer_3d.py +46 -11
diffusers/models/transformers/dit_transformer_2d.py +1 -1
diffusers/models/transformers/latte_transformer_3d.py +4 -4
diffusers/models/transformers/pixart_transformer_2d.py +1 -1
diffusers/models/transformers/sana_transformer.py +488 -0
diffusers/models/transformers/stable_audio_transformer.py +1 -1
diffusers/models/transformers/transformer_2d.py +1 -1
diffusers/models/transformers/transformer_allegro.py +422 -0
diffusers/models/transformers/transformer_cogview3plus.py +1 -1
diffusers/models/transformers/transformer_flux.py +30 -9
diffusers/models/transformers/transformer_hunyuan_video.py +789 -0
diffusers/models/transformers/transformer_ltx.py +469 -0
diffusers/models/transformers/transformer_mochi.py +499 -0
diffusers/models/transformers/transformer_sd3.py +105 -17
diffusers/models/transformers/transformer_temporal.py +1 -1
diffusers/models/unets/unet_1d_blocks.py +1 -1
diffusers/models/unets/unet_2d.py +8 -1
diffusers/models/unets/unet_2d_blocks.py +88 -21
diffusers/models/unets/unet_2d_condition.py +1 -1
diffusers/models/unets/unet_3d_blocks.py +9 -7
diffusers/models/unets/unet_motion_model.py +5 -5
diffusers/models/unets/unet_spatio_temporal_condition.py +23 -0
diffusers/models/unets/unet_stable_cascade.py +2 -2
diffusers/models/unets/uvit_2d.py +1 -1
diffusers/models/upsampling.py +8 -0
diffusers/pipelines/__init__.py +34 -0
diffusers/pipelines/allegro/__init__.py +48 -0
diffusers/pipelines/allegro/pipeline_allegro.py +938 -0
diffusers/pipelines/allegro/pipeline_output.py +23 -0
diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +8 -2
diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +1 -1
diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +0 -6
diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +8 -8
diffusers/pipelines/audioldm2/modeling_audioldm2.py +3 -3
diffusers/pipelines/aura_flow/pipeline_aura_flow.py +1 -8
diffusers/pipelines/auto_pipeline.py +53 -6
diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
diffusers/pipelines/cogvideo/pipeline_cogvideox.py +50 -22
diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +51 -20
diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +69 -21
diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +47 -21
diffusers/pipelines/cogview3/pipeline_cogview3plus.py +1 -1
diffusers/pipelines/controlnet/__init__.py +86 -80
diffusers/pipelines/controlnet/multicontrolnet.py +7 -178
diffusers/pipelines/controlnet/pipeline_controlnet.py +11 -2
diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +1 -2
diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +1 -2
diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +1 -2
diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +3 -3
diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +1 -3
diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +1790 -0
diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +1501 -0
diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +1627 -0
diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +5 -1
diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +53 -19
diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +31 -8
diffusers/pipelines/flux/__init__.py +13 -1
diffusers/pipelines/flux/modeling_flux.py +47 -0
diffusers/pipelines/flux/pipeline_flux.py +204 -29
diffusers/pipelines/flux/pipeline_flux_control.py +889 -0
diffusers/pipelines/flux/pipeline_flux_control_img2img.py +945 -0
diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +1141 -0
diffusers/pipelines/flux/pipeline_flux_controlnet.py +49 -27
diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +40 -30
diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +78 -56
diffusers/pipelines/flux/pipeline_flux_fill.py +969 -0
diffusers/pipelines/flux/pipeline_flux_img2img.py +33 -27
diffusers/pipelines/flux/pipeline_flux_inpaint.py +36 -29
diffusers/pipelines/flux/pipeline_flux_prior_redux.py +492 -0
diffusers/pipelines/flux/pipeline_output.py +16 -0
diffusers/pipelines/hunyuan_video/__init__.py +48 -0
diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +687 -0
diffusers/pipelines/hunyuan_video/pipeline_output.py +20 -0
diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +5 -1
diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +9 -9
diffusers/pipelines/kolors/text_encoder.py +2 -2
diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
diffusers/pipelines/ltx/__init__.py +50 -0
diffusers/pipelines/ltx/pipeline_ltx.py +789 -0
diffusers/pipelines/ltx/pipeline_ltx_image2video.py +885 -0
diffusers/pipelines/ltx/pipeline_output.py +20 -0
diffusers/pipelines/lumina/pipeline_lumina.py +1 -8
diffusers/pipelines/mochi/__init__.py +48 -0
diffusers/pipelines/mochi/pipeline_mochi.py +748 -0
diffusers/pipelines/mochi/pipeline_output.py +20 -0
diffusers/pipelines/pag/__init__.py +7 -0
diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +1 -2
diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +1 -2
diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +1 -3
diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +1 -3
diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +5 -1
diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +6 -13
diffusers/pipelines/pag/pipeline_pag_sana.py +886 -0
diffusers/pipelines/pag/pipeline_pag_sd_3.py +6 -6
diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +1058 -0
diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +3 -0
diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +1356 -0
diffusers/pipelines/pipeline_flax_utils.py +1 -1
diffusers/pipelines/pipeline_loading_utils.py +25 -4
diffusers/pipelines/pipeline_utils.py +35 -6
diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +6 -13
diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +6 -13
diffusers/pipelines/sana/__init__.py +47 -0
diffusers/pipelines/sana/pipeline_output.py +21 -0
diffusers/pipelines/sana/pipeline_sana.py +884 -0
diffusers/pipelines/stable_audio/pipeline_stable_audio.py +12 -1
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -3
diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +216 -20
diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +62 -9
diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +57 -8
diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -1
diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +0 -8
diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +0 -8
diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +0 -8
diffusers/pipelines/unidiffuser/modeling_uvit.py +2 -2
diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
diffusers/quantizers/auto.py +14 -1
diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -1
diffusers/quantizers/gguf/__init__.py +1 -0
diffusers/quantizers/gguf/gguf_quantizer.py +159 -0
diffusers/quantizers/gguf/utils.py +456 -0
diffusers/quantizers/quantization_config.py +280 -2
diffusers/quantizers/torchao/__init__.py +15 -0
diffusers/quantizers/torchao/torchao_quantizer.py +285 -0
diffusers/schedulers/scheduling_ddpm.py +2 -6
diffusers/schedulers/scheduling_ddpm_parallel.py +2 -6
diffusers/schedulers/scheduling_deis_multistep.py +28 -9
diffusers/schedulers/scheduling_dpmsolver_multistep.py +35 -9
diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +35 -8
diffusers/schedulers/scheduling_dpmsolver_sde.py +4 -4
diffusers/schedulers/scheduling_dpmsolver_singlestep.py +48 -10
diffusers/schedulers/scheduling_euler_discrete.py +4 -4
diffusers/schedulers/scheduling_flow_match_euler_discrete.py +153 -6
diffusers/schedulers/scheduling_heun_discrete.py +4 -4
diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +4 -4
diffusers/schedulers/scheduling_k_dpm_2_discrete.py +4 -4
diffusers/schedulers/scheduling_lcm.py +2 -6
diffusers/schedulers/scheduling_lms_discrete.py +4 -4
diffusers/schedulers/scheduling_repaint.py +1 -1
diffusers/schedulers/scheduling_sasolver.py +28 -9
diffusers/schedulers/scheduling_tcd.py +2 -6
diffusers/schedulers/scheduling_unipc_multistep.py +53 -8
diffusers/training_utils.py +16 -2
diffusers/utils/__init__.py +5 -0
diffusers/utils/constants.py +1 -0
diffusers/utils/dummy_pt_objects.py +180 -0
diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
diffusers/utils/dynamic_modules_utils.py +3 -3
diffusers/utils/hub_utils.py +31 -39
diffusers/utils/import_utils.py +67 -0
diffusers/utils/peft_utils.py +3 -0
diffusers/utils/testing_utils.py +56 -1
diffusers/utils/torch_utils.py +3 -0
{diffusers-0.31.0.dist-info → diffusers-0.32.0.dist-info}/METADATA +69 -69
{diffusers-0.31.0.dist-info → diffusers-0.32.0.dist-info}/RECORD +214 -162
{diffusers-0.31.0.dist-info → diffusers-0.32.0.dist-info}/WHEEL +1 -1
{diffusers-0.31.0.dist-info → diffusers-0.32.0.dist-info}/LICENSE +0 -0
{diffusers-0.31.0.dist-info → diffusers-0.32.0.dist-info}/entry_points.txt +0 -0
{diffusers-0.31.0.dist-info → diffusers-0.32.0.dist-info}/top_level.txt +0 -0

diffusers/quantizers/gguf/utils.py ADDED Viewed

@@ -0,0 +1,456 @@
+# Copyright 2024 The HuggingFace Team and City96. All rights reserved.
+# #
+# # Licensed under the Apache License, Version 2.0 (the "License");
+# # you may not use this file except in compliance with the License.
+# # You may obtain a copy of the License at
+# #
+# #     http://www.apache.org/licenses/LICENSE-2.0
+# #
+# # Unless required by applicable law or agreed to in writing, software
+# # distributed under the License is distributed on an "AS IS" BASIS,
+# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# # See the License for the specific language governing permissions and
+# # limitations under the License.
+import inspect
+from contextlib import nullcontext
+import gguf
+import torch
+import torch.nn as nn
+from ...utils import is_accelerate_available
+if is_accelerate_available():
+    import accelerate
+    from accelerate import init_empty_weights
+    from accelerate.hooks import add_hook_to_module, remove_hook_from_module
+# Copied from diffusers.quantizers.bitsandbytes.utils._create_accelerate_new_hook
+def _create_accelerate_new_hook(old_hook):
+    r"""
+    Creates a new hook based on the old hook. Use it only if you know what you are doing ! This method is a copy of:
+    https://github.com/huggingface/peft/blob/748f7968f3a31ec06a1c2b0328993319ad9a150a/src/peft/utils/other.py#L245 with
+    some changes
+    """
+    old_hook_cls = getattr(accelerate.hooks, old_hook.__class__.__name__)
+    old_hook_attr = old_hook.__dict__
+    filtered_old_hook_attr = {}
+    old_hook_init_signature = inspect.signature(old_hook_cls.__init__)
+    for k in old_hook_attr.keys():
+        if k in old_hook_init_signature.parameters:
+            filtered_old_hook_attr[k] = old_hook_attr[k]
+    new_hook = old_hook_cls(**filtered_old_hook_attr)
+    return new_hook
+def _replace_with_gguf_linear(model, compute_dtype, state_dict, prefix="", modules_to_not_convert=[]):
+    def _should_convert_to_gguf(state_dict, prefix):
+        weight_key = prefix + "weight"
+        return weight_key in state_dict and isinstance(state_dict[weight_key], GGUFParameter)
+    has_children = list(model.children())
+    if not has_children:
+        return
+    for name, module in model.named_children():
+        module_prefix = prefix + name + "."
+        _replace_with_gguf_linear(module, compute_dtype, state_dict, module_prefix, modules_to_not_convert)
+        if (
+            isinstance(module, nn.Linear)
+            and _should_convert_to_gguf(state_dict, module_prefix)
+            and name not in modules_to_not_convert
+        ):
+            ctx = init_empty_weights if is_accelerate_available() else nullcontext
+            with ctx():
+                model._modules[name] = GGUFLinear(
+                    module.in_features,
+                    module.out_features,
+                    module.bias is not None,
+                    compute_dtype=compute_dtype,
+                )
+            model._modules[name].source_cls = type(module)
+            # Force requires_grad to False to avoid unexpected errors
+            model._modules[name].requires_grad_(False)
+    return model
+def _dequantize_gguf_and_restore_linear(model, modules_to_not_convert=[]):
+    for name, module in model.named_children():
+        if isinstance(module, GGUFLinear) and name not in modules_to_not_convert:
+            device = module.weight.device
+            bias = getattr(module, "bias", None)
+            ctx = init_empty_weights if is_accelerate_available() else nullcontext
+            with ctx():
+                new_module = nn.Linear(
+                    module.in_features,
+                    module.out_features,
+                    module.bias is not None,
+                    device=device,
+                )
+            new_module.weight = nn.Parameter(dequantize_gguf_tensor(module.weight))
+            if bias is not None:
+                new_module.bias = bias
+            # Create a new hook and attach it in case we use accelerate
+            if hasattr(module, "_hf_hook"):
+                old_hook = module._hf_hook
+                new_hook = _create_accelerate_new_hook(old_hook)
+                remove_hook_from_module(module)
+                add_hook_to_module(new_module, new_hook)
+            new_module.to(device)
+            model._modules[name] = new_module
+        has_children = list(module.children())
+        if has_children:
+            _dequantize_gguf_and_restore_linear(module, modules_to_not_convert)
+    return model
+# dequantize operations based on torch ports of GGUF dequantize_functions
+# from City96
+# more info: https://github.com/city96/ComfyUI-GGUF/blob/main/dequant.py
+QK_K = 256
+K_SCALE_SIZE = 12
+def to_uint32(x):
+    x = x.view(torch.uint8).to(torch.int32)
+    return (x[:, 0] | x[:, 1] << 8 | x[:, 2] << 16 | x[:, 3] << 24).unsqueeze(1)
+def split_block_dims(blocks, *args):
+    n_max = blocks.shape[1]
+    dims = list(args) + [n_max - sum(args)]
+    return torch.split(blocks, dims, dim=1)
+def get_scale_min(scales):
+    n_blocks = scales.shape[0]
+    scales = scales.view(torch.uint8)
+    scales = scales.reshape((n_blocks, 3, 4))
+    d, m, m_d = torch.split(scales, scales.shape[-2] // 3, dim=-2)
+    sc = torch.cat([d & 0x3F, (m_d & 0x0F) | ((d >> 2) & 0x30)], dim=-1)
+    min = torch.cat([m & 0x3F, (m_d >> 4) | ((m >> 2) & 0x30)], dim=-1)
+    return (sc.reshape((n_blocks, 8)), min.reshape((n_blocks, 8)))
+def dequantize_blocks_Q8_0(blocks, block_size, type_size, dtype=None):
+    d, x = split_block_dims(blocks, 2)
+    d = d.view(torch.float16).to(dtype)
+    x = x.view(torch.int8)
+    return d * x
+def dequantize_blocks_Q5_1(blocks, block_size, type_size, dtype=None):
+    n_blocks = blocks.shape[0]
+    d, m, qh, qs = split_block_dims(blocks, 2, 2, 4)
+    d = d.view(torch.float16).to(dtype)
+    m = m.view(torch.float16).to(dtype)
+    qh = to_uint32(qh)
+    qh = qh.reshape((n_blocks, 1)) >> torch.arange(32, device=d.device, dtype=torch.int32).reshape(1, 32)
+    ql = qs.reshape((n_blocks, -1, 1, block_size // 2)) >> torch.tensor(
+        [0, 4], device=d.device, dtype=torch.uint8
+    ).reshape(1, 1, 2, 1)
+    qh = (qh & 1).to(torch.uint8)
+    ql = (ql & 0x0F).reshape((n_blocks, -1))
+    qs = ql | (qh << 4)
+    return (d * qs) + m
+def dequantize_blocks_Q5_0(blocks, block_size, type_size, dtype=None):
+    n_blocks = blocks.shape[0]
+    d, qh, qs = split_block_dims(blocks, 2, 4)
+    d = d.view(torch.float16).to(dtype)
+    qh = to_uint32(qh)
+    qh = qh.reshape(n_blocks, 1) >> torch.arange(32, device=d.device, dtype=torch.int32).reshape(1, 32)
+    ql = qs.reshape(n_blocks, -1, 1, block_size // 2) >> torch.tensor(
+        [0, 4], device=d.device, dtype=torch.uint8
+    ).reshape(1, 1, 2, 1)
+    qh = (qh & 1).to(torch.uint8)
+    ql = (ql & 0x0F).reshape(n_blocks, -1)
+    qs = (ql | (qh << 4)).to(torch.int8) - 16
+    return d * qs
+def dequantize_blocks_Q4_1(blocks, block_size, type_size, dtype=None):
+    n_blocks = blocks.shape[0]
+    d, m, qs = split_block_dims(blocks, 2, 2)
+    d = d.view(torch.float16).to(dtype)
+    m = m.view(torch.float16).to(dtype)
+    qs = qs.reshape((n_blocks, -1, 1, block_size // 2)) >> torch.tensor(
+        [0, 4], device=d.device, dtype=torch.uint8
+    ).reshape(1, 1, 2, 1)
+    qs = (qs & 0x0F).reshape(n_blocks, -1)
+    return (d * qs) + m
+def dequantize_blocks_Q4_0(blocks, block_size, type_size, dtype=None):
+    n_blocks = blocks.shape[0]
+    d, qs = split_block_dims(blocks, 2)
+    d = d.view(torch.float16).to(dtype)
+    qs = qs.reshape((n_blocks, -1, 1, block_size // 2)) >> torch.tensor(
+        [0, 4], device=d.device, dtype=torch.uint8
+    ).reshape((1, 1, 2, 1))
+    qs = (qs & 0x0F).reshape((n_blocks, -1)).to(torch.int8) - 8
+    return d * qs
+def dequantize_blocks_Q6_K(blocks, block_size, type_size, dtype=None):
+    n_blocks = blocks.shape[0]
+    (
+        ql,
+        qh,
+        scales,
+        d,
+    ) = split_block_dims(blocks, QK_K // 2, QK_K // 4, QK_K // 16)
+    scales = scales.view(torch.int8).to(dtype)
+    d = d.view(torch.float16).to(dtype)
+    d = (d * scales).reshape((n_blocks, QK_K // 16, 1))
+    ql = ql.reshape((n_blocks, -1, 1, 64)) >> torch.tensor([0, 4], device=d.device, dtype=torch.uint8).reshape(
+        (1, 1, 2, 1)
+    )
+    ql = (ql & 0x0F).reshape((n_blocks, -1, 32))
+    qh = qh.reshape((n_blocks, -1, 1, 32)) >> torch.tensor([0, 2, 4, 6], device=d.device, dtype=torch.uint8).reshape(
+        (1, 1, 4, 1)
+    )
+    qh = (qh & 0x03).reshape((n_blocks, -1, 32))
+    q = (ql | (qh << 4)).to(torch.int8) - 32
+    q = q.reshape((n_blocks, QK_K // 16, -1))
+    return (d * q).reshape((n_blocks, QK_K))
+def dequantize_blocks_Q5_K(blocks, block_size, type_size, dtype=None):
+    n_blocks = blocks.shape[0]
+    d, dmin, scales, qh, qs = split_block_dims(blocks, 2, 2, K_SCALE_SIZE, QK_K // 8)
+    d = d.view(torch.float16).to(dtype)
+    dmin = dmin.view(torch.float16).to(dtype)
+    sc, m = get_scale_min(scales)
+    d = (d * sc).reshape((n_blocks, -1, 1))
+    dm = (dmin * m).reshape((n_blocks, -1, 1))
+    ql = qs.reshape((n_blocks, -1, 1, 32)) >> torch.tensor([0, 4], device=d.device, dtype=torch.uint8).reshape(
+        (1, 1, 2, 1)
+    )
+    qh = qh.reshape((n_blocks, -1, 1, 32)) >> torch.arange(0, 8, device=d.device, dtype=torch.uint8).reshape(
+        (1, 1, 8, 1)
+    )
+    ql = (ql & 0x0F).reshape((n_blocks, -1, 32))
+    qh = (qh & 0x01).reshape((n_blocks, -1, 32))
+    q = ql | (qh << 4)
+    return (d * q - dm).reshape((n_blocks, QK_K))
+def dequantize_blocks_Q4_K(blocks, block_size, type_size, dtype=None):
+    n_blocks = blocks.shape[0]
+    d, dmin, scales, qs = split_block_dims(blocks, 2, 2, K_SCALE_SIZE)
+    d = d.view(torch.float16).to(dtype)
+    dmin = dmin.view(torch.float16).to(dtype)
+    sc, m = get_scale_min(scales)
+    d = (d * sc).reshape((n_blocks, -1, 1))
+    dm = (dmin * m).reshape((n_blocks, -1, 1))
+    qs = qs.reshape((n_blocks, -1, 1, 32)) >> torch.tensor([0, 4], device=d.device, dtype=torch.uint8).reshape(
+        (1, 1, 2, 1)
+    )
+    qs = (qs & 0x0F).reshape((n_blocks, -1, 32))
+    return (d * qs - dm).reshape((n_blocks, QK_K))
+def dequantize_blocks_Q3_K(blocks, block_size, type_size, dtype=None):
+    n_blocks = blocks.shape[0]
+    hmask, qs, scales, d = split_block_dims(blocks, QK_K // 8, QK_K // 4, 12)
+    d = d.view(torch.float16).to(dtype)
+    lscales, hscales = scales[:, :8], scales[:, 8:]
+    lscales = lscales.reshape((n_blocks, 1, 8)) >> torch.tensor([0, 4], device=d.device, dtype=torch.uint8).reshape(
+        (1, 2, 1)
+    )
+    lscales = lscales.reshape((n_blocks, 16))
+    hscales = hscales.reshape((n_blocks, 1, 4)) >> torch.tensor(
+        [0, 2, 4, 6], device=d.device, dtype=torch.uint8
+    ).reshape((1, 4, 1))
+    hscales = hscales.reshape((n_blocks, 16))
+    scales = (lscales & 0x0F) | ((hscales & 0x03) << 4)
+    scales = scales.to(torch.int8) - 32
+    dl = (d * scales).reshape((n_blocks, 16, 1))
+    ql = qs.reshape((n_blocks, -1, 1, 32)) >> torch.tensor([0, 2, 4, 6], device=d.device, dtype=torch.uint8).reshape(
+        (1, 1, 4, 1)
+    )
+    qh = hmask.reshape(n_blocks, -1, 1, 32) >> torch.arange(0, 8, device=d.device, dtype=torch.uint8).reshape(
+        (1, 1, 8, 1)
+    )
+    ql = ql.reshape((n_blocks, 16, QK_K // 16)) & 3
+    qh = (qh.reshape((n_blocks, 16, QK_K // 16)) & 1) ^ 1
+    q = ql.to(torch.int8) - (qh << 2).to(torch.int8)
+    return (dl * q).reshape((n_blocks, QK_K))
+def dequantize_blocks_Q2_K(blocks, block_size, type_size, dtype=None):
+    n_blocks = blocks.shape[0]
+    scales, qs, d, dmin = split_block_dims(blocks, QK_K // 16, QK_K // 4, 2)
+    d = d.view(torch.float16).to(dtype)
+    dmin = dmin.view(torch.float16).to(dtype)
+    # (n_blocks, 16, 1)
+    dl = (d * (scales & 0xF)).reshape((n_blocks, QK_K // 16, 1))
+    ml = (dmin * (scales >> 4)).reshape((n_blocks, QK_K // 16, 1))
+    shift = torch.tensor([0, 2, 4, 6], device=d.device, dtype=torch.uint8).reshape((1, 1, 4, 1))
+    qs = (qs.reshape((n_blocks, -1, 1, 32)) >> shift) & 3
+    qs = qs.reshape((n_blocks, QK_K // 16, 16))
+    qs = dl * qs - ml
+    return qs.reshape((n_blocks, -1))
+def dequantize_blocks_BF16(blocks, block_size, type_size, dtype=None):
+    return (blocks.view(torch.int16).to(torch.int32) << 16).view(torch.float32)
+GGML_QUANT_SIZES = gguf.GGML_QUANT_SIZES
+dequantize_functions = {
+    gguf.GGMLQuantizationType.BF16: dequantize_blocks_BF16,
+    gguf.GGMLQuantizationType.Q8_0: dequantize_blocks_Q8_0,
+    gguf.GGMLQuantizationType.Q5_1: dequantize_blocks_Q5_1,
+    gguf.GGMLQuantizationType.Q5_0: dequantize_blocks_Q5_0,
+    gguf.GGMLQuantizationType.Q4_1: dequantize_blocks_Q4_1,
+    gguf.GGMLQuantizationType.Q4_0: dequantize_blocks_Q4_0,
+    gguf.GGMLQuantizationType.Q6_K: dequantize_blocks_Q6_K,
+    gguf.GGMLQuantizationType.Q5_K: dequantize_blocks_Q5_K,
+    gguf.GGMLQuantizationType.Q4_K: dequantize_blocks_Q4_K,
+    gguf.GGMLQuantizationType.Q3_K: dequantize_blocks_Q3_K,
+    gguf.GGMLQuantizationType.Q2_K: dequantize_blocks_Q2_K,
+}
+SUPPORTED_GGUF_QUANT_TYPES = list(dequantize_functions.keys())
+def _quant_shape_from_byte_shape(shape, type_size, block_size):
+    return (*shape[:-1], shape[-1] // type_size * block_size)
+def dequantize_gguf_tensor(tensor):
+    if not hasattr(tensor, "quant_type"):
+        return tensor
+    quant_type = tensor.quant_type
+    dequant_fn = dequantize_functions[quant_type]
+    block_size, type_size = GGML_QUANT_SIZES[quant_type]
+    tensor = tensor.view(torch.uint8)
+    shape = _quant_shape_from_byte_shape(tensor.shape, type_size, block_size)
+    n_blocks = tensor.numel() // type_size
+    blocks = tensor.reshape((n_blocks, type_size))
+    dequant = dequant_fn(blocks, block_size, type_size)
+    dequant = dequant.reshape(shape)
+    return dequant.as_tensor()
+class GGUFParameter(torch.nn.Parameter):
+    def __new__(cls, data, requires_grad=False, quant_type=None):
+        data = data if data is not None else torch.empty(0)
+        self = torch.Tensor._make_subclass(cls, data, requires_grad)
+        self.quant_type = quant_type
+        return self
+    def as_tensor(self):
+        return torch.Tensor._make_subclass(torch.Tensor, self, self.requires_grad)
+    @classmethod
+    def __torch_function__(cls, func, types, args=(), kwargs=None):
+        if kwargs is None:
+            kwargs = {}
+        result = super().__torch_function__(func, types, args, kwargs)
+        # When converting from original format checkpoints we often use splits, cats etc on tensors
+        # this method ensures that the returned tensor type from those operations remains GGUFParameter
+        # so that we preserve quant_type information
+        quant_type = None
+        for arg in args:
+            if isinstance(arg, list) and (arg[0], GGUFParameter):
+                quant_type = arg[0].quant_type
+                break
+            if isinstance(arg, GGUFParameter):
+                quant_type = arg.quant_type
+                break
+        if isinstance(result, torch.Tensor):
+            return cls(result, quant_type=quant_type)
+        # Handle tuples and lists
+        elif isinstance(result, (tuple, list)):
+            # Preserve the original type (tuple or list)
+            wrapped = [cls(x, quant_type=quant_type) if isinstance(x, torch.Tensor) else x for x in result]
+            return type(result)(wrapped)
+        else:
+            return result
+class GGUFLinear(nn.Linear):
+    def __init__(
+        self,
+        in_features,
+        out_features,
+        bias=False,
+        compute_dtype=None,
+        device=None,
+    ) -> None:
+        super().__init__(in_features, out_features, bias, device)
+        self.compute_dtype = compute_dtype
+    def forward(self, inputs):
+        weight = dequantize_gguf_tensor(self.weight)
+        weight = weight.to(self.compute_dtype)
+        bias = self.bias.to(self.compute_dtype)
+        output = torch.nn.functional.linear(inputs, weight, bias)
+        return output

diffusers 0.31.0__py3-none-any.whl → 0.32.0__py3-none-any.whl

diffusers 0.31.0py3-none-any.whl → 0.32.0py3-none-any.whl