PyPI - diffusers - Versions diffs - 0.23.1__py3-none-any.whl → 0.25.0__py3-none-any.whl - Mend

diffusers 0.23.1py3-none-any.whl → 0.25.0py3-none-any.whl

Files changed (238) hide show

diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/modeling_text_unet.py RENAMED Viewed

@@ -7,20 +7,20 @@ import torch.nn.functional as F
 from diffusers.utils import deprecate
-from ...configuration_utils import ConfigMixin, register_to_config
-from ...models import ModelMixin
-from ...models.activations import get_activation
-from ...models.attention import Attention
-from ...models.attention_processor import (
+from ....configuration_utils import ConfigMixin, register_to_config
+from ....models import ModelMixin
+from ....models.activations import get_activation
+from ....models.attention_processor import (
     ADDED_KV_ATTENTION_PROCESSORS,
     CROSS_ATTENTION_PROCESSORS,
+    Attention,
     AttentionProcessor,
     AttnAddedKVProcessor,
     AttnAddedKVProcessor2_0,
     AttnProcessor,
 )
-from ...models.dual_transformer_2d import DualTransformer2DModel
-from ...models.embeddings import (
+from ....models.dual_transformer_2d import DualTransformer2DModel
+from ....models.embeddings import (
     GaussianFourierProjection,
     ImageHintTimeEmbedding,
     ImageProjection,
@@ -31,10 +31,10 @@ from ...models.embeddings import (
     TimestepEmbedding,
     Timesteps,
 )
-from ...models.transformer_2d import Transformer2DModel
-from ...models.unet_2d_condition import UNet2DConditionOutput
-from ...utils import USE_PEFT_BACKEND, is_torch_version, logging, scale_lora_layers, unscale_lora_layers
-from ...utils.torch_utils import apply_freeu
+from ....models.transformer_2d import Transformer2DModel
+from ....models.unet_2d_condition import UNet2DConditionOutput
+from ....utils import USE_PEFT_BACKEND, is_torch_version, logging, scale_lora_layers, unscale_lora_layers
+from ....utils.torch_utils import apply_freeu
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
@@ -50,6 +50,9 @@ def get_down_block(
     resnet_eps,
     resnet_act_fn,
     num_attention_heads,
+    transformer_layers_per_block,
+    attention_type,
+    attention_head_dim,
     resnet_groups=None,
     cross_attention_dim=None,
     downsample_padding=None,
@@ -113,6 +116,10 @@ def get_up_block(
     resnet_eps,
     resnet_act_fn,
     num_attention_heads,
+    transformer_layers_per_block,
+    resolution_idx,
+    attention_type,
+    attention_head_dim,
     resnet_groups=None,
     cross_attention_dim=None,
     dual_cross_attention=False,
@@ -425,10 +432,7 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
         if num_attention_heads is not None:
             raise ValueError(
-                "At the moment it is not possible to define the number of attention heads via `num_attention_heads`"
-                " because of a naming issue as described in"
-                " https://github.com/huggingface/diffusers/issues/2011#issuecomment-1547958131. Passing"
-                " `num_attention_heads` will only be supported in diffusers v0.19."
+                "At the moment it is not possible to define the number of attention heads via `num_attention_heads` because of a naming issue as described in https://github.com/huggingface/diffusers/issues/2011#issuecomment-1547958131. Passing `num_attention_heads` will only be supported in diffusers v0.19."
             )
         # If `num_attention_heads` is not defined (which is the case for most models)
@@ -442,44 +446,37 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
         # Check inputs
         if len(down_block_types) != len(up_block_types):
             raise ValueError(
-                "Must provide the same number of `down_block_types` as `up_block_types`. `down_block_types`:"
-                f" {down_block_types}. `up_block_types`: {up_block_types}."
+                f"Must provide the same number of `down_block_types` as `up_block_types`. `down_block_types`: {down_block_types}. `up_block_types`: {up_block_types}."
             )
         if len(block_out_channels) != len(down_block_types):
             raise ValueError(
-                "Must provide the same number of `block_out_channels` as `down_block_types`. `block_out_channels`:"
-                f" {block_out_channels}. `down_block_types`: {down_block_types}."
+                f"Must provide the same number of `block_out_channels` as `down_block_types`. `block_out_channels`: {block_out_channels}. `down_block_types`: {down_block_types}."
             )
         if not isinstance(only_cross_attention, bool) and len(only_cross_attention) != len(down_block_types):
             raise ValueError(
-                "Must provide the same number of `only_cross_attention` as `down_block_types`."
-                f" `only_cross_attention`: {only_cross_attention}. `down_block_types`: {down_block_types}."
+                f"Must provide the same number of `only_cross_attention` as `down_block_types`. `only_cross_attention`: {only_cross_attention}. `down_block_types`: {down_block_types}."
             )
         if not isinstance(num_attention_heads, int) and len(num_attention_heads) != len(down_block_types):
             raise ValueError(
-                "Must provide the same number of `num_attention_heads` as `down_block_types`. `num_attention_heads`:"
-                f" {num_attention_heads}. `down_block_types`: {down_block_types}."
+                f"Must provide the same number of `num_attention_heads` as `down_block_types`. `num_attention_heads`: {num_attention_heads}. `down_block_types`: {down_block_types}."
             )
         if not isinstance(attention_head_dim, int) and len(attention_head_dim) != len(down_block_types):
             raise ValueError(
-                "Must provide the same number of `attention_head_dim` as `down_block_types`. `attention_head_dim`:"
-                f" {attention_head_dim}. `down_block_types`: {down_block_types}."
+                f"Must provide the same number of `attention_head_dim` as `down_block_types`. `attention_head_dim`: {attention_head_dim}. `down_block_types`: {down_block_types}."
             )
         if isinstance(cross_attention_dim, list) and len(cross_attention_dim) != len(down_block_types):
             raise ValueError(
-                "Must provide the same number of `cross_attention_dim` as `down_block_types`. `cross_attention_dim`:"
-                f" {cross_attention_dim}. `down_block_types`: {down_block_types}."
+                f"Must provide the same number of `cross_attention_dim` as `down_block_types`. `cross_attention_dim`: {cross_attention_dim}. `down_block_types`: {down_block_types}."
             )
         if not isinstance(layers_per_block, int) and len(layers_per_block) != len(down_block_types):
             raise ValueError(
-                "Must provide the same number of `layers_per_block` as `down_block_types`. `layers_per_block`:"
-                f" {layers_per_block}. `down_block_types`: {down_block_types}."
+                f"Must provide the same number of `layers_per_block` as `down_block_types`. `layers_per_block`: {layers_per_block}. `down_block_types`: {down_block_types}."
             )
         if isinstance(transformer_layers_per_block, list) and reverse_transformer_layers_per_block is None:
             for layer_number_per_block in transformer_layers_per_block:
@@ -897,8 +894,7 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
             processor = AttnProcessor()
         else:
             raise ValueError(
-                "Cannot call `set_default_attn_processor` when attention processors are of type"
-                f" {next(iter(self.attn_processors.values()))}"
+                f"Cannot call `set_default_attn_processor` when attention processors are of type {next(iter(self.attn_processors.values()))}"
             )
         self.set_attn_processor(processor, _remove_lora=True)
@@ -1004,6 +1000,42 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
                 if hasattr(upsample_block, k) or getattr(upsample_block, k, None) is not None:
                     setattr(upsample_block, k, None)
+    def fuse_qkv_projections(self):
+        """
+        Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
+        key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
+        <Tip warning={true}>
+        This API is 🧪 experimental.
+        </Tip>
+        """
+        self.original_attn_processors = None
+        for _, attn_processor in self.attn_processors.items():
+            if "Added" in str(attn_processor.__class__.__name__):
+                raise ValueError("`fuse_qkv_projections()` is not supported for models having added KV projections.")
+        self.original_attn_processors = self.attn_processors
+        for module in self.modules():
+            if isinstance(module, Attention):
+                module.fuse_projections(fuse=True)
+    def unfuse_qkv_projections(self):
+        """Disables the fused QKV projection if enabled.
+        <Tip warning={true}>
+        This API is 🧪 experimental.
+        </Tip>
+        """
+        if self.original_attn_processors is not None:
+            self.set_attn_processor(self.original_attn_processors)
     def forward(
         self,
         sample: torch.FloatTensor,
@@ -1166,8 +1198,7 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
             # Kandinsky 2.1 - style
             if "image_embeds" not in added_cond_kwargs:
                 raise ValueError(
-                    f"{self.__class__} has the config param `addition_embed_type` set to 'text_image' which requires"
-                    " the keyword argument `image_embeds` to be passed in `added_cond_kwargs`"
+                    f"{self.__class__} has the config param `addition_embed_type` set to 'text_image' which requires the keyword argument `image_embeds` to be passed in `added_cond_kwargs`"
                 )
             image_embs = added_cond_kwargs.get("image_embeds")
@@ -1177,14 +1208,12 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
             # SDXL - style
             if "text_embeds" not in added_cond_kwargs:
                 raise ValueError(
-                    f"{self.__class__} has the config param `addition_embed_type` set to 'text_time' which requires"
-                    " the keyword argument `text_embeds` to be passed in `added_cond_kwargs`"
+                    f"{self.__class__} has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `text_embeds` to be passed in `added_cond_kwargs`"
                 )
             text_embeds = added_cond_kwargs.get("text_embeds")
             if "time_ids" not in added_cond_kwargs:
                 raise ValueError(
-                    f"{self.__class__} has the config param `addition_embed_type` set to 'text_time' which requires"
-                    " the keyword argument `time_ids` to be passed in `added_cond_kwargs`"
+                    f"{self.__class__} has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`"
                 )
             time_ids = added_cond_kwargs.get("time_ids")
             time_embeds = self.add_time_proj(time_ids.flatten())
@@ -1196,8 +1225,7 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
             # Kandinsky 2.2 - style
             if "image_embeds" not in added_cond_kwargs:
                 raise ValueError(
-                    f"{self.__class__} has the config param `addition_embed_type` set to 'image' which requires the"
-                    " keyword argument `image_embeds` to be passed in `added_cond_kwargs`"
+                    f"{self.__class__} has the config param `addition_embed_type` set to 'image' which requires the keyword argument `image_embeds` to be passed in `added_cond_kwargs`"
                 )
             image_embs = added_cond_kwargs.get("image_embeds")
             aug_emb = self.add_embedding(image_embs)
@@ -1205,8 +1233,7 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
             # Kandinsky 2.2 - style
             if "image_embeds" not in added_cond_kwargs or "hint" not in added_cond_kwargs:
                 raise ValueError(
-                    f"{self.__class__} has the config param `addition_embed_type` set to 'image_hint' which requires"
-                    " the keyword arguments `image_embeds` and `hint` to be passed in `added_cond_kwargs`"
+                    f"{self.__class__} has the config param `addition_embed_type` set to 'image_hint' which requires the keyword arguments `image_embeds` and `hint` to be passed in `added_cond_kwargs`"
                 )
             image_embs = added_cond_kwargs.get("image_embeds")
             hint = added_cond_kwargs.get("hint")
@@ -1224,8 +1251,7 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
             # Kadinsky 2.1 - style
             if "image_embeds" not in added_cond_kwargs:
                 raise ValueError(
-                    f"{self.__class__} has the config param `encoder_hid_dim_type` set to 'text_image_proj' which"
-                    " requires the keyword argument `image_embeds` to be passed in  `added_conditions`"
+                    f"{self.__class__} has the config param `encoder_hid_dim_type` set to 'text_image_proj' which requires the keyword argument `image_embeds` to be passed in  `added_conditions`"
                 )
             image_embeds = added_cond_kwargs.get("image_embeds")
@@ -1234,11 +1260,19 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
             # Kandinsky 2.2 - style
             if "image_embeds" not in added_cond_kwargs:
                 raise ValueError(
-                    f"{self.__class__} has the config param `encoder_hid_dim_type` set to 'image_proj' which requires"
-                    " the keyword argument `image_embeds` to be passed in  `added_conditions`"
+                    f"{self.__class__} has the config param `encoder_hid_dim_type` set to 'image_proj' which requires the keyword argument `image_embeds` to be passed in  `added_conditions`"
                 )
             image_embeds = added_cond_kwargs.get("image_embeds")
             encoder_hidden_states = self.encoder_hid_proj(image_embeds)
+        elif self.encoder_hid_proj is not None and self.config.encoder_hid_dim_type == "ip_image_proj":
+            if "image_embeds" not in added_cond_kwargs:
+                raise ValueError(
+                    f"{self.__class__} has the config param `encoder_hid_dim_type` set to 'ip_image_proj' which requires the keyword argument `image_embeds` to be passed in  `added_conditions`"
+                )
+            image_embeds = added_cond_kwargs.get("image_embeds")
+            image_embeds = self.encoder_hid_proj(image_embeds).to(encoder_hidden_states.dtype)
+            encoder_hidden_states = torch.cat([encoder_hidden_states, image_embeds], dim=1)
         # 2. pre-process
         sample = self.conv_in(sample)
@@ -1264,10 +1298,9 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
             deprecate(
                 "T2I should not use down_block_additional_residuals",
                 "1.3.0",
-                "Passing intrablock residual connections with `down_block_additional_residuals` is deprecated         "
-                "               and will be removed in diffusers 1.3.0.  `down_block_additional_residuals` should only"
-                " be used                        for ControlNet. Please make sure use"
-                " `down_intrablock_additional_residuals` instead. ",
+                "Passing intrablock residual connections with `down_block_additional_residuals` is deprecated \
+                       and will be removed in diffusers 1.3.0.  `down_block_additional_residuals` should only be used \
+                       for ControlNet. Please make sure use `down_intrablock_additional_residuals` instead. ",
                 standard_warn=False,
             )
             down_intrablock_additional_residuals = down_block_additional_residuals
@@ -2102,8 +2135,7 @@ class UNetMidBlockFlat(nn.Module):
         if attention_head_dim is None:
             logger.warn(
-                "It is not recommend to pass `attention_head_dim=None`. Defaulting `attention_head_dim` to"
-                f" `in_channels`: {in_channels}."
+                f"It is not recommend to pass `attention_head_dim=None`. Defaulting `attention_head_dim` to `in_channels`: {in_channels}."
             )
             attention_head_dim = in_channels

diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion.py RENAMED Viewed

@@ -5,10 +5,10 @@ import PIL.Image
 import torch
 from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModel
-from ...models import AutoencoderKL, UNet2DConditionModel
-from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import logging
-from ..pipeline_utils import DiffusionPipeline
+from ....models import AutoencoderKL, UNet2DConditionModel
+from ....schedulers import KarrasDiffusionSchedulers
+from ....utils import logging
+from ...pipeline_utils import DiffusionPipeline
 from .pipeline_versatile_diffusion_dual_guided import VersatileDiffusionDualGuidedPipeline
 from .pipeline_versatile_diffusion_image_variation import VersatileDiffusionImageVariationPipeline
 from .pipeline_versatile_diffusion_text_to_image import VersatileDiffusionTextToImagePipeline

diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_dual_guided.py RENAMED Viewed

@@ -26,12 +26,12 @@ from transformers import (
     CLIPVisionModelWithProjection,
 )
-from ...image_processor import VaeImageProcessor
-from ...models import AutoencoderKL, DualTransformer2DModel, Transformer2DModel, UNet2DConditionModel
-from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging
-from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
+from ....image_processor import VaeImageProcessor
+from ....models import AutoencoderKL, DualTransformer2DModel, Transformer2DModel, UNet2DConditionModel
+from ....schedulers import KarrasDiffusionSchedulers
+from ....utils import deprecate, logging
+from ....utils.torch_utils import randn_tensor
+from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 from .modeling_text_unet import UNetFlatConditionModel
@@ -58,6 +58,7 @@ class VersatileDiffusionDualGuidedPipeline(DiffusionPipeline):
             A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
             [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
     """
     model_cpu_offload_seq = "bert->unet->vqvae"
     tokenizer: CLIPTokenizer

diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_image_variation.py RENAMED Viewed

@@ -21,12 +21,12 @@ import torch
 import torch.utils.checkpoint
 from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
-from ...image_processor import VaeImageProcessor
-from ...models import AutoencoderKL, UNet2DConditionModel
-from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging
-from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
+from ....image_processor import VaeImageProcessor
+from ....models import AutoencoderKL, UNet2DConditionModel
+from ....schedulers import KarrasDiffusionSchedulers
+from ....utils import deprecate, logging
+from ....utils.torch_utils import randn_tensor
+from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
@@ -52,6 +52,7 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
             A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
             [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
     """
     model_cpu_offload_seq = "bert->unet->vqvae"
     image_feature_extractor: CLIPImageProcessor

diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_text_to_image.py RENAMED Viewed

@@ -19,12 +19,12 @@ import torch
 import torch.utils.checkpoint
 from transformers import CLIPImageProcessor, CLIPTextModelWithProjection, CLIPTokenizer
-from ...image_processor import VaeImageProcessor
-from ...models import AutoencoderKL, Transformer2DModel, UNet2DConditionModel
-from ...schedulers import KarrasDiffusionSchedulers
-from ...utils import deprecate, logging
-from ...utils.torch_utils import randn_tensor
-from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
+from ....image_processor import VaeImageProcessor
+from ....models import AutoencoderKL, Transformer2DModel, UNet2DConditionModel
+from ....schedulers import KarrasDiffusionSchedulers
+from ....utils import deprecate, logging
+from ....utils.torch_utils import randn_tensor
+from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 from .modeling_text_unet import UNetFlatConditionModel
@@ -51,6 +51,7 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
             A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
             [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
     """
     model_cpu_offload_seq = "bert->unet->vqvae"
     tokenizer: CLIPTokenizer

diffusers/pipelines/{vq_diffusion → deprecated/vq_diffusion}/__init__.py RENAMED Viewed

@@ -1,6 +1,6 @@
 from typing import TYPE_CHECKING
-from ...utils import (
+from ....utils import (
     DIFFUSERS_SLOW_IMPORT,
     OptionalDependencyNotAvailable,
     _LazyModule,
@@ -16,7 +16,7 @@ try:
     if not (is_transformers_available() and is_torch_available()):
         raise OptionalDependencyNotAvailable()
 except OptionalDependencyNotAvailable:
-    from ...utils.dummy_torch_and_transformers_objects import (
+    from ....utils.dummy_torch_and_transformers_objects import (
         LearnedClassifierFreeSamplingEmbeddings,
         VQDiffusionPipeline,
     )
@@ -36,7 +36,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
         if not (is_transformers_available() and is_torch_available()):
             raise OptionalDependencyNotAvailable()
     except OptionalDependencyNotAvailable:
-        from ...utils.dummy_torch_and_transformers_objects import (
+        from ....utils.dummy_torch_and_transformers_objects import (
             LearnedClassifierFreeSamplingEmbeddings,
             VQDiffusionPipeline,
         )

diffusers/pipelines/{vq_diffusion → deprecated/vq_diffusion}/pipeline_vq_diffusion.py RENAMED Viewed

@@ -17,11 +17,11 @@ from typing import Callable, List, Optional, Tuple, Union
 import torch
 from transformers import CLIPTextModel, CLIPTokenizer
-from ...configuration_utils import ConfigMixin, register_to_config
-from ...models import ModelMixin, Transformer2DModel, VQModel
-from ...schedulers import VQDiffusionScheduler
-from ...utils import logging
-from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
+from ....configuration_utils import ConfigMixin, register_to_config
+from ....models import ModelMixin, Transformer2DModel, VQModel
+from ....schedulers import VQDiffusionScheduler
+from ....utils import logging
+from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name

diffusers/pipelines/dit/pipeline_dit.py CHANGED Viewed

@@ -43,6 +43,7 @@ class DiTPipeline(DiffusionPipeline):
         scheduler ([`DDIMScheduler`]):
             A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
     """
     model_cpu_offload_seq = "transformer->vae"
     def __init__(

diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py CHANGED Viewed

@@ -181,7 +181,7 @@ class KandinskyV22Pipeline(DiffusionPipeline):
             callback_on_step_end_tensor_inputs (`List`, *optional*):
                 The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
                 will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
-                `._callback_tensor_inputs` attribute of your pipeine class.
+                `._callback_tensor_inputs` attribute of your pipeline class.
         Examples:

diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py CHANGED Viewed

@@ -283,7 +283,7 @@ class KandinskyV22CombinedPipeline(DiffusionPipeline):
             callback_on_step_end_tensor_inputs (`List`, *optional*):
                 The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
                 will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
-                `._callback_tensor_inputs` attribute of your pipeine class.
+                `._callback_tensor_inputs` attribute of your pipeline class.
         Examples:
@@ -759,7 +759,7 @@ class KandinskyV22InpaintCombinedPipeline(DiffusionPipeline):
             prior_callback_on_step_end_tensor_inputs (`List`, *optional*):
                 The list of tensor inputs for the `prior_callback_on_step_end` function. The tensors specified in the
                 list will be passed as `callback_kwargs` argument. You will only be able to include variables listed in
-                the `._callback_tensor_inputs` attribute of your pipeine class.
+                the `._callback_tensor_inputs` attribute of your pipeline class.
             callback_on_step_end (`Callable`, *optional*):
                 A function that calls at the end of each denoising steps during the inference. The function is called
                 with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
@@ -768,7 +768,7 @@ class KandinskyV22InpaintCombinedPipeline(DiffusionPipeline):
             callback_on_step_end_tensor_inputs (`List`, *optional*):
                 The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
                 will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
-                `._callback_tensor_inputs` attribute of your pipeine class.
+                `._callback_tensor_inputs` attribute of your pipeline class.
         Examples:

diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py CHANGED Viewed

@@ -255,7 +255,7 @@ class KandinskyV22Img2ImgPipeline(DiffusionPipeline):
             callback_on_step_end_tensor_inputs (`List`, *optional*):
                 The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
                 will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
-                `._callback_tensor_inputs` attribute of your pipeine class.
+                `._callback_tensor_inputs` attribute of your pipeline class.
         Examples:

diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py CHANGED Viewed

@@ -362,7 +362,7 @@ class KandinskyV22InpaintPipeline(DiffusionPipeline):
             callback_on_step_end_tensor_inputs (`List`, *optional*):
                 The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
                 will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
-                `._callback_tensor_inputs` attribute of your pipeine class.
+                `._callback_tensor_inputs` attribute of your pipeline class.
         Examples:

diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py CHANGED Viewed

@@ -423,7 +423,7 @@ class KandinskyV22PriorPipeline(DiffusionPipeline):
             callback_on_step_end_tensor_inputs (`List`, *optional*):
                 The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
                 will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
-                `._callback_tensor_inputs` attribute of your pipeine class.
+                `._callback_tensor_inputs` attribute of your pipeline class.
         Examples:

diffusers/pipelines/kandinsky3/__init__.py ADDED Viewed

@@ -0,0 +1,49 @@
+from typing import TYPE_CHECKING
+from ...utils import (
+    DIFFUSERS_SLOW_IMPORT,
+    OptionalDependencyNotAvailable,
+    _LazyModule,
+    get_objects_from_module,
+    is_torch_available,
+    is_transformers_available,
+)
+_dummy_objects = {}
+_import_structure = {}
+try:
+    if not (is_transformers_available() and is_torch_available()):
+        raise OptionalDependencyNotAvailable()
+except OptionalDependencyNotAvailable:
+    from ...utils import dummy_torch_and_transformers_objects  # noqa F403
+    _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
+else:
+    _import_structure["pipeline_kandinsky3"] = ["Kandinsky3Pipeline"]
+    _import_structure["pipeline_kandinsky3_img2img"] = ["Kandinsky3Img2ImgPipeline"]
+if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
+    try:
+        if not (is_transformers_available() and is_torch_available()):
+            raise OptionalDependencyNotAvailable()
+    except OptionalDependencyNotAvailable:
+        from ...utils.dummy_torch_and_transformers_objects import *
+    else:
+        from .pipeline_kandinsky3 import Kandinsky3Pipeline
+        from .pipeline_kandinsky3_img2img import Kandinsky3Img2ImgPipeline
+else:
+    import sys
+    sys.modules[__name__] = _LazyModule(
+        __name__,
+        globals()["__file__"],
+        _import_structure,
+        module_spec=__spec__,
+    )
+    for name, value in _dummy_objects.items():
+        setattr(sys.modules[__name__], name, value)

diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py ADDED Viewed

@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+import argparse
+import fnmatch
+from safetensors.torch import load_file
+from diffusers import Kandinsky3UNet
+MAPPING = {
+    "to_time_embed.1": "time_embedding.linear_1",
+    "to_time_embed.3": "time_embedding.linear_2",
+    "in_layer": "conv_in",
+    "out_layer.0": "conv_norm_out",
+    "out_layer.2": "conv_out",
+    "down_samples": "down_blocks",
+    "up_samples": "up_blocks",
+    "projection_lin": "encoder_hid_proj.projection_linear",
+    "projection_ln": "encoder_hid_proj.projection_norm",
+    "feature_pooling": "add_time_condition",
+    "to_query": "to_q",
+    "to_key": "to_k",
+    "to_value": "to_v",
+    "output_layer": "to_out.0",
+    "self_attention_block": "attentions.0",
+}
+DYNAMIC_MAP = {
+    "resnet_attn_blocks.*.0": "resnets_in.*",
+    "resnet_attn_blocks.*.1": ("attentions.*", 1),
+    "resnet_attn_blocks.*.2": "resnets_out.*",
+}
+# MAPPING = {}
+def convert_state_dict(unet_state_dict):
+    """
+    Convert the state dict of a U-Net model to match the key format expected by Kandinsky3UNet model.
+    Args:
+        unet_model (torch.nn.Module): The original U-Net model.
+        unet_kandi3_model (torch.nn.Module): The Kandinsky3UNet model to match keys with.
+    Returns:
+        OrderedDict: The converted state dictionary.
+    """
+    # Example of renaming logic (this will vary based on your model's architecture)
+    converted_state_dict = {}
+    for key in unet_state_dict:
+        new_key = key
+        for pattern, new_pattern in MAPPING.items():
+            new_key = new_key.replace(pattern, new_pattern)
+        for dyn_pattern, dyn_new_pattern in DYNAMIC_MAP.items():
+            has_matched = False
+            if fnmatch.fnmatch(new_key, f"*.{dyn_pattern}.*") and not has_matched:
+                star = int(new_key.split(dyn_pattern.split(".")[0])[-1].split(".")[1])
+                if isinstance(dyn_new_pattern, tuple):
+                    new_star = star + dyn_new_pattern[-1]
+                    dyn_new_pattern = dyn_new_pattern[0]
+                else:
+                    new_star = star
+                pattern = dyn_pattern.replace("*", str(star))
+                new_pattern = dyn_new_pattern.replace("*", str(new_star))
+                new_key = new_key.replace(pattern, new_pattern)
+                has_matched = True
+        converted_state_dict[new_key] = unet_state_dict[key]
+    return converted_state_dict
+def main(model_path, output_path):
+    # Load your original U-Net model
+    unet_state_dict = load_file(model_path)
+    # Initialize your Kandinsky3UNet model
+    config = {}
+    # Convert the state dict
+    converted_state_dict = convert_state_dict(unet_state_dict)
+    unet = Kandinsky3UNet(config)
+    unet.load_state_dict(converted_state_dict)
+    unet.save_pretrained(output_path)
+    print(f"Converted model saved to {output_path}")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Convert U-Net PyTorch model to Kandinsky3UNet format")
+    parser.add_argument("--model_path", type=str, required=True, help="Path to the original U-Net PyTorch model")
+    parser.add_argument("--output_path", type=str, required=True, help="Path to save the converted model")
+    args = parser.parse_args()
+    main(args.model_path, args.output_path)

diffusers 0.23.1__py3-none-any.whl → 0.25.0__py3-none-any.whl

diffusers 0.23.1py3-none-any.whl → 0.25.0py3-none-any.whl