PyPI - diffusers - Versions diffs - 0.23.1__py3-none-any.whl → 0.25.0__py3-none-any.whl - Mend

diffusers 0.23.1py3-none-any.whl → 0.25.0py3-none-any.whl

Files changed (238) hide show

diffusers/loaders/__init__.py ADDED Viewed

@@ -0,0 +1,82 @@
+from typing import TYPE_CHECKING
+from ..utils import DIFFUSERS_SLOW_IMPORT, _LazyModule, deprecate
+from ..utils.import_utils import is_torch_available, is_transformers_available
+def text_encoder_lora_state_dict(text_encoder):
+    deprecate(
+        "text_encoder_load_state_dict in `models`",
+        "0.27.0",
+        "`text_encoder_lora_state_dict` is deprecated and will be removed in 0.27.0. Make sure to retrieve the weights using `get_peft_model`. See https://huggingface.co/docs/peft/v0.6.2/en/quicktour#peftmodel for more information.",
+    )
+    state_dict = {}
+    for name, module in text_encoder_attn_modules(text_encoder):
+        for k, v in module.q_proj.lora_linear_layer.state_dict().items():
+            state_dict[f"{name}.q_proj.lora_linear_layer.{k}"] = v
+        for k, v in module.k_proj.lora_linear_layer.state_dict().items():
+            state_dict[f"{name}.k_proj.lora_linear_layer.{k}"] = v
+        for k, v in module.v_proj.lora_linear_layer.state_dict().items():
+            state_dict[f"{name}.v_proj.lora_linear_layer.{k}"] = v
+        for k, v in module.out_proj.lora_linear_layer.state_dict().items():
+            state_dict[f"{name}.out_proj.lora_linear_layer.{k}"] = v
+    return state_dict
+if is_transformers_available():
+    def text_encoder_attn_modules(text_encoder):
+        deprecate(
+            "text_encoder_attn_modules in `models`",
+            "0.27.0",
+            "`text_encoder_lora_state_dict` is deprecated and will be removed in 0.27.0. Make sure to retrieve the weights using `get_peft_model`. See https://huggingface.co/docs/peft/v0.6.2/en/quicktour#peftmodel for more information.",
+        )
+        from transformers import CLIPTextModel, CLIPTextModelWithProjection
+        attn_modules = []
+        if isinstance(text_encoder, (CLIPTextModel, CLIPTextModelWithProjection)):
+            for i, layer in enumerate(text_encoder.text_model.encoder.layers):
+                name = f"text_model.encoder.layers.{i}.self_attn"
+                mod = layer.self_attn
+                attn_modules.append((name, mod))
+        else:
+            raise ValueError(f"do not know how to get attention modules for: {text_encoder.__class__.__name__}")
+        return attn_modules
+_import_structure = {}
+if is_torch_available():
+    _import_structure["single_file"] = ["FromOriginalControlnetMixin", "FromOriginalVAEMixin"]
+    _import_structure["unet"] = ["UNet2DConditionLoadersMixin"]
+    _import_structure["utils"] = ["AttnProcsLayers"]
+    if is_transformers_available():
+        _import_structure["single_file"].extend(["FromSingleFileMixin"])
+        _import_structure["lora"] = ["LoraLoaderMixin", "StableDiffusionXLLoraLoaderMixin"]
+        _import_structure["textual_inversion"] = ["TextualInversionLoaderMixin"]
+        _import_structure["ip_adapter"] = ["IPAdapterMixin"]
+if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
+    if is_torch_available():
+        from .single_file import FromOriginalControlnetMixin, FromOriginalVAEMixin
+        from .unet import UNet2DConditionLoadersMixin
+        from .utils import AttnProcsLayers
+        if is_transformers_available():
+            from .ip_adapter import IPAdapterMixin
+            from .lora import LoraLoaderMixin, StableDiffusionXLLoraLoaderMixin
+            from .single_file import FromSingleFileMixin
+            from .textual_inversion import TextualInversionLoaderMixin
+else:
+    import sys
+    sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)

diffusers/loaders/ip_adapter.py ADDED Viewed

@@ -0,0 +1,159 @@
+# Copyright 2023 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from typing import Dict, Union
+import torch
+from huggingface_hub.utils import validate_hf_hub_args
+from safetensors import safe_open
+from ..utils import (
+    _get_model_file,
+    is_transformers_available,
+    logging,
+)
+if is_transformers_available():
+    from transformers import (
+        CLIPImageProcessor,
+        CLIPVisionModelWithProjection,
+    )
+    from ..models.attention_processor import (
+        IPAdapterAttnProcessor,
+        IPAdapterAttnProcessor2_0,
+    )
+logger = logging.get_logger(__name__)
+class IPAdapterMixin:
+    """Mixin for handling IP Adapters."""
+    @validate_hf_hub_args
+    def load_ip_adapter(
+        self,
+        pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
+        subfolder: str,
+        weight_name: str,
+        **kwargs,
+    ):
+        """
+        Parameters:
+            pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`):
+                Can be either:
+                    - A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model hosted on
+                      the Hub.
+                    - A path to a *directory* (for example `./my_model_directory`) containing the model weights saved
+                      with [`ModelMixin.save_pretrained`].
+                    - A [torch state
+                      dict](https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict).
+            cache_dir (`Union[str, os.PathLike]`, *optional*):
+                Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
+                is not used.
+            force_download (`bool`, *optional*, defaults to `False`):
+                Whether or not to force the (re-)download of the model weights and configuration files, overriding the
+                cached versions if they exist.
+            resume_download (`bool`, *optional*, defaults to `False`):
+                Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
+                incompletely downloaded files are deleted.
+            proxies (`Dict[str, str]`, *optional*):
+                A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
+                'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
+            local_files_only (`bool`, *optional*, defaults to `False`):
+                Whether to only load local model weights and configuration files or not. If set to `True`, the model
+                won't be downloaded from the Hub.
+            token (`str` or *bool*, *optional*):
+                The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
+                `diffusers-cli login` (stored in `~/.huggingface`) is used.
+            revision (`str`, *optional*, defaults to `"main"`):
+                The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier
+                allowed by Git.
+            subfolder (`str`, *optional*, defaults to `""`):
+                The subfolder location of a model file within a larger model repository on the Hub or locally.
+        """
+        # Load the main state dict first.
+        cache_dir = kwargs.pop("cache_dir", None)
+        force_download = kwargs.pop("force_download", False)
+        resume_download = kwargs.pop("resume_download", False)
+        proxies = kwargs.pop("proxies", None)
+        local_files_only = kwargs.pop("local_files_only", None)
+        token = kwargs.pop("token", None)
+        revision = kwargs.pop("revision", None)
+        user_agent = {
+            "file_type": "attn_procs_weights",
+            "framework": "pytorch",
+        }
+        if not isinstance(pretrained_model_name_or_path_or_dict, dict):
+            model_file = _get_model_file(
+                pretrained_model_name_or_path_or_dict,
+                weights_name=weight_name,
+                cache_dir=cache_dir,
+                force_download=force_download,
+                resume_download=resume_download,
+                proxies=proxies,
+                local_files_only=local_files_only,
+                token=token,
+                revision=revision,
+                subfolder=subfolder,
+                user_agent=user_agent,
+            )
+            if weight_name.endswith(".safetensors"):
+                state_dict = {"image_proj": {}, "ip_adapter": {}}
+                with safe_open(model_file, framework="pt", device="cpu") as f:
+                    for key in f.keys():
+                        if key.startswith("image_proj."):
+                            state_dict["image_proj"][key.replace("image_proj.", "")] = f.get_tensor(key)
+                        elif key.startswith("ip_adapter."):
+                            state_dict["ip_adapter"][key.replace("ip_adapter.", "")] = f.get_tensor(key)
+            else:
+                state_dict = torch.load(model_file, map_location="cpu")
+        else:
+            state_dict = pretrained_model_name_or_path_or_dict
+        keys = list(state_dict.keys())
+        if keys != ["image_proj", "ip_adapter"]:
+            raise ValueError("Required keys are (`image_proj` and `ip_adapter`) missing from the state dict.")
+        # load CLIP image encoer here if it has not been registered to the pipeline yet
+        if hasattr(self, "image_encoder") and getattr(self, "image_encoder", None) is None:
+            if not isinstance(pretrained_model_name_or_path_or_dict, dict):
+                logger.info(f"loading image_encoder from {pretrained_model_name_or_path_or_dict}")
+                image_encoder = CLIPVisionModelWithProjection.from_pretrained(
+                    pretrained_model_name_or_path_or_dict,
+                    subfolder=os.path.join(subfolder, "image_encoder"),
+                ).to(self.device, dtype=self.dtype)
+                self.image_encoder = image_encoder
+            else:
+                raise ValueError("`image_encoder` cannot be None when using IP Adapters.")
+        # create feature extractor if it has not been registered to the pipeline yet
+        if hasattr(self, "feature_extractor") and getattr(self, "feature_extractor", None) is None:
+            self.feature_extractor = CLIPImageProcessor()
+        # load ip-adapter into unet
+        unet = getattr(self, self.unet_name) if not hasattr(self, "unet") else self.unet
+        unet._load_ip_adapter_weights(state_dict)
+    def set_ip_adapter_scale(self, scale):
+        unet = getattr(self, self.unet_name) if not hasattr(self, "unet") else self.unet
+        for attn_processor in unet.attn_processors.values():
+            if isinstance(attn_processor, (IPAdapterAttnProcessor, IPAdapterAttnProcessor2_0)):
+                attn_processor.scale = scale

diffusers 0.23.1__py3-none-any.whl → 0.25.0__py3-none-any.whl

diffusers 0.23.1py3-none-any.whl → 0.25.0py3-none-any.whl