PyPI - diffusers - Versions diffs - 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl - Mend

diffusers 0.33.0py3-none-any.whl → 0.34.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (478) hide show

diffusers/loaders/lora_pipeline.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2024 The HuggingFace Team. All rights reserved.
+# Copyright 2025 The HuggingFace Team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -37,12 +37,16 @@ from .lora_base import (  # noqa
     LoraBaseMixin,
     _fetch_state_dict,
     _load_lora_into_text_encoder,
+    _pack_dict_with_prefix,
 )
 from .lora_conversion_utils import (
     _convert_bfl_flux_control_lora_to_diffusers,
     _convert_hunyuan_video_lora_to_diffusers,
     _convert_kohya_flux_lora_to_diffusers,
+    _convert_musubi_wan_lora_to_diffusers,
+    _convert_non_diffusers_hidream_lora_to_diffusers,
     _convert_non_diffusers_lora_to_diffusers,
+    _convert_non_diffusers_ltxv_lora_to_diffusers,
     _convert_non_diffusers_lumina2_lora_to_diffusers,
     _convert_non_diffusers_wan_lora_to_diffusers,
     _convert_xlabs_flux_lora_to_diffusers,
@@ -78,30 +82,36 @@ def _maybe_dequantize_weight_for_expanded_lora(model, module):
         from ..quantizers.gguf.utils import dequantize_gguf_tensor
     is_bnb_4bit_quantized = module.weight.__class__.__name__ == "Params4bit"
+    is_bnb_8bit_quantized = module.weight.__class__.__name__ == "Int8Params"
     is_gguf_quantized = module.weight.__class__.__name__ == "GGUFParameter"
     if is_bnb_4bit_quantized and not is_bitsandbytes_available():
         raise ValueError(
             "The checkpoint seems to have been quantized with `bitsandbytes` (4bits). Install `bitsandbytes` to load quantized checkpoints."
         )
+    if is_bnb_8bit_quantized and not is_bitsandbytes_available():
+        raise ValueError(
+            "The checkpoint seems to have been quantized with `bitsandbytes` (8bits). Install `bitsandbytes` to load quantized checkpoints."
+        )
     if is_gguf_quantized and not is_gguf_available():
         raise ValueError(
             "The checkpoint seems to have been quantized with `gguf`. Install `gguf` to load quantized checkpoints."
         )
     weight_on_cpu = False
-    if not module.weight.is_cuda:
+    if module.weight.device.type == "cpu":
         weight_on_cpu = True
-    if is_bnb_4bit_quantized:
+    device = torch.accelerator.current_accelerator().type if hasattr(torch, "accelerator") else "cuda"
+    if is_bnb_4bit_quantized or is_bnb_8bit_quantized:
         module_weight = dequantize_bnb_weight(
-            module.weight.cuda() if weight_on_cpu else module.weight,
-            state=module.weight.quant_state,
+            module.weight.to(device) if weight_on_cpu else module.weight,
+            state=module.weight.quant_state if is_bnb_4bit_quantized else module.state,
             dtype=model.dtype,
         ).data
     elif is_gguf_quantized:
         module_weight = dequantize_gguf_tensor(
-            module.weight.cuda() if weight_on_cpu else module.weight,
+            module.weight.to(device) if weight_on_cpu else module.weight,
         )
         module_weight = module_weight.to(model.dtype)
     else:
@@ -126,7 +136,7 @@ class StableDiffusionLoraLoaderMixin(LoraBaseMixin):
     def load_lora_weights(
         self,
         pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
-        adapter_name=None,
+        adapter_name: Optional[str] = None,
         hotswap: bool = False,
         **kwargs,
     ):
@@ -153,7 +163,7 @@ class StableDiffusionLoraLoaderMixin(LoraBaseMixin):
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
-            hotswap : (`bool`, *optional*)
+            hotswap (`bool`, *optional*):
                 Defaults to `False`. Whether to substitute an existing (LoRA) adapter with the newly loaded adapter
                 in-place. This means that, instead of loading an additional adapter, this will take the existing
                 adapter weights and replace them with the weights of the new adapter. This can be faster and more
@@ -193,7 +203,8 @@ class StableDiffusionLoraLoaderMixin(LoraBaseMixin):
             pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy()
         # First, ensure that the checkpoint is a compatible one and can be successfully loaded.
-        state_dict, network_alphas = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs)
+        kwargs["return_lora_metadata"] = True
+        state_dict, network_alphas, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs)
         is_correct_format = all("lora" in key for key in state_dict.keys())
         if not is_correct_format:
@@ -204,6 +215,7 @@ class StableDiffusionLoraLoaderMixin(LoraBaseMixin):
             network_alphas=network_alphas,
             unet=getattr(self, self.unet_name) if not hasattr(self, "unet") else self.unet,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=self,
             low_cpu_mem_usage=low_cpu_mem_usage,
             hotswap=hotswap,
@@ -217,6 +229,7 @@ class StableDiffusionLoraLoaderMixin(LoraBaseMixin):
             lora_scale=self.lora_scale,
             adapter_name=adapter_name,
             _pipeline=self,
+            metadata=metadata,
             low_cpu_mem_usage=low_cpu_mem_usage,
             hotswap=hotswap,
         )
@@ -273,6 +286,8 @@ class StableDiffusionLoraLoaderMixin(LoraBaseMixin):
                 The subfolder location of a model file within a larger model repository on the Hub or locally.
             weight_name (`str`, *optional*, defaults to None):
                 Name of the serialized state dict file.
+            return_lora_metadata (`bool`, *optional*, defaults to False):
+                When enabled, additionally return the LoRA adapter metadata, typically found in the state dict.
         """
         # Load the main state dict first which has the LoRA layers for either of
         # UNet and text encoder or both.
@@ -286,18 +301,16 @@ class StableDiffusionLoraLoaderMixin(LoraBaseMixin):
         weight_name = kwargs.pop("weight_name", None)
         unet_config = kwargs.pop("unet_config", None)
         use_safetensors = kwargs.pop("use_safetensors", None)
+        return_lora_metadata = kwargs.pop("return_lora_metadata", False)
         allow_pickle = False
         if use_safetensors is None:
             use_safetensors = True
             allow_pickle = True
-        user_agent = {
-            "file_type": "attn_procs_weights",
-            "framework": "pytorch",
-        }
+        user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"}
-        state_dict = _fetch_state_dict(
+        state_dict, metadata = _fetch_state_dict(
             pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict,
             weight_name=weight_name,
             use_safetensors=use_safetensors,
@@ -334,7 +347,8 @@ class StableDiffusionLoraLoaderMixin(LoraBaseMixin):
                 state_dict = _maybe_map_sgm_blocks_to_diffusers(state_dict, unet_config)
             state_dict, network_alphas = _convert_non_diffusers_lora_to_diffusers(state_dict)
-        return state_dict, network_alphas
+        out = (state_dict, network_alphas, metadata) if return_lora_metadata else (state_dict, network_alphas)
+        return out
     @classmethod
     def load_lora_into_unet(
@@ -346,6 +360,7 @@ class StableDiffusionLoraLoaderMixin(LoraBaseMixin):
         _pipeline=None,
         low_cpu_mem_usage=False,
         hotswap: bool = False,
+        metadata=None,
     ):
         """
         This will load the LoRA layers specified in `state_dict` into `unet`.
@@ -367,29 +382,11 @@ class StableDiffusionLoraLoaderMixin(LoraBaseMixin):
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading only loading the pretrained LoRA weights and not initializing the random
                 weights.
-            hotswap : (`bool`, *optional*)
-                Defaults to `False`. Whether to substitute an existing (LoRA) adapter with the newly loaded adapter
-                in-place. This means that, instead of loading an additional adapter, this will take the existing
-                adapter weights and replace them with the weights of the new adapter. This can be faster and more
-                memory efficient. However, the main advantage of hotswapping is that when the model is compiled with
-                torch.compile, loading the new adapter does not require recompilation of the model. When using
-                hotswapping, the passed `adapter_name` should be the name of an already loaded adapter.
-                If the new adapter and the old adapter have different ranks and/or LoRA alphas (i.e. scaling), you need
-                to call an additional method before loading the adapter:
-                ```py
-                pipeline = ...  # load diffusers pipeline
-                max_rank = ...  # the highest rank among all LoRAs that you want to load
-                # call *before* compiling and loading the LoRA adapter
-                pipeline.enable_lora_hotswap(target_rank=max_rank)
-                pipeline.load_lora_weights(file_name)
-                # optionally compile the model now
-                ```
-                Note that hotswapping adapters of the text encoder is not yet supported. There are some further
-                limitations to this technique, which are documented here:
-                https://huggingface.co/docs/peft/main/en/package_reference/hotswap
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
+            metadata (`dict`):
+                Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived
+                from the state dict.
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -408,6 +405,7 @@ class StableDiffusionLoraLoaderMixin(LoraBaseMixin):
             prefix=cls.unet_name,
             network_alphas=network_alphas,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=_pipeline,
             low_cpu_mem_usage=low_cpu_mem_usage,
             hotswap=hotswap,
@@ -425,6 +423,7 @@ class StableDiffusionLoraLoaderMixin(LoraBaseMixin):
         _pipeline=None,
         low_cpu_mem_usage=False,
         hotswap: bool = False,
+        metadata=None,
     ):
         """
         This will load the LoRA layers specified in `state_dict` into `text_encoder`
@@ -450,29 +449,11 @@ class StableDiffusionLoraLoaderMixin(LoraBaseMixin):
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
-            hotswap : (`bool`, *optional*)
-                Defaults to `False`. Whether to substitute an existing (LoRA) adapter with the newly loaded adapter
-                in-place. This means that, instead of loading an additional adapter, this will take the existing
-                adapter weights and replace them with the weights of the new adapter. This can be faster and more
-                memory efficient. However, the main advantage of hotswapping is that when the model is compiled with
-                torch.compile, loading the new adapter does not require recompilation of the model. When using
-                hotswapping, the passed `adapter_name` should be the name of an already loaded adapter.
-                If the new adapter and the old adapter have different ranks and/or LoRA alphas (i.e. scaling), you need
-                to call an additional method before loading the adapter:
-                ```py
-                pipeline = ...  # load diffusers pipeline
-                max_rank = ...  # the highest rank among all LoRAs that you want to load
-                # call *before* compiling and loading the LoRA adapter
-                pipeline.enable_lora_hotswap(target_rank=max_rank)
-                pipeline.load_lora_weights(file_name)
-                # optionally compile the model now
-                ```
-                Note that hotswapping adapters of the text encoder is not yet supported. There are some further
-                limitations to this technique, which are documented here:
-                https://huggingface.co/docs/peft/main/en/package_reference/hotswap
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
+            metadata (`dict`):
+                Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived
+                from the state dict.
         """
         _load_lora_into_text_encoder(
             state_dict=state_dict,
@@ -482,6 +463,7 @@ class StableDiffusionLoraLoaderMixin(LoraBaseMixin):
             prefix=prefix,
             text_encoder_name=cls.text_encoder_name,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=_pipeline,
             low_cpu_mem_usage=low_cpu_mem_usage,
             hotswap=hotswap,
@@ -497,6 +479,8 @@ class StableDiffusionLoraLoaderMixin(LoraBaseMixin):
         weight_name: str = None,
         save_function: Callable = None,
         safe_serialization: bool = True,
+        unet_lora_adapter_metadata=None,
+        text_encoder_lora_adapter_metadata=None,
     ):
         r"""
         Save the LoRA parameters corresponding to the UNet and text encoder.
@@ -519,8 +503,13 @@ class StableDiffusionLoraLoaderMixin(LoraBaseMixin):
                 `DIFFUSERS_SAVE_MODE`.
             safe_serialization (`bool`, *optional*, defaults to `True`):
                 Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`.
+            unet_lora_adapter_metadata:
+                LoRA adapter metadata associated with the unet to be serialized with the state dict.
+            text_encoder_lora_adapter_metadata:
+                LoRA adapter metadata associated with the text encoder to be serialized with the state dict.
         """
         state_dict = {}
+        lora_adapter_metadata = {}
         if not (unet_lora_layers or text_encoder_lora_layers):
             raise ValueError("You must pass at least one of `unet_lora_layers` and `text_encoder_lora_layers`.")
@@ -531,6 +520,14 @@ class StableDiffusionLoraLoaderMixin(LoraBaseMixin):
         if text_encoder_lora_layers:
             state_dict.update(cls.pack_weights(text_encoder_lora_layers, cls.text_encoder_name))
+        if unet_lora_adapter_metadata:
+            lora_adapter_metadata.update(_pack_dict_with_prefix(unet_lora_adapter_metadata, cls.unet_name))
+        if text_encoder_lora_adapter_metadata:
+            lora_adapter_metadata.update(
+                _pack_dict_with_prefix(text_encoder_lora_adapter_metadata, cls.text_encoder_name)
+            )
         # Save the model
         cls.write_lora_layers(
             state_dict=state_dict,
@@ -539,6 +536,7 @@ class StableDiffusionLoraLoaderMixin(LoraBaseMixin):
             weight_name=weight_name,
             save_function=save_function,
             safe_serialization=safe_serialization,
+            lora_adapter_metadata=lora_adapter_metadata,
         )
     def fuse_lora(
@@ -624,6 +622,7 @@ class StableDiffusionXLLoraLoaderMixin(LoraBaseMixin):
         self,
         pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
         adapter_name: Optional[str] = None,
+        hotswap: bool = False,
         **kwargs,
     ):
         """
@@ -650,6 +649,8 @@ class StableDiffusionXLLoraLoaderMixin(LoraBaseMixin):
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
             kwargs (`dict`, *optional*):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
@@ -671,7 +672,8 @@ class StableDiffusionXLLoraLoaderMixin(LoraBaseMixin):
             pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy()
         # First, ensure that the checkpoint is a compatible one and can be successfully loaded.
-        state_dict, network_alphas = self.lora_state_dict(
+        kwargs["return_lora_metadata"] = True
+        state_dict, network_alphas, metadata = self.lora_state_dict(
             pretrained_model_name_or_path_or_dict,
             unet_config=self.unet.config,
             **kwargs,
@@ -686,8 +688,10 @@ class StableDiffusionXLLoraLoaderMixin(LoraBaseMixin):
             network_alphas=network_alphas,
             unet=self.unet,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=self,
             low_cpu_mem_usage=low_cpu_mem_usage,
+            hotswap=hotswap,
         )
         self.load_lora_into_text_encoder(
             state_dict,
@@ -696,8 +700,10 @@ class StableDiffusionXLLoraLoaderMixin(LoraBaseMixin):
             prefix=self.text_encoder_name,
             lora_scale=self.lora_scale,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=self,
             low_cpu_mem_usage=low_cpu_mem_usage,
+            hotswap=hotswap,
         )
         self.load_lora_into_text_encoder(
             state_dict,
@@ -706,8 +712,10 @@ class StableDiffusionXLLoraLoaderMixin(LoraBaseMixin):
             prefix=f"{self.text_encoder_name}_2",
             lora_scale=self.lora_scale,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=self,
             low_cpu_mem_usage=low_cpu_mem_usage,
+            hotswap=hotswap,
         )
     @classmethod
@@ -763,6 +771,8 @@ class StableDiffusionXLLoraLoaderMixin(LoraBaseMixin):
                 The subfolder location of a model file within a larger model repository on the Hub or locally.
             weight_name (`str`, *optional*, defaults to None):
                 Name of the serialized state dict file.
+            return_lora_metadata (`bool`, *optional*, defaults to False):
+                When enabled, additionally return the LoRA adapter metadata, typically found in the state dict.
         """
         # Load the main state dict first which has the LoRA layers for either of
         # UNet and text encoder or both.
@@ -776,18 +786,16 @@ class StableDiffusionXLLoraLoaderMixin(LoraBaseMixin):
         weight_name = kwargs.pop("weight_name", None)
         unet_config = kwargs.pop("unet_config", None)
         use_safetensors = kwargs.pop("use_safetensors", None)
+        return_lora_metadata = kwargs.pop("return_lora_metadata", False)
         allow_pickle = False
         if use_safetensors is None:
             use_safetensors = True
             allow_pickle = True
-        user_agent = {
-            "file_type": "attn_procs_weights",
-            "framework": "pytorch",
-        }
+        user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"}
-        state_dict = _fetch_state_dict(
+        state_dict, metadata = _fetch_state_dict(
             pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict,
             weight_name=weight_name,
             use_safetensors=use_safetensors,
@@ -824,7 +832,8 @@ class StableDiffusionXLLoraLoaderMixin(LoraBaseMixin):
                 state_dict = _maybe_map_sgm_blocks_to_diffusers(state_dict, unet_config)
             state_dict, network_alphas = _convert_non_diffusers_lora_to_diffusers(state_dict)
-        return state_dict, network_alphas
+        out = (state_dict, network_alphas, metadata) if return_lora_metadata else (state_dict, network_alphas)
+        return out
     @classmethod
     # Copied from diffusers.loaders.lora_pipeline.StableDiffusionLoraLoaderMixin.load_lora_into_unet
@@ -837,6 +846,7 @@ class StableDiffusionXLLoraLoaderMixin(LoraBaseMixin):
         _pipeline=None,
         low_cpu_mem_usage=False,
         hotswap: bool = False,
+        metadata=None,
     ):
         """
         This will load the LoRA layers specified in `state_dict` into `unet`.
@@ -858,29 +868,11 @@ class StableDiffusionXLLoraLoaderMixin(LoraBaseMixin):
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading only loading the pretrained LoRA weights and not initializing the random
                 weights.
-            hotswap : (`bool`, *optional*)
-                Defaults to `False`. Whether to substitute an existing (LoRA) adapter with the newly loaded adapter
-                in-place. This means that, instead of loading an additional adapter, this will take the existing
-                adapter weights and replace them with the weights of the new adapter. This can be faster and more
-                memory efficient. However, the main advantage of hotswapping is that when the model is compiled with
-                torch.compile, loading the new adapter does not require recompilation of the model. When using
-                hotswapping, the passed `adapter_name` should be the name of an already loaded adapter.
-                If the new adapter and the old adapter have different ranks and/or LoRA alphas (i.e. scaling), you need
-                to call an additional method before loading the adapter:
-                ```py
-                pipeline = ...  # load diffusers pipeline
-                max_rank = ...  # the highest rank among all LoRAs that you want to load
-                # call *before* compiling and loading the LoRA adapter
-                pipeline.enable_lora_hotswap(target_rank=max_rank)
-                pipeline.load_lora_weights(file_name)
-                # optionally compile the model now
-                ```
-                Note that hotswapping adapters of the text encoder is not yet supported. There are some further
-                limitations to this technique, which are documented here:
-                https://huggingface.co/docs/peft/main/en/package_reference/hotswap
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
+            metadata (`dict`):
+                Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived
+                from the state dict.
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -899,6 +891,7 @@ class StableDiffusionXLLoraLoaderMixin(LoraBaseMixin):
             prefix=cls.unet_name,
             network_alphas=network_alphas,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=_pipeline,
             low_cpu_mem_usage=low_cpu_mem_usage,
             hotswap=hotswap,
@@ -917,6 +910,7 @@ class StableDiffusionXLLoraLoaderMixin(LoraBaseMixin):
         _pipeline=None,
         low_cpu_mem_usage=False,
         hotswap: bool = False,
+        metadata=None,
     ):
         """
         This will load the LoRA layers specified in `state_dict` into `text_encoder`
@@ -942,29 +936,11 @@ class StableDiffusionXLLoraLoaderMixin(LoraBaseMixin):
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
-            hotswap : (`bool`, *optional*)
-                Defaults to `False`. Whether to substitute an existing (LoRA) adapter with the newly loaded adapter
-                in-place. This means that, instead of loading an additional adapter, this will take the existing
-                adapter weights and replace them with the weights of the new adapter. This can be faster and more
-                memory efficient. However, the main advantage of hotswapping is that when the model is compiled with
-                torch.compile, loading the new adapter does not require recompilation of the model. When using
-                hotswapping, the passed `adapter_name` should be the name of an already loaded adapter.
-                If the new adapter and the old adapter have different ranks and/or LoRA alphas (i.e. scaling), you need
-                to call an additional method before loading the adapter:
-                ```py
-                pipeline = ...  # load diffusers pipeline
-                max_rank = ...  # the highest rank among all LoRAs that you want to load
-                # call *before* compiling and loading the LoRA adapter
-                pipeline.enable_lora_hotswap(target_rank=max_rank)
-                pipeline.load_lora_weights(file_name)
-                # optionally compile the model now
-                ```
-                Note that hotswapping adapters of the text encoder is not yet supported. There are some further
-                limitations to this technique, which are documented here:
-                https://huggingface.co/docs/peft/main/en/package_reference/hotswap
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
+            metadata (`dict`):
+                Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived
+                from the state dict.
         """
         _load_lora_into_text_encoder(
             state_dict=state_dict,
@@ -974,6 +950,7 @@ class StableDiffusionXLLoraLoaderMixin(LoraBaseMixin):
             prefix=prefix,
             text_encoder_name=cls.text_encoder_name,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=_pipeline,
             low_cpu_mem_usage=low_cpu_mem_usage,
             hotswap=hotswap,
@@ -990,6 +967,9 @@ class StableDiffusionXLLoraLoaderMixin(LoraBaseMixin):
         weight_name: str = None,
         save_function: Callable = None,
         safe_serialization: bool = True,
+        unet_lora_adapter_metadata=None,
+        text_encoder_lora_adapter_metadata=None,
+        text_encoder_2_lora_adapter_metadata=None,
     ):
         r"""
         Save the LoRA parameters corresponding to the UNet and text encoder.
@@ -1015,8 +995,15 @@ class StableDiffusionXLLoraLoaderMixin(LoraBaseMixin):
                 `DIFFUSERS_SAVE_MODE`.
             safe_serialization (`bool`, *optional*, defaults to `True`):
                 Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`.
+            unet_lora_adapter_metadata:
+                LoRA adapter metadata associated with the unet to be serialized with the state dict.
+            text_encoder_lora_adapter_metadata:
+                LoRA adapter metadata associated with the text encoder to be serialized with the state dict.
+            text_encoder_2_lora_adapter_metadata:
+                LoRA adapter metadata associated with the second text encoder to be serialized with the state dict.
         """
         state_dict = {}
+        lora_adapter_metadata = {}
         if not (unet_lora_layers or text_encoder_lora_layers or text_encoder_2_lora_layers):
             raise ValueError(
@@ -1032,6 +1019,19 @@ class StableDiffusionXLLoraLoaderMixin(LoraBaseMixin):
         if text_encoder_2_lora_layers:
             state_dict.update(cls.pack_weights(text_encoder_2_lora_layers, "text_encoder_2"))
+        if unet_lora_adapter_metadata is not None:
+            lora_adapter_metadata.update(_pack_dict_with_prefix(unet_lora_adapter_metadata, cls.unet_name))
+        if text_encoder_lora_adapter_metadata:
+            lora_adapter_metadata.update(
+                _pack_dict_with_prefix(text_encoder_lora_adapter_metadata, cls.text_encoder_name)
+            )
+        if text_encoder_2_lora_adapter_metadata:
+            lora_adapter_metadata.update(
+                _pack_dict_with_prefix(text_encoder_2_lora_adapter_metadata, "text_encoder_2")
+            )
         cls.write_lora_layers(
             state_dict=state_dict,
             save_directory=save_directory,
@@ -1039,6 +1039,7 @@ class StableDiffusionXLLoraLoaderMixin(LoraBaseMixin):
             weight_name=weight_name,
             save_function=save_function,
             safe_serialization=safe_serialization,
+            lora_adapter_metadata=lora_adapter_metadata,
         )
     def fuse_lora(
@@ -1172,6 +1173,8 @@ class SD3LoraLoaderMixin(LoraBaseMixin):
                 allowed by Git.
             subfolder (`str`, *optional*, defaults to `""`):
                 The subfolder location of a model file within a larger model repository on the Hub or locally.
+            return_lora_metadata (`bool`, *optional*, defaults to False):
+                When enabled, additionally return the LoRA adapter metadata, typically found in the state dict.
         """
         # Load the main state dict first which has the LoRA layers for either of
@@ -1185,18 +1188,16 @@ class SD3LoraLoaderMixin(LoraBaseMixin):
         subfolder = kwargs.pop("subfolder", None)
         weight_name = kwargs.pop("weight_name", None)
         use_safetensors = kwargs.pop("use_safetensors", None)
+        return_lora_metadata = kwargs.pop("return_lora_metadata", False)
         allow_pickle = False
         if use_safetensors is None:
             use_safetensors = True
             allow_pickle = True
-        user_agent = {
-            "file_type": "attn_procs_weights",
-            "framework": "pytorch",
-        }
+        user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"}
-        state_dict = _fetch_state_dict(
+        state_dict, metadata = _fetch_state_dict(
             pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict,
             weight_name=weight_name,
             use_safetensors=use_safetensors,
@@ -1217,7 +1218,8 @@ class SD3LoraLoaderMixin(LoraBaseMixin):
             logger.warning(warn_msg)
             state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k}
-        return state_dict
+        out = (state_dict, metadata) if return_lora_metadata else state_dict
+        return out
     def load_lora_weights(
         self,
@@ -1247,29 +1249,8 @@ class SD3LoraLoaderMixin(LoraBaseMixin):
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
-            hotswap : (`bool`, *optional*)
-                Defaults to `False`. Whether to substitute an existing (LoRA) adapter with the newly loaded adapter
-                in-place. This means that, instead of loading an additional adapter, this will take the existing
-                adapter weights and replace them with the weights of the new adapter. This can be faster and more
-                memory efficient. However, the main advantage of hotswapping is that when the model is compiled with
-                torch.compile, loading the new adapter does not require recompilation of the model. When using
-                hotswapping, the passed `adapter_name` should be the name of an already loaded adapter.
-                If the new adapter and the old adapter have different ranks and/or LoRA alphas (i.e. scaling), you need
-                to call an additional method before loading the adapter:
-                ```py
-                pipeline = ...  # load diffusers pipeline
-                max_rank = ...  # the highest rank among all LoRAs that you want to load
-                # call *before* compiling and loading the LoRA adapter
-                pipeline.enable_lora_hotswap(target_rank=max_rank)
-                pipeline.load_lora_weights(file_name)
-                # optionally compile the model now
-                ```
-                Note that hotswapping adapters of the text encoder is not yet supported. There are some further
-                limitations to this technique, which are documented here:
-                https://huggingface.co/docs/peft/main/en/package_reference/hotswap
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
             kwargs (`dict`, *optional*):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
@@ -1287,7 +1268,8 @@ class SD3LoraLoaderMixin(LoraBaseMixin):
             pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy()
         # First, ensure that the checkpoint is a compatible one and can be successfully loaded.
-        state_dict = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs)
+        kwargs["return_lora_metadata"] = True
+        state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs)
         is_correct_format = all("lora" in key for key in state_dict.keys())
         if not is_correct_format:
@@ -1297,6 +1279,7 @@ class SD3LoraLoaderMixin(LoraBaseMixin):
             state_dict,
             transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=self,
             low_cpu_mem_usage=low_cpu_mem_usage,
             hotswap=hotswap,
@@ -1308,6 +1291,7 @@ class SD3LoraLoaderMixin(LoraBaseMixin):
             prefix=self.text_encoder_name,
             lora_scale=self.lora_scale,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=self,
             low_cpu_mem_usage=low_cpu_mem_usage,
             hotswap=hotswap,
@@ -1319,6 +1303,7 @@ class SD3LoraLoaderMixin(LoraBaseMixin):
             prefix=f"{self.text_encoder_name}_2",
             lora_scale=self.lora_scale,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=self,
             low_cpu_mem_usage=low_cpu_mem_usage,
             hotswap=hotswap,
@@ -1326,7 +1311,14 @@ class SD3LoraLoaderMixin(LoraBaseMixin):
     @classmethod
     def load_lora_into_transformer(
-        cls, state_dict, transformer, adapter_name=None, _pipeline=None, low_cpu_mem_usage=False, hotswap: bool = False
+        cls,
+        state_dict,
+        transformer,
+        adapter_name=None,
+        _pipeline=None,
+        low_cpu_mem_usage=False,
+        hotswap: bool = False,
+        metadata=None,
     ):
         """
         This will load the LoRA layers specified in `state_dict` into `transformer`.
@@ -1344,29 +1336,11 @@ class SD3LoraLoaderMixin(LoraBaseMixin):
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
-            hotswap : (`bool`, *optional*)
-                Defaults to `False`. Whether to substitute an existing (LoRA) adapter with the newly loaded adapter
-                in-place. This means that, instead of loading an additional adapter, this will take the existing
-                adapter weights and replace them with the weights of the new adapter. This can be faster and more
-                memory efficient. However, the main advantage of hotswapping is that when the model is compiled with
-                torch.compile, loading the new adapter does not require recompilation of the model. When using
-                hotswapping, the passed `adapter_name` should be the name of an already loaded adapter.
-                If the new adapter and the old adapter have different ranks and/or LoRA alphas (i.e. scaling), you need
-                to call an additional method before loading the adapter:
-                ```py
-                pipeline = ...  # load diffusers pipeline
-                max_rank = ...  # the highest rank among all LoRAs that you want to load
-                # call *before* compiling and loading the LoRA adapter
-                pipeline.enable_lora_hotswap(target_rank=max_rank)
-                pipeline.load_lora_weights(file_name)
-                # optionally compile the model now
-                ```
-                Note that hotswapping adapters of the text encoder is not yet supported. There are some further
-                limitations to this technique, which are documented here:
-                https://huggingface.co/docs/peft/main/en/package_reference/hotswap
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
+            metadata (`dict`):
+                Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived
+                from the state dict.
         """
         if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
             raise ValueError(
@@ -1379,6 +1353,7 @@ class SD3LoraLoaderMixin(LoraBaseMixin):
             state_dict,
             network_alphas=None,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=_pipeline,
             low_cpu_mem_usage=low_cpu_mem_usage,
             hotswap=hotswap,
@@ -1397,6 +1372,7 @@ class SD3LoraLoaderMixin(LoraBaseMixin):
         _pipeline=None,
         low_cpu_mem_usage=False,
         hotswap: bool = False,
+        metadata=None,
     ):
         """
         This will load the LoRA layers specified in `state_dict` into `text_encoder`
@@ -1422,29 +1398,11 @@ class SD3LoraLoaderMixin(LoraBaseMixin):
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
-            hotswap : (`bool`, *optional*)
-                Defaults to `False`. Whether to substitute an existing (LoRA) adapter with the newly loaded adapter
-                in-place. This means that, instead of loading an additional adapter, this will take the existing
-                adapter weights and replace them with the weights of the new adapter. This can be faster and more
-                memory efficient. However, the main advantage of hotswapping is that when the model is compiled with
-                torch.compile, loading the new adapter does not require recompilation of the model. When using
-                hotswapping, the passed `adapter_name` should be the name of an already loaded adapter.
-                If the new adapter and the old adapter have different ranks and/or LoRA alphas (i.e. scaling), you need
-                to call an additional method before loading the adapter:
-                ```py
-                pipeline = ...  # load diffusers pipeline
-                max_rank = ...  # the highest rank among all LoRAs that you want to load
-                # call *before* compiling and loading the LoRA adapter
-                pipeline.enable_lora_hotswap(target_rank=max_rank)
-                pipeline.load_lora_weights(file_name)
-                # optionally compile the model now
-                ```
-                Note that hotswapping adapters of the text encoder is not yet supported. There are some further
-                limitations to this technique, which are documented here:
-                https://huggingface.co/docs/peft/main/en/package_reference/hotswap
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
+            metadata (`dict`):
+                Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived
+                from the state dict.
         """
         _load_lora_into_text_encoder(
             state_dict=state_dict,
@@ -1454,6 +1412,7 @@ class SD3LoraLoaderMixin(LoraBaseMixin):
             prefix=prefix,
             text_encoder_name=cls.text_encoder_name,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=_pipeline,
             low_cpu_mem_usage=low_cpu_mem_usage,
             hotswap=hotswap,
@@ -1471,6 +1430,9 @@ class SD3LoraLoaderMixin(LoraBaseMixin):
         weight_name: str = None,
         save_function: Callable = None,
         safe_serialization: bool = True,
+        transformer_lora_adapter_metadata=None,
+        text_encoder_lora_adapter_metadata=None,
+        text_encoder_2_lora_adapter_metadata=None,
     ):
         r"""
         Save the LoRA parameters corresponding to the UNet and text encoder.
@@ -1496,8 +1458,15 @@ class SD3LoraLoaderMixin(LoraBaseMixin):
                 `DIFFUSERS_SAVE_MODE`.
             safe_serialization (`bool`, *optional*, defaults to `True`):
                 Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`.
+            transformer_lora_adapter_metadata:
+                LoRA adapter metadata associated with the transformer to be serialized with the state dict.
+            text_encoder_lora_adapter_metadata:
+                LoRA adapter metadata associated with the text encoder to be serialized with the state dict.
+            text_encoder_2_lora_adapter_metadata:
+                LoRA adapter metadata associated with the second text encoder to be serialized with the state dict.
         """
         state_dict = {}
+        lora_adapter_metadata = {}
         if not (transformer_lora_layers or text_encoder_lora_layers or text_encoder_2_lora_layers):
             raise ValueError(
@@ -1513,6 +1482,21 @@ class SD3LoraLoaderMixin(LoraBaseMixin):
         if text_encoder_2_lora_layers:
             state_dict.update(cls.pack_weights(text_encoder_2_lora_layers, "text_encoder_2"))
+        if transformer_lora_adapter_metadata is not None:
+            lora_adapter_metadata.update(
+                _pack_dict_with_prefix(transformer_lora_adapter_metadata, cls.transformer_name)
+            )
+        if text_encoder_lora_adapter_metadata:
+            lora_adapter_metadata.update(
+                _pack_dict_with_prefix(text_encoder_lora_adapter_metadata, cls.text_encoder_name)
+            )
+        if text_encoder_2_lora_adapter_metadata:
+            lora_adapter_metadata.update(
+                _pack_dict_with_prefix(text_encoder_2_lora_adapter_metadata, "text_encoder_2")
+            )
         cls.write_lora_layers(
             state_dict=state_dict,
             save_directory=save_directory,
@@ -1520,6 +1504,7 @@ class SD3LoraLoaderMixin(LoraBaseMixin):
             weight_name=weight_name,
             save_function=save_function,
             safe_serialization=safe_serialization,
+            lora_adapter_metadata=lora_adapter_metadata,
         )
     # Copied from diffusers.loaders.lora_pipeline.StableDiffusionXLLoraLoaderMixin.fuse_lora with unet->transformer
@@ -1592,25 +1577,20 @@ class SD3LoraLoaderMixin(LoraBaseMixin):
         super().unfuse_lora(components=components, **kwargs)
-class FluxLoraLoaderMixin(LoraBaseMixin):
+class AuraFlowLoraLoaderMixin(LoraBaseMixin):
     r"""
-    Load LoRA layers into [`FluxTransformer2DModel`],
-    [`CLIPTextModel`](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel).
-    Specific to [`StableDiffusion3Pipeline`].
+    Load LoRA layers into [`AuraFlowTransformer2DModel`] Specific to [`AuraFlowPipeline`].
     """
-    _lora_loadable_modules = ["transformer", "text_encoder"]
+    _lora_loadable_modules = ["transformer"]
     transformer_name = TRANSFORMER_NAME
-    text_encoder_name = TEXT_ENCODER_NAME
-    _control_lora_supported_norm_keys = ["norm_q", "norm_k", "norm_added_q", "norm_added_k"]
     @classmethod
     @validate_hf_hub_args
+    # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.lora_state_dict
     def lora_state_dict(
         cls,
         pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
-        return_alphas: bool = False,
         **kwargs,
     ):
         r"""
@@ -1656,6 +1636,8 @@ class FluxLoraLoaderMixin(LoraBaseMixin):
                 allowed by Git.
             subfolder (`str`, *optional*, defaults to `""`):
                 The subfolder location of a model file within a larger model repository on the Hub or locally.
+            return_lora_metadata (`bool`, *optional*, defaults to False):
+                When enabled, additionally return the LoRA adapter metadata, typically found in the state dict.
         """
         # Load the main state dict first which has the LoRA layers for either of
@@ -1669,18 +1651,16 @@ class FluxLoraLoaderMixin(LoraBaseMixin):
         subfolder = kwargs.pop("subfolder", None)
         weight_name = kwargs.pop("weight_name", None)
         use_safetensors = kwargs.pop("use_safetensors", None)
+        return_lora_metadata = kwargs.pop("return_lora_metadata", False)
         allow_pickle = False
         if use_safetensors is None:
             use_safetensors = True
             allow_pickle = True
-        user_agent = {
-            "file_type": "attn_procs_weights",
-            "framework": "pytorch",
-        }
+        user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"}
-        state_dict = _fetch_state_dict(
+        state_dict, metadata = _fetch_state_dict(
             pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict,
             weight_name=weight_name,
             use_safetensors=use_safetensors,
@@ -1694,101 +1674,453 @@ class FluxLoraLoaderMixin(LoraBaseMixin):
             user_agent=user_agent,
             allow_pickle=allow_pickle,
         )
         is_dora_scale_present = any("dora_scale" in k for k in state_dict)
         if is_dora_scale_present:
             warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new."
             logger.warning(warn_msg)
             state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k}
-        # TODO (sayakpaul): to a follow-up to clean and try to unify the conditions.
-        is_kohya = any(".lora_down.weight" in k for k in state_dict)
-        if is_kohya:
-            state_dict = _convert_kohya_flux_lora_to_diffusers(state_dict)
-            # Kohya already takes care of scaling the LoRA parameters with alpha.
-            return (state_dict, None) if return_alphas else state_dict
-        is_xlabs = any("processor" in k for k in state_dict)
-        if is_xlabs:
-            state_dict = _convert_xlabs_flux_lora_to_diffusers(state_dict)
-            # xlabs doesn't use `alpha`.
-            return (state_dict, None) if return_alphas else state_dict
-        is_bfl_control = any("query_norm.scale" in k for k in state_dict)
-        if is_bfl_control:
-            state_dict = _convert_bfl_flux_control_lora_to_diffusers(state_dict)
-            return (state_dict, None) if return_alphas else state_dict
-        # For state dicts like
-        # https://huggingface.co/TheLastBen/Jon_Snow_Flux_LoRA
-        keys = list(state_dict.keys())
-        network_alphas = {}
-        for k in keys:
-            if "alpha" in k:
-                alpha_value = state_dict.get(k)
-                if (torch.is_tensor(alpha_value) and torch.is_floating_point(alpha_value)) or isinstance(
-                    alpha_value, float
-                ):
-                    network_alphas[k] = state_dict.pop(k)
-                else:
-                    raise ValueError(
-                        f"The alpha key ({k}) seems to be incorrect. If you think this error is unexpected, please open as issue."
-                    )
-        if return_alphas:
-            return state_dict, network_alphas
-        else:
-            return state_dict
+        out = (state_dict, metadata) if return_lora_metadata else state_dict
+        return out
+    # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.load_lora_weights
     def load_lora_weights(
         self,
         pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
-        adapter_name=None,
+        adapter_name: Optional[str] = None,
         hotswap: bool = False,
         **kwargs,
     ):
         """
         Load LoRA weights specified in `pretrained_model_name_or_path_or_dict` into `self.transformer` and
-        `self.text_encoder`.
-        All kwargs are forwarded to `self.lora_state_dict`.
-        See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details on how the state dict is
-        loaded.
+        `self.text_encoder`. All kwargs are forwarded to `self.lora_state_dict`. See
+        [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details on how the state dict is loaded.
         See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_transformer`] for more details on how the state
         dict is loaded into `self.transformer`.
         Parameters:
             pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
-            kwargs (`dict`, *optional*):
-                See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
             low_cpu_mem_usage (`bool`, *optional*):
-                `Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
+                Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
-            hotswap : (`bool`, *optional*)
-                Defaults to `False`. Whether to substitute an existing (LoRA) adapter with the newly loaded adapter
-                in-place. This means that, instead of loading an additional adapter, this will take the existing
-                adapter weights and replace them with the weights of the new adapter. This can be faster and more
-                memory efficient. However, the main advantage of hotswapping is that when the model is compiled with
-                torch.compile, loading the new adapter does not require recompilation of the model. When using
-                hotswapping, the passed `adapter_name` should be the name of an already loaded adapter. If the new
-                adapter and the old adapter have different ranks and/or LoRA alphas (i.e. scaling), you need to call an
-                additional method before loading the adapter:
-                ```py
-                pipeline = ...  # load diffusers pipeline
-                max_rank = ...  # the highest rank among all LoRAs that you want to load
-                # call *before* compiling and loading the LoRA adapter
-                pipeline.enable_lora_hotswap(target_rank=max_rank)
-                pipeline.load_lora_weights(file_name)
-                # optionally compile the model now
-                ```
-                Note that hotswapping adapters of the text encoder is not yet supported. There are some further
-                limitations to this technique, which are documented here:
-                https://huggingface.co/docs/peft/main/en/package_reference/hotswap
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
+            kwargs (`dict`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
+        """
+        if not USE_PEFT_BACKEND:
+            raise ValueError("PEFT backend is required for this method.")
+        low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA)
+        if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
+            raise ValueError(
+                "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
+            )
+        # if a dict is passed, copy it instead of modifying it inplace
+        if isinstance(pretrained_model_name_or_path_or_dict, dict):
+            pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy()
+        # First, ensure that the checkpoint is a compatible one and can be successfully loaded.
+        kwargs["return_lora_metadata"] = True
+        state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs)
+        is_correct_format = all("lora" in key for key in state_dict.keys())
+        if not is_correct_format:
+            raise ValueError("Invalid LoRA checkpoint.")
+        self.load_lora_into_transformer(
+            state_dict,
+            transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer,
+            adapter_name=adapter_name,
+            metadata=metadata,
+            _pipeline=self,
+            low_cpu_mem_usage=low_cpu_mem_usage,
+            hotswap=hotswap,
+        )
+    @classmethod
+    # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->AuraFlowTransformer2DModel
+    def load_lora_into_transformer(
+        cls,
+        state_dict,
+        transformer,
+        adapter_name=None,
+        _pipeline=None,
+        low_cpu_mem_usage=False,
+        hotswap: bool = False,
+        metadata=None,
+    ):
+        """
+        This will load the LoRA layers specified in `state_dict` into `transformer`.
+        Parameters:
+            state_dict (`dict`):
+                A standard state dict containing the lora layer parameters. The keys can either be indexed directly
+                into the unet or prefixed with an additional `unet` which can be used to distinguish between text
+                encoder lora layers.
+            transformer (`AuraFlowTransformer2DModel`):
+                The Transformer model to load the LoRA layers into.
+            adapter_name (`str`, *optional*):
+                Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
+                `default_{i}` where i is the total number of adapters being loaded.
+            low_cpu_mem_usage (`bool`, *optional*):
+                Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
+                weights.
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
+            metadata (`dict`):
+                Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived
+                from the state dict.
+        """
+        if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
+            raise ValueError(
+                "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
+            )
+        # Load the layers corresponding to transformer.
+        logger.info(f"Loading {cls.transformer_name}.")
+        transformer.load_lora_adapter(
+            state_dict,
+            network_alphas=None,
+            adapter_name=adapter_name,
+            metadata=metadata,
+            _pipeline=_pipeline,
+            low_cpu_mem_usage=low_cpu_mem_usage,
+            hotswap=hotswap,
+        )
+    @classmethod
+    # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.save_lora_weights
+    def save_lora_weights(
+        cls,
+        save_directory: Union[str, os.PathLike],
+        transformer_lora_layers: Dict[str, Union[torch.nn.Module, torch.Tensor]] = None,
+        is_main_process: bool = True,
+        weight_name: str = None,
+        save_function: Callable = None,
+        safe_serialization: bool = True,
+        transformer_lora_adapter_metadata: Optional[dict] = None,
+    ):
+        r"""
+        Save the LoRA parameters corresponding to the transformer.
+        Arguments:
+            save_directory (`str` or `os.PathLike`):
+                Directory to save LoRA parameters to. Will be created if it doesn't exist.
+            transformer_lora_layers (`Dict[str, torch.nn.Module]` or `Dict[str, torch.Tensor]`):
+                State dict of the LoRA layers corresponding to the `transformer`.
+            is_main_process (`bool`, *optional*, defaults to `True`):
+                Whether the process calling this is the main process or not. Useful during distributed training and you
+                need to call this function on all processes. In this case, set `is_main_process=True` only on the main
+                process to avoid race conditions.
+            save_function (`Callable`):
+                The function to use to save the state dictionary. Useful during distributed training when you need to
+                replace `torch.save` with another method. Can be configured with the environment variable
+                `DIFFUSERS_SAVE_MODE`.
+            safe_serialization (`bool`, *optional*, defaults to `True`):
+                Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`.
+            transformer_lora_adapter_metadata:
+                LoRA adapter metadata associated with the transformer to be serialized with the state dict.
+        """
+        state_dict = {}
+        lora_adapter_metadata = {}
+        if not transformer_lora_layers:
+            raise ValueError("You must pass `transformer_lora_layers`.")
+        state_dict.update(cls.pack_weights(transformer_lora_layers, cls.transformer_name))
+        if transformer_lora_adapter_metadata is not None:
+            lora_adapter_metadata.update(
+                _pack_dict_with_prefix(transformer_lora_adapter_metadata, cls.transformer_name)
+            )
+        # Save the model
+        cls.write_lora_layers(
+            state_dict=state_dict,
+            save_directory=save_directory,
+            is_main_process=is_main_process,
+            weight_name=weight_name,
+            save_function=save_function,
+            safe_serialization=safe_serialization,
+            lora_adapter_metadata=lora_adapter_metadata,
+        )
+    # Copied from diffusers.loaders.lora_pipeline.SanaLoraLoaderMixin.fuse_lora
+    def fuse_lora(
+        self,
+        components: List[str] = ["transformer"],
+        lora_scale: float = 1.0,
+        safe_fusing: bool = False,
+        adapter_names: Optional[List[str]] = None,
+        **kwargs,
+    ):
+        r"""
+        Fuses the LoRA parameters into the original parameters of the corresponding blocks.
+        <Tip warning={true}>
+        This is an experimental API.
+        </Tip>
+        Args:
+            components: (`List[str]`): List of LoRA-injectable components to fuse the LoRAs into.
+            lora_scale (`float`, defaults to 1.0):
+                Controls how much to influence the outputs with the LoRA parameters.
+            safe_fusing (`bool`, defaults to `False`):
+                Whether to check fused weights for NaN values before fusing and if values are NaN not fusing them.
+            adapter_names (`List[str]`, *optional*):
+                Adapter names to be used for fusing. If nothing is passed, all active adapters will be fused.
+        Example:
+        ```py
+        from diffusers import DiffusionPipeline
+        import torch
+        pipeline = DiffusionPipeline.from_pretrained(
+            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
+        ).to("cuda")
+        pipeline.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors", adapter_name="pixel")
+        pipeline.fuse_lora(lora_scale=0.7)
+        ```
+        """
+        super().fuse_lora(
+            components=components,
+            lora_scale=lora_scale,
+            safe_fusing=safe_fusing,
+            adapter_names=adapter_names,
+            **kwargs,
+        )
+    # Copied from diffusers.loaders.lora_pipeline.SanaLoraLoaderMixin.unfuse_lora
+    def unfuse_lora(self, components: List[str] = ["transformer", "text_encoder"], **kwargs):
+        r"""
+        Reverses the effect of
+        [`pipe.fuse_lora()`](https://huggingface.co/docs/diffusers/main/en/api/loaders#diffusers.loaders.LoraBaseMixin.fuse_lora).
+        <Tip warning={true}>
+        This is an experimental API.
+        </Tip>
+        Args:
+            components (`List[str]`): List of LoRA-injectable components to unfuse LoRA from.
+            unfuse_transformer (`bool`, defaults to `True`): Whether to unfuse the UNet LoRA parameters.
+        """
+        super().unfuse_lora(components=components, **kwargs)
+class FluxLoraLoaderMixin(LoraBaseMixin):
+    r"""
+    Load LoRA layers into [`FluxTransformer2DModel`],
+    [`CLIPTextModel`](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel).
+    Specific to [`StableDiffusion3Pipeline`].
+    """
+    _lora_loadable_modules = ["transformer", "text_encoder"]
+    transformer_name = TRANSFORMER_NAME
+    text_encoder_name = TEXT_ENCODER_NAME
+    _control_lora_supported_norm_keys = ["norm_q", "norm_k", "norm_added_q", "norm_added_k"]
+    @classmethod
+    @validate_hf_hub_args
+    def lora_state_dict(
+        cls,
+        pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
+        return_alphas: bool = False,
+        **kwargs,
+    ):
+        r"""
+        Return state dict for lora weights and the network alphas.
+        <Tip warning={true}>
+        We support loading A1111 formatted LoRA checkpoints in a limited capacity.
+        This function is experimental and might change in the future.
+        </Tip>
+        Parameters:
+            pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`):
+                Can be either:
+                    - A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model hosted on
+                      the Hub.
+                    - A path to a *directory* (for example `./my_model_directory`) containing the model weights saved
+                      with [`ModelMixin.save_pretrained`].
+                    - A [torch state
+                      dict](https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict).
+            cache_dir (`Union[str, os.PathLike]`, *optional*):
+                Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
+                is not used.
+            force_download (`bool`, *optional*, defaults to `False`):
+                Whether or not to force the (re-)download of the model weights and configuration files, overriding the
+                cached versions if they exist.
+            proxies (`Dict[str, str]`, *optional*):
+                A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
+                'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
+            local_files_only (`bool`, *optional*, defaults to `False`):
+                Whether to only load local model weights and configuration files or not. If set to `True`, the model
+                won't be downloaded from the Hub.
+            token (`str` or *bool*, *optional*):
+                The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
+                `diffusers-cli login` (stored in `~/.huggingface`) is used.
+            revision (`str`, *optional*, defaults to `"main"`):
+                The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier
+                allowed by Git.
+            subfolder (`str`, *optional*, defaults to `""`):
+                The subfolder location of a model file within a larger model repository on the Hub or locally.
+            return_lora_metadata (`bool`, *optional*, defaults to False):
+                When enabled, additionally return the LoRA adapter metadata, typically found in the state dict.
+        """
+        # Load the main state dict first which has the LoRA layers for either of
+        # transformer and text encoder or both.
+        cache_dir = kwargs.pop("cache_dir", None)
+        force_download = kwargs.pop("force_download", False)
+        proxies = kwargs.pop("proxies", None)
+        local_files_only = kwargs.pop("local_files_only", None)
+        token = kwargs.pop("token", None)
+        revision = kwargs.pop("revision", None)
+        subfolder = kwargs.pop("subfolder", None)
+        weight_name = kwargs.pop("weight_name", None)
+        use_safetensors = kwargs.pop("use_safetensors", None)
+        return_lora_metadata = kwargs.pop("return_lora_metadata", False)
+        allow_pickle = False
+        if use_safetensors is None:
+            use_safetensors = True
+            allow_pickle = True
+        user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"}
+        state_dict, metadata = _fetch_state_dict(
+            pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict,
+            weight_name=weight_name,
+            use_safetensors=use_safetensors,
+            local_files_only=local_files_only,
+            cache_dir=cache_dir,
+            force_download=force_download,
+            proxies=proxies,
+            token=token,
+            revision=revision,
+            subfolder=subfolder,
+            user_agent=user_agent,
+            allow_pickle=allow_pickle,
+        )
+        is_dora_scale_present = any("dora_scale" in k for k in state_dict)
+        if is_dora_scale_present:
+            warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new."
+            logger.warning(warn_msg)
+            state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k}
+        # TODO (sayakpaul): to a follow-up to clean and try to unify the conditions.
+        is_kohya = any(".lora_down.weight" in k for k in state_dict)
+        if is_kohya:
+            state_dict = _convert_kohya_flux_lora_to_diffusers(state_dict)
+            # Kohya already takes care of scaling the LoRA parameters with alpha.
+            return cls._prepare_outputs(
+                state_dict,
+                metadata=metadata,
+                alphas=None,
+                return_alphas=return_alphas,
+                return_metadata=return_lora_metadata,
+            )
+        is_xlabs = any("processor" in k for k in state_dict)
+        if is_xlabs:
+            state_dict = _convert_xlabs_flux_lora_to_diffusers(state_dict)
+            # xlabs doesn't use `alpha`.
+            return cls._prepare_outputs(
+                state_dict,
+                metadata=metadata,
+                alphas=None,
+                return_alphas=return_alphas,
+                return_metadata=return_lora_metadata,
+            )
+        is_bfl_control = any("query_norm.scale" in k for k in state_dict)
+        if is_bfl_control:
+            state_dict = _convert_bfl_flux_control_lora_to_diffusers(state_dict)
+            return cls._prepare_outputs(
+                state_dict,
+                metadata=metadata,
+                alphas=None,
+                return_alphas=return_alphas,
+                return_metadata=return_lora_metadata,
+            )
+        # For state dicts like
+        # https://huggingface.co/TheLastBen/Jon_Snow_Flux_LoRA
+        keys = list(state_dict.keys())
+        network_alphas = {}
+        for k in keys:
+            if "alpha" in k:
+                alpha_value = state_dict.get(k)
+                if (torch.is_tensor(alpha_value) and torch.is_floating_point(alpha_value)) or isinstance(
+                    alpha_value, float
+                ):
+                    network_alphas[k] = state_dict.pop(k)
+                else:
+                    raise ValueError(
+                        f"The alpha key ({k}) seems to be incorrect. If you think this error is unexpected, please open as issue."
+                    )
+        if return_alphas or return_lora_metadata:
+            return cls._prepare_outputs(
+                state_dict,
+                metadata=metadata,
+                alphas=network_alphas,
+                return_alphas=return_alphas,
+                return_metadata=return_lora_metadata,
+            )
+        else:
+            return state_dict
+    def load_lora_weights(
+        self,
+        pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
+        adapter_name: Optional[str] = None,
+        hotswap: bool = False,
+        **kwargs,
+    ):
+        """
+        Load LoRA weights specified in `pretrained_model_name_or_path_or_dict` into `self.transformer` and
+        `self.text_encoder`.
+        All kwargs are forwarded to `self.lora_state_dict`.
+        See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details on how the state dict is
+        loaded.
+        See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_transformer`] for more details on how the state
+        dict is loaded into `self.transformer`.
+        Parameters:
+            pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
+            adapter_name (`str`, *optional*):
+                Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
+                `default_{i}` where i is the total number of adapters being loaded.
+            low_cpu_mem_usage (`bool`, *optional*):
+                `Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
+                weights.
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
+            kwargs (`dict`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for this method.")
@@ -1804,7 +2136,8 @@ class FluxLoraLoaderMixin(LoraBaseMixin):
             pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy()
         # First, ensure that the checkpoint is a compatible one and can be successfully loaded.
-        state_dict, network_alphas = self.lora_state_dict(
+        kwargs["return_lora_metadata"] = True
+        state_dict, network_alphas, metadata = self.lora_state_dict(
             pretrained_model_name_or_path_or_dict, return_alphas=True, **kwargs
         )
@@ -1855,6 +2188,7 @@ class FluxLoraLoaderMixin(LoraBaseMixin):
             network_alphas=network_alphas,
             transformer=transformer,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=self,
             low_cpu_mem_usage=low_cpu_mem_usage,
             hotswap=hotswap,
@@ -1874,6 +2208,7 @@ class FluxLoraLoaderMixin(LoraBaseMixin):
             prefix=self.text_encoder_name,
             lora_scale=self.lora_scale,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=self,
             low_cpu_mem_usage=low_cpu_mem_usage,
             hotswap=hotswap,
@@ -1886,6 +2221,7 @@ class FluxLoraLoaderMixin(LoraBaseMixin):
         network_alphas,
         transformer,
         adapter_name=None,
+        metadata=None,
         _pipeline=None,
         low_cpu_mem_usage=False,
         hotswap: bool = False,
@@ -1910,29 +2246,11 @@ class FluxLoraLoaderMixin(LoraBaseMixin):
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
-            hotswap : (`bool`, *optional*)
-                Defaults to `False`. Whether to substitute an existing (LoRA) adapter with the newly loaded adapter
-                in-place. This means that, instead of loading an additional adapter, this will take the existing
-                adapter weights and replace them with the weights of the new adapter. This can be faster and more
-                memory efficient. However, the main advantage of hotswapping is that when the model is compiled with
-                torch.compile, loading the new adapter does not require recompilation of the model. When using
-                hotswapping, the passed `adapter_name` should be the name of an already loaded adapter.
-                If the new adapter and the old adapter have different ranks and/or LoRA alphas (i.e. scaling), you need
-                to call an additional method before loading the adapter:
-                ```py
-                pipeline = ...  # load diffusers pipeline
-                max_rank = ...  # the highest rank among all LoRAs that you want to load
-                # call *before* compiling and loading the LoRA adapter
-                pipeline.enable_lora_hotswap(target_rank=max_rank)
-                pipeline.load_lora_weights(file_name)
-                # optionally compile the model now
-                ```
-                Note that hotswapping adapters of the text encoder is not yet supported. There are some further
-                limitations to this technique, which are documented here:
-                https://huggingface.co/docs/peft/main/en/package_reference/hotswap
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
+            metadata (`dict`):
+                Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived
+                from the state dict.
         """
         if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"):
             raise ValueError(
@@ -1945,6 +2263,7 @@ class FluxLoraLoaderMixin(LoraBaseMixin):
             state_dict,
             network_alphas=network_alphas,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=_pipeline,
             low_cpu_mem_usage=low_cpu_mem_usage,
             hotswap=hotswap,
@@ -1962,7 +2281,7 @@ class FluxLoraLoaderMixin(LoraBaseMixin):
         prefix = prefix or cls.transformer_name
         for key in list(state_dict.keys()):
             if key.split(".")[0] == prefix:
-                state_dict[key[len(f"{prefix}.") :]] = state_dict.pop(key)
+                state_dict[key.removeprefix(f"{prefix}.")] = state_dict.pop(key)
         # Find invalid keys
         transformer_state_dict = transformer.state_dict()
@@ -2017,6 +2336,7 @@ class FluxLoraLoaderMixin(LoraBaseMixin):
         _pipeline=None,
         low_cpu_mem_usage=False,
         hotswap: bool = False,
+        metadata=None,
     ):
         """
         This will load the LoRA layers specified in `state_dict` into `text_encoder`
@@ -2040,31 +2360,13 @@ class FluxLoraLoaderMixin(LoraBaseMixin):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
             low_cpu_mem_usage (`bool`, *optional*):
-                Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
-                weights.
-            hotswap : (`bool`, *optional*)
-                Defaults to `False`. Whether to substitute an existing (LoRA) adapter with the newly loaded adapter
-                in-place. This means that, instead of loading an additional adapter, this will take the existing
-                adapter weights and replace them with the weights of the new adapter. This can be faster and more
-                memory efficient. However, the main advantage of hotswapping is that when the model is compiled with
-                torch.compile, loading the new adapter does not require recompilation of the model. When using
-                hotswapping, the passed `adapter_name` should be the name of an already loaded adapter.
-                If the new adapter and the old adapter have different ranks and/or LoRA alphas (i.e. scaling), you need
-                to call an additional method before loading the adapter:
-                ```py
-                pipeline = ...  # load diffusers pipeline
-                max_rank = ...  # the highest rank among all LoRAs that you want to load
-                # call *before* compiling and loading the LoRA adapter
-                pipeline.enable_lora_hotswap(target_rank=max_rank)
-                pipeline.load_lora_weights(file_name)
-                # optionally compile the model now
-                ```
-                Note that hotswapping adapters of the text encoder is not yet supported. There are some further
-                limitations to this technique, which are documented here:
-                https://huggingface.co/docs/peft/main/en/package_reference/hotswap
+                Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
+                weights.
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
+            metadata (`dict`):
+                Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived
+                from the state dict.
         """
         _load_lora_into_text_encoder(
             state_dict=state_dict,
@@ -2074,6 +2376,7 @@ class FluxLoraLoaderMixin(LoraBaseMixin):
             prefix=prefix,
             text_encoder_name=cls.text_encoder_name,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=_pipeline,
             low_cpu_mem_usage=low_cpu_mem_usage,
             hotswap=hotswap,
@@ -2090,6 +2393,8 @@ class FluxLoraLoaderMixin(LoraBaseMixin):
         weight_name: str = None,
         save_function: Callable = None,
         safe_serialization: bool = True,
+        transformer_lora_adapter_metadata=None,
+        text_encoder_lora_adapter_metadata=None,
     ):
         r"""
         Save the LoRA parameters corresponding to the UNet and text encoder.
@@ -2112,8 +2417,13 @@ class FluxLoraLoaderMixin(LoraBaseMixin):
                 `DIFFUSERS_SAVE_MODE`.
             safe_serialization (`bool`, *optional*, defaults to `True`):
                 Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`.
+            transformer_lora_adapter_metadata:
+                LoRA adapter metadata associated with the transformer to be serialized with the state dict.
+            text_encoder_lora_adapter_metadata:
+                LoRA adapter metadata associated with the text encoder to be serialized with the state dict.
         """
         state_dict = {}
+        lora_adapter_metadata = {}
         if not (transformer_lora_layers or text_encoder_lora_layers):
             raise ValueError("You must pass at least one of `transformer_lora_layers` and `text_encoder_lora_layers`.")
@@ -2124,6 +2434,16 @@ class FluxLoraLoaderMixin(LoraBaseMixin):
         if text_encoder_lora_layers:
             state_dict.update(cls.pack_weights(text_encoder_lora_layers, cls.text_encoder_name))
+        if transformer_lora_adapter_metadata:
+            lora_adapter_metadata.update(
+                _pack_dict_with_prefix(transformer_lora_adapter_metadata, cls.transformer_name)
+            )
+        if text_encoder_lora_adapter_metadata:
+            lora_adapter_metadata.update(
+                _pack_dict_with_prefix(text_encoder_lora_adapter_metadata, cls.text_encoder_name)
+            )
         # Save the model
         cls.write_lora_layers(
             state_dict=state_dict,
@@ -2132,6 +2452,7 @@ class FluxLoraLoaderMixin(LoraBaseMixin):
             weight_name=weight_name,
             save_function=save_function,
             safe_serialization=safe_serialization,
+            lora_adapter_metadata=lora_adapter_metadata,
         )
     def fuse_lora(
@@ -2293,7 +2614,7 @@ class FluxLoraLoaderMixin(LoraBaseMixin):
     ) -> bool:
         """
         Control LoRA expands the shape of the input layer from (3072, 64) to (3072, 128). This method handles that and
-        generalizes things a bit so that any parameter that needs expansion receives appropriate treatement.
+        generalizes things a bit so that any parameter that needs expansion receives appropriate treatment.
         """
         state_dict = {}
         if lora_state_dict is not None:
@@ -2305,7 +2626,7 @@ class FluxLoraLoaderMixin(LoraBaseMixin):
         prefix = prefix or cls.transformer_name
         for key in list(state_dict.keys()):
             if key.split(".")[0] == prefix:
-                state_dict[key[len(f"{prefix}.") :]] = state_dict.pop(key)
+                state_dict[key.removeprefix(f"{prefix}.")] = state_dict.pop(key)
         # Expand transformer parameter shapes if they don't match lora
         has_param_with_shape_update = False
@@ -2423,14 +2744,13 @@ class FluxLoraLoaderMixin(LoraBaseMixin):
         if unexpected_modules:
             logger.debug(f"Found unexpected modules: {unexpected_modules}. These will be ignored.")
-        is_peft_loaded = getattr(transformer, "peft_config", None) is not None
         for k in lora_module_names:
             if k in unexpected_modules:
                 continue
             base_param_name = (
                 f"{k.replace(prefix, '')}.base_layer.weight"
-                if is_peft_loaded and f"{k.replace(prefix, '')}.base_layer.weight" in transformer_state_dict
+                if f"{k.replace(prefix, '')}.base_layer.weight" in transformer_state_dict
                 else f"{k.replace(prefix, '')}.weight"
             )
             base_weight_param = transformer_state_dict[base_param_name]
@@ -2484,6 +2804,15 @@ class FluxLoraLoaderMixin(LoraBaseMixin):
         raise ValueError("Either `base_module` or `base_weight_param_name` must be provided.")
+    @staticmethod
+    def _prepare_outputs(state_dict, metadata, alphas=None, return_alphas=False, return_metadata=False):
+        outputs = [state_dict]
+        if return_alphas:
+            outputs.append(alphas)
+        if return_metadata:
+            outputs.append(metadata)
+        return tuple(outputs) if (return_alphas or return_metadata) else state_dict
 # The reason why we subclass from `StableDiffusionLoraLoaderMixin` here is because Amused initially
 # relied on `StableDiffusionLoraLoaderMixin` for its LoRA support.
@@ -2500,6 +2829,7 @@ class AmusedLoraLoaderMixin(StableDiffusionLoraLoaderMixin):
         network_alphas,
         transformer,
         adapter_name=None,
+        metadata=None,
         _pipeline=None,
         low_cpu_mem_usage=False,
         hotswap: bool = False,
@@ -2524,143 +2854,380 @@ class AmusedLoraLoaderMixin(StableDiffusionLoraLoaderMixin):
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
-            hotswap : (`bool`, *optional*)
-                Defaults to `False`. Whether to substitute an existing (LoRA) adapter with the newly loaded adapter
-                in-place. This means that, instead of loading an additional adapter, this will take the existing
-                adapter weights and replace them with the weights of the new adapter. This can be faster and more
-                memory efficient. However, the main advantage of hotswapping is that when the model is compiled with
-                torch.compile, loading the new adapter does not require recompilation of the model. When using
-                hotswapping, the passed `adapter_name` should be the name of an already loaded adapter.
-                If the new adapter and the old adapter have different ranks and/or LoRA alphas (i.e. scaling), you need
-                to call an additional method before loading the adapter:
-                ```py
-                pipeline = ...  # load diffusers pipeline
-                max_rank = ...  # the highest rank among all LoRAs that you want to load
-                # call *before* compiling and loading the LoRA adapter
-                pipeline.enable_lora_hotswap(target_rank=max_rank)
-                pipeline.load_lora_weights(file_name)
-                # optionally compile the model now
-                ```
-                Note that hotswapping adapters of the text encoder is not yet supported. There are some further
-                limitations to this technique, which are documented here:
-                https://huggingface.co/docs/peft/main/en/package_reference/hotswap
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
+            metadata (`dict`):
+                Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived
+                from the state dict.
+        """
+        if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"):
+            raise ValueError(
+                "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
+            )
+        # Load the layers corresponding to transformer.
+        logger.info(f"Loading {cls.transformer_name}.")
+        transformer.load_lora_adapter(
+            state_dict,
+            network_alphas=network_alphas,
+            adapter_name=adapter_name,
+            metadata=metadata,
+            _pipeline=_pipeline,
+            low_cpu_mem_usage=low_cpu_mem_usage,
+            hotswap=hotswap,
+        )
+    @classmethod
+    # Copied from diffusers.loaders.lora_pipeline.StableDiffusionLoraLoaderMixin.load_lora_into_text_encoder
+    def load_lora_into_text_encoder(
+        cls,
+        state_dict,
+        network_alphas,
+        text_encoder,
+        prefix=None,
+        lora_scale=1.0,
+        adapter_name=None,
+        _pipeline=None,
+        low_cpu_mem_usage=False,
+        hotswap: bool = False,
+        metadata=None,
+    ):
+        """
+        This will load the LoRA layers specified in `state_dict` into `text_encoder`
+        Parameters:
+            state_dict (`dict`):
+                A standard state dict containing the lora layer parameters. The key should be prefixed with an
+                additional `text_encoder` to distinguish between unet lora layers.
+            network_alphas (`Dict[str, float]`):
+                The value of the network alpha used for stable learning and preventing underflow. This value has the
+                same meaning as the `--network_alpha` option in the kohya-ss trainer script. Refer to [this
+                link](https://github.com/darkstorm2150/sd-scripts/blob/main/docs/train_network_README-en.md#execute-learning).
+            text_encoder (`CLIPTextModel`):
+                The text encoder model to load the LoRA layers into.
+            prefix (`str`):
+                Expected prefix of the `text_encoder` in the `state_dict`.
+            lora_scale (`float`):
+                How much to scale the output of the lora linear layer before it is added with the output of the regular
+                lora layer.
+            adapter_name (`str`, *optional*):
+                Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
+                `default_{i}` where i is the total number of adapters being loaded.
+            low_cpu_mem_usage (`bool`, *optional*):
+                Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
+                weights.
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
+            metadata (`dict`):
+                Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived
+                from the state dict.
+        """
+        _load_lora_into_text_encoder(
+            state_dict=state_dict,
+            network_alphas=network_alphas,
+            lora_scale=lora_scale,
+            text_encoder=text_encoder,
+            prefix=prefix,
+            text_encoder_name=cls.text_encoder_name,
+            adapter_name=adapter_name,
+            metadata=metadata,
+            _pipeline=_pipeline,
+            low_cpu_mem_usage=low_cpu_mem_usage,
+            hotswap=hotswap,
+        )
+    @classmethod
+    def save_lora_weights(
+        cls,
+        save_directory: Union[str, os.PathLike],
+        text_encoder_lora_layers: Dict[str, torch.nn.Module] = None,
+        transformer_lora_layers: Dict[str, torch.nn.Module] = None,
+        is_main_process: bool = True,
+        weight_name: str = None,
+        save_function: Callable = None,
+        safe_serialization: bool = True,
+    ):
+        r"""
+        Save the LoRA parameters corresponding to the UNet and text encoder.
+        Arguments:
+            save_directory (`str` or `os.PathLike`):
+                Directory to save LoRA parameters to. Will be created if it doesn't exist.
+            unet_lora_layers (`Dict[str, torch.nn.Module]` or `Dict[str, torch.Tensor]`):
+                State dict of the LoRA layers corresponding to the `unet`.
+            text_encoder_lora_layers (`Dict[str, torch.nn.Module]` or `Dict[str, torch.Tensor]`):
+                State dict of the LoRA layers corresponding to the `text_encoder`. Must explicitly pass the text
+                encoder LoRA state dict because it comes from 🤗 Transformers.
+            is_main_process (`bool`, *optional*, defaults to `True`):
+                Whether the process calling this is the main process or not. Useful during distributed training and you
+                need to call this function on all processes. In this case, set `is_main_process=True` only on the main
+                process to avoid race conditions.
+            save_function (`Callable`):
+                The function to use to save the state dictionary. Useful during distributed training when you need to
+                replace `torch.save` with another method. Can be configured with the environment variable
+                `DIFFUSERS_SAVE_MODE`.
+            safe_serialization (`bool`, *optional*, defaults to `True`):
+                Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`.
+        """
+        state_dict = {}
+        if not (transformer_lora_layers or text_encoder_lora_layers):
+            raise ValueError("You must pass at least one of `transformer_lora_layers` or `text_encoder_lora_layers`.")
+        if transformer_lora_layers:
+            state_dict.update(cls.pack_weights(transformer_lora_layers, cls.transformer_name))
+        if text_encoder_lora_layers:
+            state_dict.update(cls.pack_weights(text_encoder_lora_layers, cls.text_encoder_name))
+        # Save the model
+        cls.write_lora_layers(
+            state_dict=state_dict,
+            save_directory=save_directory,
+            is_main_process=is_main_process,
+            weight_name=weight_name,
+            save_function=save_function,
+            safe_serialization=safe_serialization,
+        )
+class CogVideoXLoraLoaderMixin(LoraBaseMixin):
+    r"""
+    Load LoRA layers into [`CogVideoXTransformer3DModel`]. Specific to [`CogVideoXPipeline`].
+    """
+    _lora_loadable_modules = ["transformer"]
+    transformer_name = TRANSFORMER_NAME
+    @classmethod
+    @validate_hf_hub_args
+    # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.lora_state_dict
+    def lora_state_dict(
+        cls,
+        pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
+        **kwargs,
+    ):
+        r"""
+        Return state dict for lora weights and the network alphas.
+        <Tip warning={true}>
+        We support loading A1111 formatted LoRA checkpoints in a limited capacity.
+        This function is experimental and might change in the future.
+        </Tip>
+        Parameters:
+            pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`):
+                Can be either:
+                    - A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model hosted on
+                      the Hub.
+                    - A path to a *directory* (for example `./my_model_directory`) containing the model weights saved
+                      with [`ModelMixin.save_pretrained`].
+                    - A [torch state
+                      dict](https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict).
+            cache_dir (`Union[str, os.PathLike]`, *optional*):
+                Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
+                is not used.
+            force_download (`bool`, *optional*, defaults to `False`):
+                Whether or not to force the (re-)download of the model weights and configuration files, overriding the
+                cached versions if they exist.
+            proxies (`Dict[str, str]`, *optional*):
+                A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
+                'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
+            local_files_only (`bool`, *optional*, defaults to `False`):
+                Whether to only load local model weights and configuration files or not. If set to `True`, the model
+                won't be downloaded from the Hub.
+            token (`str` or *bool*, *optional*):
+                The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
+                `diffusers-cli login` (stored in `~/.huggingface`) is used.
+            revision (`str`, *optional*, defaults to `"main"`):
+                The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier
+                allowed by Git.
+            subfolder (`str`, *optional*, defaults to `""`):
+                The subfolder location of a model file within a larger model repository on the Hub or locally.
+            return_lora_metadata (`bool`, *optional*, defaults to False):
+                When enabled, additionally return the LoRA adapter metadata, typically found in the state dict.
+        """
+        # Load the main state dict first which has the LoRA layers for either of
+        # transformer and text encoder or both.
+        cache_dir = kwargs.pop("cache_dir", None)
+        force_download = kwargs.pop("force_download", False)
+        proxies = kwargs.pop("proxies", None)
+        local_files_only = kwargs.pop("local_files_only", None)
+        token = kwargs.pop("token", None)
+        revision = kwargs.pop("revision", None)
+        subfolder = kwargs.pop("subfolder", None)
+        weight_name = kwargs.pop("weight_name", None)
+        use_safetensors = kwargs.pop("use_safetensors", None)
+        return_lora_metadata = kwargs.pop("return_lora_metadata", False)
+        allow_pickle = False
+        if use_safetensors is None:
+            use_safetensors = True
+            allow_pickle = True
+        user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"}
+        state_dict, metadata = _fetch_state_dict(
+            pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict,
+            weight_name=weight_name,
+            use_safetensors=use_safetensors,
+            local_files_only=local_files_only,
+            cache_dir=cache_dir,
+            force_download=force_download,
+            proxies=proxies,
+            token=token,
+            revision=revision,
+            subfolder=subfolder,
+            user_agent=user_agent,
+            allow_pickle=allow_pickle,
+        )
+        is_dora_scale_present = any("dora_scale" in k for k in state_dict)
+        if is_dora_scale_present:
+            warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new."
+            logger.warning(warn_msg)
+            state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k}
+        out = (state_dict, metadata) if return_lora_metadata else state_dict
+        return out
+    def load_lora_weights(
+        self,
+        pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
+        adapter_name: Optional[str] = None,
+        hotswap: bool = False,
+        **kwargs,
+    ):
+        """
+        Load LoRA weights specified in `pretrained_model_name_or_path_or_dict` into `self.transformer` and
+        `self.text_encoder`. All kwargs are forwarded to `self.lora_state_dict`. See
+        [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details on how the state dict is loaded.
+        See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_transformer`] for more details on how the state
+        dict is loaded into `self.transformer`.
+        Parameters:
+            pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
+            adapter_name (`str`, *optional*):
+                Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
+                `default_{i}` where i is the total number of adapters being loaded.
+            low_cpu_mem_usage (`bool`, *optional*):
+                Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
+                weights.
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
+            kwargs (`dict`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
-        if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"):
+        if not USE_PEFT_BACKEND:
+            raise ValueError("PEFT backend is required for this method.")
+        low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA)
+        if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
             raise ValueError(
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
-        # Load the layers corresponding to transformer.
-        logger.info(f"Loading {cls.transformer_name}.")
-        transformer.load_lora_adapter(
+        # if a dict is passed, copy it instead of modifying it inplace
+        if isinstance(pretrained_model_name_or_path_or_dict, dict):
+            pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy()
+        # First, ensure that the checkpoint is a compatible one and can be successfully loaded.
+        kwargs["return_lora_metadata"] = True
+        state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs)
+        is_correct_format = all("lora" in key for key in state_dict.keys())
+        if not is_correct_format:
+            raise ValueError("Invalid LoRA checkpoint.")
+        self.load_lora_into_transformer(
             state_dict,
-            network_alphas=network_alphas,
+            transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer,
             adapter_name=adapter_name,
-            _pipeline=_pipeline,
+            metadata=metadata,
+            _pipeline=self,
             low_cpu_mem_usage=low_cpu_mem_usage,
             hotswap=hotswap,
         )
     @classmethod
-    # Copied from diffusers.loaders.lora_pipeline.StableDiffusionLoraLoaderMixin.load_lora_into_text_encoder
-    def load_lora_into_text_encoder(
+    # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->CogVideoXTransformer3DModel
+    def load_lora_into_transformer(
         cls,
         state_dict,
-        network_alphas,
-        text_encoder,
-        prefix=None,
-        lora_scale=1.0,
+        transformer,
         adapter_name=None,
         _pipeline=None,
         low_cpu_mem_usage=False,
         hotswap: bool = False,
+        metadata=None,
     ):
         """
-        This will load the LoRA layers specified in `state_dict` into `text_encoder`
+        This will load the LoRA layers specified in `state_dict` into `transformer`.
         Parameters:
             state_dict (`dict`):
-                A standard state dict containing the lora layer parameters. The key should be prefixed with an
-                additional `text_encoder` to distinguish between unet lora layers.
-            network_alphas (`Dict[str, float]`):
-                The value of the network alpha used for stable learning and preventing underflow. This value has the
-                same meaning as the `--network_alpha` option in the kohya-ss trainer script. Refer to [this
-                link](https://github.com/darkstorm2150/sd-scripts/blob/main/docs/train_network_README-en.md#execute-learning).
-            text_encoder (`CLIPTextModel`):
-                The text encoder model to load the LoRA layers into.
-            prefix (`str`):
-                Expected prefix of the `text_encoder` in the `state_dict`.
-            lora_scale (`float`):
-                How much to scale the output of the lora linear layer before it is added with the output of the regular
-                lora layer.
+                A standard state dict containing the lora layer parameters. The keys can either be indexed directly
+                into the unet or prefixed with an additional `unet` which can be used to distinguish between text
+                encoder lora layers.
+            transformer (`CogVideoXTransformer3DModel`):
+                The Transformer model to load the LoRA layers into.
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
                 `default_{i}` where i is the total number of adapters being loaded.
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
-            hotswap : (`bool`, *optional*)
-                Defaults to `False`. Whether to substitute an existing (LoRA) adapter with the newly loaded adapter
-                in-place. This means that, instead of loading an additional adapter, this will take the existing
-                adapter weights and replace them with the weights of the new adapter. This can be faster and more
-                memory efficient. However, the main advantage of hotswapping is that when the model is compiled with
-                torch.compile, loading the new adapter does not require recompilation of the model. When using
-                hotswapping, the passed `adapter_name` should be the name of an already loaded adapter.
-                If the new adapter and the old adapter have different ranks and/or LoRA alphas (i.e. scaling), you need
-                to call an additional method before loading the adapter:
-                ```py
-                pipeline = ...  # load diffusers pipeline
-                max_rank = ...  # the highest rank among all LoRAs that you want to load
-                # call *before* compiling and loading the LoRA adapter
-                pipeline.enable_lora_hotswap(target_rank=max_rank)
-                pipeline.load_lora_weights(file_name)
-                # optionally compile the model now
-                ```
-                Note that hotswapping adapters of the text encoder is not yet supported. There are some further
-                limitations to this technique, which are documented here:
-                https://huggingface.co/docs/peft/main/en/package_reference/hotswap
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
+            metadata (`dict`):
+                Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived
+                from the state dict.
         """
-        _load_lora_into_text_encoder(
-            state_dict=state_dict,
-            network_alphas=network_alphas,
-            lora_scale=lora_scale,
-            text_encoder=text_encoder,
-            prefix=prefix,
-            text_encoder_name=cls.text_encoder_name,
+        if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
+            raise ValueError(
+                "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
+            )
+        # Load the layers corresponding to transformer.
+        logger.info(f"Loading {cls.transformer_name}.")
+        transformer.load_lora_adapter(
+            state_dict,
+            network_alphas=None,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=_pipeline,
             low_cpu_mem_usage=low_cpu_mem_usage,
             hotswap=hotswap,
         )
     @classmethod
+    # Adapted from diffusers.loaders.lora_pipeline.StableDiffusionLoraLoaderMixin.save_lora_weights without support for text encoder
     def save_lora_weights(
         cls,
         save_directory: Union[str, os.PathLike],
-        text_encoder_lora_layers: Dict[str, torch.nn.Module] = None,
-        transformer_lora_layers: Dict[str, torch.nn.Module] = None,
+        transformer_lora_layers: Dict[str, Union[torch.nn.Module, torch.Tensor]] = None,
         is_main_process: bool = True,
         weight_name: str = None,
         save_function: Callable = None,
         safe_serialization: bool = True,
+        transformer_lora_adapter_metadata: Optional[dict] = None,
     ):
         r"""
-        Save the LoRA parameters corresponding to the UNet and text encoder.
+        Save the LoRA parameters corresponding to the transformer.
         Arguments:
             save_directory (`str` or `os.PathLike`):
                 Directory to save LoRA parameters to. Will be created if it doesn't exist.
-            unet_lora_layers (`Dict[str, torch.nn.Module]` or `Dict[str, torch.Tensor]`):
-                State dict of the LoRA layers corresponding to the `unet`.
-            text_encoder_lora_layers (`Dict[str, torch.nn.Module]` or `Dict[str, torch.Tensor]`):
-                State dict of the LoRA layers corresponding to the `text_encoder`. Must explicitly pass the text
-                encoder LoRA state dict because it comes from 🤗 Transformers.
+            transformer_lora_layers (`Dict[str, torch.nn.Module]` or `Dict[str, torch.Tensor]`):
+                State dict of the LoRA layers corresponding to the `transformer`.
             is_main_process (`bool`, *optional*, defaults to `True`):
                 Whether the process calling this is the main process or not. Useful during distributed training and you
                 need to call this function on all processes. In this case, set `is_main_process=True` only on the main
@@ -2671,17 +3238,21 @@ class AmusedLoraLoaderMixin(StableDiffusionLoraLoaderMixin):
                 `DIFFUSERS_SAVE_MODE`.
             safe_serialization (`bool`, *optional*, defaults to `True`):
                 Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`.
+            transformer_lora_adapter_metadata:
+                LoRA adapter metadata associated with the transformer to be serialized with the state dict.
         """
         state_dict = {}
+        lora_adapter_metadata = {}
-        if not (transformer_lora_layers or text_encoder_lora_layers):
-            raise ValueError("You must pass at least one of `transformer_lora_layers` or `text_encoder_lora_layers`.")
+        if not transformer_lora_layers:
+            raise ValueError("You must pass `transformer_lora_layers`.")
-        if transformer_lora_layers:
-            state_dict.update(cls.pack_weights(transformer_lora_layers, cls.transformer_name))
+        state_dict.update(cls.pack_weights(transformer_lora_layers, cls.transformer_name))
-        if text_encoder_lora_layers:
-            state_dict.update(cls.pack_weights(text_encoder_lora_layers, cls.text_encoder_name))
+        if transformer_lora_adapter_metadata is not None:
+            lora_adapter_metadata.update(
+                _pack_dict_with_prefix(transformer_lora_adapter_metadata, cls.transformer_name)
+            )
         # Save the model
         cls.write_lora_layers(
@@ -2691,12 +3262,77 @@ class AmusedLoraLoaderMixin(StableDiffusionLoraLoaderMixin):
             weight_name=weight_name,
             save_function=save_function,
             safe_serialization=safe_serialization,
+            lora_adapter_metadata=lora_adapter_metadata,
         )
+    def fuse_lora(
+        self,
+        components: List[str] = ["transformer"],
+        lora_scale: float = 1.0,
+        safe_fusing: bool = False,
+        adapter_names: Optional[List[str]] = None,
+        **kwargs,
+    ):
+        r"""
+        Fuses the LoRA parameters into the original parameters of the corresponding blocks.
+        <Tip warning={true}>
-class CogVideoXLoraLoaderMixin(LoraBaseMixin):
+        This is an experimental API.
+        </Tip>
+        Args:
+            components: (`List[str]`): List of LoRA-injectable components to fuse the LoRAs into.
+            lora_scale (`float`, defaults to 1.0):
+                Controls how much to influence the outputs with the LoRA parameters.
+            safe_fusing (`bool`, defaults to `False`):
+                Whether to check fused weights for NaN values before fusing and if values are NaN not fusing them.
+            adapter_names (`List[str]`, *optional*):
+                Adapter names to be used for fusing. If nothing is passed, all active adapters will be fused.
+        Example:
+        ```py
+        from diffusers import DiffusionPipeline
+        import torch
+        pipeline = DiffusionPipeline.from_pretrained(
+            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
+        ).to("cuda")
+        pipeline.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors", adapter_name="pixel")
+        pipeline.fuse_lora(lora_scale=0.7)
+        ```
+        """
+        super().fuse_lora(
+            components=components,
+            lora_scale=lora_scale,
+            safe_fusing=safe_fusing,
+            adapter_names=adapter_names,
+            **kwargs,
+        )
+    def unfuse_lora(self, components: List[str] = ["transformer"], **kwargs):
+        r"""
+        Reverses the effect of
+        [`pipe.fuse_lora()`](https://huggingface.co/docs/diffusers/main/en/api/loaders#diffusers.loaders.LoraBaseMixin.fuse_lora).
+        <Tip warning={true}>
+        This is an experimental API.
+        </Tip>
+        Args:
+            components (`List[str]`): List of LoRA-injectable components to unfuse LoRA from.
+            unfuse_transformer (`bool`, defaults to `True`): Whether to unfuse the UNet LoRA parameters.
+        """
+        super().unfuse_lora(components=components, **kwargs)
+class Mochi1LoraLoaderMixin(LoraBaseMixin):
     r"""
-    Load LoRA layers into [`CogVideoXTransformer3DModel`]. Specific to [`CogVideoXPipeline`].
+    Load LoRA layers into [`MochiTransformer3DModel`]. Specific to [`MochiPipeline`].
     """
     _lora_loadable_modules = ["transformer"]
@@ -2753,6 +3389,8 @@ class CogVideoXLoraLoaderMixin(LoraBaseMixin):
                 allowed by Git.
             subfolder (`str`, *optional*, defaults to `""`):
                 The subfolder location of a model file within a larger model repository on the Hub or locally.
+            return_lora_metadata (`bool`, *optional*, defaults to False):
+                When enabled, additionally return the LoRA adapter metadata, typically found in the state dict.
         """
         # Load the main state dict first which has the LoRA layers for either of
@@ -2766,18 +3404,16 @@ class CogVideoXLoraLoaderMixin(LoraBaseMixin):
         subfolder = kwargs.pop("subfolder", None)
         weight_name = kwargs.pop("weight_name", None)
         use_safetensors = kwargs.pop("use_safetensors", None)
+        return_lora_metadata = kwargs.pop("return_lora_metadata", False)
         allow_pickle = False
         if use_safetensors is None:
             use_safetensors = True
             allow_pickle = True
-        user_agent = {
-            "file_type": "attn_procs_weights",
-            "framework": "pytorch",
-        }
+        user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"}
-        state_dict = _fetch_state_dict(
+        state_dict, metadata = _fetch_state_dict(
             pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict,
             weight_name=weight_name,
             use_safetensors=use_safetensors,
@@ -2798,10 +3434,16 @@ class CogVideoXLoraLoaderMixin(LoraBaseMixin):
             logger.warning(warn_msg)
             state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k}
-        return state_dict
+        out = (state_dict, metadata) if return_lora_metadata else state_dict
+        return out
+    # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.load_lora_weights
     def load_lora_weights(
-        self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], adapter_name=None, **kwargs
+        self,
+        pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
+        adapter_name: Optional[str] = None,
+        hotswap: bool = False,
+        **kwargs,
     ):
         """
         Load LoRA weights specified in `pretrained_model_name_or_path_or_dict` into `self.transformer` and
@@ -2819,6 +3461,8 @@ class CogVideoXLoraLoaderMixin(LoraBaseMixin):
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
             kwargs (`dict`, *optional*):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
@@ -2836,7 +3480,8 @@ class CogVideoXLoraLoaderMixin(LoraBaseMixin):
             pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy()
         # First, ensure that the checkpoint is a compatible one and can be successfully loaded.
-        state_dict = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs)
+        kwargs["return_lora_metadata"] = True
+        state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs)
         is_correct_format = all("lora" in key for key in state_dict.keys())
         if not is_correct_format:
@@ -2846,54 +3491,45 @@ class CogVideoXLoraLoaderMixin(LoraBaseMixin):
             state_dict,
             transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=self,
             low_cpu_mem_usage=low_cpu_mem_usage,
+            hotswap=hotswap,
         )
     @classmethod
-    # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->CogVideoXTransformer3DModel
+    # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->MochiTransformer3DModel
     def load_lora_into_transformer(
-        cls, state_dict, transformer, adapter_name=None, _pipeline=None, low_cpu_mem_usage=False, hotswap: bool = False
+        cls,
+        state_dict,
+        transformer,
+        adapter_name=None,
+        _pipeline=None,
+        low_cpu_mem_usage=False,
+        hotswap: bool = False,
+        metadata=None,
     ):
         """
         This will load the LoRA layers specified in `state_dict` into `transformer`.
-        Parameters:
-            state_dict (`dict`):
-                A standard state dict containing the lora layer parameters. The keys can either be indexed directly
-                into the unet or prefixed with an additional `unet` which can be used to distinguish between text
-                encoder lora layers.
-            transformer (`CogVideoXTransformer3DModel`):
-                The Transformer model to load the LoRA layers into.
-            adapter_name (`str`, *optional*):
-                Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
-                `default_{i}` where i is the total number of adapters being loaded.
-            low_cpu_mem_usage (`bool`, *optional*):
-                Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
-                weights.
-            hotswap : (`bool`, *optional*)
-                Defaults to `False`. Whether to substitute an existing (LoRA) adapter with the newly loaded adapter
-                in-place. This means that, instead of loading an additional adapter, this will take the existing
-                adapter weights and replace them with the weights of the new adapter. This can be faster and more
-                memory efficient. However, the main advantage of hotswapping is that when the model is compiled with
-                torch.compile, loading the new adapter does not require recompilation of the model. When using
-                hotswapping, the passed `adapter_name` should be the name of an already loaded adapter.
-                If the new adapter and the old adapter have different ranks and/or LoRA alphas (i.e. scaling), you need
-                to call an additional method before loading the adapter:
-                ```py
-                pipeline = ...  # load diffusers pipeline
-                max_rank = ...  # the highest rank among all LoRAs that you want to load
-                # call *before* compiling and loading the LoRA adapter
-                pipeline.enable_lora_hotswap(target_rank=max_rank)
-                pipeline.load_lora_weights(file_name)
-                # optionally compile the model now
-                ```
-                Note that hotswapping adapters of the text encoder is not yet supported. There are some further
-                limitations to this technique, which are documented here:
-                https://huggingface.co/docs/peft/main/en/package_reference/hotswap
+        Parameters:
+            state_dict (`dict`):
+                A standard state dict containing the lora layer parameters. The keys can either be indexed directly
+                into the unet or prefixed with an additional `unet` which can be used to distinguish between text
+                encoder lora layers.
+            transformer (`MochiTransformer3DModel`):
+                The Transformer model to load the LoRA layers into.
+            adapter_name (`str`, *optional*):
+                Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
+                `default_{i}` where i is the total number of adapters being loaded.
+            low_cpu_mem_usage (`bool`, *optional*):
+                Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
+                weights.
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
+            metadata (`dict`):
+                Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived
+                from the state dict.
         """
         if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
             raise ValueError(
@@ -2906,13 +3542,14 @@ class CogVideoXLoraLoaderMixin(LoraBaseMixin):
             state_dict,
             network_alphas=None,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=_pipeline,
             low_cpu_mem_usage=low_cpu_mem_usage,
             hotswap=hotswap,
         )
     @classmethod
-    # Adapted from diffusers.loaders.lora_pipeline.StableDiffusionLoraLoaderMixin.save_lora_weights without support for text encoder
+    # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.save_lora_weights
     def save_lora_weights(
         cls,
         save_directory: Union[str, os.PathLike],
@@ -2921,9 +3558,10 @@ class CogVideoXLoraLoaderMixin(LoraBaseMixin):
         weight_name: str = None,
         save_function: Callable = None,
         safe_serialization: bool = True,
+        transformer_lora_adapter_metadata: Optional[dict] = None,
     ):
         r"""
-        Save the LoRA parameters corresponding to the UNet and text encoder.
+        Save the LoRA parameters corresponding to the transformer.
         Arguments:
             save_directory (`str` or `os.PathLike`):
@@ -2940,14 +3578,21 @@ class CogVideoXLoraLoaderMixin(LoraBaseMixin):
                 `DIFFUSERS_SAVE_MODE`.
             safe_serialization (`bool`, *optional*, defaults to `True`):
                 Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`.
+            transformer_lora_adapter_metadata:
+                LoRA adapter metadata associated with the transformer to be serialized with the state dict.
         """
         state_dict = {}
+        lora_adapter_metadata = {}
         if not transformer_lora_layers:
             raise ValueError("You must pass `transformer_lora_layers`.")
-        if transformer_lora_layers:
-            state_dict.update(cls.pack_weights(transformer_lora_layers, cls.transformer_name))
+        state_dict.update(cls.pack_weights(transformer_lora_layers, cls.transformer_name))
+        if transformer_lora_adapter_metadata is not None:
+            lora_adapter_metadata.update(
+                _pack_dict_with_prefix(transformer_lora_adapter_metadata, cls.transformer_name)
+            )
         # Save the model
         cls.write_lora_layers(
@@ -2957,8 +3602,10 @@ class CogVideoXLoraLoaderMixin(LoraBaseMixin):
             weight_name=weight_name,
             save_function=save_function,
             safe_serialization=safe_serialization,
+            lora_adapter_metadata=lora_adapter_metadata,
         )
+    # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.fuse_lora
     def fuse_lora(
         self,
         components: List[str] = ["transformer"],
@@ -3006,6 +3653,7 @@ class CogVideoXLoraLoaderMixin(LoraBaseMixin):
             **kwargs,
         )
+    # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.unfuse_lora
     def unfuse_lora(self, components: List[str] = ["transformer"], **kwargs):
         r"""
         Reverses the effect of
@@ -3024,9 +3672,9 @@ class CogVideoXLoraLoaderMixin(LoraBaseMixin):
         super().unfuse_lora(components=components, **kwargs)
-class Mochi1LoraLoaderMixin(LoraBaseMixin):
+class LTXVideoLoraLoaderMixin(LoraBaseMixin):
     r"""
-    Load LoRA layers into [`MochiTransformer3DModel`]. Specific to [`MochiPipeline`].
+    Load LoRA layers into [`LTXVideoTransformer3DModel`]. Specific to [`LTXPipeline`].
     """
     _lora_loadable_modules = ["transformer"]
@@ -3034,7 +3682,6 @@ class Mochi1LoraLoaderMixin(LoraBaseMixin):
     @classmethod
     @validate_hf_hub_args
-    # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.lora_state_dict
     def lora_state_dict(
         cls,
         pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
@@ -3083,7 +3730,8 @@ class Mochi1LoraLoaderMixin(LoraBaseMixin):
                 allowed by Git.
             subfolder (`str`, *optional*, defaults to `""`):
                 The subfolder location of a model file within a larger model repository on the Hub or locally.
+            return_lora_metadata (`bool`, *optional*, defaults to False):
+                When enabled, additionally return the LoRA adapter metadata, typically found in the state dict.
         """
         # Load the main state dict first which has the LoRA layers for either of
         # transformer and text encoder or both.
@@ -3096,18 +3744,16 @@ class Mochi1LoraLoaderMixin(LoraBaseMixin):
         subfolder = kwargs.pop("subfolder", None)
         weight_name = kwargs.pop("weight_name", None)
         use_safetensors = kwargs.pop("use_safetensors", None)
+        return_lora_metadata = kwargs.pop("return_lora_metadata", False)
         allow_pickle = False
         if use_safetensors is None:
             use_safetensors = True
             allow_pickle = True
-        user_agent = {
-            "file_type": "attn_procs_weights",
-            "framework": "pytorch",
-        }
+        user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"}
-        state_dict = _fetch_state_dict(
+        state_dict, metadata = _fetch_state_dict(
             pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict,
             weight_name=weight_name,
             use_safetensors=use_safetensors,
@@ -3128,11 +3774,20 @@ class Mochi1LoraLoaderMixin(LoraBaseMixin):
             logger.warning(warn_msg)
             state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k}
-        return state_dict
+        is_non_diffusers_format = any(k.startswith("diffusion_model.") for k in state_dict)
+        if is_non_diffusers_format:
+            state_dict = _convert_non_diffusers_ltxv_lora_to_diffusers(state_dict)
+        out = (state_dict, metadata) if return_lora_metadata else state_dict
+        return out
     # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.load_lora_weights
     def load_lora_weights(
-        self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], adapter_name=None, **kwargs
+        self,
+        pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
+        adapter_name: Optional[str] = None,
+        hotswap: bool = False,
+        **kwargs,
     ):
         """
         Load LoRA weights specified in `pretrained_model_name_or_path_or_dict` into `self.transformer` and
@@ -3150,6 +3805,8 @@ class Mochi1LoraLoaderMixin(LoraBaseMixin):
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
             kwargs (`dict`, *optional*):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
@@ -3167,7 +3824,8 @@ class Mochi1LoraLoaderMixin(LoraBaseMixin):
             pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy()
         # First, ensure that the checkpoint is a compatible one and can be successfully loaded.
-        state_dict = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs)
+        kwargs["return_lora_metadata"] = True
+        state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs)
         is_correct_format = all("lora" in key for key in state_dict.keys())
         if not is_correct_format:
@@ -3177,14 +3835,23 @@ class Mochi1LoraLoaderMixin(LoraBaseMixin):
             state_dict,
             transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=self,
             low_cpu_mem_usage=low_cpu_mem_usage,
+            hotswap=hotswap,
         )
     @classmethod
-    # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->MochiTransformer3DModel
+    # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->LTXVideoTransformer3DModel
     def load_lora_into_transformer(
-        cls, state_dict, transformer, adapter_name=None, _pipeline=None, low_cpu_mem_usage=False, hotswap: bool = False
+        cls,
+        state_dict,
+        transformer,
+        adapter_name=None,
+        _pipeline=None,
+        low_cpu_mem_usage=False,
+        hotswap: bool = False,
+        metadata=None,
     ):
         """
         This will load the LoRA layers specified in `state_dict` into `transformer`.
@@ -3194,7 +3861,7 @@ class Mochi1LoraLoaderMixin(LoraBaseMixin):
                 A standard state dict containing the lora layer parameters. The keys can either be indexed directly
                 into the unet or prefixed with an additional `unet` which can be used to distinguish between text
                 encoder lora layers.
-            transformer (`MochiTransformer3DModel`):
+            transformer (`LTXVideoTransformer3DModel`):
                 The Transformer model to load the LoRA layers into.
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
@@ -3202,29 +3869,11 @@ class Mochi1LoraLoaderMixin(LoraBaseMixin):
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
-            hotswap : (`bool`, *optional*)
-                Defaults to `False`. Whether to substitute an existing (LoRA) adapter with the newly loaded adapter
-                in-place. This means that, instead of loading an additional adapter, this will take the existing
-                adapter weights and replace them with the weights of the new adapter. This can be faster and more
-                memory efficient. However, the main advantage of hotswapping is that when the model is compiled with
-                torch.compile, loading the new adapter does not require recompilation of the model. When using
-                hotswapping, the passed `adapter_name` should be the name of an already loaded adapter.
-                If the new adapter and the old adapter have different ranks and/or LoRA alphas (i.e. scaling), you need
-                to call an additional method before loading the adapter:
-                ```py
-                pipeline = ...  # load diffusers pipeline
-                max_rank = ...  # the highest rank among all LoRAs that you want to load
-                # call *before* compiling and loading the LoRA adapter
-                pipeline.enable_lora_hotswap(target_rank=max_rank)
-                pipeline.load_lora_weights(file_name)
-                # optionally compile the model now
-                ```
-                Note that hotswapping adapters of the text encoder is not yet supported. There are some further
-                limitations to this technique, which are documented here:
-                https://huggingface.co/docs/peft/main/en/package_reference/hotswap
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
+            metadata (`dict`):
+                Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived
+                from the state dict.
         """
         if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
             raise ValueError(
@@ -3237,6 +3886,7 @@ class Mochi1LoraLoaderMixin(LoraBaseMixin):
             state_dict,
             network_alphas=None,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=_pipeline,
             low_cpu_mem_usage=low_cpu_mem_usage,
             hotswap=hotswap,
@@ -3252,9 +3902,10 @@ class Mochi1LoraLoaderMixin(LoraBaseMixin):
         weight_name: str = None,
         save_function: Callable = None,
         safe_serialization: bool = True,
+        transformer_lora_adapter_metadata: Optional[dict] = None,
     ):
         r"""
-        Save the LoRA parameters corresponding to the UNet and text encoder.
+        Save the LoRA parameters corresponding to the transformer.
         Arguments:
             save_directory (`str` or `os.PathLike`):
@@ -3271,14 +3922,21 @@ class Mochi1LoraLoaderMixin(LoraBaseMixin):
                 `DIFFUSERS_SAVE_MODE`.
             safe_serialization (`bool`, *optional*, defaults to `True`):
                 Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`.
+            transformer_lora_adapter_metadata:
+                LoRA adapter metadata associated with the transformer to be serialized with the state dict.
         """
         state_dict = {}
+        lora_adapter_metadata = {}
         if not transformer_lora_layers:
             raise ValueError("You must pass `transformer_lora_layers`.")
-        if transformer_lora_layers:
-            state_dict.update(cls.pack_weights(transformer_lora_layers, cls.transformer_name))
+        state_dict.update(cls.pack_weights(transformer_lora_layers, cls.transformer_name))
+        if transformer_lora_adapter_metadata is not None:
+            lora_adapter_metadata.update(
+                _pack_dict_with_prefix(transformer_lora_adapter_metadata, cls.transformer_name)
+            )
         # Save the model
         cls.write_lora_layers(
@@ -3288,6 +3946,7 @@ class Mochi1LoraLoaderMixin(LoraBaseMixin):
             weight_name=weight_name,
             save_function=save_function,
             safe_serialization=safe_serialization,
+            lora_adapter_metadata=lora_adapter_metadata,
         )
     # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.fuse_lora
@@ -3357,9 +4016,9 @@ class Mochi1LoraLoaderMixin(LoraBaseMixin):
         super().unfuse_lora(components=components, **kwargs)
-class LTXVideoLoraLoaderMixin(LoraBaseMixin):
+class SanaLoraLoaderMixin(LoraBaseMixin):
     r"""
-    Load LoRA layers into [`LTXVideoTransformer3DModel`]. Specific to [`LTXPipeline`].
+    Load LoRA layers into [`SanaTransformer2DModel`]. Specific to [`SanaPipeline`].
     """
     _lora_loadable_modules = ["transformer"]
@@ -3367,7 +4026,7 @@ class LTXVideoLoraLoaderMixin(LoraBaseMixin):
     @classmethod
     @validate_hf_hub_args
-    # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.lora_state_dict
+    # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.lora_state_dict
     def lora_state_dict(
         cls,
         pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
@@ -3416,6 +4075,8 @@ class LTXVideoLoraLoaderMixin(LoraBaseMixin):
                 allowed by Git.
             subfolder (`str`, *optional*, defaults to `""`):
                 The subfolder location of a model file within a larger model repository on the Hub or locally.
+            return_lora_metadata (`bool`, *optional*, defaults to False):
+                When enabled, additionally return the LoRA adapter metadata, typically found in the state dict.
         """
         # Load the main state dict first which has the LoRA layers for either of
@@ -3429,18 +4090,16 @@ class LTXVideoLoraLoaderMixin(LoraBaseMixin):
         subfolder = kwargs.pop("subfolder", None)
         weight_name = kwargs.pop("weight_name", None)
         use_safetensors = kwargs.pop("use_safetensors", None)
+        return_lora_metadata = kwargs.pop("return_lora_metadata", False)
         allow_pickle = False
         if use_safetensors is None:
             use_safetensors = True
             allow_pickle = True
-        user_agent = {
-            "file_type": "attn_procs_weights",
-            "framework": "pytorch",
-        }
+        user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"}
-        state_dict = _fetch_state_dict(
+        state_dict, metadata = _fetch_state_dict(
             pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict,
             weight_name=weight_name,
             use_safetensors=use_safetensors,
@@ -3461,11 +4120,16 @@ class LTXVideoLoraLoaderMixin(LoraBaseMixin):
             logger.warning(warn_msg)
             state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k}
-        return state_dict
+        out = (state_dict, metadata) if return_lora_metadata else state_dict
+        return out
     # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.load_lora_weights
     def load_lora_weights(
-        self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], adapter_name=None, **kwargs
+        self,
+        pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
+        adapter_name: Optional[str] = None,
+        hotswap: bool = False,
+        **kwargs,
     ):
         """
         Load LoRA weights specified in `pretrained_model_name_or_path_or_dict` into `self.transformer` and
@@ -3483,6 +4147,8 @@ class LTXVideoLoraLoaderMixin(LoraBaseMixin):
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
             kwargs (`dict`, *optional*):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
@@ -3500,7 +4166,8 @@ class LTXVideoLoraLoaderMixin(LoraBaseMixin):
             pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy()
         # First, ensure that the checkpoint is a compatible one and can be successfully loaded.
-        state_dict = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs)
+        kwargs["return_lora_metadata"] = True
+        state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs)
         is_correct_format = all("lora" in key for key in state_dict.keys())
         if not is_correct_format:
@@ -3510,14 +4177,23 @@ class LTXVideoLoraLoaderMixin(LoraBaseMixin):
             state_dict,
             transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=self,
             low_cpu_mem_usage=low_cpu_mem_usage,
+            hotswap=hotswap,
         )
     @classmethod
-    # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->LTXVideoTransformer3DModel
+    # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->SanaTransformer2DModel
     def load_lora_into_transformer(
-        cls, state_dict, transformer, adapter_name=None, _pipeline=None, low_cpu_mem_usage=False, hotswap: bool = False
+        cls,
+        state_dict,
+        transformer,
+        adapter_name=None,
+        _pipeline=None,
+        low_cpu_mem_usage=False,
+        hotswap: bool = False,
+        metadata=None,
     ):
         """
         This will load the LoRA layers specified in `state_dict` into `transformer`.
@@ -3527,7 +4203,7 @@ class LTXVideoLoraLoaderMixin(LoraBaseMixin):
                 A standard state dict containing the lora layer parameters. The keys can either be indexed directly
                 into the unet or prefixed with an additional `unet` which can be used to distinguish between text
                 encoder lora layers.
-            transformer (`LTXVideoTransformer3DModel`):
+            transformer (`SanaTransformer2DModel`):
                 The Transformer model to load the LoRA layers into.
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
@@ -3535,29 +4211,11 @@ class LTXVideoLoraLoaderMixin(LoraBaseMixin):
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
-            hotswap : (`bool`, *optional*)
-                Defaults to `False`. Whether to substitute an existing (LoRA) adapter with the newly loaded adapter
-                in-place. This means that, instead of loading an additional adapter, this will take the existing
-                adapter weights and replace them with the weights of the new adapter. This can be faster and more
-                memory efficient. However, the main advantage of hotswapping is that when the model is compiled with
-                torch.compile, loading the new adapter does not require recompilation of the model. When using
-                hotswapping, the passed `adapter_name` should be the name of an already loaded adapter.
-                If the new adapter and the old adapter have different ranks and/or LoRA alphas (i.e. scaling), you need
-                to call an additional method before loading the adapter:
-                ```py
-                pipeline = ...  # load diffusers pipeline
-                max_rank = ...  # the highest rank among all LoRAs that you want to load
-                # call *before* compiling and loading the LoRA adapter
-                pipeline.enable_lora_hotswap(target_rank=max_rank)
-                pipeline.load_lora_weights(file_name)
-                # optionally compile the model now
-                ```
-                Note that hotswapping adapters of the text encoder is not yet supported. There are some further
-                limitations to this technique, which are documented here:
-                https://huggingface.co/docs/peft/main/en/package_reference/hotswap
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
+            metadata (`dict`):
+                Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived
+                from the state dict.
         """
         if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
             raise ValueError(
@@ -3570,6 +4228,7 @@ class LTXVideoLoraLoaderMixin(LoraBaseMixin):
             state_dict,
             network_alphas=None,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=_pipeline,
             low_cpu_mem_usage=low_cpu_mem_usage,
             hotswap=hotswap,
@@ -3585,9 +4244,10 @@ class LTXVideoLoraLoaderMixin(LoraBaseMixin):
         weight_name: str = None,
         save_function: Callable = None,
         safe_serialization: bool = True,
+        transformer_lora_adapter_metadata: Optional[dict] = None,
     ):
         r"""
-        Save the LoRA parameters corresponding to the UNet and text encoder.
+        Save the LoRA parameters corresponding to the transformer.
         Arguments:
             save_directory (`str` or `os.PathLike`):
@@ -3604,14 +4264,21 @@ class LTXVideoLoraLoaderMixin(LoraBaseMixin):
                 `DIFFUSERS_SAVE_MODE`.
             safe_serialization (`bool`, *optional*, defaults to `True`):
                 Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`.
+            transformer_lora_adapter_metadata:
+                LoRA adapter metadata associated with the transformer to be serialized with the state dict.
         """
         state_dict = {}
+        lora_adapter_metadata = {}
         if not transformer_lora_layers:
             raise ValueError("You must pass `transformer_lora_layers`.")
-        if transformer_lora_layers:
-            state_dict.update(cls.pack_weights(transformer_lora_layers, cls.transformer_name))
+        state_dict.update(cls.pack_weights(transformer_lora_layers, cls.transformer_name))
+        if transformer_lora_adapter_metadata is not None:
+            lora_adapter_metadata.update(
+                _pack_dict_with_prefix(transformer_lora_adapter_metadata, cls.transformer_name)
+            )
         # Save the model
         cls.write_lora_layers(
@@ -3621,6 +4288,7 @@ class LTXVideoLoraLoaderMixin(LoraBaseMixin):
             weight_name=weight_name,
             save_function=save_function,
             safe_serialization=safe_serialization,
+            lora_adapter_metadata=lora_adapter_metadata,
         )
     # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.fuse_lora
@@ -3690,9 +4358,9 @@ class LTXVideoLoraLoaderMixin(LoraBaseMixin):
         super().unfuse_lora(components=components, **kwargs)
-class SanaLoraLoaderMixin(LoraBaseMixin):
+class HunyuanVideoLoraLoaderMixin(LoraBaseMixin):
     r"""
-    Load LoRA layers into [`SanaTransformer2DModel`]. Specific to [`SanaPipeline`].
+    Load LoRA layers into [`HunyuanVideoTransformer3DModel`]. Specific to [`HunyuanVideoPipeline`].
     """
     _lora_loadable_modules = ["transformer"]
@@ -3700,7 +4368,6 @@ class SanaLoraLoaderMixin(LoraBaseMixin):
     @classmethod
     @validate_hf_hub_args
-    # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.lora_state_dict
     def lora_state_dict(
         cls,
         pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
@@ -3711,7 +4378,7 @@ class SanaLoraLoaderMixin(LoraBaseMixin):
         <Tip warning={true}>
-        We support loading A1111 formatted LoRA checkpoints in a limited capacity.
+        We support loading original format HunyuanVideo LoRA checkpoints.
         This function is experimental and might change in the future.
@@ -3749,7 +4416,8 @@ class SanaLoraLoaderMixin(LoraBaseMixin):
                 allowed by Git.
             subfolder (`str`, *optional*, defaults to `""`):
                 The subfolder location of a model file within a larger model repository on the Hub or locally.
+            return_lora_metadata (`bool`, *optional*, defaults to False):
+                When enabled, additionally return the LoRA adapter metadata, typically found in the state dict.
         """
         # Load the main state dict first which has the LoRA layers for either of
         # transformer and text encoder or both.
@@ -3762,18 +4430,16 @@ class SanaLoraLoaderMixin(LoraBaseMixin):
         subfolder = kwargs.pop("subfolder", None)
         weight_name = kwargs.pop("weight_name", None)
         use_safetensors = kwargs.pop("use_safetensors", None)
+        return_lora_metadata = kwargs.pop("return_lora_metadata", False)
         allow_pickle = False
         if use_safetensors is None:
             use_safetensors = True
             allow_pickle = True
-        user_agent = {
-            "file_type": "attn_procs_weights",
-            "framework": "pytorch",
-        }
+        user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"}
-        state_dict = _fetch_state_dict(
+        state_dict, metadata = _fetch_state_dict(
             pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict,
             weight_name=weight_name,
             use_safetensors=use_safetensors,
@@ -3794,11 +4460,20 @@ class SanaLoraLoaderMixin(LoraBaseMixin):
             logger.warning(warn_msg)
             state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k}
-        return state_dict
+        is_original_hunyuan_video = any("img_attn_qkv" in k for k in state_dict)
+        if is_original_hunyuan_video:
+            state_dict = _convert_hunyuan_video_lora_to_diffusers(state_dict)
+        out = (state_dict, metadata) if return_lora_metadata else state_dict
+        return out
     # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.load_lora_weights
     def load_lora_weights(
-        self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], adapter_name=None, **kwargs
+        self,
+        pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
+        adapter_name: Optional[str] = None,
+        hotswap: bool = False,
+        **kwargs,
     ):
         """
         Load LoRA weights specified in `pretrained_model_name_or_path_or_dict` into `self.transformer` and
@@ -3816,6 +4491,8 @@ class SanaLoraLoaderMixin(LoraBaseMixin):
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
             kwargs (`dict`, *optional*):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
@@ -3833,7 +4510,8 @@ class SanaLoraLoaderMixin(LoraBaseMixin):
             pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy()
         # First, ensure that the checkpoint is a compatible one and can be successfully loaded.
-        state_dict = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs)
+        kwargs["return_lora_metadata"] = True
+        state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs)
         is_correct_format = all("lora" in key for key in state_dict.keys())
         if not is_correct_format:
@@ -3843,14 +4521,23 @@ class SanaLoraLoaderMixin(LoraBaseMixin):
             state_dict,
             transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=self,
             low_cpu_mem_usage=low_cpu_mem_usage,
+            hotswap=hotswap,
         )
     @classmethod
-    # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->SanaTransformer2DModel
+    # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->HunyuanVideoTransformer3DModel
     def load_lora_into_transformer(
-        cls, state_dict, transformer, adapter_name=None, _pipeline=None, low_cpu_mem_usage=False, hotswap: bool = False
+        cls,
+        state_dict,
+        transformer,
+        adapter_name=None,
+        _pipeline=None,
+        low_cpu_mem_usage=False,
+        hotswap: bool = False,
+        metadata=None,
     ):
         """
         This will load the LoRA layers specified in `state_dict` into `transformer`.
@@ -3860,7 +4547,7 @@ class SanaLoraLoaderMixin(LoraBaseMixin):
                 A standard state dict containing the lora layer parameters. The keys can either be indexed directly
                 into the unet or prefixed with an additional `unet` which can be used to distinguish between text
                 encoder lora layers.
-            transformer (`SanaTransformer2DModel`):
+            transformer (`HunyuanVideoTransformer3DModel`):
                 The Transformer model to load the LoRA layers into.
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
@@ -3868,29 +4555,11 @@ class SanaLoraLoaderMixin(LoraBaseMixin):
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
-            hotswap : (`bool`, *optional*)
-                Defaults to `False`. Whether to substitute an existing (LoRA) adapter with the newly loaded adapter
-                in-place. This means that, instead of loading an additional adapter, this will take the existing
-                adapter weights and replace them with the weights of the new adapter. This can be faster and more
-                memory efficient. However, the main advantage of hotswapping is that when the model is compiled with
-                torch.compile, loading the new adapter does not require recompilation of the model. When using
-                hotswapping, the passed `adapter_name` should be the name of an already loaded adapter.
-                If the new adapter and the old adapter have different ranks and/or LoRA alphas (i.e. scaling), you need
-                to call an additional method before loading the adapter:
-                ```py
-                pipeline = ...  # load diffusers pipeline
-                max_rank = ...  # the highest rank among all LoRAs that you want to load
-                # call *before* compiling and loading the LoRA adapter
-                pipeline.enable_lora_hotswap(target_rank=max_rank)
-                pipeline.load_lora_weights(file_name)
-                # optionally compile the model now
-                ```
-                Note that hotswapping adapters of the text encoder is not yet supported. There are some further
-                limitations to this technique, which are documented here:
-                https://huggingface.co/docs/peft/main/en/package_reference/hotswap
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
+            metadata (`dict`):
+                Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived
+                from the state dict.
         """
         if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
             raise ValueError(
@@ -3903,6 +4572,7 @@ class SanaLoraLoaderMixin(LoraBaseMixin):
             state_dict,
             network_alphas=None,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=_pipeline,
             low_cpu_mem_usage=low_cpu_mem_usage,
             hotswap=hotswap,
@@ -3918,9 +4588,10 @@ class SanaLoraLoaderMixin(LoraBaseMixin):
         weight_name: str = None,
         save_function: Callable = None,
         safe_serialization: bool = True,
+        transformer_lora_adapter_metadata: Optional[dict] = None,
     ):
         r"""
-        Save the LoRA parameters corresponding to the UNet and text encoder.
+        Save the LoRA parameters corresponding to the transformer.
         Arguments:
             save_directory (`str` or `os.PathLike`):
@@ -3937,14 +4608,21 @@ class SanaLoraLoaderMixin(LoraBaseMixin):
                 `DIFFUSERS_SAVE_MODE`.
             safe_serialization (`bool`, *optional*, defaults to `True`):
                 Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`.
+            transformer_lora_adapter_metadata:
+                LoRA adapter metadata associated with the transformer to be serialized with the state dict.
         """
         state_dict = {}
+        lora_adapter_metadata = {}
         if not transformer_lora_layers:
             raise ValueError("You must pass `transformer_lora_layers`.")
-        if transformer_lora_layers:
-            state_dict.update(cls.pack_weights(transformer_lora_layers, cls.transformer_name))
+        state_dict.update(cls.pack_weights(transformer_lora_layers, cls.transformer_name))
+        if transformer_lora_adapter_metadata is not None:
+            lora_adapter_metadata.update(
+                _pack_dict_with_prefix(transformer_lora_adapter_metadata, cls.transformer_name)
+            )
         # Save the model
         cls.write_lora_layers(
@@ -3954,6 +4632,7 @@ class SanaLoraLoaderMixin(LoraBaseMixin):
             weight_name=weight_name,
             save_function=save_function,
             safe_serialization=safe_serialization,
+            lora_adapter_metadata=lora_adapter_metadata,
         )
     # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.fuse_lora
@@ -4023,9 +4702,9 @@ class SanaLoraLoaderMixin(LoraBaseMixin):
         super().unfuse_lora(components=components, **kwargs)
-class HunyuanVideoLoraLoaderMixin(LoraBaseMixin):
+class Lumina2LoraLoaderMixin(LoraBaseMixin):
     r"""
-    Load LoRA layers into [`HunyuanVideoTransformer3DModel`]. Specific to [`HunyuanVideoPipeline`].
+    Load LoRA layers into [`Lumina2Transformer2DModel`]. Specific to [`Lumina2Text2ImgPipeline`].
     """
     _lora_loadable_modules = ["transformer"]
@@ -4043,7 +4722,7 @@ class HunyuanVideoLoraLoaderMixin(LoraBaseMixin):
         <Tip warning={true}>
-        We support loading original format HunyuanVideo LoRA checkpoints.
+        We support loading A1111 formatted LoRA checkpoints in a limited capacity.
         This function is experimental and might change in the future.
@@ -4081,7 +4760,8 @@ class HunyuanVideoLoraLoaderMixin(LoraBaseMixin):
                 allowed by Git.
             subfolder (`str`, *optional*, defaults to `""`):
                 The subfolder location of a model file within a larger model repository on the Hub or locally.
+            return_lora_metadata (`bool`, *optional*, defaults to False):
+                When enabled, additionally return the LoRA adapter metadata, typically found in the state dict.
         """
         # Load the main state dict first which has the LoRA layers for either of
         # transformer and text encoder or both.
@@ -4094,18 +4774,16 @@ class HunyuanVideoLoraLoaderMixin(LoraBaseMixin):
         subfolder = kwargs.pop("subfolder", None)
         weight_name = kwargs.pop("weight_name", None)
         use_safetensors = kwargs.pop("use_safetensors", None)
+        return_lora_metadata = kwargs.pop("return_lora_metadata", False)
         allow_pickle = False
         if use_safetensors is None:
             use_safetensors = True
             allow_pickle = True
-        user_agent = {
-            "file_type": "attn_procs_weights",
-            "framework": "pytorch",
-        }
+        user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"}
-        state_dict = _fetch_state_dict(
+        state_dict, metadata = _fetch_state_dict(
             pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict,
             weight_name=weight_name,
             use_safetensors=use_safetensors,
@@ -4126,15 +4804,21 @@ class HunyuanVideoLoraLoaderMixin(LoraBaseMixin):
             logger.warning(warn_msg)
             state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k}
-        is_original_hunyuan_video = any("img_attn_qkv" in k for k in state_dict)
-        if is_original_hunyuan_video:
-            state_dict = _convert_hunyuan_video_lora_to_diffusers(state_dict)
+        # conversion.
+        non_diffusers = any(k.startswith("diffusion_model.") for k in state_dict)
+        if non_diffusers:
+            state_dict = _convert_non_diffusers_lumina2_lora_to_diffusers(state_dict)
-        return state_dict
+        out = (state_dict, metadata) if return_lora_metadata else state_dict
+        return out
     # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.load_lora_weights
     def load_lora_weights(
-        self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], adapter_name=None, **kwargs
+        self,
+        pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
+        adapter_name: Optional[str] = None,
+        hotswap: bool = False,
+        **kwargs,
     ):
         """
         Load LoRA weights specified in `pretrained_model_name_or_path_or_dict` into `self.transformer` and
@@ -4152,6 +4836,8 @@ class HunyuanVideoLoraLoaderMixin(LoraBaseMixin):
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
             kwargs (`dict`, *optional*):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
@@ -4169,7 +4855,8 @@ class HunyuanVideoLoraLoaderMixin(LoraBaseMixin):
             pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy()
         # First, ensure that the checkpoint is a compatible one and can be successfully loaded.
-        state_dict = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs)
+        kwargs["return_lora_metadata"] = True
+        state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs)
         is_correct_format = all("lora" in key for key in state_dict.keys())
         if not is_correct_format:
@@ -4179,54 +4866,45 @@ class HunyuanVideoLoraLoaderMixin(LoraBaseMixin):
             state_dict,
             transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=self,
             low_cpu_mem_usage=low_cpu_mem_usage,
+            hotswap=hotswap,
         )
     @classmethod
-    # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->HunyuanVideoTransformer3DModel
+    # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->Lumina2Transformer2DModel
     def load_lora_into_transformer(
-        cls, state_dict, transformer, adapter_name=None, _pipeline=None, low_cpu_mem_usage=False, hotswap: bool = False
+        cls,
+        state_dict,
+        transformer,
+        adapter_name=None,
+        _pipeline=None,
+        low_cpu_mem_usage=False,
+        hotswap: bool = False,
+        metadata=None,
     ):
         """
-        This will load the LoRA layers specified in `state_dict` into `transformer`.
-        Parameters:
-            state_dict (`dict`):
-                A standard state dict containing the lora layer parameters. The keys can either be indexed directly
-                into the unet or prefixed with an additional `unet` which can be used to distinguish between text
-                encoder lora layers.
-            transformer (`HunyuanVideoTransformer3DModel`):
-                The Transformer model to load the LoRA layers into.
-            adapter_name (`str`, *optional*):
-                Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
-                `default_{i}` where i is the total number of adapters being loaded.
-            low_cpu_mem_usage (`bool`, *optional*):
-                Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
-                weights.
-            hotswap : (`bool`, *optional*)
-                Defaults to `False`. Whether to substitute an existing (LoRA) adapter with the newly loaded adapter
-                in-place. This means that, instead of loading an additional adapter, this will take the existing
-                adapter weights and replace them with the weights of the new adapter. This can be faster and more
-                memory efficient. However, the main advantage of hotswapping is that when the model is compiled with
-                torch.compile, loading the new adapter does not require recompilation of the model. When using
-                hotswapping, the passed `adapter_name` should be the name of an already loaded adapter.
-                If the new adapter and the old adapter have different ranks and/or LoRA alphas (i.e. scaling), you need
-                to call an additional method before loading the adapter:
-                ```py
-                pipeline = ...  # load diffusers pipeline
-                max_rank = ...  # the highest rank among all LoRAs that you want to load
-                # call *before* compiling and loading the LoRA adapter
-                pipeline.enable_lora_hotswap(target_rank=max_rank)
-                pipeline.load_lora_weights(file_name)
-                # optionally compile the model now
-                ```
-                Note that hotswapping adapters of the text encoder is not yet supported. There are some further
-                limitations to this technique, which are documented here:
-                https://huggingface.co/docs/peft/main/en/package_reference/hotswap
+        This will load the LoRA layers specified in `state_dict` into `transformer`.
+        Parameters:
+            state_dict (`dict`):
+                A standard state dict containing the lora layer parameters. The keys can either be indexed directly
+                into the unet or prefixed with an additional `unet` which can be used to distinguish between text
+                encoder lora layers.
+            transformer (`Lumina2Transformer2DModel`):
+                The Transformer model to load the LoRA layers into.
+            adapter_name (`str`, *optional*):
+                Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
+                `default_{i}` where i is the total number of adapters being loaded.
+            low_cpu_mem_usage (`bool`, *optional*):
+                Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
+                weights.
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
+            metadata (`dict`):
+                Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived
+                from the state dict.
         """
         if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
             raise ValueError(
@@ -4239,6 +4917,7 @@ class HunyuanVideoLoraLoaderMixin(LoraBaseMixin):
             state_dict,
             network_alphas=None,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=_pipeline,
             low_cpu_mem_usage=low_cpu_mem_usage,
             hotswap=hotswap,
@@ -4254,9 +4933,10 @@ class HunyuanVideoLoraLoaderMixin(LoraBaseMixin):
         weight_name: str = None,
         save_function: Callable = None,
         safe_serialization: bool = True,
+        transformer_lora_adapter_metadata: Optional[dict] = None,
     ):
         r"""
-        Save the LoRA parameters corresponding to the UNet and text encoder.
+        Save the LoRA parameters corresponding to the transformer.
         Arguments:
             save_directory (`str` or `os.PathLike`):
@@ -4273,14 +4953,21 @@ class HunyuanVideoLoraLoaderMixin(LoraBaseMixin):
                 `DIFFUSERS_SAVE_MODE`.
             safe_serialization (`bool`, *optional*, defaults to `True`):
                 Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`.
+            transformer_lora_adapter_metadata:
+                LoRA adapter metadata associated with the transformer to be serialized with the state dict.
         """
         state_dict = {}
+        lora_adapter_metadata = {}
         if not transformer_lora_layers:
             raise ValueError("You must pass `transformer_lora_layers`.")
-        if transformer_lora_layers:
-            state_dict.update(cls.pack_weights(transformer_lora_layers, cls.transformer_name))
+        state_dict.update(cls.pack_weights(transformer_lora_layers, cls.transformer_name))
+        if transformer_lora_adapter_metadata is not None:
+            lora_adapter_metadata.update(
+                _pack_dict_with_prefix(transformer_lora_adapter_metadata, cls.transformer_name)
+            )
         # Save the model
         cls.write_lora_layers(
@@ -4290,9 +4977,10 @@ class HunyuanVideoLoraLoaderMixin(LoraBaseMixin):
             weight_name=weight_name,
             save_function=save_function,
             safe_serialization=safe_serialization,
+            lora_adapter_metadata=lora_adapter_metadata,
         )
-    # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.fuse_lora
+    # Copied from diffusers.loaders.lora_pipeline.SanaLoraLoaderMixin.fuse_lora
     def fuse_lora(
         self,
         components: List[str] = ["transformer"],
@@ -4340,7 +5028,7 @@ class HunyuanVideoLoraLoaderMixin(LoraBaseMixin):
             **kwargs,
         )
-    # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.unfuse_lora
+    # Copied from diffusers.loaders.lora_pipeline.SanaLoraLoaderMixin.unfuse_lora
     def unfuse_lora(self, components: List[str] = ["transformer"], **kwargs):
         r"""
         Reverses the effect of
@@ -4359,9 +5047,9 @@ class HunyuanVideoLoraLoaderMixin(LoraBaseMixin):
         super().unfuse_lora(components=components, **kwargs)
-class Lumina2LoraLoaderMixin(LoraBaseMixin):
+class WanLoraLoaderMixin(LoraBaseMixin):
     r"""
-    Load LoRA layers into [`Lumina2Transformer2DModel`]. Specific to [`Lumina2Text2ImgPipeline`].
+    Load LoRA layers into [`WanTransformer3DModel`]. Specific to [`WanPipeline`] and `[WanImageToVideoPipeline`].
     """
     _lora_loadable_modules = ["transformer"]
@@ -4417,7 +5105,8 @@ class Lumina2LoraLoaderMixin(LoraBaseMixin):
                 allowed by Git.
             subfolder (`str`, *optional*, defaults to `""`):
                 The subfolder location of a model file within a larger model repository on the Hub or locally.
+            return_lora_metadata (`bool`, *optional*, defaults to False):
+                When enabled, additionally return the LoRA adapter metadata, typically found in the state dict.
         """
         # Load the main state dict first which has the LoRA layers for either of
         # transformer and text encoder or both.
@@ -4430,18 +5119,16 @@ class Lumina2LoraLoaderMixin(LoraBaseMixin):
         subfolder = kwargs.pop("subfolder", None)
         weight_name = kwargs.pop("weight_name", None)
         use_safetensors = kwargs.pop("use_safetensors", None)
+        return_lora_metadata = kwargs.pop("return_lora_metadata", False)
         allow_pickle = False
         if use_safetensors is None:
             use_safetensors = True
             allow_pickle = True
-        user_agent = {
-            "file_type": "attn_procs_weights",
-            "framework": "pytorch",
-        }
+        user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"}
-        state_dict = _fetch_state_dict(
+        state_dict, metadata = _fetch_state_dict(
             pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict,
             weight_name=weight_name,
             use_safetensors=use_safetensors,
@@ -4455,6 +5142,10 @@ class Lumina2LoraLoaderMixin(LoraBaseMixin):
             user_agent=user_agent,
             allow_pickle=allow_pickle,
         )
+        if any(k.startswith("diffusion_model.") for k in state_dict):
+            state_dict = _convert_non_diffusers_wan_lora_to_diffusers(state_dict)
+        elif any(k.startswith("lora_unet_") for k in state_dict):
+            state_dict = _convert_musubi_wan_lora_to_diffusers(state_dict)
         is_dora_scale_present = any("dora_scale" in k for k in state_dict)
         if is_dora_scale_present:
@@ -4462,16 +5153,63 @@ class Lumina2LoraLoaderMixin(LoraBaseMixin):
             logger.warning(warn_msg)
             state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k}
-        # conversion.
-        non_diffusers = any(k.startswith("diffusion_model.") for k in state_dict)
-        if non_diffusers:
-            state_dict = _convert_non_diffusers_lumina2_lora_to_diffusers(state_dict)
+        out = (state_dict, metadata) if return_lora_metadata else state_dict
+        return out
+    @classmethod
+    def _maybe_expand_t2v_lora_for_i2v(
+        cls,
+        transformer: torch.nn.Module,
+        state_dict,
+    ):
+        if transformer.config.image_dim is None:
+            return state_dict
+        target_device = transformer.device
+        if any(k.startswith("transformer.blocks.") for k in state_dict):
+            num_blocks = len({k.split("blocks.")[1].split(".")[0] for k in state_dict if "blocks." in k})
+            is_i2v_lora = any("add_k_proj" in k for k in state_dict) and any("add_v_proj" in k for k in state_dict)
+            has_bias = any(".lora_B.bias" in k for k in state_dict)
+            if is_i2v_lora:
+                return state_dict
+            for i in range(num_blocks):
+                for o, c in zip(["k_img", "v_img"], ["add_k_proj", "add_v_proj"]):
+                    # These keys should exist if the block `i` was part of the T2V LoRA.
+                    ref_key_lora_A = f"transformer.blocks.{i}.attn2.to_k.lora_A.weight"
+                    ref_key_lora_B = f"transformer.blocks.{i}.attn2.to_k.lora_B.weight"
+                    if ref_key_lora_A not in state_dict or ref_key_lora_B not in state_dict:
+                        continue
+                    state_dict[f"transformer.blocks.{i}.attn2.{c}.lora_A.weight"] = torch.zeros_like(
+                        state_dict[f"transformer.blocks.{i}.attn2.to_k.lora_A.weight"], device=target_device
+                    )
+                    state_dict[f"transformer.blocks.{i}.attn2.{c}.lora_B.weight"] = torch.zeros_like(
+                        state_dict[f"transformer.blocks.{i}.attn2.to_k.lora_B.weight"], device=target_device
+                    )
+                    # If the original LoRA had biases (indicated by has_bias)
+                    # AND the specific reference bias key exists for this block.
+                    ref_key_lora_B_bias = f"transformer.blocks.{i}.attn2.to_k.lora_B.bias"
+                    if has_bias and ref_key_lora_B_bias in state_dict:
+                        ref_lora_B_bias_tensor = state_dict[ref_key_lora_B_bias]
+                        state_dict[f"transformer.blocks.{i}.attn2.{c}.lora_B.bias"] = torch.zeros_like(
+                            ref_lora_B_bias_tensor,
+                            device=target_device,
+                        )
         return state_dict
-    # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.load_lora_weights
     def load_lora_weights(
-        self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], adapter_name=None, **kwargs
+        self,
+        pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
+        adapter_name: Optional[str] = None,
+        hotswap: bool = False,
+        **kwargs,
     ):
         """
         Load LoRA weights specified in `pretrained_model_name_or_path_or_dict` into `self.transformer` and
@@ -4489,6 +5227,8 @@ class Lumina2LoraLoaderMixin(LoraBaseMixin):
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
             kwargs (`dict`, *optional*):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
@@ -4506,8 +5246,13 @@ class Lumina2LoraLoaderMixin(LoraBaseMixin):
             pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy()
         # First, ensure that the checkpoint is a compatible one and can be successfully loaded.
-        state_dict = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs)
+        kwargs["return_lora_metadata"] = True
+        state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs)
+        # convert T2V LoRA to I2V LoRA (when loaded to Wan I2V) by adding zeros for the additional (missing) _img layers
+        state_dict = self._maybe_expand_t2v_lora_for_i2v(
+            transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer,
+            state_dict=state_dict,
+        )
         is_correct_format = all("lora" in key for key in state_dict.keys())
         if not is_correct_format:
             raise ValueError("Invalid LoRA checkpoint.")
@@ -4516,14 +5261,23 @@ class Lumina2LoraLoaderMixin(LoraBaseMixin):
             state_dict,
             transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=self,
             low_cpu_mem_usage=low_cpu_mem_usage,
+            hotswap=hotswap,
         )
     @classmethod
-    # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->Lumina2Transformer2DModel
+    # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->WanTransformer3DModel
     def load_lora_into_transformer(
-        cls, state_dict, transformer, adapter_name=None, _pipeline=None, low_cpu_mem_usage=False, hotswap: bool = False
+        cls,
+        state_dict,
+        transformer,
+        adapter_name=None,
+        _pipeline=None,
+        low_cpu_mem_usage=False,
+        hotswap: bool = False,
+        metadata=None,
     ):
         """
         This will load the LoRA layers specified in `state_dict` into `transformer`.
@@ -4533,7 +5287,7 @@ class Lumina2LoraLoaderMixin(LoraBaseMixin):
                 A standard state dict containing the lora layer parameters. The keys can either be indexed directly
                 into the unet or prefixed with an additional `unet` which can be used to distinguish between text
                 encoder lora layers.
-            transformer (`Lumina2Transformer2DModel`):
+            transformer (`WanTransformer3DModel`):
                 The Transformer model to load the LoRA layers into.
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
@@ -4541,29 +5295,11 @@ class Lumina2LoraLoaderMixin(LoraBaseMixin):
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
-            hotswap : (`bool`, *optional*)
-                Defaults to `False`. Whether to substitute an existing (LoRA) adapter with the newly loaded adapter
-                in-place. This means that, instead of loading an additional adapter, this will take the existing
-                adapter weights and replace them with the weights of the new adapter. This can be faster and more
-                memory efficient. However, the main advantage of hotswapping is that when the model is compiled with
-                torch.compile, loading the new adapter does not require recompilation of the model. When using
-                hotswapping, the passed `adapter_name` should be the name of an already loaded adapter.
-                If the new adapter and the old adapter have different ranks and/or LoRA alphas (i.e. scaling), you need
-                to call an additional method before loading the adapter:
-                ```py
-                pipeline = ...  # load diffusers pipeline
-                max_rank = ...  # the highest rank among all LoRAs that you want to load
-                # call *before* compiling and loading the LoRA adapter
-                pipeline.enable_lora_hotswap(target_rank=max_rank)
-                pipeline.load_lora_weights(file_name)
-                # optionally compile the model now
-                ```
-                Note that hotswapping adapters of the text encoder is not yet supported. There are some further
-                limitations to this technique, which are documented here:
-                https://huggingface.co/docs/peft/main/en/package_reference/hotswap
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
+            metadata (`dict`):
+                Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived
+                from the state dict.
         """
         if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
             raise ValueError(
@@ -4576,6 +5312,7 @@ class Lumina2LoraLoaderMixin(LoraBaseMixin):
             state_dict,
             network_alphas=None,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=_pipeline,
             low_cpu_mem_usage=low_cpu_mem_usage,
             hotswap=hotswap,
@@ -4591,9 +5328,10 @@ class Lumina2LoraLoaderMixin(LoraBaseMixin):
         weight_name: str = None,
         save_function: Callable = None,
         safe_serialization: bool = True,
+        transformer_lora_adapter_metadata: Optional[dict] = None,
     ):
         r"""
-        Save the LoRA parameters corresponding to the UNet and text encoder.
+        Save the LoRA parameters corresponding to the transformer.
         Arguments:
             save_directory (`str` or `os.PathLike`):
@@ -4610,14 +5348,21 @@ class Lumina2LoraLoaderMixin(LoraBaseMixin):
                 `DIFFUSERS_SAVE_MODE`.
             safe_serialization (`bool`, *optional*, defaults to `True`):
                 Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`.
+            transformer_lora_adapter_metadata:
+                LoRA adapter metadata associated with the transformer to be serialized with the state dict.
         """
         state_dict = {}
+        lora_adapter_metadata = {}
         if not transformer_lora_layers:
             raise ValueError("You must pass `transformer_lora_layers`.")
-        if transformer_lora_layers:
-            state_dict.update(cls.pack_weights(transformer_lora_layers, cls.transformer_name))
+        state_dict.update(cls.pack_weights(transformer_lora_layers, cls.transformer_name))
+        if transformer_lora_adapter_metadata is not None:
+            lora_adapter_metadata.update(
+                _pack_dict_with_prefix(transformer_lora_adapter_metadata, cls.transformer_name)
+            )
         # Save the model
         cls.write_lora_layers(
@@ -4627,9 +5372,10 @@ class Lumina2LoraLoaderMixin(LoraBaseMixin):
             weight_name=weight_name,
             save_function=save_function,
             safe_serialization=safe_serialization,
+            lora_adapter_metadata=lora_adapter_metadata,
         )
-    # Copied from diffusers.loaders.lora_pipeline.SanaLoraLoaderMixin.fuse_lora
+    # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.fuse_lora
     def fuse_lora(
         self,
         components: List[str] = ["transformer"],
@@ -4677,7 +5423,7 @@ class Lumina2LoraLoaderMixin(LoraBaseMixin):
             **kwargs,
         )
-    # Copied from diffusers.loaders.lora_pipeline.SanaLoraLoaderMixin.unfuse_lora
+    # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.unfuse_lora
     def unfuse_lora(self, components: List[str] = ["transformer"], **kwargs):
         r"""
         Reverses the effect of
@@ -4696,9 +5442,9 @@ class Lumina2LoraLoaderMixin(LoraBaseMixin):
         super().unfuse_lora(components=components, **kwargs)
-class WanLoraLoaderMixin(LoraBaseMixin):
+class CogView4LoraLoaderMixin(LoraBaseMixin):
     r"""
-    Load LoRA layers into [`WanTransformer3DModel`]. Specific to [`WanPipeline`] and `[WanImageToVideoPipeline`].
+    Load LoRA layers into [`WanTransformer3DModel`]. Specific to [`CogView4Pipeline`].
     """
     _lora_loadable_modules = ["transformer"]
@@ -4706,6 +5452,7 @@ class WanLoraLoaderMixin(LoraBaseMixin):
     @classmethod
     @validate_hf_hub_args
+    # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.lora_state_dict
     def lora_state_dict(
         cls,
         pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
@@ -4754,6 +5501,8 @@ class WanLoraLoaderMixin(LoraBaseMixin):
                 allowed by Git.
             subfolder (`str`, *optional*, defaults to `""`):
                 The subfolder location of a model file within a larger model repository on the Hub or locally.
+            return_lora_metadata (`bool`, *optional*, defaults to False):
+                When enabled, additionally return the LoRA adapter metadata, typically found in the state dict.
         """
         # Load the main state dict first which has the LoRA layers for either of
@@ -4767,18 +5516,16 @@ class WanLoraLoaderMixin(LoraBaseMixin):
         subfolder = kwargs.pop("subfolder", None)
         weight_name = kwargs.pop("weight_name", None)
         use_safetensors = kwargs.pop("use_safetensors", None)
+        return_lora_metadata = kwargs.pop("return_lora_metadata", False)
         allow_pickle = False
         if use_safetensors is None:
             use_safetensors = True
             allow_pickle = True
-        user_agent = {
-            "file_type": "attn_procs_weights",
-            "framework": "pytorch",
-        }
+        user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"}
-        state_dict = _fetch_state_dict(
+        state_dict, metadata = _fetch_state_dict(
             pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict,
             weight_name=weight_name,
             use_safetensors=use_safetensors,
@@ -4792,8 +5539,6 @@ class WanLoraLoaderMixin(LoraBaseMixin):
             user_agent=user_agent,
             allow_pickle=allow_pickle,
         )
-        if any(k.startswith("diffusion_model.") for k in state_dict):
-            state_dict = _convert_non_diffusers_wan_lora_to_diffusers(state_dict)
         is_dora_scale_present = any("dora_scale" in k for k in state_dict)
         if is_dora_scale_present:
@@ -4801,37 +5546,16 @@ class WanLoraLoaderMixin(LoraBaseMixin):
             logger.warning(warn_msg)
             state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k}
-        return state_dict
-    @classmethod
-    def _maybe_expand_t2v_lora_for_i2v(
-        cls,
-        transformer: torch.nn.Module,
-        state_dict,
-    ):
-        if transformer.config.image_dim is None:
-            return state_dict
-        if any(k.startswith("transformer.blocks.") for k in state_dict):
-            num_blocks = len({k.split("blocks.")[1].split(".")[0] for k in state_dict})
-            is_i2v_lora = any("add_k_proj" in k for k in state_dict) and any("add_v_proj" in k for k in state_dict)
-            if is_i2v_lora:
-                return state_dict
-            for i in range(num_blocks):
-                for o, c in zip(["k_img", "v_img"], ["add_k_proj", "add_v_proj"]):
-                    state_dict[f"transformer.blocks.{i}.attn2.{c}.lora_A.weight"] = torch.zeros_like(
-                        state_dict[f"transformer.blocks.{i}.attn2.to_k.lora_A.weight"]
-                    )
-                    state_dict[f"transformer.blocks.{i}.attn2.{c}.lora_B.weight"] = torch.zeros_like(
-                        state_dict[f"transformer.blocks.{i}.attn2.to_k.lora_B.weight"]
-                    )
-        return state_dict
+        out = (state_dict, metadata) if return_lora_metadata else state_dict
+        return out
+    # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.load_lora_weights
     def load_lora_weights(
-        self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], adapter_name=None, **kwargs
+        self,
+        pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
+        adapter_name: Optional[str] = None,
+        hotswap: bool = False,
+        **kwargs,
     ):
         """
         Load LoRA weights specified in `pretrained_model_name_or_path_or_dict` into `self.transformer` and
@@ -4849,6 +5573,8 @@ class WanLoraLoaderMixin(LoraBaseMixin):
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
             kwargs (`dict`, *optional*):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
@@ -4866,12 +5592,9 @@ class WanLoraLoaderMixin(LoraBaseMixin):
             pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy()
         # First, ensure that the checkpoint is a compatible one and can be successfully loaded.
-        state_dict = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs)
-        # convert T2V LoRA to I2V LoRA (when loaded to Wan I2V) by adding zeros for the additional (missing) _img layers
-        state_dict = self._maybe_expand_t2v_lora_for_i2v(
-            transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer,
-            state_dict=state_dict,
-        )
+        kwargs["return_lora_metadata"] = True
+        state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs)
         is_correct_format = all("lora" in key for key in state_dict.keys())
         if not is_correct_format:
             raise ValueError("Invalid LoRA checkpoint.")
@@ -4880,14 +5603,23 @@ class WanLoraLoaderMixin(LoraBaseMixin):
             state_dict,
             transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=self,
             low_cpu_mem_usage=low_cpu_mem_usage,
+            hotswap=hotswap,
         )
     @classmethod
-    # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->WanTransformer3DModel
+    # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->CogView4Transformer2DModel
     def load_lora_into_transformer(
-        cls, state_dict, transformer, adapter_name=None, _pipeline=None, low_cpu_mem_usage=False, hotswap: bool = False
+        cls,
+        state_dict,
+        transformer,
+        adapter_name=None,
+        _pipeline=None,
+        low_cpu_mem_usage=False,
+        hotswap: bool = False,
+        metadata=None,
     ):
         """
         This will load the LoRA layers specified in `state_dict` into `transformer`.
@@ -4897,7 +5629,7 @@ class WanLoraLoaderMixin(LoraBaseMixin):
                 A standard state dict containing the lora layer parameters. The keys can either be indexed directly
                 into the unet or prefixed with an additional `unet` which can be used to distinguish between text
                 encoder lora layers.
-            transformer (`WanTransformer3DModel`):
+            transformer (`CogView4Transformer2DModel`):
                 The Transformer model to load the LoRA layers into.
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
@@ -4905,29 +5637,11 @@ class WanLoraLoaderMixin(LoraBaseMixin):
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
-            hotswap : (`bool`, *optional*)
-                Defaults to `False`. Whether to substitute an existing (LoRA) adapter with the newly loaded adapter
-                in-place. This means that, instead of loading an additional adapter, this will take the existing
-                adapter weights and replace them with the weights of the new adapter. This can be faster and more
-                memory efficient. However, the main advantage of hotswapping is that when the model is compiled with
-                torch.compile, loading the new adapter does not require recompilation of the model. When using
-                hotswapping, the passed `adapter_name` should be the name of an already loaded adapter.
-                If the new adapter and the old adapter have different ranks and/or LoRA alphas (i.e. scaling), you need
-                to call an additional method before loading the adapter:
-                ```py
-                pipeline = ...  # load diffusers pipeline
-                max_rank = ...  # the highest rank among all LoRAs that you want to load
-                # call *before* compiling and loading the LoRA adapter
-                pipeline.enable_lora_hotswap(target_rank=max_rank)
-                pipeline.load_lora_weights(file_name)
-                # optionally compile the model now
-                ```
-                Note that hotswapping adapters of the text encoder is not yet supported. There are some further
-                limitations to this technique, which are documented here:
-                https://huggingface.co/docs/peft/main/en/package_reference/hotswap
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
+            metadata (`dict`):
+                Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived
+                from the state dict.
         """
         if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
             raise ValueError(
@@ -4940,6 +5654,7 @@ class WanLoraLoaderMixin(LoraBaseMixin):
             state_dict,
             network_alphas=None,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=_pipeline,
             low_cpu_mem_usage=low_cpu_mem_usage,
             hotswap=hotswap,
@@ -4955,9 +5670,10 @@ class WanLoraLoaderMixin(LoraBaseMixin):
         weight_name: str = None,
         save_function: Callable = None,
         safe_serialization: bool = True,
+        transformer_lora_adapter_metadata: Optional[dict] = None,
     ):
         r"""
-        Save the LoRA parameters corresponding to the UNet and text encoder.
+        Save the LoRA parameters corresponding to the transformer.
         Arguments:
             save_directory (`str` or `os.PathLike`):
@@ -4974,14 +5690,21 @@ class WanLoraLoaderMixin(LoraBaseMixin):
                 `DIFFUSERS_SAVE_MODE`.
             safe_serialization (`bool`, *optional*, defaults to `True`):
                 Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`.
+            transformer_lora_adapter_metadata:
+                LoRA adapter metadata associated with the transformer to be serialized with the state dict.
         """
         state_dict = {}
+        lora_adapter_metadata = {}
         if not transformer_lora_layers:
             raise ValueError("You must pass `transformer_lora_layers`.")
-        if transformer_lora_layers:
-            state_dict.update(cls.pack_weights(transformer_lora_layers, cls.transformer_name))
+        state_dict.update(cls.pack_weights(transformer_lora_layers, cls.transformer_name))
+        if transformer_lora_adapter_metadata is not None:
+            lora_adapter_metadata.update(
+                _pack_dict_with_prefix(transformer_lora_adapter_metadata, cls.transformer_name)
+            )
         # Save the model
         cls.write_lora_layers(
@@ -4991,6 +5714,7 @@ class WanLoraLoaderMixin(LoraBaseMixin):
             weight_name=weight_name,
             save_function=save_function,
             safe_serialization=safe_serialization,
+            lora_adapter_metadata=lora_adapter_metadata,
         )
     # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.fuse_lora
@@ -5060,9 +5784,9 @@ class WanLoraLoaderMixin(LoraBaseMixin):
         super().unfuse_lora(components=components, **kwargs)
-class CogView4LoraLoaderMixin(LoraBaseMixin):
+class HiDreamImageLoraLoaderMixin(LoraBaseMixin):
     r"""
-    Load LoRA layers into [`WanTransformer3DModel`]. Specific to [`CogView4Pipeline`].
+    Load LoRA layers into [`HiDreamImageTransformer2DModel`]. Specific to [`HiDreamImagePipeline`].
     """
     _lora_loadable_modules = ["transformer"]
@@ -5070,7 +5794,6 @@ class CogView4LoraLoaderMixin(LoraBaseMixin):
     @classmethod
     @validate_hf_hub_args
-    # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.lora_state_dict
     def lora_state_dict(
         cls,
         pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
@@ -5119,7 +5842,8 @@ class CogView4LoraLoaderMixin(LoraBaseMixin):
                 allowed by Git.
             subfolder (`str`, *optional*, defaults to `""`):
                 The subfolder location of a model file within a larger model repository on the Hub or locally.
+            return_lora_metadata (`bool`, *optional*, defaults to False):
+                When enabled, additionally return the LoRA adapter metadata, typically found in the state dict.
         """
         # Load the main state dict first which has the LoRA layers for either of
         # transformer and text encoder or both.
@@ -5132,18 +5856,16 @@ class CogView4LoraLoaderMixin(LoraBaseMixin):
         subfolder = kwargs.pop("subfolder", None)
         weight_name = kwargs.pop("weight_name", None)
         use_safetensors = kwargs.pop("use_safetensors", None)
+        return_lora_metadata = kwargs.pop("return_lora_metadata", False)
         allow_pickle = False
         if use_safetensors is None:
             use_safetensors = True
             allow_pickle = True
-        user_agent = {
-            "file_type": "attn_procs_weights",
-            "framework": "pytorch",
-        }
+        user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"}
-        state_dict = _fetch_state_dict(
+        state_dict, metadata = _fetch_state_dict(
             pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict,
             weight_name=weight_name,
             use_safetensors=use_safetensors,
@@ -5164,11 +5886,20 @@ class CogView4LoraLoaderMixin(LoraBaseMixin):
             logger.warning(warn_msg)
             state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k}
-        return state_dict
+        is_non_diffusers_format = any("diffusion_model" in k for k in state_dict)
+        if is_non_diffusers_format:
+            state_dict = _convert_non_diffusers_hidream_lora_to_diffusers(state_dict)
+        out = (state_dict, metadata) if return_lora_metadata else state_dict
+        return out
     # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.load_lora_weights
     def load_lora_weights(
-        self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], adapter_name=None, **kwargs
+        self,
+        pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
+        adapter_name: Optional[str] = None,
+        hotswap: bool = False,
+        **kwargs,
     ):
         """
         Load LoRA weights specified in `pretrained_model_name_or_path_or_dict` into `self.transformer` and
@@ -5186,6 +5917,8 @@ class CogView4LoraLoaderMixin(LoraBaseMixin):
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
             kwargs (`dict`, *optional*):
                 See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`].
         """
@@ -5203,7 +5936,8 @@ class CogView4LoraLoaderMixin(LoraBaseMixin):
             pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy()
         # First, ensure that the checkpoint is a compatible one and can be successfully loaded.
-        state_dict = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs)
+        kwargs["return_lora_metadata"] = True
+        state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs)
         is_correct_format = all("lora" in key for key in state_dict.keys())
         if not is_correct_format:
@@ -5213,14 +5947,23 @@ class CogView4LoraLoaderMixin(LoraBaseMixin):
             state_dict,
             transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=self,
             low_cpu_mem_usage=low_cpu_mem_usage,
+            hotswap=hotswap,
         )
     @classmethod
-    # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->CogView4Transformer2DModel
+    # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->HiDreamImageTransformer2DModel
     def load_lora_into_transformer(
-        cls, state_dict, transformer, adapter_name=None, _pipeline=None, low_cpu_mem_usage=False, hotswap: bool = False
+        cls,
+        state_dict,
+        transformer,
+        adapter_name=None,
+        _pipeline=None,
+        low_cpu_mem_usage=False,
+        hotswap: bool = False,
+        metadata=None,
     ):
         """
         This will load the LoRA layers specified in `state_dict` into `transformer`.
@@ -5230,7 +5973,7 @@ class CogView4LoraLoaderMixin(LoraBaseMixin):
                 A standard state dict containing the lora layer parameters. The keys can either be indexed directly
                 into the unet or prefixed with an additional `unet` which can be used to distinguish between text
                 encoder lora layers.
-            transformer (`CogView4Transformer2DModel`):
+            transformer (`HiDreamImageTransformer2DModel`):
                 The Transformer model to load the LoRA layers into.
             adapter_name (`str`, *optional*):
                 Adapter name to be used for referencing the loaded adapter model. If not specified, it will use
@@ -5238,29 +5981,11 @@ class CogView4LoraLoaderMixin(LoraBaseMixin):
             low_cpu_mem_usage (`bool`, *optional*):
                 Speed up model loading by only loading the pretrained LoRA weights and not initializing the random
                 weights.
-            hotswap : (`bool`, *optional*)
-                Defaults to `False`. Whether to substitute an existing (LoRA) adapter with the newly loaded adapter
-                in-place. This means that, instead of loading an additional adapter, this will take the existing
-                adapter weights and replace them with the weights of the new adapter. This can be faster and more
-                memory efficient. However, the main advantage of hotswapping is that when the model is compiled with
-                torch.compile, loading the new adapter does not require recompilation of the model. When using
-                hotswapping, the passed `adapter_name` should be the name of an already loaded adapter.
-                If the new adapter and the old adapter have different ranks and/or LoRA alphas (i.e. scaling), you need
-                to call an additional method before loading the adapter:
-                ```py
-                pipeline = ...  # load diffusers pipeline
-                max_rank = ...  # the highest rank among all LoRAs that you want to load
-                # call *before* compiling and loading the LoRA adapter
-                pipeline.enable_lora_hotswap(target_rank=max_rank)
-                pipeline.load_lora_weights(file_name)
-                # optionally compile the model now
-                ```
-                Note that hotswapping adapters of the text encoder is not yet supported. There are some further
-                limitations to this technique, which are documented here:
-                https://huggingface.co/docs/peft/main/en/package_reference/hotswap
+            hotswap (`bool`, *optional*):
+                See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`].
+            metadata (`dict`):
+                Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived
+                from the state dict.
         """
         if low_cpu_mem_usage and is_peft_version("<", "0.13.0"):
             raise ValueError(
@@ -5273,6 +5998,7 @@ class CogView4LoraLoaderMixin(LoraBaseMixin):
             state_dict,
             network_alphas=None,
             adapter_name=adapter_name,
+            metadata=metadata,
             _pipeline=_pipeline,
             low_cpu_mem_usage=low_cpu_mem_usage,
             hotswap=hotswap,
@@ -5288,9 +6014,10 @@ class CogView4LoraLoaderMixin(LoraBaseMixin):
         weight_name: str = None,
         save_function: Callable = None,
         safe_serialization: bool = True,
+        transformer_lora_adapter_metadata: Optional[dict] = None,
     ):
         r"""
-        Save the LoRA parameters corresponding to the UNet and text encoder.
+        Save the LoRA parameters corresponding to the transformer.
         Arguments:
             save_directory (`str` or `os.PathLike`):
@@ -5307,14 +6034,21 @@ class CogView4LoraLoaderMixin(LoraBaseMixin):
                 `DIFFUSERS_SAVE_MODE`.
             safe_serialization (`bool`, *optional*, defaults to `True`):
                 Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`.
+            transformer_lora_adapter_metadata:
+                LoRA adapter metadata associated with the transformer to be serialized with the state dict.
         """
         state_dict = {}
+        lora_adapter_metadata = {}
         if not transformer_lora_layers:
             raise ValueError("You must pass `transformer_lora_layers`.")
-        if transformer_lora_layers:
-            state_dict.update(cls.pack_weights(transformer_lora_layers, cls.transformer_name))
+        state_dict.update(cls.pack_weights(transformer_lora_layers, cls.transformer_name))
+        if transformer_lora_adapter_metadata is not None:
+            lora_adapter_metadata.update(
+                _pack_dict_with_prefix(transformer_lora_adapter_metadata, cls.transformer_name)
+            )
         # Save the model
         cls.write_lora_layers(
@@ -5324,9 +6058,10 @@ class CogView4LoraLoaderMixin(LoraBaseMixin):
             weight_name=weight_name,
             save_function=save_function,
             safe_serialization=safe_serialization,
+            lora_adapter_metadata=lora_adapter_metadata,
         )
-    # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.fuse_lora
+    # Copied from diffusers.loaders.lora_pipeline.SanaLoraLoaderMixin.fuse_lora
     def fuse_lora(
         self,
         components: List[str] = ["transformer"],
@@ -5374,7 +6109,7 @@ class CogView4LoraLoaderMixin(LoraBaseMixin):
             **kwargs,
         )
-    # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.unfuse_lora
+    # Copied from diffusers.loaders.lora_pipeline.SanaLoraLoaderMixin.unfuse_lora
     def unfuse_lora(self, components: List[str] = ["transformer"], **kwargs):
         r"""
         Reverses the effect of

diffusers 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl

diffusers 0.33.0py3-none-any.whl → 0.34.0py3-none-any.whl