diffusers 0.30.3__py3-none-any.whl → 0.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. diffusers/__init__.py +97 -4
  2. diffusers/callbacks.py +56 -3
  3. diffusers/configuration_utils.py +13 -1
  4. diffusers/image_processor.py +282 -71
  5. diffusers/loaders/__init__.py +24 -3
  6. diffusers/loaders/ip_adapter.py +543 -16
  7. diffusers/loaders/lora_base.py +138 -125
  8. diffusers/loaders/lora_conversion_utils.py +647 -0
  9. diffusers/loaders/lora_pipeline.py +2216 -230
  10. diffusers/loaders/peft.py +380 -0
  11. diffusers/loaders/single_file_model.py +71 -4
  12. diffusers/loaders/single_file_utils.py +597 -10
  13. diffusers/loaders/textual_inversion.py +5 -3
  14. diffusers/loaders/transformer_flux.py +181 -0
  15. diffusers/loaders/transformer_sd3.py +89 -0
  16. diffusers/loaders/unet.py +56 -12
  17. diffusers/models/__init__.py +49 -12
  18. diffusers/models/activations.py +22 -9
  19. diffusers/models/adapter.py +53 -53
  20. diffusers/models/attention.py +98 -13
  21. diffusers/models/attention_flax.py +1 -1
  22. diffusers/models/attention_processor.py +2160 -346
  23. diffusers/models/autoencoders/__init__.py +5 -0
  24. diffusers/models/autoencoders/autoencoder_dc.py +620 -0
  25. diffusers/models/autoencoders/autoencoder_kl.py +73 -12
  26. diffusers/models/autoencoders/autoencoder_kl_allegro.py +1149 -0
  27. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +213 -105
  28. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +1176 -0
  29. diffusers/models/autoencoders/autoencoder_kl_ltx.py +1338 -0
  30. diffusers/models/autoencoders/autoencoder_kl_mochi.py +1166 -0
  31. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +3 -10
  32. diffusers/models/autoencoders/autoencoder_tiny.py +4 -2
  33. diffusers/models/autoencoders/vae.py +18 -5
  34. diffusers/models/controlnet.py +47 -802
  35. diffusers/models/controlnet_flux.py +70 -0
  36. diffusers/models/controlnet_sd3.py +26 -376
  37. diffusers/models/controlnet_sparsectrl.py +46 -719
  38. diffusers/models/controlnets/__init__.py +23 -0
  39. diffusers/models/controlnets/controlnet.py +872 -0
  40. diffusers/models/{controlnet_flax.py → controlnets/controlnet_flax.py} +5 -5
  41. diffusers/models/controlnets/controlnet_flux.py +536 -0
  42. diffusers/models/{controlnet_hunyuan.py → controlnets/controlnet_hunyuan.py} +7 -7
  43. diffusers/models/controlnets/controlnet_sd3.py +489 -0
  44. diffusers/models/controlnets/controlnet_sparsectrl.py +788 -0
  45. diffusers/models/controlnets/controlnet_union.py +832 -0
  46. diffusers/models/{controlnet_xs.py → controlnets/controlnet_xs.py} +14 -13
  47. diffusers/models/controlnets/multicontrolnet.py +183 -0
  48. diffusers/models/embeddings.py +996 -92
  49. diffusers/models/embeddings_flax.py +23 -9
  50. diffusers/models/model_loading_utils.py +264 -14
  51. diffusers/models/modeling_flax_utils.py +1 -1
  52. diffusers/models/modeling_utils.py +334 -51
  53. diffusers/models/normalization.py +157 -13
  54. diffusers/models/transformers/__init__.py +6 -0
  55. diffusers/models/transformers/auraflow_transformer_2d.py +3 -2
  56. diffusers/models/transformers/cogvideox_transformer_3d.py +69 -13
  57. diffusers/models/transformers/dit_transformer_2d.py +1 -1
  58. diffusers/models/transformers/latte_transformer_3d.py +4 -4
  59. diffusers/models/transformers/pixart_transformer_2d.py +10 -2
  60. diffusers/models/transformers/sana_transformer.py +488 -0
  61. diffusers/models/transformers/stable_audio_transformer.py +1 -1
  62. diffusers/models/transformers/transformer_2d.py +1 -1
  63. diffusers/models/transformers/transformer_allegro.py +422 -0
  64. diffusers/models/transformers/transformer_cogview3plus.py +386 -0
  65. diffusers/models/transformers/transformer_flux.py +189 -51
  66. diffusers/models/transformers/transformer_hunyuan_video.py +789 -0
  67. diffusers/models/transformers/transformer_ltx.py +469 -0
  68. diffusers/models/transformers/transformer_mochi.py +499 -0
  69. diffusers/models/transformers/transformer_sd3.py +112 -18
  70. diffusers/models/transformers/transformer_temporal.py +1 -1
  71. diffusers/models/unets/unet_1d_blocks.py +1 -1
  72. diffusers/models/unets/unet_2d.py +8 -1
  73. diffusers/models/unets/unet_2d_blocks.py +88 -21
  74. diffusers/models/unets/unet_2d_condition.py +9 -9
  75. diffusers/models/unets/unet_3d_blocks.py +9 -7
  76. diffusers/models/unets/unet_motion_model.py +46 -68
  77. diffusers/models/unets/unet_spatio_temporal_condition.py +23 -0
  78. diffusers/models/unets/unet_stable_cascade.py +2 -2
  79. diffusers/models/unets/uvit_2d.py +1 -1
  80. diffusers/models/upsampling.py +14 -6
  81. diffusers/pipelines/__init__.py +69 -6
  82. diffusers/pipelines/allegro/__init__.py +48 -0
  83. diffusers/pipelines/allegro/pipeline_allegro.py +938 -0
  84. diffusers/pipelines/allegro/pipeline_output.py +23 -0
  85. diffusers/pipelines/animatediff/__init__.py +2 -0
  86. diffusers/pipelines/animatediff/pipeline_animatediff.py +45 -21
  87. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +52 -22
  88. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +18 -4
  89. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +3 -1
  90. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +104 -72
  91. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +1341 -0
  92. diffusers/pipelines/audioldm2/modeling_audioldm2.py +3 -3
  93. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +2 -9
  94. diffusers/pipelines/auto_pipeline.py +88 -10
  95. diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
  96. diffusers/pipelines/cogvideo/__init__.py +2 -0
  97. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +80 -39
  98. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +825 -0
  99. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +108 -50
  100. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +89 -50
  101. diffusers/pipelines/cogview3/__init__.py +47 -0
  102. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +674 -0
  103. diffusers/pipelines/cogview3/pipeline_output.py +21 -0
  104. diffusers/pipelines/controlnet/__init__.py +86 -80
  105. diffusers/pipelines/controlnet/multicontrolnet.py +7 -178
  106. diffusers/pipelines/controlnet/pipeline_controlnet.py +20 -3
  107. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +9 -2
  108. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +9 -2
  109. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +37 -15
  110. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +12 -4
  111. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +9 -4
  112. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +1790 -0
  113. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +1501 -0
  114. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +1627 -0
  115. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +22 -4
  116. diffusers/pipelines/controlnet_sd3/__init__.py +4 -0
  117. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +56 -20
  118. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +1153 -0
  119. diffusers/pipelines/ddpm/pipeline_ddpm.py +2 -2
  120. diffusers/pipelines/deepfloyd_if/pipeline_output.py +6 -5
  121. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +16 -4
  122. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +1 -1
  123. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +32 -9
  124. diffusers/pipelines/flux/__init__.py +23 -1
  125. diffusers/pipelines/flux/modeling_flux.py +47 -0
  126. diffusers/pipelines/flux/pipeline_flux.py +256 -48
  127. diffusers/pipelines/flux/pipeline_flux_control.py +889 -0
  128. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +945 -0
  129. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +1141 -0
  130. diffusers/pipelines/flux/pipeline_flux_controlnet.py +1006 -0
  131. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +998 -0
  132. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1204 -0
  133. diffusers/pipelines/flux/pipeline_flux_fill.py +969 -0
  134. diffusers/pipelines/flux/pipeline_flux_img2img.py +856 -0
  135. diffusers/pipelines/flux/pipeline_flux_inpaint.py +1022 -0
  136. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +492 -0
  137. diffusers/pipelines/flux/pipeline_output.py +16 -0
  138. diffusers/pipelines/free_noise_utils.py +365 -5
  139. diffusers/pipelines/hunyuan_video/__init__.py +48 -0
  140. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +687 -0
  141. diffusers/pipelines/hunyuan_video/pipeline_output.py +20 -0
  142. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +20 -4
  143. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +9 -9
  144. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +2 -2
  145. diffusers/pipelines/kolors/pipeline_kolors.py +1 -1
  146. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +14 -11
  147. diffusers/pipelines/kolors/text_encoder.py +2 -2
  148. diffusers/pipelines/kolors/tokenizer.py +4 -0
  149. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +1 -1
  150. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +1 -1
  151. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
  152. diffusers/pipelines/latte/pipeline_latte.py +2 -2
  153. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +15 -3
  154. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +15 -3
  155. diffusers/pipelines/ltx/__init__.py +50 -0
  156. diffusers/pipelines/ltx/pipeline_ltx.py +789 -0
  157. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +885 -0
  158. diffusers/pipelines/ltx/pipeline_output.py +20 -0
  159. diffusers/pipelines/lumina/pipeline_lumina.py +3 -10
  160. diffusers/pipelines/mochi/__init__.py +48 -0
  161. diffusers/pipelines/mochi/pipeline_mochi.py +748 -0
  162. diffusers/pipelines/mochi/pipeline_output.py +20 -0
  163. diffusers/pipelines/pag/__init__.py +13 -0
  164. diffusers/pipelines/pag/pag_utils.py +8 -2
  165. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +2 -3
  166. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +1543 -0
  167. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +3 -5
  168. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +1683 -0
  169. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +22 -6
  170. diffusers/pipelines/pag/pipeline_pag_kolors.py +1 -1
  171. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +7 -14
  172. diffusers/pipelines/pag/pipeline_pag_sana.py +886 -0
  173. diffusers/pipelines/pag/pipeline_pag_sd.py +18 -6
  174. diffusers/pipelines/pag/pipeline_pag_sd_3.py +18 -9
  175. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +1058 -0
  176. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +5 -1
  177. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +1094 -0
  178. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +1356 -0
  179. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +18 -6
  180. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +31 -16
  181. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +42 -19
  182. diffusers/pipelines/pia/pipeline_pia.py +2 -0
  183. diffusers/pipelines/pipeline_flax_utils.py +1 -1
  184. diffusers/pipelines/pipeline_loading_utils.py +250 -31
  185. diffusers/pipelines/pipeline_utils.py +158 -186
  186. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +7 -14
  187. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +7 -14
  188. diffusers/pipelines/sana/__init__.py +47 -0
  189. diffusers/pipelines/sana/pipeline_output.py +21 -0
  190. diffusers/pipelines/sana/pipeline_sana.py +884 -0
  191. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +12 -1
  192. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +35 -3
  193. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +2 -2
  194. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +46 -9
  195. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +1 -1
  196. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +1 -1
  197. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +241 -81
  198. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +228 -23
  199. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +82 -13
  200. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +60 -11
  201. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -1
  202. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -1
  203. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +16 -4
  204. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +16 -4
  205. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -12
  206. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +29 -22
  207. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +29 -22
  208. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +1 -1
  209. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +1 -1
  210. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +16 -4
  211. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +15 -3
  212. diffusers/pipelines/unidiffuser/modeling_uvit.py +2 -2
  213. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
  214. diffusers/quantizers/__init__.py +16 -0
  215. diffusers/quantizers/auto.py +139 -0
  216. diffusers/quantizers/base.py +233 -0
  217. diffusers/quantizers/bitsandbytes/__init__.py +2 -0
  218. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +561 -0
  219. diffusers/quantizers/bitsandbytes/utils.py +306 -0
  220. diffusers/quantizers/gguf/__init__.py +1 -0
  221. diffusers/quantizers/gguf/gguf_quantizer.py +159 -0
  222. diffusers/quantizers/gguf/utils.py +456 -0
  223. diffusers/quantizers/quantization_config.py +669 -0
  224. diffusers/quantizers/torchao/__init__.py +15 -0
  225. diffusers/quantizers/torchao/torchao_quantizer.py +285 -0
  226. diffusers/schedulers/scheduling_ddim.py +4 -1
  227. diffusers/schedulers/scheduling_ddim_cogvideox.py +4 -1
  228. diffusers/schedulers/scheduling_ddim_parallel.py +4 -1
  229. diffusers/schedulers/scheduling_ddpm.py +6 -7
  230. diffusers/schedulers/scheduling_ddpm_parallel.py +6 -7
  231. diffusers/schedulers/scheduling_deis_multistep.py +102 -6
  232. diffusers/schedulers/scheduling_dpmsolver_multistep.py +113 -6
  233. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +111 -5
  234. diffusers/schedulers/scheduling_dpmsolver_sde.py +125 -10
  235. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +126 -7
  236. diffusers/schedulers/scheduling_edm_euler.py +8 -6
  237. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +4 -1
  238. diffusers/schedulers/scheduling_euler_discrete.py +92 -7
  239. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +153 -6
  240. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +4 -5
  241. diffusers/schedulers/scheduling_heun_discrete.py +114 -8
  242. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +116 -11
  243. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +110 -8
  244. diffusers/schedulers/scheduling_lcm.py +2 -6
  245. diffusers/schedulers/scheduling_lms_discrete.py +76 -1
  246. diffusers/schedulers/scheduling_repaint.py +1 -1
  247. diffusers/schedulers/scheduling_sasolver.py +102 -6
  248. diffusers/schedulers/scheduling_tcd.py +2 -6
  249. diffusers/schedulers/scheduling_unclip.py +4 -1
  250. diffusers/schedulers/scheduling_unipc_multistep.py +127 -5
  251. diffusers/training_utils.py +63 -19
  252. diffusers/utils/__init__.py +7 -1
  253. diffusers/utils/constants.py +1 -0
  254. diffusers/utils/dummy_pt_objects.py +240 -0
  255. diffusers/utils/dummy_torch_and_transformers_objects.py +435 -0
  256. diffusers/utils/dynamic_modules_utils.py +3 -3
  257. diffusers/utils/hub_utils.py +44 -40
  258. diffusers/utils/import_utils.py +98 -8
  259. diffusers/utils/loading_utils.py +28 -4
  260. diffusers/utils/peft_utils.py +6 -3
  261. diffusers/utils/testing_utils.py +115 -1
  262. diffusers/utils/torch_utils.py +3 -0
  263. {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/METADATA +73 -72
  264. {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/RECORD +268 -193
  265. {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/WHEEL +1 -1
  266. {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/LICENSE +0 -0
  267. {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/entry_points.txt +0 -0
  268. {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/top_level.txt +0 -0
@@ -33,17 +33,20 @@ from .unet_loader_utils import _maybe_expand_lora_scales
33
33
 
34
34
 
35
35
  if is_transformers_available():
36
- from transformers import (
37
- CLIPImageProcessor,
38
- CLIPVisionModelWithProjection,
39
- )
40
-
41
- from ..models.attention_processor import (
42
- AttnProcessor,
43
- AttnProcessor2_0,
44
- IPAdapterAttnProcessor,
45
- IPAdapterAttnProcessor2_0,
46
- )
36
+ from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection, SiglipImageProcessor, SiglipVisionModel
37
+
38
+ from ..models.attention_processor import (
39
+ AttnProcessor,
40
+ AttnProcessor2_0,
41
+ FluxAttnProcessor2_0,
42
+ FluxIPAdapterJointAttnProcessor2_0,
43
+ IPAdapterAttnProcessor,
44
+ IPAdapterAttnProcessor2_0,
45
+ IPAdapterXFormersAttnProcessor,
46
+ JointAttnProcessor2_0,
47
+ SD3IPAdapterJointAttnProcessor2_0,
48
+ )
49
+
47
50
 
48
51
  logger = logging.get_logger(__name__)
49
52
 
@@ -76,7 +79,7 @@ class IPAdapterMixin:
76
79
  list is passed, it should have the same length as `weight_name`.
77
80
  weight_name (`str` or `List[str]`):
78
81
  The name of the weight file to load. If a list is passed, it should have the same length as
79
- `weight_name`.
82
+ `subfolder`.
80
83
  image_encoder_folder (`str`, *optional*, defaults to `image_encoder`):
81
84
  The subfolder location of the image encoder within a larger model repository on the Hub or locally.
82
85
  Pass `None` to not load the image encoder. If the image encoder is located in a folder inside
@@ -189,7 +192,7 @@ class IPAdapterMixin:
189
192
  state_dict = pretrained_model_name_or_path_or_dict
190
193
 
191
194
  keys = list(state_dict.keys())
192
- if keys != ["image_proj", "ip_adapter"]:
195
+ if "image_proj" not in keys and "ip_adapter" not in keys:
193
196
  raise ValueError("Required keys are (`image_proj` and `ip_adapter`) missing from the state dict.")
194
197
 
195
198
  state_dicts.append(state_dict)
@@ -224,7 +227,11 @@ class IPAdapterMixin:
224
227
 
225
228
  # create feature extractor if it has not been registered to the pipeline yet
226
229
  if hasattr(self, "feature_extractor") and getattr(self, "feature_extractor", None) is None:
227
- clip_image_size = self.image_encoder.config.image_size
230
+ # FaceID IP adapters don't need the image encoder so it's not present, in this case we default to 224
231
+ default_clip_size = 224
232
+ clip_image_size = (
233
+ self.image_encoder.config.image_size if self.image_encoder is not None else default_clip_size
234
+ )
228
235
  feature_extractor = CLIPImageProcessor(size=clip_image_size, crop_size=clip_image_size)
229
236
  self.register_modules(feature_extractor=feature_extractor)
230
237
 
@@ -280,7 +287,9 @@ class IPAdapterMixin:
280
287
  scale_configs = _maybe_expand_lora_scales(unet, scale, default_scale=0.0)
281
288
 
282
289
  for attn_name, attn_processor in unet.attn_processors.items():
283
- if isinstance(attn_processor, (IPAdapterAttnProcessor, IPAdapterAttnProcessor2_0)):
290
+ if isinstance(
291
+ attn_processor, (IPAdapterAttnProcessor, IPAdapterAttnProcessor2_0, IPAdapterXFormersAttnProcessor)
292
+ ):
284
293
  if len(scale_configs) != len(attn_processor.scale):
285
294
  raise ValueError(
286
295
  f"Cannot assign {len(scale_configs)} scale_configs to "
@@ -338,7 +347,525 @@ class IPAdapterMixin:
338
347
  )
339
348
  attn_procs[name] = (
340
349
  attn_processor_class
341
- if isinstance(value, (IPAdapterAttnProcessor, IPAdapterAttnProcessor2_0))
350
+ if isinstance(
351
+ value, (IPAdapterAttnProcessor, IPAdapterAttnProcessor2_0, IPAdapterXFormersAttnProcessor)
352
+ )
342
353
  else value.__class__()
343
354
  )
344
355
  self.unet.set_attn_processor(attn_procs)
356
+
357
+
358
+ class FluxIPAdapterMixin:
359
+ """Mixin for handling Flux IP Adapters."""
360
+
361
+ @validate_hf_hub_args
362
+ def load_ip_adapter(
363
+ self,
364
+ pretrained_model_name_or_path_or_dict: Union[str, List[str], Dict[str, torch.Tensor]],
365
+ weight_name: Union[str, List[str]],
366
+ subfolder: Optional[Union[str, List[str]]] = "",
367
+ image_encoder_pretrained_model_name_or_path: Optional[str] = "image_encoder",
368
+ image_encoder_subfolder: Optional[str] = "",
369
+ image_encoder_dtype: torch.dtype = torch.float16,
370
+ **kwargs,
371
+ ):
372
+ """
373
+ Parameters:
374
+ pretrained_model_name_or_path_or_dict (`str` or `List[str]` or `os.PathLike` or `List[os.PathLike]` or `dict` or `List[dict]`):
375
+ Can be either:
376
+
377
+ - A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model hosted on
378
+ the Hub.
379
+ - A path to a *directory* (for example `./my_model_directory`) containing the model weights saved
380
+ with [`ModelMixin.save_pretrained`].
381
+ - A [torch state
382
+ dict](https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict).
383
+ subfolder (`str` or `List[str]`):
384
+ The subfolder location of a model file within a larger model repository on the Hub or locally. If a
385
+ list is passed, it should have the same length as `weight_name`.
386
+ weight_name (`str` or `List[str]`):
387
+ The name of the weight file to load. If a list is passed, it should have the same length as
388
+ `weight_name`.
389
+ image_encoder_pretrained_model_name_or_path (`str`, *optional*, defaults to `./image_encoder`):
390
+ Can be either:
391
+
392
+ - A string, the *model id* (for example `openai/clip-vit-large-patch14`) of a pretrained model
393
+ hosted on the Hub.
394
+ - A path to a *directory* (for example `./my_model_directory`) containing the model weights saved
395
+ with [`ModelMixin.save_pretrained`].
396
+ cache_dir (`Union[str, os.PathLike]`, *optional*):
397
+ Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
398
+ is not used.
399
+ force_download (`bool`, *optional*, defaults to `False`):
400
+ Whether or not to force the (re-)download of the model weights and configuration files, overriding the
401
+ cached versions if they exist.
402
+
403
+ proxies (`Dict[str, str]`, *optional*):
404
+ A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
405
+ 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
406
+ local_files_only (`bool`, *optional*, defaults to `False`):
407
+ Whether to only load local model weights and configuration files or not. If set to `True`, the model
408
+ won't be downloaded from the Hub.
409
+ token (`str` or *bool*, *optional*):
410
+ The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
411
+ `diffusers-cli login` (stored in `~/.huggingface`) is used.
412
+ revision (`str`, *optional*, defaults to `"main"`):
413
+ The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier
414
+ allowed by Git.
415
+ low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`):
416
+ Speed up model loading only loading the pretrained weights and not initializing the weights. This also
417
+ tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model.
418
+ Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this
419
+ argument to `True` will raise an error.
420
+ """
421
+
422
+ # handle the list inputs for multiple IP Adapters
423
+ if not isinstance(weight_name, list):
424
+ weight_name = [weight_name]
425
+
426
+ if not isinstance(pretrained_model_name_or_path_or_dict, list):
427
+ pretrained_model_name_or_path_or_dict = [pretrained_model_name_or_path_or_dict]
428
+ if len(pretrained_model_name_or_path_or_dict) == 1:
429
+ pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict * len(weight_name)
430
+
431
+ if not isinstance(subfolder, list):
432
+ subfolder = [subfolder]
433
+ if len(subfolder) == 1:
434
+ subfolder = subfolder * len(weight_name)
435
+
436
+ if len(weight_name) != len(pretrained_model_name_or_path_or_dict):
437
+ raise ValueError("`weight_name` and `pretrained_model_name_or_path_or_dict` must have the same length.")
438
+
439
+ if len(weight_name) != len(subfolder):
440
+ raise ValueError("`weight_name` and `subfolder` must have the same length.")
441
+
442
+ # Load the main state dict first.
443
+ cache_dir = kwargs.pop("cache_dir", None)
444
+ force_download = kwargs.pop("force_download", False)
445
+ proxies = kwargs.pop("proxies", None)
446
+ local_files_only = kwargs.pop("local_files_only", None)
447
+ token = kwargs.pop("token", None)
448
+ revision = kwargs.pop("revision", None)
449
+ low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT)
450
+
451
+ if low_cpu_mem_usage and not is_accelerate_available():
452
+ low_cpu_mem_usage = False
453
+ logger.warning(
454
+ "Cannot initialize model with low cpu memory usage because `accelerate` was not found in the"
455
+ " environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install"
456
+ " `accelerate` for faster and less memory-intense model loading. You can do so with: \n```\npip"
457
+ " install accelerate\n```\n."
458
+ )
459
+
460
+ if low_cpu_mem_usage is True and not is_torch_version(">=", "1.9.0"):
461
+ raise NotImplementedError(
462
+ "Low memory initialization requires torch >= 1.9.0. Please either update your PyTorch version or set"
463
+ " `low_cpu_mem_usage=False`."
464
+ )
465
+
466
+ user_agent = {
467
+ "file_type": "attn_procs_weights",
468
+ "framework": "pytorch",
469
+ }
470
+ state_dicts = []
471
+ for pretrained_model_name_or_path_or_dict, weight_name, subfolder in zip(
472
+ pretrained_model_name_or_path_or_dict, weight_name, subfolder
473
+ ):
474
+ if not isinstance(pretrained_model_name_or_path_or_dict, dict):
475
+ model_file = _get_model_file(
476
+ pretrained_model_name_or_path_or_dict,
477
+ weights_name=weight_name,
478
+ cache_dir=cache_dir,
479
+ force_download=force_download,
480
+ proxies=proxies,
481
+ local_files_only=local_files_only,
482
+ token=token,
483
+ revision=revision,
484
+ subfolder=subfolder,
485
+ user_agent=user_agent,
486
+ )
487
+ if weight_name.endswith(".safetensors"):
488
+ state_dict = {"image_proj": {}, "ip_adapter": {}}
489
+ with safe_open(model_file, framework="pt", device="cpu") as f:
490
+ image_proj_keys = ["ip_adapter_proj_model.", "image_proj."]
491
+ ip_adapter_keys = ["double_blocks.", "ip_adapter."]
492
+ for key in f.keys():
493
+ if any(key.startswith(prefix) for prefix in image_proj_keys):
494
+ diffusers_name = ".".join(key.split(".")[1:])
495
+ state_dict["image_proj"][diffusers_name] = f.get_tensor(key)
496
+ elif any(key.startswith(prefix) for prefix in ip_adapter_keys):
497
+ diffusers_name = (
498
+ ".".join(key.split(".")[1:])
499
+ .replace("ip_adapter_double_stream_k_proj", "to_k_ip")
500
+ .replace("ip_adapter_double_stream_v_proj", "to_v_ip")
501
+ .replace("processor.", "")
502
+ )
503
+ state_dict["ip_adapter"][diffusers_name] = f.get_tensor(key)
504
+ else:
505
+ state_dict = load_state_dict(model_file)
506
+ else:
507
+ state_dict = pretrained_model_name_or_path_or_dict
508
+
509
+ keys = list(state_dict.keys())
510
+ if keys != ["image_proj", "ip_adapter"]:
511
+ raise ValueError("Required keys are (`image_proj` and `ip_adapter`) missing from the state dict.")
512
+
513
+ state_dicts.append(state_dict)
514
+
515
+ # load CLIP image encoder here if it has not been registered to the pipeline yet
516
+ if hasattr(self, "image_encoder") and getattr(self, "image_encoder", None) is None:
517
+ if image_encoder_pretrained_model_name_or_path is not None:
518
+ if not isinstance(pretrained_model_name_or_path_or_dict, dict):
519
+ logger.info(f"loading image_encoder from {image_encoder_pretrained_model_name_or_path}")
520
+ image_encoder = (
521
+ CLIPVisionModelWithProjection.from_pretrained(
522
+ image_encoder_pretrained_model_name_or_path,
523
+ subfolder=image_encoder_subfolder,
524
+ low_cpu_mem_usage=low_cpu_mem_usage,
525
+ cache_dir=cache_dir,
526
+ local_files_only=local_files_only,
527
+ )
528
+ .to(self.device, dtype=image_encoder_dtype)
529
+ .eval()
530
+ )
531
+ self.register_modules(image_encoder=image_encoder)
532
+ else:
533
+ raise ValueError(
534
+ "`image_encoder` cannot be loaded because `pretrained_model_name_or_path_or_dict` is a state dict."
535
+ )
536
+ else:
537
+ logger.warning(
538
+ "image_encoder is not loaded since `image_encoder_folder=None` passed. You will not be able to use `ip_adapter_image` when calling the pipeline with IP-Adapter."
539
+ "Use `ip_adapter_image_embeds` to pass pre-generated image embedding instead."
540
+ )
541
+
542
+ # create feature extractor if it has not been registered to the pipeline yet
543
+ if hasattr(self, "feature_extractor") and getattr(self, "feature_extractor", None) is None:
544
+ # FaceID IP adapters don't need the image encoder so it's not present, in this case we default to 224
545
+ default_clip_size = 224
546
+ clip_image_size = (
547
+ self.image_encoder.config.image_size if self.image_encoder is not None else default_clip_size
548
+ )
549
+ feature_extractor = CLIPImageProcessor(size=clip_image_size, crop_size=clip_image_size)
550
+ self.register_modules(feature_extractor=feature_extractor)
551
+
552
+ # load ip-adapter into transformer
553
+ self.transformer._load_ip_adapter_weights(state_dicts, low_cpu_mem_usage=low_cpu_mem_usage)
554
+
555
+ def set_ip_adapter_scale(self, scale: Union[float, List[float], List[List[float]]]):
556
+ """
557
+ Set IP-Adapter scales per-transformer block. Input `scale` could be a single config or a list of configs for
558
+ granular control over each IP-Adapter behavior. A config can be a float or a list.
559
+
560
+ `float` is converted to list and repeated for the number of blocks and the number of IP adapters. `List[float]`
561
+ length match the number of blocks, it is repeated for each IP adapter. `List[List[float]]` must match the
562
+ number of IP adapters and each must match the number of blocks.
563
+
564
+ Example:
565
+
566
+ ```py
567
+ # To use original IP-Adapter
568
+ scale = 1.0
569
+ pipeline.set_ip_adapter_scale(scale)
570
+
571
+
572
+ def LinearStrengthModel(start, finish, size):
573
+ return [(start + (finish - start) * (i / (size - 1))) for i in range(size)]
574
+
575
+
576
+ ip_strengths = LinearStrengthModel(0.3, 0.92, 19)
577
+ pipeline.set_ip_adapter_scale(ip_strengths)
578
+ ```
579
+ """
580
+ transformer = self.transformer
581
+ if not isinstance(scale, list):
582
+ scale = [[scale] * transformer.config.num_layers]
583
+ elif isinstance(scale, list) and isinstance(scale[0], int) or isinstance(scale[0], float):
584
+ if len(scale) != transformer.config.num_layers:
585
+ raise ValueError(f"Expected list of {transformer.config.num_layers} scales, got {len(scale)}.")
586
+ scale = [scale]
587
+
588
+ scale_configs = scale
589
+
590
+ key_id = 0
591
+ for attn_name, attn_processor in transformer.attn_processors.items():
592
+ if isinstance(attn_processor, (FluxIPAdapterJointAttnProcessor2_0)):
593
+ if len(scale_configs) != len(attn_processor.scale):
594
+ raise ValueError(
595
+ f"Cannot assign {len(scale_configs)} scale_configs to "
596
+ f"{len(attn_processor.scale)} IP-Adapter."
597
+ )
598
+ elif len(scale_configs) == 1:
599
+ scale_configs = scale_configs * len(attn_processor.scale)
600
+ for i, scale_config in enumerate(scale_configs):
601
+ attn_processor.scale[i] = scale_config[key_id]
602
+ key_id += 1
603
+
604
+ def unload_ip_adapter(self):
605
+ """
606
+ Unloads the IP Adapter weights
607
+
608
+ Examples:
609
+
610
+ ```python
611
+ >>> # Assuming `pipeline` is already loaded with the IP Adapter weights.
612
+ >>> pipeline.unload_ip_adapter()
613
+ >>> ...
614
+ ```
615
+ """
616
+ # remove CLIP image encoder
617
+ if hasattr(self, "image_encoder") and getattr(self, "image_encoder", None) is not None:
618
+ self.image_encoder = None
619
+ self.register_to_config(image_encoder=[None, None])
620
+
621
+ # remove feature extractor only when safety_checker is None as safety_checker uses
622
+ # the feature_extractor later
623
+ if not hasattr(self, "safety_checker"):
624
+ if hasattr(self, "feature_extractor") and getattr(self, "feature_extractor", None) is not None:
625
+ self.feature_extractor = None
626
+ self.register_to_config(feature_extractor=[None, None])
627
+
628
+ # remove hidden encoder
629
+ self.transformer.encoder_hid_proj = None
630
+ self.transformer.config.encoder_hid_dim_type = None
631
+
632
+ # restore original Transformer attention processors layers
633
+ attn_procs = {}
634
+ for name, value in self.transformer.attn_processors.items():
635
+ attn_processor_class = FluxAttnProcessor2_0()
636
+ attn_procs[name] = (
637
+ attn_processor_class if isinstance(value, (FluxIPAdapterJointAttnProcessor2_0)) else value.__class__()
638
+ )
639
+ self.transformer.set_attn_processor(attn_procs)
640
+
641
+
642
+ class SD3IPAdapterMixin:
643
+ """Mixin for handling StableDiffusion 3 IP Adapters."""
644
+
645
+ @property
646
+ def is_ip_adapter_active(self) -> bool:
647
+ """Checks if IP-Adapter is loaded and scale > 0.
648
+
649
+ IP-Adapter scale controls the influence of the image prompt versus text prompt. When this value is set to 0,
650
+ the image context is irrelevant.
651
+
652
+ Returns:
653
+ `bool`: True when IP-Adapter is loaded and any layer has scale > 0.
654
+ """
655
+ scales = [
656
+ attn_proc.scale
657
+ for attn_proc in self.transformer.attn_processors.values()
658
+ if isinstance(attn_proc, SD3IPAdapterJointAttnProcessor2_0)
659
+ ]
660
+
661
+ return len(scales) > 0 and any(scale > 0 for scale in scales)
662
+
663
+ @validate_hf_hub_args
664
+ def load_ip_adapter(
665
+ self,
666
+ pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
667
+ weight_name: str = "ip-adapter.safetensors",
668
+ subfolder: Optional[str] = None,
669
+ image_encoder_folder: Optional[str] = "image_encoder",
670
+ **kwargs,
671
+ ) -> None:
672
+ """
673
+ Parameters:
674
+ pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`):
675
+ Can be either:
676
+ - A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model hosted on
677
+ the Hub.
678
+ - A path to a *directory* (for example `./my_model_directory`) containing the model weights saved
679
+ with [`ModelMixin.save_pretrained`].
680
+ - A [torch state
681
+ dict](https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict).
682
+ weight_name (`str`, defaults to "ip-adapter.safetensors"):
683
+ The name of the weight file to load. If a list is passed, it should have the same length as
684
+ `subfolder`.
685
+ subfolder (`str`, *optional*):
686
+ The subfolder location of a model file within a larger model repository on the Hub or locally. If a
687
+ list is passed, it should have the same length as `weight_name`.
688
+ image_encoder_folder (`str`, *optional*, defaults to `image_encoder`):
689
+ The subfolder location of the image encoder within a larger model repository on the Hub or locally.
690
+ Pass `None` to not load the image encoder. If the image encoder is located in a folder inside
691
+ `subfolder`, you only need to pass the name of the folder that contains image encoder weights, e.g.
692
+ `image_encoder_folder="image_encoder"`. If the image encoder is located in a folder other than
693
+ `subfolder`, you should pass the path to the folder that contains image encoder weights, for example,
694
+ `image_encoder_folder="different_subfolder/image_encoder"`.
695
+ cache_dir (`Union[str, os.PathLike]`, *optional*):
696
+ Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
697
+ is not used.
698
+ force_download (`bool`, *optional*, defaults to `False`):
699
+ Whether or not to force the (re-)download of the model weights and configuration files, overriding the
700
+ cached versions if they exist.
701
+ proxies (`Dict[str, str]`, *optional*):
702
+ A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
703
+ 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
704
+ local_files_only (`bool`, *optional*, defaults to `False`):
705
+ Whether to only load local model weights and configuration files or not. If set to `True`, the model
706
+ won't be downloaded from the Hub.
707
+ token (`str` or *bool*, *optional*):
708
+ The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
709
+ `diffusers-cli login` (stored in `~/.huggingface`) is used.
710
+ revision (`str`, *optional*, defaults to `"main"`):
711
+ The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier
712
+ allowed by Git.
713
+ low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`):
714
+ Speed up model loading only loading the pretrained weights and not initializing the weights. This also
715
+ tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model.
716
+ Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this
717
+ argument to `True` will raise an error.
718
+ """
719
+ # Load the main state dict first
720
+ cache_dir = kwargs.pop("cache_dir", None)
721
+ force_download = kwargs.pop("force_download", False)
722
+ proxies = kwargs.pop("proxies", None)
723
+ local_files_only = kwargs.pop("local_files_only", None)
724
+ token = kwargs.pop("token", None)
725
+ revision = kwargs.pop("revision", None)
726
+ low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT)
727
+
728
+ if low_cpu_mem_usage and not is_accelerate_available():
729
+ low_cpu_mem_usage = False
730
+ logger.warning(
731
+ "Cannot initialize model with low cpu memory usage because `accelerate` was not found in the"
732
+ " environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install"
733
+ " `accelerate` for faster and less memory-intense model loading. You can do so with: \n```\npip"
734
+ " install accelerate\n```\n."
735
+ )
736
+
737
+ if low_cpu_mem_usage is True and not is_torch_version(">=", "1.9.0"):
738
+ raise NotImplementedError(
739
+ "Low memory initialization requires torch >= 1.9.0. Please either update your PyTorch version or set"
740
+ " `low_cpu_mem_usage=False`."
741
+ )
742
+
743
+ user_agent = {
744
+ "file_type": "attn_procs_weights",
745
+ "framework": "pytorch",
746
+ }
747
+
748
+ if not isinstance(pretrained_model_name_or_path_or_dict, dict):
749
+ model_file = _get_model_file(
750
+ pretrained_model_name_or_path_or_dict,
751
+ weights_name=weight_name,
752
+ cache_dir=cache_dir,
753
+ force_download=force_download,
754
+ proxies=proxies,
755
+ local_files_only=local_files_only,
756
+ token=token,
757
+ revision=revision,
758
+ subfolder=subfolder,
759
+ user_agent=user_agent,
760
+ )
761
+ if weight_name.endswith(".safetensors"):
762
+ state_dict = {"image_proj": {}, "ip_adapter": {}}
763
+ with safe_open(model_file, framework="pt", device="cpu") as f:
764
+ for key in f.keys():
765
+ if key.startswith("image_proj."):
766
+ state_dict["image_proj"][key.replace("image_proj.", "")] = f.get_tensor(key)
767
+ elif key.startswith("ip_adapter."):
768
+ state_dict["ip_adapter"][key.replace("ip_adapter.", "")] = f.get_tensor(key)
769
+ else:
770
+ state_dict = load_state_dict(model_file)
771
+ else:
772
+ state_dict = pretrained_model_name_or_path_or_dict
773
+
774
+ keys = list(state_dict.keys())
775
+ if "image_proj" not in keys and "ip_adapter" not in keys:
776
+ raise ValueError("Required keys are (`image_proj` and `ip_adapter`) missing from the state dict.")
777
+
778
+ # Load image_encoder and feature_extractor here if they haven't been registered to the pipeline yet
779
+ if hasattr(self, "image_encoder") and getattr(self, "image_encoder", None) is None:
780
+ if image_encoder_folder is not None:
781
+ if not isinstance(pretrained_model_name_or_path_or_dict, dict):
782
+ logger.info(f"loading image_encoder from {pretrained_model_name_or_path_or_dict}")
783
+ if image_encoder_folder.count("/") == 0:
784
+ image_encoder_subfolder = Path(subfolder, image_encoder_folder).as_posix()
785
+ else:
786
+ image_encoder_subfolder = Path(image_encoder_folder).as_posix()
787
+
788
+ # Commons args for loading image encoder and image processor
789
+ kwargs = {
790
+ "low_cpu_mem_usage": low_cpu_mem_usage,
791
+ "cache_dir": cache_dir,
792
+ "local_files_only": local_files_only,
793
+ }
794
+
795
+ self.register_modules(
796
+ feature_extractor=SiglipImageProcessor.from_pretrained(image_encoder_subfolder, **kwargs).to(
797
+ self.device, dtype=self.dtype
798
+ ),
799
+ image_encoder=SiglipVisionModel.from_pretrained(image_encoder_subfolder, **kwargs).to(
800
+ self.device, dtype=self.dtype
801
+ ),
802
+ )
803
+ else:
804
+ raise ValueError(
805
+ "`image_encoder` cannot be loaded because `pretrained_model_name_or_path_or_dict` is a state dict."
806
+ )
807
+ else:
808
+ logger.warning(
809
+ "image_encoder is not loaded since `image_encoder_folder=None` passed. You will not be able to use `ip_adapter_image` when calling the pipeline with IP-Adapter."
810
+ "Use `ip_adapter_image_embeds` to pass pre-generated image embedding instead."
811
+ )
812
+
813
+ # Load IP-Adapter into transformer
814
+ self.transformer._load_ip_adapter_weights(state_dict, low_cpu_mem_usage=low_cpu_mem_usage)
815
+
816
+ def set_ip_adapter_scale(self, scale: float) -> None:
817
+ """
818
+ Set IP-Adapter scale, which controls image prompt conditioning. A value of 1.0 means the model is only
819
+ conditioned on the image prompt, and 0.0 only conditioned by the text prompt. Lowering this value encourages
820
+ the model to produce more diverse images, but they may not be as aligned with the image prompt.
821
+
822
+ Example:
823
+
824
+ ```python
825
+ >>> # Assuming `pipeline` is already loaded with the IP Adapter weights.
826
+ >>> pipeline.set_ip_adapter_scale(0.6)
827
+ >>> ...
828
+ ```
829
+
830
+ Args:
831
+ scale (float):
832
+ IP-Adapter scale to be set.
833
+
834
+ """
835
+ for attn_processor in self.transformer.attn_processors.values():
836
+ if isinstance(attn_processor, SD3IPAdapterJointAttnProcessor2_0):
837
+ attn_processor.scale = scale
838
+
839
+ def unload_ip_adapter(self) -> None:
840
+ """
841
+ Unloads the IP Adapter weights.
842
+
843
+ Example:
844
+
845
+ ```python
846
+ >>> # Assuming `pipeline` is already loaded with the IP Adapter weights.
847
+ >>> pipeline.unload_ip_adapter()
848
+ >>> ...
849
+ ```
850
+ """
851
+ # Remove image encoder
852
+ if hasattr(self, "image_encoder") and getattr(self, "image_encoder", None) is not None:
853
+ self.image_encoder = None
854
+ self.register_to_config(image_encoder=None)
855
+
856
+ # Remove feature extractor
857
+ if hasattr(self, "feature_extractor") and getattr(self, "feature_extractor", None) is not None:
858
+ self.feature_extractor = None
859
+ self.register_to_config(feature_extractor=None)
860
+
861
+ # Remove image projection
862
+ self.transformer.image_proj = None
863
+
864
+ # Restore original attention processors layers
865
+ attn_procs = {
866
+ name: (
867
+ JointAttnProcessor2_0() if isinstance(value, SD3IPAdapterJointAttnProcessor2_0) else value.__class__()
868
+ )
869
+ for name, value in self.transformer.attn_processors.items()
870
+ }
871
+ self.transformer.set_attn_processor(attn_procs)