diffusers 0.29.2__py3-none-any.whl → 0.30.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. diffusers/__init__.py +94 -3
  2. diffusers/commands/env.py +1 -5
  3. diffusers/configuration_utils.py +4 -9
  4. diffusers/dependency_versions_table.py +2 -2
  5. diffusers/image_processor.py +1 -2
  6. diffusers/loaders/__init__.py +17 -2
  7. diffusers/loaders/ip_adapter.py +10 -7
  8. diffusers/loaders/lora_base.py +752 -0
  9. diffusers/loaders/lora_pipeline.py +2252 -0
  10. diffusers/loaders/peft.py +213 -5
  11. diffusers/loaders/single_file.py +3 -14
  12. diffusers/loaders/single_file_model.py +31 -10
  13. diffusers/loaders/single_file_utils.py +293 -8
  14. diffusers/loaders/textual_inversion.py +1 -6
  15. diffusers/loaders/unet.py +23 -208
  16. diffusers/models/__init__.py +20 -0
  17. diffusers/models/activations.py +22 -0
  18. diffusers/models/attention.py +386 -7
  19. diffusers/models/attention_processor.py +1937 -629
  20. diffusers/models/autoencoders/__init__.py +2 -0
  21. diffusers/models/autoencoders/autoencoder_kl.py +14 -3
  22. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +1271 -0
  23. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +1 -1
  24. diffusers/models/autoencoders/autoencoder_oobleck.py +464 -0
  25. diffusers/models/autoencoders/autoencoder_tiny.py +1 -0
  26. diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
  27. diffusers/models/autoencoders/vq_model.py +4 -4
  28. diffusers/models/controlnet.py +2 -3
  29. diffusers/models/controlnet_hunyuan.py +401 -0
  30. diffusers/models/controlnet_sd3.py +11 -11
  31. diffusers/models/controlnet_sparsectrl.py +789 -0
  32. diffusers/models/controlnet_xs.py +40 -10
  33. diffusers/models/downsampling.py +68 -0
  34. diffusers/models/embeddings.py +403 -36
  35. diffusers/models/model_loading_utils.py +1 -3
  36. diffusers/models/modeling_flax_utils.py +1 -6
  37. diffusers/models/modeling_utils.py +4 -16
  38. diffusers/models/normalization.py +203 -12
  39. diffusers/models/transformers/__init__.py +6 -0
  40. diffusers/models/transformers/auraflow_transformer_2d.py +543 -0
  41. diffusers/models/transformers/cogvideox_transformer_3d.py +485 -0
  42. diffusers/models/transformers/hunyuan_transformer_2d.py +19 -15
  43. diffusers/models/transformers/latte_transformer_3d.py +327 -0
  44. diffusers/models/transformers/lumina_nextdit2d.py +340 -0
  45. diffusers/models/transformers/pixart_transformer_2d.py +102 -1
  46. diffusers/models/transformers/prior_transformer.py +1 -1
  47. diffusers/models/transformers/stable_audio_transformer.py +458 -0
  48. diffusers/models/transformers/transformer_flux.py +455 -0
  49. diffusers/models/transformers/transformer_sd3.py +18 -4
  50. diffusers/models/unets/unet_1d_blocks.py +1 -1
  51. diffusers/models/unets/unet_2d_condition.py +8 -1
  52. diffusers/models/unets/unet_3d_blocks.py +51 -920
  53. diffusers/models/unets/unet_3d_condition.py +4 -1
  54. diffusers/models/unets/unet_i2vgen_xl.py +4 -1
  55. diffusers/models/unets/unet_kandinsky3.py +1 -1
  56. diffusers/models/unets/unet_motion_model.py +1330 -84
  57. diffusers/models/unets/unet_spatio_temporal_condition.py +1 -1
  58. diffusers/models/unets/unet_stable_cascade.py +1 -3
  59. diffusers/models/unets/uvit_2d.py +1 -1
  60. diffusers/models/upsampling.py +64 -0
  61. diffusers/models/vq_model.py +8 -4
  62. diffusers/optimization.py +1 -1
  63. diffusers/pipelines/__init__.py +100 -3
  64. diffusers/pipelines/animatediff/__init__.py +4 -0
  65. diffusers/pipelines/animatediff/pipeline_animatediff.py +50 -40
  66. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +1076 -0
  67. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +17 -27
  68. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +1008 -0
  69. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +51 -38
  70. diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
  71. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +1 -0
  72. diffusers/pipelines/aura_flow/__init__.py +48 -0
  73. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +591 -0
  74. diffusers/pipelines/auto_pipeline.py +97 -19
  75. diffusers/pipelines/cogvideo/__init__.py +48 -0
  76. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +746 -0
  77. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
  78. diffusers/pipelines/controlnet/pipeline_controlnet.py +24 -30
  79. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +31 -30
  80. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +24 -153
  81. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +19 -28
  82. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +18 -28
  83. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +29 -32
  84. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -2
  85. diffusers/pipelines/controlnet_hunyuandit/__init__.py +48 -0
  86. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +1042 -0
  87. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +35 -0
  88. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +10 -6
  89. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +0 -4
  90. diffusers/pipelines/deepfloyd_if/pipeline_if.py +2 -2
  91. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +2 -2
  92. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +2 -2
  93. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +2 -2
  94. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +2 -2
  95. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +2 -2
  96. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -6
  97. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -6
  98. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +6 -6
  99. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +6 -6
  100. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -10
  101. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +10 -6
  102. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +3 -3
  103. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +1 -1
  104. diffusers/pipelines/flux/__init__.py +47 -0
  105. diffusers/pipelines/flux/pipeline_flux.py +749 -0
  106. diffusers/pipelines/flux/pipeline_output.py +21 -0
  107. diffusers/pipelines/free_init_utils.py +2 -0
  108. diffusers/pipelines/free_noise_utils.py +236 -0
  109. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +2 -2
  110. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +2 -2
  111. diffusers/pipelines/kolors/__init__.py +54 -0
  112. diffusers/pipelines/kolors/pipeline_kolors.py +1070 -0
  113. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +1247 -0
  114. diffusers/pipelines/kolors/pipeline_output.py +21 -0
  115. diffusers/pipelines/kolors/text_encoder.py +889 -0
  116. diffusers/pipelines/kolors/tokenizer.py +334 -0
  117. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +30 -29
  118. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +23 -29
  119. diffusers/pipelines/latte/__init__.py +48 -0
  120. diffusers/pipelines/latte/pipeline_latte.py +881 -0
  121. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +4 -4
  122. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +0 -4
  123. diffusers/pipelines/lumina/__init__.py +48 -0
  124. diffusers/pipelines/lumina/pipeline_lumina.py +897 -0
  125. diffusers/pipelines/pag/__init__.py +67 -0
  126. diffusers/pipelines/pag/pag_utils.py +237 -0
  127. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +1329 -0
  128. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +1612 -0
  129. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +953 -0
  130. diffusers/pipelines/pag/pipeline_pag_kolors.py +1136 -0
  131. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +872 -0
  132. diffusers/pipelines/pag/pipeline_pag_sd.py +1050 -0
  133. diffusers/pipelines/pag/pipeline_pag_sd_3.py +985 -0
  134. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +862 -0
  135. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +1333 -0
  136. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +1529 -0
  137. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +1753 -0
  138. diffusers/pipelines/pia/pipeline_pia.py +30 -37
  139. diffusers/pipelines/pipeline_flax_utils.py +4 -9
  140. diffusers/pipelines/pipeline_loading_utils.py +0 -3
  141. diffusers/pipelines/pipeline_utils.py +2 -14
  142. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +0 -1
  143. diffusers/pipelines/stable_audio/__init__.py +50 -0
  144. diffusers/pipelines/stable_audio/modeling_stable_audio.py +158 -0
  145. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +745 -0
  146. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +2 -0
  147. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
  148. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +23 -29
  149. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +15 -8
  150. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +30 -29
  151. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +23 -152
  152. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +8 -4
  153. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +11 -11
  154. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +8 -6
  155. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +6 -6
  156. diffusers/pipelines/stable_diffusion_3/__init__.py +2 -0
  157. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +34 -3
  158. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +33 -7
  159. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +1201 -0
  160. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +3 -3
  161. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +6 -6
  162. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +5 -5
  163. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +5 -5
  164. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +6 -6
  165. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +0 -4
  166. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +23 -29
  167. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +27 -29
  168. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +3 -3
  169. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +17 -27
  170. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +26 -29
  171. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +17 -145
  172. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +0 -4
  173. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +6 -6
  174. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -28
  175. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +8 -6
  176. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +8 -6
  177. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +6 -4
  178. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +0 -4
  179. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +3 -3
  180. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
  181. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +5 -4
  182. diffusers/schedulers/__init__.py +8 -0
  183. diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +572 -0
  184. diffusers/schedulers/scheduling_ddim.py +1 -1
  185. diffusers/schedulers/scheduling_ddim_cogvideox.py +449 -0
  186. diffusers/schedulers/scheduling_ddpm.py +1 -1
  187. diffusers/schedulers/scheduling_ddpm_parallel.py +1 -1
  188. diffusers/schedulers/scheduling_deis_multistep.py +2 -2
  189. diffusers/schedulers/scheduling_dpm_cogvideox.py +489 -0
  190. diffusers/schedulers/scheduling_dpmsolver_multistep.py +1 -1
  191. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +1 -1
  192. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +64 -19
  193. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +2 -2
  194. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +63 -39
  195. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +321 -0
  196. diffusers/schedulers/scheduling_ipndm.py +1 -1
  197. diffusers/schedulers/scheduling_unipc_multistep.py +1 -1
  198. diffusers/schedulers/scheduling_utils.py +1 -3
  199. diffusers/schedulers/scheduling_utils_flax.py +1 -3
  200. diffusers/training_utils.py +99 -14
  201. diffusers/utils/__init__.py +2 -2
  202. diffusers/utils/dummy_pt_objects.py +210 -0
  203. diffusers/utils/dummy_torch_and_torchsde_objects.py +15 -0
  204. diffusers/utils/dummy_torch_and_transformers_and_sentencepiece_objects.py +47 -0
  205. diffusers/utils/dummy_torch_and_transformers_objects.py +315 -0
  206. diffusers/utils/dynamic_modules_utils.py +1 -11
  207. diffusers/utils/export_utils.py +50 -6
  208. diffusers/utils/hub_utils.py +45 -42
  209. diffusers/utils/import_utils.py +37 -15
  210. diffusers/utils/loading_utils.py +80 -3
  211. diffusers/utils/testing_utils.py +11 -8
  212. {diffusers-0.29.2.dist-info → diffusers-0.30.1.dist-info}/METADATA +73 -83
  213. {diffusers-0.29.2.dist-info → diffusers-0.30.1.dist-info}/RECORD +217 -164
  214. {diffusers-0.29.2.dist-info → diffusers-0.30.1.dist-info}/WHEEL +1 -1
  215. diffusers/loaders/autoencoder.py +0 -146
  216. diffusers/loaders/controlnet.py +0 -136
  217. diffusers/loaders/lora.py +0 -1728
  218. {diffusers-0.29.2.dist-info → diffusers-0.30.1.dist-info}/LICENSE +0 -0
  219. {diffusers-0.29.2.dist-info → diffusers-0.30.1.dist-info}/entry_points.txt +0 -0
  220. {diffusers-0.29.2.dist-info → diffusers-0.30.1.dist-info}/top_level.txt +0 -0
@@ -30,9 +30,12 @@ from ...models.controlnet_sd3 import SD3ControlNetModel, SD3MultiControlNetModel
30
30
  from ...models.transformers import SD3Transformer2DModel
31
31
  from ...schedulers import FlowMatchEulerDiscreteScheduler
32
32
  from ...utils import (
33
+ USE_PEFT_BACKEND,
33
34
  is_torch_xla_available,
34
35
  logging,
35
36
  replace_example_docstring,
37
+ scale_lora_layers,
38
+ unscale_lora_layers,
36
39
  )
37
40
  from ...utils.torch_utils import randn_tensor
38
41
  from ..pipeline_utils import DiffusionPipeline
@@ -346,6 +349,7 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
346
349
  negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
347
350
  clip_skip: Optional[int] = None,
348
351
  max_sequence_length: int = 256,
352
+ lora_scale: Optional[float] = None,
349
353
  ):
350
354
  r"""
351
355
 
@@ -391,9 +395,22 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
391
395
  clip_skip (`int`, *optional*):
392
396
  Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
393
397
  the output of the pre-final layer will be used for computing the prompt embeddings.
398
+ lora_scale (`float`, *optional*):
399
+ A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
394
400
  """
395
401
  device = device or self._execution_device
396
402
 
403
+ # set lora scale so that monkey patched LoRA
404
+ # function of text encoder can correctly access it
405
+ if lora_scale is not None and isinstance(self, SD3LoraLoaderMixin):
406
+ self._lora_scale = lora_scale
407
+
408
+ # dynamically adjust the LoRA scale
409
+ if self.text_encoder is not None and USE_PEFT_BACKEND:
410
+ scale_lora_layers(self.text_encoder, lora_scale)
411
+ if self.text_encoder_2 is not None and USE_PEFT_BACKEND:
412
+ scale_lora_layers(self.text_encoder_2, lora_scale)
413
+
397
414
  prompt = [prompt] if isinstance(prompt, str) else prompt
398
415
  if prompt is not None:
399
416
  batch_size = len(prompt)
@@ -496,6 +513,16 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
496
513
  [negative_pooled_prompt_embed, negative_pooled_prompt_2_embed], dim=-1
497
514
  )
498
515
 
516
+ if self.text_encoder is not None:
517
+ if isinstance(self, SD3LoraLoaderMixin) and USE_PEFT_BACKEND:
518
+ # Retrieve the original scale by scaling back the LoRA layers
519
+ unscale_lora_layers(self.text_encoder, lora_scale)
520
+
521
+ if self.text_encoder_2 is not None:
522
+ if isinstance(self, SD3LoraLoaderMixin) and USE_PEFT_BACKEND:
523
+ # Retrieve the original scale by scaling back the LoRA layers
524
+ unscale_lora_layers(self.text_encoder_2, lora_scale)
525
+
499
526
  return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
500
527
 
501
528
  def check_inputs(
@@ -513,6 +540,7 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
513
540
  pooled_prompt_embeds=None,
514
541
  negative_pooled_prompt_embeds=None,
515
542
  callback_on_step_end_tensor_inputs=None,
543
+ max_sequence_length=None,
516
544
  ):
517
545
  if height % 8 != 0 or width % 8 != 0:
518
546
  raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
@@ -584,6 +612,9 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
584
612
  "If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`."
585
613
  )
586
614
 
615
+ if max_sequence_length is not None and max_sequence_length > 512:
616
+ raise ValueError(f"`max_sequence_length` cannot be greater than 512 but is {max_sequence_length}")
617
+
587
618
  # Copied from diffusers.pipelines.stable_diffusion_3.pipeline_stable_diffusion_3.StableDiffusion3Pipeline.prepare_latents
588
619
  def prepare_latents(
589
620
  self,
@@ -710,6 +741,7 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
710
741
  clip_skip: Optional[int] = None,
711
742
  callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
712
743
  callback_on_step_end_tensor_inputs: List[str] = ["latents"],
744
+ max_sequence_length: int = 256,
713
745
  ):
714
746
  r"""
715
747
  Function invoked when calling the pipeline for generation.
@@ -811,6 +843,7 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
811
843
  The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
812
844
  will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
813
845
  `._callback_tensor_inputs` attribute of your pipeline class.
846
+ max_sequence_length (`int` defaults to 256): Maximum sequence length to use with the `prompt`.
814
847
 
815
848
  Examples:
816
849
 
@@ -850,6 +883,7 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
850
883
  pooled_prompt_embeds=pooled_prompt_embeds,
851
884
  negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
852
885
  callback_on_step_end_tensor_inputs=callback_on_step_end_tensor_inputs,
886
+ max_sequence_length=max_sequence_length,
853
887
  )
854
888
 
855
889
  self._guidance_scale = guidance_scale
@@ -888,6 +922,7 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
888
922
  device=device,
889
923
  clip_skip=self.clip_skip,
890
924
  num_images_per_prompt=num_images_per_prompt,
925
+ max_sequence_length=max_sequence_length,
891
926
  )
892
927
 
893
928
  if self.do_classifier_free_guidance:
@@ -23,7 +23,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
23
23
 
24
24
  from ...callbacks import MultiPipelineCallbacks, PipelineCallback
25
25
  from ...image_processor import PipelineImageInput, VaeImageProcessor
26
- from ...loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
26
+ from ...loaders import FromSingleFileMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
27
27
  from ...models import AutoencoderKL, ControlNetXSAdapter, UNet2DConditionModel, UNetControlNetXSModel
28
28
  from ...models.lora import adjust_lora_scale_text_encoder
29
29
  from ...schedulers import KarrasDiffusionSchedulers
@@ -90,7 +90,11 @@ EXAMPLE_DOC_STRING = """
90
90
 
91
91
 
92
92
  class StableDiffusionControlNetXSPipeline(
93
- DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
93
+ DiffusionPipeline,
94
+ StableDiffusionMixin,
95
+ TextualInversionLoaderMixin,
96
+ StableDiffusionLoraLoaderMixin,
97
+ FromSingleFileMixin,
94
98
  ):
95
99
  r"""
96
100
  Pipeline for text-to-image generation using Stable Diffusion with ControlNet-XS guidance.
@@ -100,8 +104,8 @@ class StableDiffusionControlNetXSPipeline(
100
104
 
101
105
  The pipeline also inherits the following loading methods:
102
106
  - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
103
- - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
104
- - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
107
+ - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
108
+ - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
105
109
  - [`loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
106
110
 
107
111
  Args:
@@ -258,7 +262,7 @@ class StableDiffusionControlNetXSPipeline(
258
262
  """
259
263
  # set lora scale so that monkey patched LoRA
260
264
  # function of text encoder can correctly access it
261
- if lora_scale is not None and isinstance(self, LoraLoaderMixin):
265
+ if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
262
266
  self._lora_scale = lora_scale
263
267
 
264
268
  # dynamically adjust the LoRA scale
@@ -391,7 +395,7 @@ class StableDiffusionControlNetXSPipeline(
391
395
  negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
392
396
 
393
397
  if self.text_encoder is not None:
394
- if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
398
+ if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
395
399
  # Retrieve the original scale by scaling back the LoRA layers
396
400
  unscale_lora_layers(self.text_encoder, lora_scale)
397
401
 
@@ -34,8 +34,6 @@ from ...loaders import FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, Te
34
34
  from ...models import AutoencoderKL, ControlNetXSAdapter, UNet2DConditionModel, UNetControlNetXSModel
35
35
  from ...models.attention_processor import (
36
36
  AttnProcessor2_0,
37
- LoRAAttnProcessor2_0,
38
- LoRAXFormersAttnProcessor,
39
37
  XFormersAttnProcessor,
40
38
  )
41
39
  from ...models.lora import adjust_lora_scale_text_encoder
@@ -678,8 +676,6 @@ class StableDiffusionXLControlNetXSPipeline(
678
676
  (
679
677
  AttnProcessor2_0,
680
678
  XFormersAttnProcessor,
681
- LoRAXFormersAttnProcessor,
682
- LoRAAttnProcessor2_0,
683
679
  ),
684
680
  )
685
681
  # if xformers or torch_2_0 is used attention block does not need
@@ -7,7 +7,7 @@ from typing import Any, Callable, Dict, List, Optional, Union
7
7
  import torch
8
8
  from transformers import CLIPImageProcessor, T5EncoderModel, T5Tokenizer
9
9
 
10
- from ...loaders import LoraLoaderMixin
10
+ from ...loaders import StableDiffusionLoraLoaderMixin
11
11
  from ...models import UNet2DConditionModel
12
12
  from ...schedulers import DDPMScheduler
13
13
  from ...utils import (
@@ -84,7 +84,7 @@ EXAMPLE_DOC_STRING = """
84
84
  """
85
85
 
86
86
 
87
- class IFPipeline(DiffusionPipeline, LoraLoaderMixin):
87
+ class IFPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixin):
88
88
  tokenizer: T5Tokenizer
89
89
  text_encoder: T5EncoderModel
90
90
 
@@ -9,7 +9,7 @@ import PIL.Image
9
9
  import torch
10
10
  from transformers import CLIPImageProcessor, T5EncoderModel, T5Tokenizer
11
11
 
12
- from ...loaders import LoraLoaderMixin
12
+ from ...loaders import StableDiffusionLoraLoaderMixin
13
13
  from ...models import UNet2DConditionModel
14
14
  from ...schedulers import DDPMScheduler
15
15
  from ...utils import (
@@ -108,7 +108,7 @@ EXAMPLE_DOC_STRING = """
108
108
  """
109
109
 
110
110
 
111
- class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
111
+ class IFImg2ImgPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixin):
112
112
  tokenizer: T5Tokenizer
113
113
  text_encoder: T5EncoderModel
114
114
 
@@ -10,7 +10,7 @@ import torch
10
10
  import torch.nn.functional as F
11
11
  from transformers import CLIPImageProcessor, T5EncoderModel, T5Tokenizer
12
12
 
13
- from ...loaders import LoraLoaderMixin
13
+ from ...loaders import StableDiffusionLoraLoaderMixin
14
14
  from ...models import UNet2DConditionModel
15
15
  from ...schedulers import DDPMScheduler
16
16
  from ...utils import (
@@ -111,7 +111,7 @@ EXAMPLE_DOC_STRING = """
111
111
  """
112
112
 
113
113
 
114
- class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
114
+ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixin):
115
115
  tokenizer: T5Tokenizer
116
116
  text_encoder: T5EncoderModel
117
117
 
@@ -9,7 +9,7 @@ import PIL.Image
9
9
  import torch
10
10
  from transformers import CLIPImageProcessor, T5EncoderModel, T5Tokenizer
11
11
 
12
- from ...loaders import LoraLoaderMixin
12
+ from ...loaders import StableDiffusionLoraLoaderMixin
13
13
  from ...models import UNet2DConditionModel
14
14
  from ...schedulers import DDPMScheduler
15
15
  from ...utils import (
@@ -111,7 +111,7 @@ EXAMPLE_DOC_STRING = """
111
111
  """
112
112
 
113
113
 
114
- class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
114
+ class IFInpaintingPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixin):
115
115
  tokenizer: T5Tokenizer
116
116
  text_encoder: T5EncoderModel
117
117
 
@@ -10,7 +10,7 @@ import torch
10
10
  import torch.nn.functional as F
11
11
  from transformers import CLIPImageProcessor, T5EncoderModel, T5Tokenizer
12
12
 
13
- from ...loaders import LoraLoaderMixin
13
+ from ...loaders import StableDiffusionLoraLoaderMixin
14
14
  from ...models import UNet2DConditionModel
15
15
  from ...schedulers import DDPMScheduler
16
16
  from ...utils import (
@@ -113,7 +113,7 @@ EXAMPLE_DOC_STRING = """
113
113
  """
114
114
 
115
115
 
116
- class IFInpaintingSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
116
+ class IFInpaintingSuperResolutionPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixin):
117
117
  tokenizer: T5Tokenizer
118
118
  text_encoder: T5EncoderModel
119
119
 
@@ -10,7 +10,7 @@ import torch
10
10
  import torch.nn.functional as F
11
11
  from transformers import CLIPImageProcessor, T5EncoderModel, T5Tokenizer
12
12
 
13
- from ...loaders import LoraLoaderMixin
13
+ from ...loaders import StableDiffusionLoraLoaderMixin
14
14
  from ...models import UNet2DConditionModel
15
15
  from ...schedulers import DDPMScheduler
16
16
  from ...utils import (
@@ -69,7 +69,7 @@ EXAMPLE_DOC_STRING = """
69
69
  """
70
70
 
71
71
 
72
- class IFSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
72
+ class IFSuperResolutionPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixin):
73
73
  tokenizer: T5Tokenizer
74
74
  text_encoder: T5EncoderModel
75
75
 
@@ -21,7 +21,12 @@ from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection, XLMR
21
21
 
22
22
  from ....configuration_utils import FrozenDict
23
23
  from ....image_processor import PipelineImageInput, VaeImageProcessor
24
- from ....loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
24
+ from ....loaders import (
25
+ FromSingleFileMixin,
26
+ IPAdapterMixin,
27
+ StableDiffusionLoraLoaderMixin,
28
+ TextualInversionLoaderMixin,
29
+ )
25
30
  from ....models import AutoencoderKL, ImageProjection, UNet2DConditionModel
26
31
  from ....models.lora import adjust_lora_scale_text_encoder
27
32
  from ....schedulers import KarrasDiffusionSchedulers
@@ -137,7 +142,7 @@ class AltDiffusionPipeline(
137
142
  DiffusionPipeline,
138
143
  StableDiffusionMixin,
139
144
  TextualInversionLoaderMixin,
140
- LoraLoaderMixin,
145
+ StableDiffusionLoraLoaderMixin,
141
146
  IPAdapterMixin,
142
147
  FromSingleFileMixin,
143
148
  ):
@@ -149,8 +154,8 @@ class AltDiffusionPipeline(
149
154
 
150
155
  The pipeline also inherits the following loading methods:
151
156
  - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
152
- - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
153
- - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
157
+ - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
158
+ - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
154
159
  - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
155
160
  - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
156
161
 
@@ -346,7 +351,7 @@ class AltDiffusionPipeline(
346
351
  """
347
352
  # set lora scale so that monkey patched LoRA
348
353
  # function of text encoder can correctly access it
349
- if lora_scale is not None and isinstance(self, LoraLoaderMixin):
354
+ if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
350
355
  self._lora_scale = lora_scale
351
356
 
352
357
  # dynamically adjust the LoRA scale
@@ -478,7 +483,7 @@ class AltDiffusionPipeline(
478
483
  negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
479
484
  negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
480
485
 
481
- if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
486
+ if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
482
487
  # Retrieve the original scale by scaling back the LoRA layers
483
488
  unscale_lora_layers(self.text_encoder, lora_scale)
484
489
 
@@ -23,7 +23,12 @@ from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection, XLMR
23
23
 
24
24
  from ....configuration_utils import FrozenDict
25
25
  from ....image_processor import PipelineImageInput, VaeImageProcessor
26
- from ....loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
26
+ from ....loaders import (
27
+ FromSingleFileMixin,
28
+ IPAdapterMixin,
29
+ StableDiffusionLoraLoaderMixin,
30
+ TextualInversionLoaderMixin,
31
+ )
27
32
  from ....models import AutoencoderKL, ImageProjection, UNet2DConditionModel
28
33
  from ....models.lora import adjust_lora_scale_text_encoder
29
34
  from ....schedulers import KarrasDiffusionSchedulers
@@ -178,7 +183,7 @@ class AltDiffusionImg2ImgPipeline(
178
183
  StableDiffusionMixin,
179
184
  TextualInversionLoaderMixin,
180
185
  IPAdapterMixin,
181
- LoraLoaderMixin,
186
+ StableDiffusionLoraLoaderMixin,
182
187
  FromSingleFileMixin,
183
188
  ):
184
189
  r"""
@@ -189,8 +194,8 @@ class AltDiffusionImg2ImgPipeline(
189
194
 
190
195
  The pipeline also inherits the following loading methods:
191
196
  - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
192
- - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
193
- - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
197
+ - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
198
+ - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
194
199
  - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
195
200
  - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
196
201
 
@@ -386,7 +391,7 @@ class AltDiffusionImg2ImgPipeline(
386
391
  """
387
392
  # set lora scale so that monkey patched LoRA
388
393
  # function of text encoder can correctly access it
389
- if lora_scale is not None and isinstance(self, LoraLoaderMixin):
394
+ if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
390
395
  self._lora_scale = lora_scale
391
396
 
392
397
  # dynamically adjust the LoRA scale
@@ -518,7 +523,7 @@ class AltDiffusionImg2ImgPipeline(
518
523
  negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
519
524
  negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
520
525
 
521
- if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
526
+ if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
522
527
  # Retrieve the original scale by scaling back the LoRA layers
523
528
  unscale_lora_layers(self.text_encoder, lora_scale)
524
529
 
@@ -23,7 +23,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
23
23
 
24
24
  from ....configuration_utils import FrozenDict
25
25
  from ....image_processor import PipelineImageInput, VaeImageProcessor
26
- from ....loaders import LoraLoaderMixin, TextualInversionLoaderMixin
26
+ from ....loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
27
27
  from ....models import AutoencoderKL, UNet2DConditionModel
28
28
  from ....models.lora import adjust_lora_scale_text_encoder
29
29
  from ....schedulers import DDIMScheduler
@@ -136,7 +136,7 @@ def compute_noise(scheduler, prev_latents, latents, timestep, noise_pred, eta):
136
136
  return noise
137
137
 
138
138
 
139
- class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin):
139
+ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin):
140
140
  r"""
141
141
  Pipeline for text-guided image to image generation using Stable Diffusion.
142
142
 
@@ -145,8 +145,8 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
145
145
 
146
146
  The pipeline also inherits the following loading methods:
147
147
  - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
148
- - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
149
- - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
148
+ - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
149
+ - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
150
150
 
151
151
  Args:
152
152
  vae ([`AutoencoderKL`]):
@@ -324,7 +324,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
324
324
  """
325
325
  # set lora scale so that monkey patched LoRA
326
326
  # function of text encoder can correctly access it
327
- if lora_scale is not None and isinstance(self, LoraLoaderMixin):
327
+ if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
328
328
  self._lora_scale = lora_scale
329
329
 
330
330
  # dynamically adjust the LoRA scale
@@ -457,7 +457,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
457
457
  negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
458
458
 
459
459
  if self.text_encoder is not None:
460
- if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
460
+ if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
461
461
  # Retrieve the original scale by scaling back the LoRA layers
462
462
  unscale_lora_layers(self.text_encoder, lora_scale)
463
463
 
@@ -23,7 +23,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
23
23
 
24
24
  from ....configuration_utils import FrozenDict
25
25
  from ....image_processor import VaeImageProcessor
26
- from ....loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
26
+ from ....loaders import FromSingleFileMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
27
27
  from ....models import AutoencoderKL, UNet2DConditionModel
28
28
  from ....models.lora import adjust_lora_scale_text_encoder
29
29
  from ....schedulers import KarrasDiffusionSchedulers
@@ -79,7 +79,7 @@ def preprocess_mask(mask, batch_size, scale_factor=8):
79
79
 
80
80
 
81
81
  class StableDiffusionInpaintPipelineLegacy(
82
- DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
82
+ DiffusionPipeline, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin, FromSingleFileMixin
83
83
  ):
84
84
  r"""
85
85
  Pipeline for text-guided image inpainting using Stable Diffusion. *This is an experimental feature*.
@@ -89,11 +89,11 @@ class StableDiffusionInpaintPipelineLegacy(
89
89
 
90
90
  In addition the pipeline inherits the following loading methods:
91
91
  - *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`]
92
- - *LoRA*: [`loaders.LoraLoaderMixin.load_lora_weights`]
92
+ - *LoRA*: [`loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`]
93
93
  - *Ckpt*: [`loaders.FromSingleFileMixin.from_single_file`]
94
94
 
95
95
  as well as the following saving methods:
96
- - *LoRA*: [`loaders.LoraLoaderMixin.save_lora_weights`]
96
+ - *LoRA*: [`loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`]
97
97
 
98
98
  Args:
99
99
  vae ([`AutoencoderKL`]):
@@ -294,7 +294,7 @@ class StableDiffusionInpaintPipelineLegacy(
294
294
  """
295
295
  # set lora scale so that monkey patched LoRA
296
296
  # function of text encoder can correctly access it
297
- if lora_scale is not None and isinstance(self, LoraLoaderMixin):
297
+ if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
298
298
  self._lora_scale = lora_scale
299
299
 
300
300
  # dynamically adjust the LoRA scale
@@ -427,7 +427,7 @@ class StableDiffusionInpaintPipelineLegacy(
427
427
  negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
428
428
 
429
429
  if self.text_encoder is not None:
430
- if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
430
+ if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
431
431
  # Retrieve the original scale by scaling back the LoRA layers
432
432
  unscale_lora_layers(self.text_encoder, lora_scale)
433
433
 
@@ -16,10 +16,10 @@ import inspect
16
16
  from typing import Any, Callable, Dict, List, Optional, Union
17
17
 
18
18
  import torch
19
- from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
19
+ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
20
20
 
21
21
  from ....image_processor import VaeImageProcessor
22
- from ....loaders import LoraLoaderMixin, TextualInversionLoaderMixin
22
+ from ....loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
23
23
  from ....models import AutoencoderKL, UNet2DConditionModel
24
24
  from ....models.lora import adjust_lora_scale_text_encoder
25
25
  from ....schedulers import PNDMScheduler
@@ -37,7 +37,7 @@ AUGS_CONST = ["A photo of ", "An image of ", "A picture of "]
37
37
 
38
38
 
39
39
  class StableDiffusionModelEditingPipeline(
40
- DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin
40
+ DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin
41
41
  ):
42
42
  r"""
43
43
  Pipeline for text-to-image model editing.
@@ -47,8 +47,8 @@ class StableDiffusionModelEditingPipeline(
47
47
 
48
48
  The pipeline also inherits the following loading methods:
49
49
  - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
50
- - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
51
- - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
50
+ - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
51
+ - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
52
52
 
53
53
  Args:
54
54
  vae ([`AutoencoderKL`]):
@@ -66,8 +66,8 @@ class StableDiffusionModelEditingPipeline(
66
66
  Classification module that estimates whether generated images could be considered offensive or harmful.
67
67
  Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details
68
68
  about a model's potential harms.
69
- feature_extractor ([`~transformers.CLIPFeatureExtractor`]):
70
- A `CLIPFeatureExtractor` to extract features from generated images; used as inputs to the `safety_checker`.
69
+ feature_extractor ([`~transformers.CLIPImageProcessor`]):
70
+ A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
71
71
  with_to_k ([`bool`]):
72
72
  Whether to edit the key projection matrices along with the value projection matrices.
73
73
  with_augs ([`list`]):
@@ -86,7 +86,7 @@ class StableDiffusionModelEditingPipeline(
86
86
  unet: UNet2DConditionModel,
87
87
  scheduler: SchedulerMixin,
88
88
  safety_checker: StableDiffusionSafetyChecker,
89
- feature_extractor: CLIPFeatureExtractor,
89
+ feature_extractor: CLIPImageProcessor,
90
90
  requires_safety_checker: bool = True,
91
91
  with_to_k: bool = True,
92
92
  with_augs: list = AUGS_CONST,
@@ -232,7 +232,7 @@ class StableDiffusionModelEditingPipeline(
232
232
  """
233
233
  # set lora scale so that monkey patched LoRA
234
234
  # function of text encoder can correctly access it
235
- if lora_scale is not None and isinstance(self, LoraLoaderMixin):
235
+ if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
236
236
  self._lora_scale = lora_scale
237
237
 
238
238
  # dynamically adjust the LoRA scale
@@ -365,7 +365,7 @@ class StableDiffusionModelEditingPipeline(
365
365
  negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
366
366
 
367
367
  if self.text_encoder is not None:
368
- if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
368
+ if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
369
369
  # Retrieve the original scale by scaling back the LoRA layers
370
370
  unscale_lora_layers(self.text_encoder, lora_scale)
371
371
 
@@ -19,7 +19,7 @@ import torch
19
19
  from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
20
20
 
21
21
  from ....image_processor import VaeImageProcessor
22
- from ....loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
22
+ from ....loaders import FromSingleFileMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
23
23
  from ....models import AutoencoderKL, UNet2DConditionModel
24
24
  from ....models.lora import adjust_lora_scale_text_encoder
25
25
  from ....schedulers import KarrasDiffusionSchedulers
@@ -63,7 +63,11 @@ EXAMPLE_DOC_STRING = """
63
63
 
64
64
 
65
65
  class StableDiffusionParadigmsPipeline(
66
- DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
66
+ DiffusionPipeline,
67
+ StableDiffusionMixin,
68
+ TextualInversionLoaderMixin,
69
+ StableDiffusionLoraLoaderMixin,
70
+ FromSingleFileMixin,
67
71
  ):
68
72
  r"""
69
73
  Pipeline for text-to-image generation using a parallelized version of Stable Diffusion.
@@ -73,8 +77,8 @@ class StableDiffusionParadigmsPipeline(
73
77
 
74
78
  The pipeline also inherits the following loading methods:
75
79
  - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
76
- - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
77
- - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
80
+ - [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
81
+ - [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
78
82
  - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
79
83
 
80
84
  Args:
@@ -223,7 +227,7 @@ class StableDiffusionParadigmsPipeline(
223
227
  """
224
228
  # set lora scale so that monkey patched LoRA
225
229
  # function of text encoder can correctly access it
226
- if lora_scale is not None and isinstance(self, LoraLoaderMixin):
230
+ if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
227
231
  self._lora_scale = lora_scale
228
232
 
229
233
  # dynamically adjust the LoRA scale
@@ -356,7 +360,7 @@ class StableDiffusionParadigmsPipeline(
356
360
  negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
357
361
 
358
362
  if self.text_encoder is not None:
359
- if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
363
+ if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
360
364
  # Retrieve the original scale by scaling back the LoRA layers
361
365
  unscale_lora_layers(self.text_encoder, lora_scale)
362
366
 
@@ -29,7 +29,7 @@ from transformers import (
29
29
  )
30
30
 
31
31
  from ....image_processor import PipelineImageInput, VaeImageProcessor
32
- from ....loaders import LoraLoaderMixin, TextualInversionLoaderMixin
32
+ from ....loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
33
33
  from ....models import AutoencoderKL, UNet2DConditionModel
34
34
  from ....models.attention_processor import Attention
35
35
  from ....models.lora import adjust_lora_scale_text_encoder
@@ -446,7 +446,7 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin
446
446
  """
447
447
  # set lora scale so that monkey patched LoRA
448
448
  # function of text encoder can correctly access it
449
- if lora_scale is not None and isinstance(self, LoraLoaderMixin):
449
+ if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
450
450
  self._lora_scale = lora_scale
451
451
 
452
452
  # dynamically adjust the LoRA scale
@@ -579,7 +579,7 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin
579
579
  negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
580
580
 
581
581
  if self.text_encoder is not None:
582
- if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
582
+ if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
583
583
  # Retrieve the original scale by scaling back the LoRA layers
584
584
  unscale_lora_layers(self.text_encoder, lora_scale)
585
585
 
@@ -837,7 +837,7 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
837
837
 
838
838
  def fn_recursive_add_processors(name: str, module: torch.nn.Module, processors: Dict[str, AttentionProcessor]):
839
839
  if hasattr(module, "get_processor"):
840
- processors[f"{name}.processor"] = module.get_processor(return_deprecated_lora=True)
840
+ processors[f"{name}.processor"] = module.get_processor()
841
841
 
842
842
  for sub_name, child in module.named_children():
843
843
  fn_recursive_add_processors(f"{name}.{sub_name}", child, processors)