diffusers 0.34.0__py3-none-any.whl → 0.35.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (191) hide show
  1. diffusers/__init__.py +98 -1
  2. diffusers/callbacks.py +35 -0
  3. diffusers/commands/custom_blocks.py +134 -0
  4. diffusers/commands/diffusers_cli.py +2 -0
  5. diffusers/commands/fp16_safetensors.py +1 -1
  6. diffusers/configuration_utils.py +11 -2
  7. diffusers/dependency_versions_table.py +3 -3
  8. diffusers/guiders/__init__.py +41 -0
  9. diffusers/guiders/adaptive_projected_guidance.py +188 -0
  10. diffusers/guiders/auto_guidance.py +190 -0
  11. diffusers/guiders/classifier_free_guidance.py +141 -0
  12. diffusers/guiders/classifier_free_zero_star_guidance.py +152 -0
  13. diffusers/guiders/frequency_decoupled_guidance.py +327 -0
  14. diffusers/guiders/guider_utils.py +309 -0
  15. diffusers/guiders/perturbed_attention_guidance.py +271 -0
  16. diffusers/guiders/skip_layer_guidance.py +262 -0
  17. diffusers/guiders/smoothed_energy_guidance.py +251 -0
  18. diffusers/guiders/tangential_classifier_free_guidance.py +143 -0
  19. diffusers/hooks/__init__.py +17 -0
  20. diffusers/hooks/_common.py +56 -0
  21. diffusers/hooks/_helpers.py +293 -0
  22. diffusers/hooks/faster_cache.py +7 -6
  23. diffusers/hooks/first_block_cache.py +259 -0
  24. diffusers/hooks/group_offloading.py +292 -286
  25. diffusers/hooks/hooks.py +56 -1
  26. diffusers/hooks/layer_skip.py +263 -0
  27. diffusers/hooks/layerwise_casting.py +2 -7
  28. diffusers/hooks/pyramid_attention_broadcast.py +14 -11
  29. diffusers/hooks/smoothed_energy_guidance_utils.py +167 -0
  30. diffusers/hooks/utils.py +43 -0
  31. diffusers/loaders/__init__.py +6 -0
  32. diffusers/loaders/ip_adapter.py +255 -4
  33. diffusers/loaders/lora_base.py +63 -30
  34. diffusers/loaders/lora_conversion_utils.py +434 -53
  35. diffusers/loaders/lora_pipeline.py +834 -37
  36. diffusers/loaders/peft.py +28 -5
  37. diffusers/loaders/single_file_model.py +44 -11
  38. diffusers/loaders/single_file_utils.py +170 -2
  39. diffusers/loaders/transformer_flux.py +9 -10
  40. diffusers/loaders/transformer_sd3.py +6 -1
  41. diffusers/loaders/unet.py +22 -5
  42. diffusers/loaders/unet_loader_utils.py +5 -2
  43. diffusers/models/__init__.py +8 -0
  44. diffusers/models/attention.py +484 -3
  45. diffusers/models/attention_dispatch.py +1218 -0
  46. diffusers/models/attention_processor.py +105 -663
  47. diffusers/models/auto_model.py +2 -2
  48. diffusers/models/autoencoders/__init__.py +1 -0
  49. diffusers/models/autoencoders/autoencoder_dc.py +14 -1
  50. diffusers/models/autoencoders/autoencoder_kl.py +1 -1
  51. diffusers/models/autoencoders/autoencoder_kl_cosmos.py +3 -1
  52. diffusers/models/autoencoders/autoencoder_kl_qwenimage.py +1070 -0
  53. diffusers/models/autoencoders/autoencoder_kl_wan.py +370 -40
  54. diffusers/models/cache_utils.py +31 -9
  55. diffusers/models/controlnets/controlnet_flux.py +5 -5
  56. diffusers/models/controlnets/controlnet_union.py +4 -4
  57. diffusers/models/embeddings.py +26 -34
  58. diffusers/models/model_loading_utils.py +233 -1
  59. diffusers/models/modeling_flax_utils.py +1 -2
  60. diffusers/models/modeling_utils.py +159 -94
  61. diffusers/models/transformers/__init__.py +2 -0
  62. diffusers/models/transformers/transformer_chroma.py +16 -117
  63. diffusers/models/transformers/transformer_cogview4.py +36 -2
  64. diffusers/models/transformers/transformer_cosmos.py +11 -4
  65. diffusers/models/transformers/transformer_flux.py +372 -132
  66. diffusers/models/transformers/transformer_hunyuan_video.py +6 -0
  67. diffusers/models/transformers/transformer_ltx.py +104 -23
  68. diffusers/models/transformers/transformer_qwenimage.py +645 -0
  69. diffusers/models/transformers/transformer_skyreels_v2.py +607 -0
  70. diffusers/models/transformers/transformer_wan.py +298 -85
  71. diffusers/models/transformers/transformer_wan_vace.py +15 -21
  72. diffusers/models/unets/unet_2d_condition.py +2 -1
  73. diffusers/modular_pipelines/__init__.py +83 -0
  74. diffusers/modular_pipelines/components_manager.py +1068 -0
  75. diffusers/modular_pipelines/flux/__init__.py +66 -0
  76. diffusers/modular_pipelines/flux/before_denoise.py +689 -0
  77. diffusers/modular_pipelines/flux/decoders.py +109 -0
  78. diffusers/modular_pipelines/flux/denoise.py +227 -0
  79. diffusers/modular_pipelines/flux/encoders.py +412 -0
  80. diffusers/modular_pipelines/flux/modular_blocks.py +181 -0
  81. diffusers/modular_pipelines/flux/modular_pipeline.py +59 -0
  82. diffusers/modular_pipelines/modular_pipeline.py +2446 -0
  83. diffusers/modular_pipelines/modular_pipeline_utils.py +672 -0
  84. diffusers/modular_pipelines/node_utils.py +665 -0
  85. diffusers/modular_pipelines/stable_diffusion_xl/__init__.py +77 -0
  86. diffusers/modular_pipelines/stable_diffusion_xl/before_denoise.py +1874 -0
  87. diffusers/modular_pipelines/stable_diffusion_xl/decoders.py +208 -0
  88. diffusers/modular_pipelines/stable_diffusion_xl/denoise.py +771 -0
  89. diffusers/modular_pipelines/stable_diffusion_xl/encoders.py +887 -0
  90. diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py +380 -0
  91. diffusers/modular_pipelines/stable_diffusion_xl/modular_pipeline.py +365 -0
  92. diffusers/modular_pipelines/wan/__init__.py +66 -0
  93. diffusers/modular_pipelines/wan/before_denoise.py +365 -0
  94. diffusers/modular_pipelines/wan/decoders.py +105 -0
  95. diffusers/modular_pipelines/wan/denoise.py +261 -0
  96. diffusers/modular_pipelines/wan/encoders.py +242 -0
  97. diffusers/modular_pipelines/wan/modular_blocks.py +144 -0
  98. diffusers/modular_pipelines/wan/modular_pipeline.py +90 -0
  99. diffusers/pipelines/__init__.py +31 -0
  100. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +2 -3
  101. diffusers/pipelines/auto_pipeline.py +17 -13
  102. diffusers/pipelines/chroma/pipeline_chroma.py +5 -5
  103. diffusers/pipelines/chroma/pipeline_chroma_img2img.py +5 -5
  104. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +9 -8
  105. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +9 -8
  106. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +10 -9
  107. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +9 -8
  108. diffusers/pipelines/cogview4/pipeline_cogview4.py +16 -15
  109. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +3 -2
  110. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +212 -93
  111. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +7 -3
  112. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +194 -92
  113. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +1 -1
  114. diffusers/pipelines/dit/pipeline_dit.py +3 -1
  115. diffusers/pipelines/flux/__init__.py +4 -0
  116. diffusers/pipelines/flux/pipeline_flux.py +34 -26
  117. diffusers/pipelines/flux/pipeline_flux_control.py +8 -8
  118. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +1 -1
  119. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +1 -1
  120. diffusers/pipelines/flux/pipeline_flux_controlnet.py +1 -1
  121. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +1 -1
  122. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1 -1
  123. diffusers/pipelines/flux/pipeline_flux_fill.py +1 -1
  124. diffusers/pipelines/flux/pipeline_flux_img2img.py +1 -1
  125. diffusers/pipelines/flux/pipeline_flux_inpaint.py +1 -1
  126. diffusers/pipelines/flux/pipeline_flux_kontext.py +1134 -0
  127. diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py +1460 -0
  128. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
  129. diffusers/pipelines/flux/pipeline_output.py +6 -4
  130. diffusers/pipelines/hidream_image/pipeline_hidream_image.py +5 -5
  131. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +25 -24
  132. diffusers/pipelines/ltx/pipeline_ltx.py +13 -12
  133. diffusers/pipelines/ltx/pipeline_ltx_condition.py +10 -9
  134. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +13 -12
  135. diffusers/pipelines/mochi/pipeline_mochi.py +9 -8
  136. diffusers/pipelines/pipeline_flax_utils.py +2 -2
  137. diffusers/pipelines/pipeline_loading_utils.py +24 -2
  138. diffusers/pipelines/pipeline_utils.py +22 -15
  139. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +3 -1
  140. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +20 -0
  141. diffusers/pipelines/qwenimage/__init__.py +55 -0
  142. diffusers/pipelines/qwenimage/pipeline_output.py +21 -0
  143. diffusers/pipelines/qwenimage/pipeline_qwenimage.py +726 -0
  144. diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py +849 -0
  145. diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py +829 -0
  146. diffusers/pipelines/qwenimage/pipeline_qwenimage_inpaint.py +1015 -0
  147. diffusers/pipelines/sana/pipeline_sana_sprint.py +5 -5
  148. diffusers/pipelines/skyreels_v2/__init__.py +59 -0
  149. diffusers/pipelines/skyreels_v2/pipeline_output.py +20 -0
  150. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2.py +610 -0
  151. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing.py +978 -0
  152. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_i2v.py +1059 -0
  153. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_v2v.py +1063 -0
  154. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_i2v.py +745 -0
  155. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -1
  156. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +1 -1
  157. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +1 -1
  158. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +2 -1
  159. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +6 -5
  160. diffusers/pipelines/wan/pipeline_wan.py +78 -20
  161. diffusers/pipelines/wan/pipeline_wan_i2v.py +112 -32
  162. diffusers/pipelines/wan/pipeline_wan_vace.py +1 -2
  163. diffusers/quantizers/__init__.py +1 -177
  164. diffusers/quantizers/base.py +11 -0
  165. diffusers/quantizers/gguf/utils.py +92 -3
  166. diffusers/quantizers/pipe_quant_config.py +202 -0
  167. diffusers/quantizers/torchao/torchao_quantizer.py +26 -0
  168. diffusers/schedulers/scheduling_deis_multistep.py +8 -1
  169. diffusers/schedulers/scheduling_dpmsolver_multistep.py +6 -0
  170. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +6 -0
  171. diffusers/schedulers/scheduling_scm.py +0 -1
  172. diffusers/schedulers/scheduling_unipc_multistep.py +10 -1
  173. diffusers/schedulers/scheduling_utils.py +2 -2
  174. diffusers/schedulers/scheduling_utils_flax.py +1 -1
  175. diffusers/training_utils.py +78 -0
  176. diffusers/utils/__init__.py +10 -0
  177. diffusers/utils/constants.py +4 -0
  178. diffusers/utils/dummy_pt_objects.py +312 -0
  179. diffusers/utils/dummy_torch_and_transformers_objects.py +255 -0
  180. diffusers/utils/dynamic_modules_utils.py +84 -25
  181. diffusers/utils/hub_utils.py +33 -17
  182. diffusers/utils/import_utils.py +70 -0
  183. diffusers/utils/peft_utils.py +11 -8
  184. diffusers/utils/testing_utils.py +136 -10
  185. diffusers/utils/torch_utils.py +18 -0
  186. {diffusers-0.34.0.dist-info → diffusers-0.35.1.dist-info}/METADATA +6 -6
  187. {diffusers-0.34.0.dist-info → diffusers-0.35.1.dist-info}/RECORD +191 -127
  188. {diffusers-0.34.0.dist-info → diffusers-0.35.1.dist-info}/LICENSE +0 -0
  189. {diffusers-0.34.0.dist-info → diffusers-0.35.1.dist-info}/WHEEL +0 -0
  190. {diffusers-0.34.0.dist-info → diffusers-0.35.1.dist-info}/entry_points.txt +0 -0
  191. {diffusers-0.34.0.dist-info → diffusers-0.35.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,90 @@
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from ...loaders import WanLoraLoaderMixin
17
+ from ...pipelines.pipeline_utils import StableDiffusionMixin
18
+ from ...utils import logging
19
+ from ..modular_pipeline import ModularPipeline
20
+
21
+
22
+ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
23
+
24
+
25
+ class WanModularPipeline(
26
+ ModularPipeline,
27
+ StableDiffusionMixin,
28
+ WanLoraLoaderMixin,
29
+ ):
30
+ """
31
+ A ModularPipeline for Wan.
32
+
33
+ <Tip warning={true}>
34
+
35
+ This is an experimental feature and is likely to change in the future.
36
+
37
+ </Tip>
38
+ """
39
+
40
+ @property
41
+ def default_height(self):
42
+ return self.default_sample_height * self.vae_scale_factor_spatial
43
+
44
+ @property
45
+ def default_width(self):
46
+ return self.default_sample_width * self.vae_scale_factor_spatial
47
+
48
+ @property
49
+ def default_num_frames(self):
50
+ return (self.default_sample_num_frames - 1) * self.vae_scale_factor_temporal + 1
51
+
52
+ @property
53
+ def default_sample_height(self):
54
+ return 60
55
+
56
+ @property
57
+ def default_sample_width(self):
58
+ return 104
59
+
60
+ @property
61
+ def default_sample_num_frames(self):
62
+ return 21
63
+
64
+ @property
65
+ def vae_scale_factor_spatial(self):
66
+ vae_scale_factor = 8
67
+ if hasattr(self, "vae") and self.vae is not None:
68
+ vae_scale_factor = 2 ** len(self.vae.temperal_downsample)
69
+ return vae_scale_factor
70
+
71
+ @property
72
+ def vae_scale_factor_temporal(self):
73
+ vae_scale_factor = 4
74
+ if hasattr(self, "vae") and self.vae is not None:
75
+ vae_scale_factor = 2 ** sum(self.vae.temperal_downsample)
76
+ return vae_scale_factor
77
+
78
+ @property
79
+ def num_channels_transformer(self):
80
+ num_channels_transformer = 16
81
+ if hasattr(self, "transformer") and self.transformer is not None:
82
+ num_channels_transformer = self.transformer.config.in_channels
83
+ return num_channels_transformer
84
+
85
+ @property
86
+ def num_channels_latents(self):
87
+ num_channels_latents = 16
88
+ if hasattr(self, "vae") and self.vae is not None:
89
+ num_channels_latents = self.vae.config.z_dim
90
+ return num_channels_latents
@@ -140,6 +140,8 @@ else:
140
140
  "FluxFillPipeline",
141
141
  "FluxPriorReduxPipeline",
142
142
  "ReduxImageEncoder",
143
+ "FluxKontextPipeline",
144
+ "FluxKontextInpaintPipeline",
143
145
  ]
144
146
  _import_structure["audioldm"] = ["AudioLDMPipeline"]
145
147
  _import_structure["audioldm2"] = [
@@ -378,6 +380,19 @@ else:
378
380
  "WuerstchenPriorPipeline",
379
381
  ]
380
382
  _import_structure["wan"] = ["WanPipeline", "WanImageToVideoPipeline", "WanVideoToVideoPipeline", "WanVACEPipeline"]
383
+ _import_structure["skyreels_v2"] = [
384
+ "SkyReelsV2DiffusionForcingPipeline",
385
+ "SkyReelsV2DiffusionForcingImageToVideoPipeline",
386
+ "SkyReelsV2DiffusionForcingVideoToVideoPipeline",
387
+ "SkyReelsV2ImageToVideoPipeline",
388
+ "SkyReelsV2Pipeline",
389
+ ]
390
+ _import_structure["qwenimage"] = [
391
+ "QwenImagePipeline",
392
+ "QwenImageImg2ImgPipeline",
393
+ "QwenImageInpaintPipeline",
394
+ "QwenImageEditPipeline",
395
+ ]
381
396
  try:
382
397
  if not is_onnx_available():
383
398
  raise OptionalDependencyNotAvailable()
@@ -609,6 +624,8 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
609
624
  FluxFillPipeline,
610
625
  FluxImg2ImgPipeline,
611
626
  FluxInpaintPipeline,
627
+ FluxKontextInpaintPipeline,
628
+ FluxKontextPipeline,
612
629
  FluxPipeline,
613
630
  FluxPriorReduxPipeline,
614
631
  ReduxImageEncoder,
@@ -692,6 +709,12 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
692
709
  from .paint_by_example import PaintByExamplePipeline
693
710
  from .pia import PIAPipeline
694
711
  from .pixart_alpha import PixArtAlphaPipeline, PixArtSigmaPipeline
712
+ from .qwenimage import (
713
+ QwenImageEditPipeline,
714
+ QwenImageImg2ImgPipeline,
715
+ QwenImageInpaintPipeline,
716
+ QwenImagePipeline,
717
+ )
695
718
  from .sana import SanaControlNetPipeline, SanaPipeline, SanaSprintImg2ImgPipeline, SanaSprintPipeline
696
719
  from .semantic_stable_diffusion import SemanticStableDiffusionPipeline
697
720
  from .shap_e import ShapEImg2ImgPipeline, ShapEPipeline
@@ -847,6 +870,14 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
847
870
  SpectrogramDiffusionPipeline,
848
871
  )
849
872
 
873
+ from .skyreels_v2 import (
874
+ SkyReelsV2DiffusionForcingImageToVideoPipeline,
875
+ SkyReelsV2DiffusionForcingPipeline,
876
+ SkyReelsV2DiffusionForcingVideoToVideoPipeline,
877
+ SkyReelsV2ImageToVideoPipeline,
878
+ SkyReelsV2Pipeline,
879
+ )
880
+
850
881
  else:
851
882
  import sys
852
883
 
@@ -312,15 +312,14 @@ class AudioLDM2Pipeline(DiffusionPipeline):
312
312
  The sequence of generated hidden-states.
313
313
  """
314
314
  cache_position_kwargs = {}
315
- if is_transformers_version("<", "4.52.0.dev0"):
315
+ if is_transformers_version("<", "4.52.1"):
316
316
  cache_position_kwargs["input_ids"] = inputs_embeds
317
- cache_position_kwargs["model_kwargs"] = model_kwargs
318
317
  else:
319
318
  cache_position_kwargs["seq_length"] = inputs_embeds.shape[0]
320
319
  cache_position_kwargs["device"] = (
321
320
  self.language_model.device if getattr(self, "language_model", None) is not None else self.device
322
321
  )
323
- cache_position_kwargs["model_kwargs"] = model_kwargs
322
+ cache_position_kwargs["model_kwargs"] = model_kwargs
324
323
  max_new_tokens = max_new_tokens if max_new_tokens is not None else self.language_model.config.max_new_tokens
325
324
  model_kwargs = self.language_model._get_initial_cache_position(**cache_position_kwargs)
326
325
 
@@ -49,6 +49,7 @@ from .flux import (
49
49
  FluxControlPipeline,
50
50
  FluxImg2ImgPipeline,
51
51
  FluxInpaintPipeline,
52
+ FluxKontextPipeline,
52
53
  FluxPipeline,
53
54
  )
54
55
  from .hunyuandit import HunyuanDiTPipeline
@@ -142,6 +143,7 @@ AUTO_TEXT2IMAGE_PIPELINES_MAPPING = OrderedDict(
142
143
  ("flux", FluxPipeline),
143
144
  ("flux-control", FluxControlPipeline),
144
145
  ("flux-controlnet", FluxControlNetPipeline),
146
+ ("flux-kontext", FluxKontextPipeline),
145
147
  ("lumina", LuminaPipeline),
146
148
  ("lumina2", Lumina2Pipeline),
147
149
  ("chroma", ChromaPipeline),
@@ -171,6 +173,7 @@ AUTO_IMAGE2IMAGE_PIPELINES_MAPPING = OrderedDict(
171
173
  ("flux", FluxImg2ImgPipeline),
172
174
  ("flux-controlnet", FluxControlNetImg2ImgPipeline),
173
175
  ("flux-control", FluxControlImg2ImgPipeline),
176
+ ("flux-kontext", FluxKontextPipeline),
174
177
  ]
175
178
  )
176
179
 
@@ -248,14 +251,15 @@ def _get_connected_pipeline(pipeline_cls):
248
251
  return _get_task_class(AUTO_INPAINT_PIPELINES_MAPPING, pipeline_cls.__name__, throw_error_if_not_exist=False)
249
252
 
250
253
 
251
- def _get_task_class(mapping, pipeline_class_name, throw_error_if_not_exist: bool = True):
252
- def get_model(pipeline_class_name):
253
- for task_mapping in SUPPORTED_TASKS_MAPPINGS:
254
- for model_name, pipeline in task_mapping.items():
255
- if pipeline.__name__ == pipeline_class_name:
256
- return model_name
254
+ def _get_model(pipeline_class_name):
255
+ for task_mapping in SUPPORTED_TASKS_MAPPINGS:
256
+ for model_name, pipeline in task_mapping.items():
257
+ if pipeline.__name__ == pipeline_class_name:
258
+ return model_name
259
+
257
260
 
258
- model_name = get_model(pipeline_class_name)
261
+ def _get_task_class(mapping, pipeline_class_name, throw_error_if_not_exist: bool = True):
262
+ model_name = _get_model(pipeline_class_name)
259
263
 
260
264
  if model_name is not None:
261
265
  task_class = mapping.get(model_name, None)
@@ -391,8 +395,8 @@ class AutoPipelineForText2Image(ConfigMixin):
391
395
 
392
396
  <Tip>
393
397
 
394
- To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with
395
- `huggingface-cli login`.
398
+ To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with `hf
399
+ auth login`.
396
400
 
397
401
  </Tip>
398
402
 
@@ -686,8 +690,8 @@ class AutoPipelineForImage2Image(ConfigMixin):
686
690
 
687
691
  <Tip>
688
692
 
689
- To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with
690
- `huggingface-cli login`.
693
+ To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with `hf
694
+ auth login`.
691
695
 
692
696
  </Tip>
693
697
 
@@ -996,8 +1000,8 @@ class AutoPipelineForInpainting(ConfigMixin):
996
1000
 
997
1001
  <Tip>
998
1002
 
999
- To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with
1000
- `huggingface-cli login`.
1003
+ To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with `hf
1004
+ auth login`.
1001
1005
 
1002
1006
  </Tip>
1003
1007
 
@@ -663,11 +663,11 @@ class ChromaPipeline(
663
663
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
664
664
  will be used.
665
665
  guidance_scale (`float`, *optional*, defaults to 3.5):
666
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
667
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
668
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
669
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
670
- usually at the expense of lower image quality.
666
+ Embedded guiddance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages
667
+ a model to generate images more aligned with `prompt` at the expense of lower image quality.
668
+
669
+ Guidance-distilled models approximates true classifer-free guidance for `guidance_scale` > 1. Refer to
670
+ the [paper](https://huggingface.co/papers/2210.03142) to learn more.
671
671
  num_images_per_prompt (`int`, *optional*, defaults to 1):
672
672
  The number of images to generate per prompt.
673
673
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -725,11 +725,11 @@ class ChromaImg2ImgPipeline(
725
725
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
726
726
  will be used.
727
727
  guidance_scale (`float`, *optional*, defaults to 5.0):
728
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
729
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
730
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
731
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
732
- usually at the expense of lower image quality.
728
+ Embedded guiddance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages
729
+ a model to generate images more aligned with `prompt` at the expense of lower image quality.
730
+
731
+ Guidance-distilled models approximates true classifer-free guidance for `guidance_scale` > 1. Refer to
732
+ the [paper](https://huggingface.co/papers/2210.03142) to learn more.
733
733
  strength (`float, *optional*, defaults to 0.9):
734
734
  Conceptually, indicates how much to transform the reference image. Must be between 0 and 1. image will
735
735
  be used as a starting point, adding more noise to it the larger the strength. The number of denoising
@@ -718,14 +718,15 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
718
718
  timestep = t.expand(latent_model_input.shape[0])
719
719
 
720
720
  # predict noise model_output
721
- noise_pred = self.transformer(
722
- hidden_states=latent_model_input,
723
- encoder_hidden_states=prompt_embeds,
724
- timestep=timestep,
725
- image_rotary_emb=image_rotary_emb,
726
- attention_kwargs=attention_kwargs,
727
- return_dict=False,
728
- )[0]
721
+ with self.transformer.cache_context("cond_uncond"):
722
+ noise_pred = self.transformer(
723
+ hidden_states=latent_model_input,
724
+ encoder_hidden_states=prompt_embeds,
725
+ timestep=timestep,
726
+ image_rotary_emb=image_rotary_emb,
727
+ attention_kwargs=attention_kwargs,
728
+ return_dict=False,
729
+ )[0]
729
730
  noise_pred = noise_pred.float()
730
731
 
731
732
  # perform guidance
@@ -784,14 +784,15 @@ class CogVideoXFunControlPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
784
784
  timestep = t.expand(latent_model_input.shape[0])
785
785
 
786
786
  # predict noise model_output
787
- noise_pred = self.transformer(
788
- hidden_states=latent_model_input,
789
- encoder_hidden_states=prompt_embeds,
790
- timestep=timestep,
791
- image_rotary_emb=image_rotary_emb,
792
- attention_kwargs=attention_kwargs,
793
- return_dict=False,
794
- )[0]
787
+ with self.transformer.cache_context("cond_uncond"):
788
+ noise_pred = self.transformer(
789
+ hidden_states=latent_model_input,
790
+ encoder_hidden_states=prompt_embeds,
791
+ timestep=timestep,
792
+ image_rotary_emb=image_rotary_emb,
793
+ attention_kwargs=attention_kwargs,
794
+ return_dict=False,
795
+ )[0]
795
796
  noise_pred = noise_pred.float()
796
797
 
797
798
  # perform guidance
@@ -831,15 +831,16 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
831
831
  timestep = t.expand(latent_model_input.shape[0])
832
832
 
833
833
  # predict noise model_output
834
- noise_pred = self.transformer(
835
- hidden_states=latent_model_input,
836
- encoder_hidden_states=prompt_embeds,
837
- timestep=timestep,
838
- ofs=ofs_emb,
839
- image_rotary_emb=image_rotary_emb,
840
- attention_kwargs=attention_kwargs,
841
- return_dict=False,
842
- )[0]
834
+ with self.transformer.cache_context("cond_uncond"):
835
+ noise_pred = self.transformer(
836
+ hidden_states=latent_model_input,
837
+ encoder_hidden_states=prompt_embeds,
838
+ timestep=timestep,
839
+ ofs=ofs_emb,
840
+ image_rotary_emb=image_rotary_emb,
841
+ attention_kwargs=attention_kwargs,
842
+ return_dict=False,
843
+ )[0]
843
844
  noise_pred = noise_pred.float()
844
845
 
845
846
  # perform guidance
@@ -799,14 +799,15 @@ class CogVideoXVideoToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
799
799
  timestep = t.expand(latent_model_input.shape[0])
800
800
 
801
801
  # predict noise model_output
802
- noise_pred = self.transformer(
803
- hidden_states=latent_model_input,
804
- encoder_hidden_states=prompt_embeds,
805
- timestep=timestep,
806
- image_rotary_emb=image_rotary_emb,
807
- attention_kwargs=attention_kwargs,
808
- return_dict=False,
809
- )[0]
802
+ with self.transformer.cache_context("cond_uncond"):
803
+ noise_pred = self.transformer(
804
+ hidden_states=latent_model_input,
805
+ encoder_hidden_states=prompt_embeds,
806
+ timestep=timestep,
807
+ image_rotary_emb=image_rotary_emb,
808
+ attention_kwargs=attention_kwargs,
809
+ return_dict=False,
810
+ )[0]
810
811
  noise_pred = noise_pred.float()
811
812
 
812
813
  # perform guidance
@@ -619,22 +619,10 @@ class CogView4Pipeline(DiffusionPipeline, CogView4LoraLoaderMixin):
619
619
  # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
620
620
  timestep = t.expand(latents.shape[0])
621
621
 
622
- noise_pred_cond = self.transformer(
623
- hidden_states=latent_model_input,
624
- encoder_hidden_states=prompt_embeds,
625
- timestep=timestep,
626
- original_size=original_size,
627
- target_size=target_size,
628
- crop_coords=crops_coords_top_left,
629
- attention_kwargs=attention_kwargs,
630
- return_dict=False,
631
- )[0]
632
-
633
- # perform guidance
634
- if self.do_classifier_free_guidance:
635
- noise_pred_uncond = self.transformer(
622
+ with self.transformer.cache_context("cond"):
623
+ noise_pred_cond = self.transformer(
636
624
  hidden_states=latent_model_input,
637
- encoder_hidden_states=negative_prompt_embeds,
625
+ encoder_hidden_states=prompt_embeds,
638
626
  timestep=timestep,
639
627
  original_size=original_size,
640
628
  target_size=target_size,
@@ -643,6 +631,19 @@ class CogView4Pipeline(DiffusionPipeline, CogView4LoraLoaderMixin):
643
631
  return_dict=False,
644
632
  )[0]
645
633
 
634
+ # perform guidance
635
+ if self.do_classifier_free_guidance:
636
+ with self.transformer.cache_context("uncond"):
637
+ noise_pred_uncond = self.transformer(
638
+ hidden_states=latent_model_input,
639
+ encoder_hidden_states=negative_prompt_embeds,
640
+ timestep=timestep,
641
+ original_size=original_size,
642
+ target_size=target_size,
643
+ crop_coords=crops_coords_top_left,
644
+ attention_kwargs=attention_kwargs,
645
+ return_dict=False,
646
+ )[0]
646
647
  noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_cond - noise_pred_uncond)
647
648
  else:
648
649
  noise_pred = noise_pred_cond
@@ -29,7 +29,7 @@ from ...utils.torch_utils import randn_tensor
29
29
  from ..blip_diffusion.blip_image_processing import BlipImageProcessor
30
30
  from ..blip_diffusion.modeling_blip2 import Blip2QFormerModel
31
31
  from ..blip_diffusion.modeling_ctx_clip import ContextCLIPTextModel
32
- from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
32
+ from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, ImagePipelineOutput
33
33
 
34
34
 
35
35
  if is_torch_xla_available():
@@ -88,7 +88,7 @@ EXAMPLE_DOC_STRING = """
88
88
  """
89
89
 
90
90
 
91
- class BlipDiffusionControlNetPipeline(DiffusionPipeline):
91
+ class BlipDiffusionControlNetPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
92
92
  """
93
93
  Pipeline for Canny Edge based Controlled subject-driven generation using Blip Diffusion.
94
94
 
@@ -116,6 +116,7 @@ class BlipDiffusionControlNetPipeline(DiffusionPipeline):
116
116
  Position of the context token in the text encoder.
117
117
  """
118
118
 
119
+ _last_supported_version = "0.33.1"
119
120
  model_cpu_offload_seq = "qformer->text_encoder->unet->vae"
120
121
 
121
122
  def __init__(