diffusers 0.34.0__py3-none-any.whl → 0.35.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (191) hide show
  1. diffusers/__init__.py +98 -1
  2. diffusers/callbacks.py +35 -0
  3. diffusers/commands/custom_blocks.py +134 -0
  4. diffusers/commands/diffusers_cli.py +2 -0
  5. diffusers/commands/fp16_safetensors.py +1 -1
  6. diffusers/configuration_utils.py +11 -2
  7. diffusers/dependency_versions_table.py +3 -3
  8. diffusers/guiders/__init__.py +41 -0
  9. diffusers/guiders/adaptive_projected_guidance.py +188 -0
  10. diffusers/guiders/auto_guidance.py +190 -0
  11. diffusers/guiders/classifier_free_guidance.py +141 -0
  12. diffusers/guiders/classifier_free_zero_star_guidance.py +152 -0
  13. diffusers/guiders/frequency_decoupled_guidance.py +327 -0
  14. diffusers/guiders/guider_utils.py +309 -0
  15. diffusers/guiders/perturbed_attention_guidance.py +271 -0
  16. diffusers/guiders/skip_layer_guidance.py +262 -0
  17. diffusers/guiders/smoothed_energy_guidance.py +251 -0
  18. diffusers/guiders/tangential_classifier_free_guidance.py +143 -0
  19. diffusers/hooks/__init__.py +17 -0
  20. diffusers/hooks/_common.py +56 -0
  21. diffusers/hooks/_helpers.py +293 -0
  22. diffusers/hooks/faster_cache.py +7 -6
  23. diffusers/hooks/first_block_cache.py +259 -0
  24. diffusers/hooks/group_offloading.py +292 -286
  25. diffusers/hooks/hooks.py +56 -1
  26. diffusers/hooks/layer_skip.py +263 -0
  27. diffusers/hooks/layerwise_casting.py +2 -7
  28. diffusers/hooks/pyramid_attention_broadcast.py +14 -11
  29. diffusers/hooks/smoothed_energy_guidance_utils.py +167 -0
  30. diffusers/hooks/utils.py +43 -0
  31. diffusers/loaders/__init__.py +6 -0
  32. diffusers/loaders/ip_adapter.py +255 -4
  33. diffusers/loaders/lora_base.py +63 -30
  34. diffusers/loaders/lora_conversion_utils.py +434 -53
  35. diffusers/loaders/lora_pipeline.py +834 -37
  36. diffusers/loaders/peft.py +28 -5
  37. diffusers/loaders/single_file_model.py +44 -11
  38. diffusers/loaders/single_file_utils.py +170 -2
  39. diffusers/loaders/transformer_flux.py +9 -10
  40. diffusers/loaders/transformer_sd3.py +6 -1
  41. diffusers/loaders/unet.py +22 -5
  42. diffusers/loaders/unet_loader_utils.py +5 -2
  43. diffusers/models/__init__.py +8 -0
  44. diffusers/models/attention.py +484 -3
  45. diffusers/models/attention_dispatch.py +1218 -0
  46. diffusers/models/attention_processor.py +105 -663
  47. diffusers/models/auto_model.py +2 -2
  48. diffusers/models/autoencoders/__init__.py +1 -0
  49. diffusers/models/autoencoders/autoencoder_dc.py +14 -1
  50. diffusers/models/autoencoders/autoencoder_kl.py +1 -1
  51. diffusers/models/autoencoders/autoencoder_kl_cosmos.py +3 -1
  52. diffusers/models/autoencoders/autoencoder_kl_qwenimage.py +1070 -0
  53. diffusers/models/autoencoders/autoencoder_kl_wan.py +370 -40
  54. diffusers/models/cache_utils.py +31 -9
  55. diffusers/models/controlnets/controlnet_flux.py +5 -5
  56. diffusers/models/controlnets/controlnet_union.py +4 -4
  57. diffusers/models/embeddings.py +26 -34
  58. diffusers/models/model_loading_utils.py +233 -1
  59. diffusers/models/modeling_flax_utils.py +1 -2
  60. diffusers/models/modeling_utils.py +159 -94
  61. diffusers/models/transformers/__init__.py +2 -0
  62. diffusers/models/transformers/transformer_chroma.py +16 -117
  63. diffusers/models/transformers/transformer_cogview4.py +36 -2
  64. diffusers/models/transformers/transformer_cosmos.py +11 -4
  65. diffusers/models/transformers/transformer_flux.py +372 -132
  66. diffusers/models/transformers/transformer_hunyuan_video.py +6 -0
  67. diffusers/models/transformers/transformer_ltx.py +104 -23
  68. diffusers/models/transformers/transformer_qwenimage.py +645 -0
  69. diffusers/models/transformers/transformer_skyreels_v2.py +607 -0
  70. diffusers/models/transformers/transformer_wan.py +298 -85
  71. diffusers/models/transformers/transformer_wan_vace.py +15 -21
  72. diffusers/models/unets/unet_2d_condition.py +2 -1
  73. diffusers/modular_pipelines/__init__.py +83 -0
  74. diffusers/modular_pipelines/components_manager.py +1068 -0
  75. diffusers/modular_pipelines/flux/__init__.py +66 -0
  76. diffusers/modular_pipelines/flux/before_denoise.py +689 -0
  77. diffusers/modular_pipelines/flux/decoders.py +109 -0
  78. diffusers/modular_pipelines/flux/denoise.py +227 -0
  79. diffusers/modular_pipelines/flux/encoders.py +412 -0
  80. diffusers/modular_pipelines/flux/modular_blocks.py +181 -0
  81. diffusers/modular_pipelines/flux/modular_pipeline.py +59 -0
  82. diffusers/modular_pipelines/modular_pipeline.py +2446 -0
  83. diffusers/modular_pipelines/modular_pipeline_utils.py +672 -0
  84. diffusers/modular_pipelines/node_utils.py +665 -0
  85. diffusers/modular_pipelines/stable_diffusion_xl/__init__.py +77 -0
  86. diffusers/modular_pipelines/stable_diffusion_xl/before_denoise.py +1874 -0
  87. diffusers/modular_pipelines/stable_diffusion_xl/decoders.py +208 -0
  88. diffusers/modular_pipelines/stable_diffusion_xl/denoise.py +771 -0
  89. diffusers/modular_pipelines/stable_diffusion_xl/encoders.py +887 -0
  90. diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py +380 -0
  91. diffusers/modular_pipelines/stable_diffusion_xl/modular_pipeline.py +365 -0
  92. diffusers/modular_pipelines/wan/__init__.py +66 -0
  93. diffusers/modular_pipelines/wan/before_denoise.py +365 -0
  94. diffusers/modular_pipelines/wan/decoders.py +105 -0
  95. diffusers/modular_pipelines/wan/denoise.py +261 -0
  96. diffusers/modular_pipelines/wan/encoders.py +242 -0
  97. diffusers/modular_pipelines/wan/modular_blocks.py +144 -0
  98. diffusers/modular_pipelines/wan/modular_pipeline.py +90 -0
  99. diffusers/pipelines/__init__.py +31 -0
  100. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +2 -3
  101. diffusers/pipelines/auto_pipeline.py +17 -13
  102. diffusers/pipelines/chroma/pipeline_chroma.py +5 -5
  103. diffusers/pipelines/chroma/pipeline_chroma_img2img.py +5 -5
  104. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +9 -8
  105. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +9 -8
  106. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +10 -9
  107. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +9 -8
  108. diffusers/pipelines/cogview4/pipeline_cogview4.py +16 -15
  109. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +3 -2
  110. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +212 -93
  111. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +7 -3
  112. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +194 -92
  113. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +1 -1
  114. diffusers/pipelines/dit/pipeline_dit.py +3 -1
  115. diffusers/pipelines/flux/__init__.py +4 -0
  116. diffusers/pipelines/flux/pipeline_flux.py +34 -26
  117. diffusers/pipelines/flux/pipeline_flux_control.py +8 -8
  118. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +1 -1
  119. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +1 -1
  120. diffusers/pipelines/flux/pipeline_flux_controlnet.py +1 -1
  121. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +1 -1
  122. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1 -1
  123. diffusers/pipelines/flux/pipeline_flux_fill.py +1 -1
  124. diffusers/pipelines/flux/pipeline_flux_img2img.py +1 -1
  125. diffusers/pipelines/flux/pipeline_flux_inpaint.py +1 -1
  126. diffusers/pipelines/flux/pipeline_flux_kontext.py +1134 -0
  127. diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py +1460 -0
  128. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
  129. diffusers/pipelines/flux/pipeline_output.py +6 -4
  130. diffusers/pipelines/hidream_image/pipeline_hidream_image.py +5 -5
  131. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +25 -24
  132. diffusers/pipelines/ltx/pipeline_ltx.py +13 -12
  133. diffusers/pipelines/ltx/pipeline_ltx_condition.py +10 -9
  134. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +13 -12
  135. diffusers/pipelines/mochi/pipeline_mochi.py +9 -8
  136. diffusers/pipelines/pipeline_flax_utils.py +2 -2
  137. diffusers/pipelines/pipeline_loading_utils.py +24 -2
  138. diffusers/pipelines/pipeline_utils.py +22 -15
  139. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +3 -1
  140. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +20 -0
  141. diffusers/pipelines/qwenimage/__init__.py +55 -0
  142. diffusers/pipelines/qwenimage/pipeline_output.py +21 -0
  143. diffusers/pipelines/qwenimage/pipeline_qwenimage.py +726 -0
  144. diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py +882 -0
  145. diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py +829 -0
  146. diffusers/pipelines/qwenimage/pipeline_qwenimage_inpaint.py +1015 -0
  147. diffusers/pipelines/sana/pipeline_sana_sprint.py +5 -5
  148. diffusers/pipelines/skyreels_v2/__init__.py +59 -0
  149. diffusers/pipelines/skyreels_v2/pipeline_output.py +20 -0
  150. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2.py +610 -0
  151. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing.py +978 -0
  152. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_i2v.py +1059 -0
  153. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_v2v.py +1063 -0
  154. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_i2v.py +745 -0
  155. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -1
  156. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +1 -1
  157. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +1 -1
  158. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +2 -1
  159. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +6 -5
  160. diffusers/pipelines/wan/pipeline_wan.py +78 -20
  161. diffusers/pipelines/wan/pipeline_wan_i2v.py +112 -32
  162. diffusers/pipelines/wan/pipeline_wan_vace.py +1 -2
  163. diffusers/quantizers/__init__.py +1 -177
  164. diffusers/quantizers/base.py +11 -0
  165. diffusers/quantizers/gguf/utils.py +92 -3
  166. diffusers/quantizers/pipe_quant_config.py +202 -0
  167. diffusers/quantizers/torchao/torchao_quantizer.py +26 -0
  168. diffusers/schedulers/scheduling_deis_multistep.py +8 -1
  169. diffusers/schedulers/scheduling_dpmsolver_multistep.py +6 -0
  170. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +6 -0
  171. diffusers/schedulers/scheduling_scm.py +0 -1
  172. diffusers/schedulers/scheduling_unipc_multistep.py +10 -1
  173. diffusers/schedulers/scheduling_utils.py +2 -2
  174. diffusers/schedulers/scheduling_utils_flax.py +1 -1
  175. diffusers/training_utils.py +78 -0
  176. diffusers/utils/__init__.py +10 -0
  177. diffusers/utils/constants.py +4 -0
  178. diffusers/utils/dummy_pt_objects.py +312 -0
  179. diffusers/utils/dummy_torch_and_transformers_objects.py +255 -0
  180. diffusers/utils/dynamic_modules_utils.py +84 -25
  181. diffusers/utils/hub_utils.py +33 -17
  182. diffusers/utils/import_utils.py +70 -0
  183. diffusers/utils/peft_utils.py +11 -8
  184. diffusers/utils/testing_utils.py +136 -10
  185. diffusers/utils/torch_utils.py +18 -0
  186. {diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/METADATA +6 -6
  187. {diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/RECORD +191 -127
  188. {diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/LICENSE +0 -0
  189. {diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/WHEEL +0 -0
  190. {diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/entry_points.txt +0 -0
  191. {diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/top_level.txt +0 -0
@@ -292,7 +292,7 @@ class FluxPriorReduxPipeline(DiffusionPipeline):
292
292
  def encode_prompt(
293
293
  self,
294
294
  prompt: Union[str, List[str]],
295
- prompt_2: Union[str, List[str]],
295
+ prompt_2: Optional[Union[str, List[str]]] = None,
296
296
  device: Optional[torch.device] = None,
297
297
  num_images_per_prompt: int = 1,
298
298
  prompt_embeds: Optional[torch.FloatTensor] = None,
@@ -11,12 +11,14 @@ from ...utils import BaseOutput
11
11
  @dataclass
12
12
  class FluxPipelineOutput(BaseOutput):
13
13
  """
14
- Output class for Stable Diffusion pipelines.
14
+ Output class for Flux image generation pipelines.
15
15
 
16
16
  Args:
17
- images (`List[PIL.Image.Image]` or `np.ndarray`)
18
- List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width,
19
- num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline.
17
+ images (`List[PIL.Image.Image]` or `torch.Tensor` or `np.ndarray`)
18
+ List of denoised PIL images of length `batch_size` or numpy array or torch tensor of shape `(batch_size,
19
+ height, width, num_channels)`. PIL images or numpy array present the denoised images of the diffusion
20
+ pipeline. Torch tensors can represent either the denoised images or the intermediate latents ready to be
21
+ passed to the decoder.
20
22
  """
21
23
 
22
24
  images: Union[List[PIL.Image.Image], np.ndarray]
@@ -763,11 +763,11 @@ class HiDreamImagePipeline(DiffusionPipeline, HiDreamImageLoraLoaderMixin):
763
763
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
764
764
  will be used.
765
765
  guidance_scale (`float`, *optional*, defaults to 3.5):
766
- Guidance scale as defined in [Classifier-Free Diffusion
767
- Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
768
- of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
769
- `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
770
- the text `prompt`, usually at the expense of lower image quality.
766
+ Embedded guiddance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages
767
+ a model to generate images more aligned with `prompt` at the expense of lower image quality.
768
+
769
+ Guidance-distilled models approximates true classifer-free guidance for `guidance_scale` > 1. Refer to
770
+ the [paper](https://huggingface.co/papers/2210.03142) to learn more.
771
771
  negative_prompt (`str` or `List[str]`, *optional*):
772
772
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
773
773
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `true_cfg_scale` is
@@ -529,15 +529,14 @@ class HunyuanVideoPipeline(DiffusionPipeline, HunyuanVideoLoraLoaderMixin):
529
529
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
530
530
  will be used.
531
531
  true_cfg_scale (`float`, *optional*, defaults to 1.0):
532
- When > 1.0 and a provided `negative_prompt`, enables true classifier-free guidance.
532
+ True classifier-free guidance (guidance scale) is enabled when `true_cfg_scale` > 1 and
533
+ `negative_prompt` is provided.
533
534
  guidance_scale (`float`, defaults to `6.0`):
534
- Guidance scale as defined in [Classifier-Free Diffusion
535
- Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
536
- of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
537
- `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
538
- the text `prompt`, usually at the expense of lower image quality. Note that the only available
539
- HunyuanVideo model is CFG-distilled, which means that traditional guidance between unconditional and
540
- conditional latent is not applied.
535
+ Embedded guiddance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages
536
+ a model to generate images more aligned with `prompt` at the expense of lower image quality.
537
+
538
+ Guidance-distilled models approximates true classifer-free guidance for `guidance_scale` > 1. Refer to
539
+ the [paper](https://huggingface.co/papers/2210.03142) to learn more.
541
540
  num_videos_per_prompt (`int`, *optional*, defaults to 1):
542
541
  The number of images to generate per prompt.
543
542
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -693,28 +692,30 @@ class HunyuanVideoPipeline(DiffusionPipeline, HunyuanVideoLoraLoaderMixin):
693
692
  # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
694
693
  timestep = t.expand(latents.shape[0]).to(latents.dtype)
695
694
 
696
- noise_pred = self.transformer(
697
- hidden_states=latent_model_input,
698
- timestep=timestep,
699
- encoder_hidden_states=prompt_embeds,
700
- encoder_attention_mask=prompt_attention_mask,
701
- pooled_projections=pooled_prompt_embeds,
702
- guidance=guidance,
703
- attention_kwargs=attention_kwargs,
704
- return_dict=False,
705
- )[0]
706
-
707
- if do_true_cfg:
708
- neg_noise_pred = self.transformer(
695
+ with self.transformer.cache_context("cond"):
696
+ noise_pred = self.transformer(
709
697
  hidden_states=latent_model_input,
710
698
  timestep=timestep,
711
- encoder_hidden_states=negative_prompt_embeds,
712
- encoder_attention_mask=negative_prompt_attention_mask,
713
- pooled_projections=negative_pooled_prompt_embeds,
699
+ encoder_hidden_states=prompt_embeds,
700
+ encoder_attention_mask=prompt_attention_mask,
701
+ pooled_projections=pooled_prompt_embeds,
714
702
  guidance=guidance,
715
703
  attention_kwargs=attention_kwargs,
716
704
  return_dict=False,
717
705
  )[0]
706
+
707
+ if do_true_cfg:
708
+ with self.transformer.cache_context("uncond"):
709
+ neg_noise_pred = self.transformer(
710
+ hidden_states=latent_model_input,
711
+ timestep=timestep,
712
+ encoder_hidden_states=negative_prompt_embeds,
713
+ encoder_attention_mask=negative_prompt_attention_mask,
714
+ pooled_projections=negative_pooled_prompt_embeds,
715
+ guidance=guidance,
716
+ attention_kwargs=attention_kwargs,
717
+ return_dict=False,
718
+ )[0]
718
719
  noise_pred = neg_noise_pred + true_cfg_scale * (noise_pred - neg_noise_pred)
719
720
 
720
721
  # compute the previous noisy sample x_t -> x_t-1
@@ -757,18 +757,19 @@ class LTXPipeline(DiffusionPipeline, FromSingleFileMixin, LTXVideoLoraLoaderMixi
757
757
  # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
758
758
  timestep = t.expand(latent_model_input.shape[0])
759
759
 
760
- noise_pred = self.transformer(
761
- hidden_states=latent_model_input,
762
- encoder_hidden_states=prompt_embeds,
763
- timestep=timestep,
764
- encoder_attention_mask=prompt_attention_mask,
765
- num_frames=latent_num_frames,
766
- height=latent_height,
767
- width=latent_width,
768
- rope_interpolation_scale=rope_interpolation_scale,
769
- attention_kwargs=attention_kwargs,
770
- return_dict=False,
771
- )[0]
760
+ with self.transformer.cache_context("cond_uncond"):
761
+ noise_pred = self.transformer(
762
+ hidden_states=latent_model_input,
763
+ encoder_hidden_states=prompt_embeds,
764
+ timestep=timestep,
765
+ encoder_attention_mask=prompt_attention_mask,
766
+ num_frames=latent_num_frames,
767
+ height=latent_height,
768
+ width=latent_width,
769
+ rope_interpolation_scale=rope_interpolation_scale,
770
+ attention_kwargs=attention_kwargs,
771
+ return_dict=False,
772
+ )[0]
772
773
  noise_pred = noise_pred.float()
773
774
 
774
775
  if self.do_classifier_free_guidance:
@@ -1177,15 +1177,16 @@ class LTXConditionPipeline(DiffusionPipeline, FromSingleFileMixin, LTXVideoLoraL
1177
1177
  if is_conditioning_image_or_video:
1178
1178
  timestep = torch.min(timestep, (1 - conditioning_mask_model_input) * 1000.0)
1179
1179
 
1180
- noise_pred = self.transformer(
1181
- hidden_states=latent_model_input,
1182
- encoder_hidden_states=prompt_embeds,
1183
- timestep=timestep,
1184
- encoder_attention_mask=prompt_attention_mask,
1185
- video_coords=video_coords,
1186
- attention_kwargs=attention_kwargs,
1187
- return_dict=False,
1188
- )[0]
1180
+ with self.transformer.cache_context("cond_uncond"):
1181
+ noise_pred = self.transformer(
1182
+ hidden_states=latent_model_input,
1183
+ encoder_hidden_states=prompt_embeds,
1184
+ timestep=timestep,
1185
+ encoder_attention_mask=prompt_attention_mask,
1186
+ video_coords=video_coords,
1187
+ attention_kwargs=attention_kwargs,
1188
+ return_dict=False,
1189
+ )[0]
1189
1190
 
1190
1191
  if self.do_classifier_free_guidance:
1191
1192
  noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
@@ -830,18 +830,19 @@ class LTXImageToVideoPipeline(DiffusionPipeline, FromSingleFileMixin, LTXVideoLo
830
830
  timestep = t.expand(latent_model_input.shape[0])
831
831
  timestep = timestep.unsqueeze(-1) * (1 - conditioning_mask)
832
832
 
833
- noise_pred = self.transformer(
834
- hidden_states=latent_model_input,
835
- encoder_hidden_states=prompt_embeds,
836
- timestep=timestep,
837
- encoder_attention_mask=prompt_attention_mask,
838
- num_frames=latent_num_frames,
839
- height=latent_height,
840
- width=latent_width,
841
- rope_interpolation_scale=rope_interpolation_scale,
842
- attention_kwargs=attention_kwargs,
843
- return_dict=False,
844
- )[0]
833
+ with self.transformer.cache_context("cond_uncond"):
834
+ noise_pred = self.transformer(
835
+ hidden_states=latent_model_input,
836
+ encoder_hidden_states=prompt_embeds,
837
+ timestep=timestep,
838
+ encoder_attention_mask=prompt_attention_mask,
839
+ num_frames=latent_num_frames,
840
+ height=latent_height,
841
+ width=latent_width,
842
+ rope_interpolation_scale=rope_interpolation_scale,
843
+ attention_kwargs=attention_kwargs,
844
+ return_dict=False,
845
+ )[0]
845
846
  noise_pred = noise_pred.float()
846
847
 
847
848
  if self.do_classifier_free_guidance:
@@ -671,14 +671,15 @@ class MochiPipeline(DiffusionPipeline, Mochi1LoraLoaderMixin):
671
671
  # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
672
672
  timestep = t.expand(latent_model_input.shape[0]).to(latents.dtype)
673
673
 
674
- noise_pred = self.transformer(
675
- hidden_states=latent_model_input,
676
- encoder_hidden_states=prompt_embeds,
677
- timestep=timestep,
678
- encoder_attention_mask=prompt_attention_mask,
679
- attention_kwargs=attention_kwargs,
680
- return_dict=False,
681
- )[0]
674
+ with self.transformer.cache_context("cond_uncond"):
675
+ noise_pred = self.transformer(
676
+ hidden_states=latent_model_input,
677
+ encoder_hidden_states=prompt_embeds,
678
+ timestep=timestep,
679
+ encoder_attention_mask=prompt_attention_mask,
680
+ attention_kwargs=attention_kwargs,
681
+ return_dict=False,
682
+ )[0]
682
683
  # Mochi CFG + Sampling runs in FP32
683
684
  noise_pred = noise_pred.to(torch.float32)
684
685
 
@@ -278,8 +278,8 @@ class FlaxDiffusionPipeline(ConfigMixin, PushToHubMixin):
278
278
 
279
279
  <Tip>
280
280
 
281
- To use private or [gated models](https://huggingface.co/docs/hub/models-gated#gated-models), log-in with
282
- `huggingface-cli login`.
281
+ To use private or [gated models](https://huggingface.co/docs/hub/models-gated#gated-models), log-in with `hf
282
+ auth login`.
283
283
 
284
284
  </Tip>
285
285
 
@@ -371,6 +371,22 @@ def maybe_raise_or_warn(
371
371
  )
372
372
 
373
373
 
374
+ # a simpler version of get_class_obj_and_candidates, it won't work with custom code
375
+ def simple_get_class_obj(library_name, class_name):
376
+ from diffusers import pipelines
377
+
378
+ is_pipeline_module = hasattr(pipelines, library_name)
379
+
380
+ if is_pipeline_module:
381
+ pipeline_module = getattr(pipelines, library_name)
382
+ class_obj = getattr(pipeline_module, class_name)
383
+ else:
384
+ library = importlib.import_module(library_name)
385
+ class_obj = getattr(library, class_name)
386
+
387
+ return class_obj
388
+
389
+
374
390
  def get_class_obj_and_candidates(
375
391
  library_name, class_name, importable_classes, pipelines, is_pipeline_module, component_name=None, cache_dir=None
376
392
  ):
@@ -452,7 +468,7 @@ def _get_pipeline_class(
452
468
  revision=revision,
453
469
  )
454
470
 
455
- if class_obj.__name__ != "DiffusionPipeline":
471
+ if class_obj.__name__ != "DiffusionPipeline" and class_obj.__name__ != "ModularPipeline":
456
472
  return class_obj
457
473
 
458
474
  diffusers_module = importlib.import_module(class_obj.__module__.split(".")[0])
@@ -597,6 +613,9 @@ def _assign_components_to_devices(
597
613
 
598
614
 
599
615
  def _get_final_device_map(device_map, pipeline_class, passed_class_obj, init_dict, library, max_memory, **kwargs):
616
+ # TODO: seperate out different device_map methods when it gets to it.
617
+ if device_map != "balanced":
618
+ return device_map
600
619
  # To avoid circular import problem.
601
620
  from diffusers import pipelines
602
621
 
@@ -892,7 +911,10 @@ def _fetch_class_library_tuple(module):
892
911
  library = not_compiled_module.__module__
893
912
 
894
913
  # retrieve class_name
895
- class_name = not_compiled_module.__class__.__name__
914
+ if isinstance(not_compiled_module, type):
915
+ class_name = not_compiled_module.__name__
916
+ else:
917
+ class_name = not_compiled_module.__class__.__name__
896
918
 
897
919
  return (library, class_name)
898
920
 
@@ -108,7 +108,7 @@ LIBRARIES = []
108
108
  for library in LOADABLE_CLASSES:
109
109
  LIBRARIES.append(library)
110
110
 
111
- SUPPORTED_DEVICE_MAP = ["balanced"]
111
+ SUPPORTED_DEVICE_MAP = ["balanced"] + [get_device()]
112
112
 
113
113
  logger = logging.get_logger(__name__)
114
114
 
@@ -710,8 +710,8 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
710
710
 
711
711
  <Tip>
712
712
 
713
- To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with
714
- `huggingface-cli login`.
713
+ To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with `hf
714
+ auth login`.
715
715
 
716
716
  </Tip>
717
717
 
@@ -988,12 +988,15 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
988
988
  _maybe_warn_for_wrong_component_in_quant_config(init_dict, quantization_config)
989
989
  for name, (library_name, class_name) in logging.tqdm(init_dict.items(), desc="Loading pipeline components..."):
990
990
  # 7.1 device_map shenanigans
991
- if final_device_map is not None and len(final_device_map) > 0:
992
- component_device = final_device_map.get(name, None)
993
- if component_device is not None:
994
- current_device_map = {"": component_device}
995
- else:
996
- current_device_map = None
991
+ if final_device_map is not None:
992
+ if isinstance(final_device_map, dict) and len(final_device_map) > 0:
993
+ component_device = final_device_map.get(name, None)
994
+ if component_device is not None:
995
+ current_device_map = {"": component_device}
996
+ else:
997
+ current_device_map = None
998
+ elif isinstance(final_device_map, str):
999
+ current_device_map = final_device_map
997
1000
 
998
1001
  # 7.2 - now that JAX/Flax is an official framework of the library, we might load from Flax names
999
1002
  class_name = class_name[4:] if class_name.startswith("Flax") else class_name
@@ -1096,6 +1099,8 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
1096
1099
  model.register_to_config(_name_or_path=pretrained_model_name_or_path)
1097
1100
  if device_map is not None:
1098
1101
  setattr(model, "hf_device_map", final_device_map)
1102
+ if quantization_config is not None:
1103
+ setattr(model, "quantization_config", quantization_config)
1099
1104
  return model
1100
1105
 
1101
1106
  @property
@@ -1428,8 +1433,8 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
1428
1433
 
1429
1434
  <Tip>
1430
1435
 
1431
- To use private or [gated models](https://huggingface.co/docs/hub/models-gated#gated-models), log-in with
1432
- `huggingface-cli login`.
1436
+ To use private or [gated models](https://huggingface.co/docs/hub/models-gated#gated-models), log-in with `hf
1437
+ auth login
1433
1438
 
1434
1439
  </Tip>
1435
1440
 
@@ -1986,11 +1991,13 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
1986
1991
  f"{'' if k.startswith('_') else '_'}{k}": v for k, v in original_config.items() if k not in pipeline_kwargs
1987
1992
  }
1988
1993
 
1994
+ optional_components = (
1995
+ pipeline._optional_components
1996
+ if hasattr(pipeline, "_optional_components") and pipeline._optional_components
1997
+ else []
1998
+ )
1989
1999
  missing_modules = (
1990
- set(expected_modules)
1991
- - set(pipeline._optional_components)
1992
- - set(pipeline_kwargs.keys())
1993
- - set(true_optional_modules)
2000
+ set(expected_modules) - set(optional_components) - set(pipeline_kwargs.keys()) - set(true_optional_modules)
1994
2001
  )
1995
2002
 
1996
2003
  if len(missing_modules) > 0:
@@ -256,7 +256,9 @@ class PixArtAlphaPipeline(DiffusionPipeline):
256
256
  Tokenizer of class
257
257
  [T5Tokenizer](https://huggingface.co/docs/transformers/model_doc/t5#transformers.T5Tokenizer).
258
258
  transformer ([`PixArtTransformer2DModel`]):
259
- A text conditioned `PixArtTransformer2DModel` to denoise the encoded image latents.
259
+ A text conditioned `PixArtTransformer2DModel` to denoise the encoded image latents. Initially published as
260
+ [`Transformer2DModel`](https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS/blob/main/transformer/config.json#L2)
261
+ in the config, but the mismatch can be ignored.
260
262
  scheduler ([`SchedulerMixin`]):
261
263
  A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
262
264
  """
@@ -185,6 +185,26 @@ def retrieve_timesteps(
185
185
  class PixArtSigmaPipeline(DiffusionPipeline):
186
186
  r"""
187
187
  Pipeline for text-to-image generation using PixArt-Sigma.
188
+
189
+ This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
190
+ library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
191
+
192
+ Args:
193
+ vae ([`AutoencoderKL`]):
194
+ Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
195
+ text_encoder ([`T5EncoderModel`]):
196
+ Frozen text-encoder. PixArt-Alpha uses
197
+ [T5](https://huggingface.co/docs/transformers/model_doc/t5#transformers.T5EncoderModel), specifically the
198
+ [t5-v1_1-xxl](https://huggingface.co/PixArt-alpha/PixArt-alpha/tree/main/t5-v1_1-xxl) variant.
199
+ tokenizer (`T5Tokenizer`):
200
+ Tokenizer of class
201
+ [T5Tokenizer](https://huggingface.co/docs/transformers/model_doc/t5#transformers.T5Tokenizer).
202
+ transformer ([`PixArtTransformer2DModel`]):
203
+ A text conditioned `PixArtTransformer2DModel` to denoise the encoded image latents. Initially published as
204
+ [`Transformer2DModel`](https://huggingface.co/PixArt-alpha/PixArt-Sigma-XL-2-1024-MS/blob/main/transformer/config.json#L2)
205
+ in the config, but the mismatch can be ignored.
206
+ scheduler ([`SchedulerMixin`]):
207
+ A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
188
208
  """
189
209
 
190
210
  bad_punct_regex = re.compile(
@@ -0,0 +1,55 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from ...utils import (
4
+ DIFFUSERS_SLOW_IMPORT,
5
+ OptionalDependencyNotAvailable,
6
+ _LazyModule,
7
+ get_objects_from_module,
8
+ is_torch_available,
9
+ is_transformers_available,
10
+ )
11
+
12
+
13
+ _dummy_objects = {}
14
+ _additional_imports = {}
15
+ _import_structure = {"pipeline_output": ["QwenImagePipelineOutput", "QwenImagePriorReduxPipelineOutput"]}
16
+
17
+ try:
18
+ if not (is_transformers_available() and is_torch_available()):
19
+ raise OptionalDependencyNotAvailable()
20
+ except OptionalDependencyNotAvailable:
21
+ from ...utils import dummy_torch_and_transformers_objects # noqa F403
22
+
23
+ _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
24
+ else:
25
+ _import_structure["modeling_qwenimage"] = ["ReduxImageEncoder"]
26
+ _import_structure["pipeline_qwenimage"] = ["QwenImagePipeline"]
27
+ _import_structure["pipeline_qwenimage_edit"] = ["QwenImageEditPipeline"]
28
+ _import_structure["pipeline_qwenimage_img2img"] = ["QwenImageImg2ImgPipeline"]
29
+ _import_structure["pipeline_qwenimage_inpaint"] = ["QwenImageInpaintPipeline"]
30
+
31
+ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
32
+ try:
33
+ if not (is_transformers_available() and is_torch_available()):
34
+ raise OptionalDependencyNotAvailable()
35
+ except OptionalDependencyNotAvailable:
36
+ from ...utils.dummy_torch_and_transformers_objects import * # noqa F403
37
+ else:
38
+ from .pipeline_qwenimage import QwenImagePipeline
39
+ from .pipeline_qwenimage_edit import QwenImageEditPipeline
40
+ from .pipeline_qwenimage_img2img import QwenImageImg2ImgPipeline
41
+ from .pipeline_qwenimage_inpaint import QwenImageInpaintPipeline
42
+ else:
43
+ import sys
44
+
45
+ sys.modules[__name__] = _LazyModule(
46
+ __name__,
47
+ globals()["__file__"],
48
+ _import_structure,
49
+ module_spec=__spec__,
50
+ )
51
+
52
+ for name, value in _dummy_objects.items():
53
+ setattr(sys.modules[__name__], name, value)
54
+ for name, value in _additional_imports.items():
55
+ setattr(sys.modules[__name__], name, value)
@@ -0,0 +1,21 @@
1
+ from dataclasses import dataclass
2
+ from typing import List, Union
3
+
4
+ import numpy as np
5
+ import PIL.Image
6
+
7
+ from ...utils import BaseOutput
8
+
9
+
10
+ @dataclass
11
+ class QwenImagePipelineOutput(BaseOutput):
12
+ """
13
+ Output class for Stable Diffusion pipelines.
14
+
15
+ Args:
16
+ images (`List[PIL.Image.Image]` or `np.ndarray`)
17
+ List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width,
18
+ num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline.
19
+ """
20
+
21
+ images: Union[List[PIL.Image.Image], np.ndarray]