diffusers 0.34.0__py3-none-any.whl → 0.35.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +98 -1
- diffusers/callbacks.py +35 -0
- diffusers/commands/custom_blocks.py +134 -0
- diffusers/commands/diffusers_cli.py +2 -0
- diffusers/commands/fp16_safetensors.py +1 -1
- diffusers/configuration_utils.py +11 -2
- diffusers/dependency_versions_table.py +3 -3
- diffusers/guiders/__init__.py +41 -0
- diffusers/guiders/adaptive_projected_guidance.py +188 -0
- diffusers/guiders/auto_guidance.py +190 -0
- diffusers/guiders/classifier_free_guidance.py +141 -0
- diffusers/guiders/classifier_free_zero_star_guidance.py +152 -0
- diffusers/guiders/frequency_decoupled_guidance.py +327 -0
- diffusers/guiders/guider_utils.py +309 -0
- diffusers/guiders/perturbed_attention_guidance.py +271 -0
- diffusers/guiders/skip_layer_guidance.py +262 -0
- diffusers/guiders/smoothed_energy_guidance.py +251 -0
- diffusers/guiders/tangential_classifier_free_guidance.py +143 -0
- diffusers/hooks/__init__.py +17 -0
- diffusers/hooks/_common.py +56 -0
- diffusers/hooks/_helpers.py +293 -0
- diffusers/hooks/faster_cache.py +7 -6
- diffusers/hooks/first_block_cache.py +259 -0
- diffusers/hooks/group_offloading.py +292 -286
- diffusers/hooks/hooks.py +56 -1
- diffusers/hooks/layer_skip.py +263 -0
- diffusers/hooks/layerwise_casting.py +2 -7
- diffusers/hooks/pyramid_attention_broadcast.py +14 -11
- diffusers/hooks/smoothed_energy_guidance_utils.py +167 -0
- diffusers/hooks/utils.py +43 -0
- diffusers/loaders/__init__.py +6 -0
- diffusers/loaders/ip_adapter.py +255 -4
- diffusers/loaders/lora_base.py +63 -30
- diffusers/loaders/lora_conversion_utils.py +434 -53
- diffusers/loaders/lora_pipeline.py +834 -37
- diffusers/loaders/peft.py +28 -5
- diffusers/loaders/single_file_model.py +44 -11
- diffusers/loaders/single_file_utils.py +170 -2
- diffusers/loaders/transformer_flux.py +9 -10
- diffusers/loaders/transformer_sd3.py +6 -1
- diffusers/loaders/unet.py +22 -5
- diffusers/loaders/unet_loader_utils.py +5 -2
- diffusers/models/__init__.py +8 -0
- diffusers/models/attention.py +484 -3
- diffusers/models/attention_dispatch.py +1218 -0
- diffusers/models/attention_processor.py +105 -663
- diffusers/models/auto_model.py +2 -2
- diffusers/models/autoencoders/__init__.py +1 -0
- diffusers/models/autoencoders/autoencoder_dc.py +14 -1
- diffusers/models/autoencoders/autoencoder_kl.py +1 -1
- diffusers/models/autoencoders/autoencoder_kl_cosmos.py +3 -1
- diffusers/models/autoencoders/autoencoder_kl_qwenimage.py +1070 -0
- diffusers/models/autoencoders/autoencoder_kl_wan.py +370 -40
- diffusers/models/cache_utils.py +31 -9
- diffusers/models/controlnets/controlnet_flux.py +5 -5
- diffusers/models/controlnets/controlnet_union.py +4 -4
- diffusers/models/embeddings.py +26 -34
- diffusers/models/model_loading_utils.py +233 -1
- diffusers/models/modeling_flax_utils.py +1 -2
- diffusers/models/modeling_utils.py +159 -94
- diffusers/models/transformers/__init__.py +2 -0
- diffusers/models/transformers/transformer_chroma.py +16 -117
- diffusers/models/transformers/transformer_cogview4.py +36 -2
- diffusers/models/transformers/transformer_cosmos.py +11 -4
- diffusers/models/transformers/transformer_flux.py +372 -132
- diffusers/models/transformers/transformer_hunyuan_video.py +6 -0
- diffusers/models/transformers/transformer_ltx.py +104 -23
- diffusers/models/transformers/transformer_qwenimage.py +645 -0
- diffusers/models/transformers/transformer_skyreels_v2.py +607 -0
- diffusers/models/transformers/transformer_wan.py +298 -85
- diffusers/models/transformers/transformer_wan_vace.py +15 -21
- diffusers/models/unets/unet_2d_condition.py +2 -1
- diffusers/modular_pipelines/__init__.py +83 -0
- diffusers/modular_pipelines/components_manager.py +1068 -0
- diffusers/modular_pipelines/flux/__init__.py +66 -0
- diffusers/modular_pipelines/flux/before_denoise.py +689 -0
- diffusers/modular_pipelines/flux/decoders.py +109 -0
- diffusers/modular_pipelines/flux/denoise.py +227 -0
- diffusers/modular_pipelines/flux/encoders.py +412 -0
- diffusers/modular_pipelines/flux/modular_blocks.py +181 -0
- diffusers/modular_pipelines/flux/modular_pipeline.py +59 -0
- diffusers/modular_pipelines/modular_pipeline.py +2446 -0
- diffusers/modular_pipelines/modular_pipeline_utils.py +672 -0
- diffusers/modular_pipelines/node_utils.py +665 -0
- diffusers/modular_pipelines/stable_diffusion_xl/__init__.py +77 -0
- diffusers/modular_pipelines/stable_diffusion_xl/before_denoise.py +1874 -0
- diffusers/modular_pipelines/stable_diffusion_xl/decoders.py +208 -0
- diffusers/modular_pipelines/stable_diffusion_xl/denoise.py +771 -0
- diffusers/modular_pipelines/stable_diffusion_xl/encoders.py +887 -0
- diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py +380 -0
- diffusers/modular_pipelines/stable_diffusion_xl/modular_pipeline.py +365 -0
- diffusers/modular_pipelines/wan/__init__.py +66 -0
- diffusers/modular_pipelines/wan/before_denoise.py +365 -0
- diffusers/modular_pipelines/wan/decoders.py +105 -0
- diffusers/modular_pipelines/wan/denoise.py +261 -0
- diffusers/modular_pipelines/wan/encoders.py +242 -0
- diffusers/modular_pipelines/wan/modular_blocks.py +144 -0
- diffusers/modular_pipelines/wan/modular_pipeline.py +90 -0
- diffusers/pipelines/__init__.py +31 -0
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +2 -3
- diffusers/pipelines/auto_pipeline.py +17 -13
- diffusers/pipelines/chroma/pipeline_chroma.py +5 -5
- diffusers/pipelines/chroma/pipeline_chroma_img2img.py +5 -5
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +9 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +9 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +10 -9
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +9 -8
- diffusers/pipelines/cogview4/pipeline_cogview4.py +16 -15
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +3 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +212 -93
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +7 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +194 -92
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +3 -1
- diffusers/pipelines/flux/__init__.py +4 -0
- diffusers/pipelines/flux/pipeline_flux.py +34 -26
- diffusers/pipelines/flux/pipeline_flux_control.py +8 -8
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +1 -1
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +1 -1
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +1 -1
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +1 -1
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1 -1
- diffusers/pipelines/flux/pipeline_flux_fill.py +1 -1
- diffusers/pipelines/flux/pipeline_flux_img2img.py +1 -1
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +1 -1
- diffusers/pipelines/flux/pipeline_flux_kontext.py +1134 -0
- diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py +1460 -0
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
- diffusers/pipelines/flux/pipeline_output.py +6 -4
- diffusers/pipelines/hidream_image/pipeline_hidream_image.py +5 -5
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +25 -24
- diffusers/pipelines/ltx/pipeline_ltx.py +13 -12
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +10 -9
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +13 -12
- diffusers/pipelines/mochi/pipeline_mochi.py +9 -8
- diffusers/pipelines/pipeline_flax_utils.py +2 -2
- diffusers/pipelines/pipeline_loading_utils.py +24 -2
- diffusers/pipelines/pipeline_utils.py +22 -15
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +3 -1
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +20 -0
- diffusers/pipelines/qwenimage/__init__.py +55 -0
- diffusers/pipelines/qwenimage/pipeline_output.py +21 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage.py +726 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py +882 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py +829 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_inpaint.py +1015 -0
- diffusers/pipelines/sana/pipeline_sana_sprint.py +5 -5
- diffusers/pipelines/skyreels_v2/__init__.py +59 -0
- diffusers/pipelines/skyreels_v2/pipeline_output.py +20 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2.py +610 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing.py +978 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_i2v.py +1059 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_v2v.py +1063 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_i2v.py +745 -0
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +2 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +6 -5
- diffusers/pipelines/wan/pipeline_wan.py +78 -20
- diffusers/pipelines/wan/pipeline_wan_i2v.py +112 -32
- diffusers/pipelines/wan/pipeline_wan_vace.py +1 -2
- diffusers/quantizers/__init__.py +1 -177
- diffusers/quantizers/base.py +11 -0
- diffusers/quantizers/gguf/utils.py +92 -3
- diffusers/quantizers/pipe_quant_config.py +202 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +26 -0
- diffusers/schedulers/scheduling_deis_multistep.py +8 -1
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +6 -0
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +6 -0
- diffusers/schedulers/scheduling_scm.py +0 -1
- diffusers/schedulers/scheduling_unipc_multistep.py +10 -1
- diffusers/schedulers/scheduling_utils.py +2 -2
- diffusers/schedulers/scheduling_utils_flax.py +1 -1
- diffusers/training_utils.py +78 -0
- diffusers/utils/__init__.py +10 -0
- diffusers/utils/constants.py +4 -0
- diffusers/utils/dummy_pt_objects.py +312 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +255 -0
- diffusers/utils/dynamic_modules_utils.py +84 -25
- diffusers/utils/hub_utils.py +33 -17
- diffusers/utils/import_utils.py +70 -0
- diffusers/utils/peft_utils.py +11 -8
- diffusers/utils/testing_utils.py +136 -10
- diffusers/utils/torch_utils.py +18 -0
- {diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/METADATA +6 -6
- {diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/RECORD +191 -127
- {diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/LICENSE +0 -0
- {diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/WHEEL +0 -0
- {diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.34.0.dist-info → diffusers-0.35.0.dist-info}/top_level.txt +0 -0
@@ -292,7 +292,7 @@ class FluxPriorReduxPipeline(DiffusionPipeline):
|
|
292
292
|
def encode_prompt(
|
293
293
|
self,
|
294
294
|
prompt: Union[str, List[str]],
|
295
|
-
prompt_2: Union[str, List[str]],
|
295
|
+
prompt_2: Optional[Union[str, List[str]]] = None,
|
296
296
|
device: Optional[torch.device] = None,
|
297
297
|
num_images_per_prompt: int = 1,
|
298
298
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|
@@ -11,12 +11,14 @@ from ...utils import BaseOutput
|
|
11
11
|
@dataclass
|
12
12
|
class FluxPipelineOutput(BaseOutput):
|
13
13
|
"""
|
14
|
-
Output class for
|
14
|
+
Output class for Flux image generation pipelines.
|
15
15
|
|
16
16
|
Args:
|
17
|
-
images (`List[PIL.Image.Image]` or `np.ndarray`)
|
18
|
-
List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size,
|
19
|
-
num_channels)`. PIL images or numpy array present the denoised images of the diffusion
|
17
|
+
images (`List[PIL.Image.Image]` or `torch.Tensor` or `np.ndarray`)
|
18
|
+
List of denoised PIL images of length `batch_size` or numpy array or torch tensor of shape `(batch_size,
|
19
|
+
height, width, num_channels)`. PIL images or numpy array present the denoised images of the diffusion
|
20
|
+
pipeline. Torch tensors can represent either the denoised images or the intermediate latents ready to be
|
21
|
+
passed to the decoder.
|
20
22
|
"""
|
21
23
|
|
22
24
|
images: Union[List[PIL.Image.Image], np.ndarray]
|
@@ -763,11 +763,11 @@ class HiDreamImagePipeline(DiffusionPipeline, HiDreamImageLoraLoaderMixin):
|
|
763
763
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
764
764
|
will be used.
|
765
765
|
guidance_scale (`float`, *optional*, defaults to 3.5):
|
766
|
-
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
the
|
766
|
+
Embedded guiddance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages
|
767
|
+
a model to generate images more aligned with `prompt` at the expense of lower image quality.
|
768
|
+
|
769
|
+
Guidance-distilled models approximates true classifer-free guidance for `guidance_scale` > 1. Refer to
|
770
|
+
the [paper](https://huggingface.co/papers/2210.03142) to learn more.
|
771
771
|
negative_prompt (`str` or `List[str]`, *optional*):
|
772
772
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
773
773
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `true_cfg_scale` is
|
@@ -529,15 +529,14 @@ class HunyuanVideoPipeline(DiffusionPipeline, HunyuanVideoLoraLoaderMixin):
|
|
529
529
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
530
530
|
will be used.
|
531
531
|
true_cfg_scale (`float`, *optional*, defaults to 1.0):
|
532
|
-
|
532
|
+
True classifier-free guidance (guidance scale) is enabled when `true_cfg_scale` > 1 and
|
533
|
+
`negative_prompt` is provided.
|
533
534
|
guidance_scale (`float`, defaults to `6.0`):
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
the
|
539
|
-
HunyuanVideo model is CFG-distilled, which means that traditional guidance between unconditional and
|
540
|
-
conditional latent is not applied.
|
535
|
+
Embedded guiddance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages
|
536
|
+
a model to generate images more aligned with `prompt` at the expense of lower image quality.
|
537
|
+
|
538
|
+
Guidance-distilled models approximates true classifer-free guidance for `guidance_scale` > 1. Refer to
|
539
|
+
the [paper](https://huggingface.co/papers/2210.03142) to learn more.
|
541
540
|
num_videos_per_prompt (`int`, *optional*, defaults to 1):
|
542
541
|
The number of images to generate per prompt.
|
543
542
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
@@ -693,28 +692,30 @@ class HunyuanVideoPipeline(DiffusionPipeline, HunyuanVideoLoraLoaderMixin):
|
|
693
692
|
# broadcast to batch dimension in a way that's compatible with ONNX/Core ML
|
694
693
|
timestep = t.expand(latents.shape[0]).to(latents.dtype)
|
695
694
|
|
696
|
-
|
697
|
-
|
698
|
-
timestep=timestep,
|
699
|
-
encoder_hidden_states=prompt_embeds,
|
700
|
-
encoder_attention_mask=prompt_attention_mask,
|
701
|
-
pooled_projections=pooled_prompt_embeds,
|
702
|
-
guidance=guidance,
|
703
|
-
attention_kwargs=attention_kwargs,
|
704
|
-
return_dict=False,
|
705
|
-
)[0]
|
706
|
-
|
707
|
-
if do_true_cfg:
|
708
|
-
neg_noise_pred = self.transformer(
|
695
|
+
with self.transformer.cache_context("cond"):
|
696
|
+
noise_pred = self.transformer(
|
709
697
|
hidden_states=latent_model_input,
|
710
698
|
timestep=timestep,
|
711
|
-
encoder_hidden_states=
|
712
|
-
encoder_attention_mask=
|
713
|
-
pooled_projections=
|
699
|
+
encoder_hidden_states=prompt_embeds,
|
700
|
+
encoder_attention_mask=prompt_attention_mask,
|
701
|
+
pooled_projections=pooled_prompt_embeds,
|
714
702
|
guidance=guidance,
|
715
703
|
attention_kwargs=attention_kwargs,
|
716
704
|
return_dict=False,
|
717
705
|
)[0]
|
706
|
+
|
707
|
+
if do_true_cfg:
|
708
|
+
with self.transformer.cache_context("uncond"):
|
709
|
+
neg_noise_pred = self.transformer(
|
710
|
+
hidden_states=latent_model_input,
|
711
|
+
timestep=timestep,
|
712
|
+
encoder_hidden_states=negative_prompt_embeds,
|
713
|
+
encoder_attention_mask=negative_prompt_attention_mask,
|
714
|
+
pooled_projections=negative_pooled_prompt_embeds,
|
715
|
+
guidance=guidance,
|
716
|
+
attention_kwargs=attention_kwargs,
|
717
|
+
return_dict=False,
|
718
|
+
)[0]
|
718
719
|
noise_pred = neg_noise_pred + true_cfg_scale * (noise_pred - neg_noise_pred)
|
719
720
|
|
720
721
|
# compute the previous noisy sample x_t -> x_t-1
|
@@ -757,18 +757,19 @@ class LTXPipeline(DiffusionPipeline, FromSingleFileMixin, LTXVideoLoraLoaderMixi
|
|
757
757
|
# broadcast to batch dimension in a way that's compatible with ONNX/Core ML
|
758
758
|
timestep = t.expand(latent_model_input.shape[0])
|
759
759
|
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
760
|
+
with self.transformer.cache_context("cond_uncond"):
|
761
|
+
noise_pred = self.transformer(
|
762
|
+
hidden_states=latent_model_input,
|
763
|
+
encoder_hidden_states=prompt_embeds,
|
764
|
+
timestep=timestep,
|
765
|
+
encoder_attention_mask=prompt_attention_mask,
|
766
|
+
num_frames=latent_num_frames,
|
767
|
+
height=latent_height,
|
768
|
+
width=latent_width,
|
769
|
+
rope_interpolation_scale=rope_interpolation_scale,
|
770
|
+
attention_kwargs=attention_kwargs,
|
771
|
+
return_dict=False,
|
772
|
+
)[0]
|
772
773
|
noise_pred = noise_pred.float()
|
773
774
|
|
774
775
|
if self.do_classifier_free_guidance:
|
@@ -1177,15 +1177,16 @@ class LTXConditionPipeline(DiffusionPipeline, FromSingleFileMixin, LTXVideoLoraL
|
|
1177
1177
|
if is_conditioning_image_or_video:
|
1178
1178
|
timestep = torch.min(timestep, (1 - conditioning_mask_model_input) * 1000.0)
|
1179
1179
|
|
1180
|
-
|
1181
|
-
|
1182
|
-
|
1183
|
-
|
1184
|
-
|
1185
|
-
|
1186
|
-
|
1187
|
-
|
1188
|
-
|
1180
|
+
with self.transformer.cache_context("cond_uncond"):
|
1181
|
+
noise_pred = self.transformer(
|
1182
|
+
hidden_states=latent_model_input,
|
1183
|
+
encoder_hidden_states=prompt_embeds,
|
1184
|
+
timestep=timestep,
|
1185
|
+
encoder_attention_mask=prompt_attention_mask,
|
1186
|
+
video_coords=video_coords,
|
1187
|
+
attention_kwargs=attention_kwargs,
|
1188
|
+
return_dict=False,
|
1189
|
+
)[0]
|
1189
1190
|
|
1190
1191
|
if self.do_classifier_free_guidance:
|
1191
1192
|
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
|
@@ -830,18 +830,19 @@ class LTXImageToVideoPipeline(DiffusionPipeline, FromSingleFileMixin, LTXVideoLo
|
|
830
830
|
timestep = t.expand(latent_model_input.shape[0])
|
831
831
|
timestep = timestep.unsqueeze(-1) * (1 - conditioning_mask)
|
832
832
|
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
833
|
+
with self.transformer.cache_context("cond_uncond"):
|
834
|
+
noise_pred = self.transformer(
|
835
|
+
hidden_states=latent_model_input,
|
836
|
+
encoder_hidden_states=prompt_embeds,
|
837
|
+
timestep=timestep,
|
838
|
+
encoder_attention_mask=prompt_attention_mask,
|
839
|
+
num_frames=latent_num_frames,
|
840
|
+
height=latent_height,
|
841
|
+
width=latent_width,
|
842
|
+
rope_interpolation_scale=rope_interpolation_scale,
|
843
|
+
attention_kwargs=attention_kwargs,
|
844
|
+
return_dict=False,
|
845
|
+
)[0]
|
845
846
|
noise_pred = noise_pred.float()
|
846
847
|
|
847
848
|
if self.do_classifier_free_guidance:
|
@@ -671,14 +671,15 @@ class MochiPipeline(DiffusionPipeline, Mochi1LoraLoaderMixin):
|
|
671
671
|
# broadcast to batch dimension in a way that's compatible with ONNX/Core ML
|
672
672
|
timestep = t.expand(latent_model_input.shape[0]).to(latents.dtype)
|
673
673
|
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
674
|
+
with self.transformer.cache_context("cond_uncond"):
|
675
|
+
noise_pred = self.transformer(
|
676
|
+
hidden_states=latent_model_input,
|
677
|
+
encoder_hidden_states=prompt_embeds,
|
678
|
+
timestep=timestep,
|
679
|
+
encoder_attention_mask=prompt_attention_mask,
|
680
|
+
attention_kwargs=attention_kwargs,
|
681
|
+
return_dict=False,
|
682
|
+
)[0]
|
682
683
|
# Mochi CFG + Sampling runs in FP32
|
683
684
|
noise_pred = noise_pred.to(torch.float32)
|
684
685
|
|
@@ -278,8 +278,8 @@ class FlaxDiffusionPipeline(ConfigMixin, PushToHubMixin):
|
|
278
278
|
|
279
279
|
<Tip>
|
280
280
|
|
281
|
-
To use private or [gated models](https://huggingface.co/docs/hub/models-gated#gated-models), log-in with
|
282
|
-
|
281
|
+
To use private or [gated models](https://huggingface.co/docs/hub/models-gated#gated-models), log-in with `hf
|
282
|
+
auth login`.
|
283
283
|
|
284
284
|
</Tip>
|
285
285
|
|
@@ -371,6 +371,22 @@ def maybe_raise_or_warn(
|
|
371
371
|
)
|
372
372
|
|
373
373
|
|
374
|
+
# a simpler version of get_class_obj_and_candidates, it won't work with custom code
|
375
|
+
def simple_get_class_obj(library_name, class_name):
|
376
|
+
from diffusers import pipelines
|
377
|
+
|
378
|
+
is_pipeline_module = hasattr(pipelines, library_name)
|
379
|
+
|
380
|
+
if is_pipeline_module:
|
381
|
+
pipeline_module = getattr(pipelines, library_name)
|
382
|
+
class_obj = getattr(pipeline_module, class_name)
|
383
|
+
else:
|
384
|
+
library = importlib.import_module(library_name)
|
385
|
+
class_obj = getattr(library, class_name)
|
386
|
+
|
387
|
+
return class_obj
|
388
|
+
|
389
|
+
|
374
390
|
def get_class_obj_and_candidates(
|
375
391
|
library_name, class_name, importable_classes, pipelines, is_pipeline_module, component_name=None, cache_dir=None
|
376
392
|
):
|
@@ -452,7 +468,7 @@ def _get_pipeline_class(
|
|
452
468
|
revision=revision,
|
453
469
|
)
|
454
470
|
|
455
|
-
if class_obj.__name__ != "DiffusionPipeline":
|
471
|
+
if class_obj.__name__ != "DiffusionPipeline" and class_obj.__name__ != "ModularPipeline":
|
456
472
|
return class_obj
|
457
473
|
|
458
474
|
diffusers_module = importlib.import_module(class_obj.__module__.split(".")[0])
|
@@ -597,6 +613,9 @@ def _assign_components_to_devices(
|
|
597
613
|
|
598
614
|
|
599
615
|
def _get_final_device_map(device_map, pipeline_class, passed_class_obj, init_dict, library, max_memory, **kwargs):
|
616
|
+
# TODO: seperate out different device_map methods when it gets to it.
|
617
|
+
if device_map != "balanced":
|
618
|
+
return device_map
|
600
619
|
# To avoid circular import problem.
|
601
620
|
from diffusers import pipelines
|
602
621
|
|
@@ -892,7 +911,10 @@ def _fetch_class_library_tuple(module):
|
|
892
911
|
library = not_compiled_module.__module__
|
893
912
|
|
894
913
|
# retrieve class_name
|
895
|
-
|
914
|
+
if isinstance(not_compiled_module, type):
|
915
|
+
class_name = not_compiled_module.__name__
|
916
|
+
else:
|
917
|
+
class_name = not_compiled_module.__class__.__name__
|
896
918
|
|
897
919
|
return (library, class_name)
|
898
920
|
|
@@ -108,7 +108,7 @@ LIBRARIES = []
|
|
108
108
|
for library in LOADABLE_CLASSES:
|
109
109
|
LIBRARIES.append(library)
|
110
110
|
|
111
|
-
SUPPORTED_DEVICE_MAP = ["balanced"]
|
111
|
+
SUPPORTED_DEVICE_MAP = ["balanced"] + [get_device()]
|
112
112
|
|
113
113
|
logger = logging.get_logger(__name__)
|
114
114
|
|
@@ -710,8 +710,8 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
|
710
710
|
|
711
711
|
<Tip>
|
712
712
|
|
713
|
-
To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with
|
714
|
-
|
713
|
+
To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with `hf
|
714
|
+
auth login`.
|
715
715
|
|
716
716
|
</Tip>
|
717
717
|
|
@@ -988,12 +988,15 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
|
988
988
|
_maybe_warn_for_wrong_component_in_quant_config(init_dict, quantization_config)
|
989
989
|
for name, (library_name, class_name) in logging.tqdm(init_dict.items(), desc="Loading pipeline components..."):
|
990
990
|
# 7.1 device_map shenanigans
|
991
|
-
if final_device_map is not None
|
992
|
-
|
993
|
-
|
994
|
-
|
995
|
-
|
996
|
-
|
991
|
+
if final_device_map is not None:
|
992
|
+
if isinstance(final_device_map, dict) and len(final_device_map) > 0:
|
993
|
+
component_device = final_device_map.get(name, None)
|
994
|
+
if component_device is not None:
|
995
|
+
current_device_map = {"": component_device}
|
996
|
+
else:
|
997
|
+
current_device_map = None
|
998
|
+
elif isinstance(final_device_map, str):
|
999
|
+
current_device_map = final_device_map
|
997
1000
|
|
998
1001
|
# 7.2 - now that JAX/Flax is an official framework of the library, we might load from Flax names
|
999
1002
|
class_name = class_name[4:] if class_name.startswith("Flax") else class_name
|
@@ -1096,6 +1099,8 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
|
1096
1099
|
model.register_to_config(_name_or_path=pretrained_model_name_or_path)
|
1097
1100
|
if device_map is not None:
|
1098
1101
|
setattr(model, "hf_device_map", final_device_map)
|
1102
|
+
if quantization_config is not None:
|
1103
|
+
setattr(model, "quantization_config", quantization_config)
|
1099
1104
|
return model
|
1100
1105
|
|
1101
1106
|
@property
|
@@ -1428,8 +1433,8 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
|
1428
1433
|
|
1429
1434
|
<Tip>
|
1430
1435
|
|
1431
|
-
To use private or [gated models](https://huggingface.co/docs/hub/models-gated#gated-models), log-in with
|
1432
|
-
|
1436
|
+
To use private or [gated models](https://huggingface.co/docs/hub/models-gated#gated-models), log-in with `hf
|
1437
|
+
auth login
|
1433
1438
|
|
1434
1439
|
</Tip>
|
1435
1440
|
|
@@ -1986,11 +1991,13 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin):
|
|
1986
1991
|
f"{'' if k.startswith('_') else '_'}{k}": v for k, v in original_config.items() if k not in pipeline_kwargs
|
1987
1992
|
}
|
1988
1993
|
|
1994
|
+
optional_components = (
|
1995
|
+
pipeline._optional_components
|
1996
|
+
if hasattr(pipeline, "_optional_components") and pipeline._optional_components
|
1997
|
+
else []
|
1998
|
+
)
|
1989
1999
|
missing_modules = (
|
1990
|
-
set(expected_modules)
|
1991
|
-
- set(pipeline._optional_components)
|
1992
|
-
- set(pipeline_kwargs.keys())
|
1993
|
-
- set(true_optional_modules)
|
2000
|
+
set(expected_modules) - set(optional_components) - set(pipeline_kwargs.keys()) - set(true_optional_modules)
|
1994
2001
|
)
|
1995
2002
|
|
1996
2003
|
if len(missing_modules) > 0:
|
@@ -256,7 +256,9 @@ class PixArtAlphaPipeline(DiffusionPipeline):
|
|
256
256
|
Tokenizer of class
|
257
257
|
[T5Tokenizer](https://huggingface.co/docs/transformers/model_doc/t5#transformers.T5Tokenizer).
|
258
258
|
transformer ([`PixArtTransformer2DModel`]):
|
259
|
-
A text conditioned `PixArtTransformer2DModel` to denoise the encoded image latents.
|
259
|
+
A text conditioned `PixArtTransformer2DModel` to denoise the encoded image latents. Initially published as
|
260
|
+
[`Transformer2DModel`](https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS/blob/main/transformer/config.json#L2)
|
261
|
+
in the config, but the mismatch can be ignored.
|
260
262
|
scheduler ([`SchedulerMixin`]):
|
261
263
|
A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
|
262
264
|
"""
|
@@ -185,6 +185,26 @@ def retrieve_timesteps(
|
|
185
185
|
class PixArtSigmaPipeline(DiffusionPipeline):
|
186
186
|
r"""
|
187
187
|
Pipeline for text-to-image generation using PixArt-Sigma.
|
188
|
+
|
189
|
+
This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
|
190
|
+
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
|
191
|
+
|
192
|
+
Args:
|
193
|
+
vae ([`AutoencoderKL`]):
|
194
|
+
Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
|
195
|
+
text_encoder ([`T5EncoderModel`]):
|
196
|
+
Frozen text-encoder. PixArt-Alpha uses
|
197
|
+
[T5](https://huggingface.co/docs/transformers/model_doc/t5#transformers.T5EncoderModel), specifically the
|
198
|
+
[t5-v1_1-xxl](https://huggingface.co/PixArt-alpha/PixArt-alpha/tree/main/t5-v1_1-xxl) variant.
|
199
|
+
tokenizer (`T5Tokenizer`):
|
200
|
+
Tokenizer of class
|
201
|
+
[T5Tokenizer](https://huggingface.co/docs/transformers/model_doc/t5#transformers.T5Tokenizer).
|
202
|
+
transformer ([`PixArtTransformer2DModel`]):
|
203
|
+
A text conditioned `PixArtTransformer2DModel` to denoise the encoded image latents. Initially published as
|
204
|
+
[`Transformer2DModel`](https://huggingface.co/PixArt-alpha/PixArt-Sigma-XL-2-1024-MS/blob/main/transformer/config.json#L2)
|
205
|
+
in the config, but the mismatch can be ignored.
|
206
|
+
scheduler ([`SchedulerMixin`]):
|
207
|
+
A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
|
188
208
|
"""
|
189
209
|
|
190
210
|
bad_punct_regex = re.compile(
|
@@ -0,0 +1,55 @@
|
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
3
|
+
from ...utils import (
|
4
|
+
DIFFUSERS_SLOW_IMPORT,
|
5
|
+
OptionalDependencyNotAvailable,
|
6
|
+
_LazyModule,
|
7
|
+
get_objects_from_module,
|
8
|
+
is_torch_available,
|
9
|
+
is_transformers_available,
|
10
|
+
)
|
11
|
+
|
12
|
+
|
13
|
+
_dummy_objects = {}
|
14
|
+
_additional_imports = {}
|
15
|
+
_import_structure = {"pipeline_output": ["QwenImagePipelineOutput", "QwenImagePriorReduxPipelineOutput"]}
|
16
|
+
|
17
|
+
try:
|
18
|
+
if not (is_transformers_available() and is_torch_available()):
|
19
|
+
raise OptionalDependencyNotAvailable()
|
20
|
+
except OptionalDependencyNotAvailable:
|
21
|
+
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
22
|
+
|
23
|
+
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
24
|
+
else:
|
25
|
+
_import_structure["modeling_qwenimage"] = ["ReduxImageEncoder"]
|
26
|
+
_import_structure["pipeline_qwenimage"] = ["QwenImagePipeline"]
|
27
|
+
_import_structure["pipeline_qwenimage_edit"] = ["QwenImageEditPipeline"]
|
28
|
+
_import_structure["pipeline_qwenimage_img2img"] = ["QwenImageImg2ImgPipeline"]
|
29
|
+
_import_structure["pipeline_qwenimage_inpaint"] = ["QwenImageInpaintPipeline"]
|
30
|
+
|
31
|
+
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
32
|
+
try:
|
33
|
+
if not (is_transformers_available() and is_torch_available()):
|
34
|
+
raise OptionalDependencyNotAvailable()
|
35
|
+
except OptionalDependencyNotAvailable:
|
36
|
+
from ...utils.dummy_torch_and_transformers_objects import * # noqa F403
|
37
|
+
else:
|
38
|
+
from .pipeline_qwenimage import QwenImagePipeline
|
39
|
+
from .pipeline_qwenimage_edit import QwenImageEditPipeline
|
40
|
+
from .pipeline_qwenimage_img2img import QwenImageImg2ImgPipeline
|
41
|
+
from .pipeline_qwenimage_inpaint import QwenImageInpaintPipeline
|
42
|
+
else:
|
43
|
+
import sys
|
44
|
+
|
45
|
+
sys.modules[__name__] = _LazyModule(
|
46
|
+
__name__,
|
47
|
+
globals()["__file__"],
|
48
|
+
_import_structure,
|
49
|
+
module_spec=__spec__,
|
50
|
+
)
|
51
|
+
|
52
|
+
for name, value in _dummy_objects.items():
|
53
|
+
setattr(sys.modules[__name__], name, value)
|
54
|
+
for name, value in _additional_imports.items():
|
55
|
+
setattr(sys.modules[__name__], name, value)
|
@@ -0,0 +1,21 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
from typing import List, Union
|
3
|
+
|
4
|
+
import numpy as np
|
5
|
+
import PIL.Image
|
6
|
+
|
7
|
+
from ...utils import BaseOutput
|
8
|
+
|
9
|
+
|
10
|
+
@dataclass
|
11
|
+
class QwenImagePipelineOutput(BaseOutput):
|
12
|
+
"""
|
13
|
+
Output class for Stable Diffusion pipelines.
|
14
|
+
|
15
|
+
Args:
|
16
|
+
images (`List[PIL.Image.Image]` or `np.ndarray`)
|
17
|
+
List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width,
|
18
|
+
num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline.
|
19
|
+
"""
|
20
|
+
|
21
|
+
images: Union[List[PIL.Image.Image], np.ndarray]
|