diffusers 0.27.2__py3-none-any.whl → 0.28.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +26 -1
- diffusers/callbacks.py +156 -0
- diffusers/commands/env.py +110 -6
- diffusers/configuration_utils.py +33 -11
- diffusers/dependency_versions_table.py +2 -1
- diffusers/image_processor.py +158 -45
- diffusers/loaders/__init__.py +2 -5
- diffusers/loaders/autoencoder.py +4 -4
- diffusers/loaders/controlnet.py +4 -4
- diffusers/loaders/ip_adapter.py +80 -22
- diffusers/loaders/lora.py +134 -20
- diffusers/loaders/lora_conversion_utils.py +46 -43
- diffusers/loaders/peft.py +4 -3
- diffusers/loaders/single_file.py +401 -170
- diffusers/loaders/single_file_model.py +290 -0
- diffusers/loaders/single_file_utils.py +616 -672
- diffusers/loaders/textual_inversion.py +41 -20
- diffusers/loaders/unet.py +168 -115
- diffusers/loaders/unet_loader_utils.py +163 -0
- diffusers/models/__init__.py +8 -0
- diffusers/models/activations.py +23 -3
- diffusers/models/attention.py +10 -11
- diffusers/models/attention_processor.py +475 -148
- diffusers/models/autoencoders/autoencoder_asym_kl.py +14 -16
- diffusers/models/autoencoders/autoencoder_kl.py +18 -19
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -11
- diffusers/models/autoencoders/autoencoder_tiny.py +16 -16
- diffusers/models/autoencoders/consistency_decoder_vae.py +36 -11
- diffusers/models/autoencoders/vae.py +23 -24
- diffusers/models/controlnet.py +12 -9
- diffusers/models/controlnet_flax.py +4 -4
- diffusers/models/controlnet_xs.py +1915 -0
- diffusers/models/downsampling.py +17 -18
- diffusers/models/embeddings.py +363 -32
- diffusers/models/model_loading_utils.py +177 -0
- diffusers/models/modeling_flax_pytorch_utils.py +2 -1
- diffusers/models/modeling_flax_utils.py +4 -4
- diffusers/models/modeling_outputs.py +14 -0
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +175 -99
- diffusers/models/normalization.py +2 -1
- diffusers/models/resnet.py +18 -23
- diffusers/models/transformer_temporal.py +3 -3
- diffusers/models/transformers/__init__.py +3 -0
- diffusers/models/transformers/dit_transformer_2d.py +240 -0
- diffusers/models/transformers/dual_transformer_2d.py +4 -4
- diffusers/models/transformers/hunyuan_transformer_2d.py +427 -0
- diffusers/models/transformers/pixart_transformer_2d.py +336 -0
- diffusers/models/transformers/prior_transformer.py +7 -7
- diffusers/models/transformers/t5_film_transformer.py +17 -19
- diffusers/models/transformers/transformer_2d.py +292 -184
- diffusers/models/transformers/transformer_temporal.py +10 -10
- diffusers/models/unets/unet_1d.py +5 -5
- diffusers/models/unets/unet_1d_blocks.py +29 -29
- diffusers/models/unets/unet_2d.py +6 -6
- diffusers/models/unets/unet_2d_blocks.py +137 -128
- diffusers/models/unets/unet_2d_condition.py +19 -15
- diffusers/models/unets/unet_2d_condition_flax.py +6 -5
- diffusers/models/unets/unet_3d_blocks.py +79 -77
- diffusers/models/unets/unet_3d_condition.py +13 -9
- diffusers/models/unets/unet_i2vgen_xl.py +14 -13
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +114 -14
- diffusers/models/unets/unet_spatio_temporal_condition.py +15 -14
- diffusers/models/unets/unet_stable_cascade.py +16 -13
- diffusers/models/upsampling.py +17 -20
- diffusers/models/vq_model.py +16 -15
- diffusers/pipelines/__init__.py +27 -3
- diffusers/pipelines/amused/pipeline_amused.py +12 -12
- diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
- diffusers/pipelines/animatediff/__init__.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +24 -46
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1284 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +82 -72
- diffusers/pipelines/animatediff/pipeline_output.py +3 -2
- diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +54 -35
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +120 -36
- diffusers/pipelines/auto_pipeline.py +21 -17
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -5
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +5 -5
- diffusers/pipelines/controlnet/multicontrolnet.py +4 -8
- diffusers/pipelines/controlnet/pipeline_controlnet.py +87 -52
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +50 -43
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +52 -40
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +80 -47
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +147 -49
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +89 -55
- diffusers/pipelines/controlnet_xs/__init__.py +68 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +911 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1115 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +14 -28
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +18 -33
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +21 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +20 -36
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +23 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +17 -32
- diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +43 -20
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +36 -18
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +12 -12
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +18 -18
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +30 -25
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +69 -59
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
- diffusers/pipelines/dit/pipeline_dit.py +7 -4
- diffusers/pipelines/free_init_utils.py +39 -38
- diffusers/pipelines/hunyuandit/__init__.py +48 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +881 -0
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +23 -20
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +32 -29
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
- diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +20 -33
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +24 -35
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +48 -30
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +50 -28
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +11 -11
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +61 -67
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +70 -69
- diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
- diffusers/pipelines/marigold/__init__.py +50 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +561 -0
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
- diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
- diffusers/pipelines/pia/pipeline_pia.py +39 -125
- diffusers/pipelines/pipeline_flax_utils.py +4 -4
- diffusers/pipelines/pipeline_loading_utils.py +269 -23
- diffusers/pipelines/pipeline_utils.py +266 -37
- diffusers/pipelines/pixart_alpha/__init__.py +8 -1
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +69 -79
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +880 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +10 -5
- diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
- diffusers/pipelines/shap_e/renderer.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +18 -18
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +33 -32
- diffusers/pipelines/stable_diffusion/__init__.py +0 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +18 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +73 -39
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +24 -17
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +66 -36
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +82 -46
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +123 -28
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +16 -16
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +24 -19
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +37 -31
- diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +23 -15
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +44 -39
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +23 -18
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +19 -14
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +20 -15
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -19
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +65 -32
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +274 -38
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +92 -25
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +88 -44
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +108 -56
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +96 -51
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -25
- diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +59 -30
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +71 -42
- diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +18 -41
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +21 -85
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -19
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +39 -33
- diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
- diffusers/pipelines/unidiffuser/modeling_uvit.py +9 -9
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +23 -23
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -6
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +10 -10
- diffusers/schedulers/__init__.py +2 -2
- diffusers/schedulers/deprecated/__init__.py +1 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
- diffusers/schedulers/scheduling_amused.py +5 -5
- diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
- diffusers/schedulers/scheduling_consistency_models.py +20 -26
- diffusers/schedulers/scheduling_ddim.py +22 -24
- diffusers/schedulers/scheduling_ddim_flax.py +2 -1
- diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
- diffusers/schedulers/scheduling_ddim_parallel.py +28 -30
- diffusers/schedulers/scheduling_ddpm.py +20 -22
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +26 -28
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
- diffusers/schedulers/scheduling_deis_multistep.py +42 -42
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +103 -77
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +46 -46
- diffusers/schedulers/scheduling_dpmsolver_sde.py +23 -23
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +86 -65
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +75 -54
- diffusers/schedulers/scheduling_edm_euler.py +50 -31
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +23 -29
- diffusers/schedulers/scheduling_euler_discrete.py +160 -68
- diffusers/schedulers/scheduling_heun_discrete.py +57 -39
- diffusers/schedulers/scheduling_ipndm.py +8 -8
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +19 -19
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +19 -19
- diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
- diffusers/schedulers/scheduling_lcm.py +21 -23
- diffusers/schedulers/scheduling_lms_discrete.py +24 -26
- diffusers/schedulers/scheduling_pndm.py +20 -20
- diffusers/schedulers/scheduling_repaint.py +20 -20
- diffusers/schedulers/scheduling_sasolver.py +55 -54
- diffusers/schedulers/scheduling_sde_ve.py +19 -19
- diffusers/schedulers/scheduling_tcd.py +39 -30
- diffusers/schedulers/scheduling_unclip.py +15 -15
- diffusers/schedulers/scheduling_unipc_multistep.py +111 -41
- diffusers/schedulers/scheduling_utils.py +14 -5
- diffusers/schedulers/scheduling_utils_flax.py +3 -3
- diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
- diffusers/training_utils.py +56 -1
- diffusers/utils/__init__.py +7 -0
- diffusers/utils/doc_utils.py +1 -0
- diffusers/utils/dummy_pt_objects.py +75 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +105 -0
- diffusers/utils/dynamic_modules_utils.py +24 -11
- diffusers/utils/hub_utils.py +3 -2
- diffusers/utils/import_utils.py +91 -0
- diffusers/utils/loading_utils.py +2 -2
- diffusers/utils/logging.py +1 -1
- diffusers/utils/peft_utils.py +32 -5
- diffusers/utils/state_dict_utils.py +11 -2
- diffusers/utils/testing_utils.py +71 -6
- diffusers/utils/torch_utils.py +1 -0
- diffusers/video_processor.py +113 -0
- {diffusers-0.27.2.dist-info → diffusers-0.28.1.dist-info}/METADATA +7 -7
- diffusers-0.28.1.dist-info/RECORD +419 -0
- diffusers-0.27.2.dist-info/RECORD +0 -399
- {diffusers-0.27.2.dist-info → diffusers-0.28.1.dist-info}/LICENSE +0 -0
- {diffusers-0.27.2.dist-info → diffusers-0.28.1.dist-info}/WHEEL +0 -0
- {diffusers-0.27.2.dist-info → diffusers-0.28.1.dist-info}/entry_points.txt +0 -0
- {diffusers-0.27.2.dist-info → diffusers-0.28.1.dist-info}/top_level.txt +0 -0
diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py
CHANGED
@@ -197,7 +197,7 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
|
|
197
197
|
and not isinstance(image, list)
|
198
198
|
):
|
199
199
|
raise ValueError(
|
200
|
-
"`image` has to be of type `torch.
|
200
|
+
"`image` has to be of type `torch.Tensor` or `PIL.Image.Image` or `List[PIL.Image.Image]` but is"
|
201
201
|
f" {type(image)}"
|
202
202
|
)
|
203
203
|
|
@@ -214,7 +214,12 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
|
|
214
214
|
|
215
215
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
216
216
|
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
217
|
-
shape = (
|
217
|
+
shape = (
|
218
|
+
batch_size,
|
219
|
+
num_channels_latents,
|
220
|
+
int(height) // self.vae_scale_factor,
|
221
|
+
int(width) // self.vae_scale_factor,
|
222
|
+
)
|
218
223
|
if isinstance(generator, list) and len(generator) != batch_size:
|
219
224
|
raise ValueError(
|
220
225
|
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
@@ -242,10 +247,10 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
|
|
242
247
|
num_images_per_prompt: Optional[int] = 1,
|
243
248
|
eta: float = 0.0,
|
244
249
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
245
|
-
latents: Optional[torch.
|
250
|
+
latents: Optional[torch.Tensor] = None,
|
246
251
|
output_type: Optional[str] = "pil",
|
247
252
|
return_dict: bool = True,
|
248
|
-
callback: Optional[Callable[[int, int, torch.
|
253
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
249
254
|
callback_steps: int = 1,
|
250
255
|
**kwargs,
|
251
256
|
):
|
@@ -276,7 +281,7 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
|
|
276
281
|
generator (`torch.Generator`, *optional*):
|
277
282
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
278
283
|
generation deterministic.
|
279
|
-
latents (`torch.
|
284
|
+
latents (`torch.Tensor`, *optional*):
|
280
285
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
281
286
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
282
287
|
tensor is generated by sampling using the supplied random `generator`.
|
@@ -287,7 +292,7 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
|
|
287
292
|
plain tuple.
|
288
293
|
callback (`Callable`, *optional*):
|
289
294
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
290
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
295
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
291
296
|
callback_steps (`int`, *optional*, defaults to 1):
|
292
297
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
293
298
|
every step.
|
diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py
CHANGED
@@ -300,7 +300,12 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
|
|
300
300
|
|
301
301
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
302
302
|
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
303
|
-
shape = (
|
303
|
+
shape = (
|
304
|
+
batch_size,
|
305
|
+
num_channels_latents,
|
306
|
+
int(height) // self.vae_scale_factor,
|
307
|
+
int(width) // self.vae_scale_factor,
|
308
|
+
)
|
304
309
|
if isinstance(generator, list) and len(generator) != batch_size:
|
305
310
|
raise ValueError(
|
306
311
|
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
@@ -328,10 +333,10 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
|
|
328
333
|
num_images_per_prompt: Optional[int] = 1,
|
329
334
|
eta: float = 0.0,
|
330
335
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
331
|
-
latents: Optional[torch.
|
336
|
+
latents: Optional[torch.Tensor] = None,
|
332
337
|
output_type: Optional[str] = "pil",
|
333
338
|
return_dict: bool = True,
|
334
|
-
callback: Optional[Callable[[int, int, torch.
|
339
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
335
340
|
callback_steps: int = 1,
|
336
341
|
**kwargs,
|
337
342
|
):
|
@@ -362,7 +367,7 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
|
|
362
367
|
generator (`torch.Generator`, *optional*):
|
363
368
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
364
369
|
generation deterministic.
|
365
|
-
latents (`torch.
|
370
|
+
latents (`torch.Tensor`, *optional*):
|
366
371
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
367
372
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
368
373
|
tensor is generated by sampling using the supplied random `generator`.
|
@@ -373,7 +378,7 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
|
|
373
378
|
plain tuple.
|
374
379
|
callback (`Callable`, *optional*):
|
375
380
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
376
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
381
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
377
382
|
callback_steps (`int`, *optional*, defaults to 1):
|
378
383
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
379
384
|
every step.
|
@@ -169,10 +169,10 @@ class VQDiffusionPipeline(DiffusionPipeline):
|
|
169
169
|
truncation_rate: float = 1.0,
|
170
170
|
num_images_per_prompt: int = 1,
|
171
171
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
172
|
-
latents: Optional[torch.
|
172
|
+
latents: Optional[torch.Tensor] = None,
|
173
173
|
output_type: Optional[str] = "pil",
|
174
174
|
return_dict: bool = True,
|
175
|
-
callback: Optional[Callable[[int, int, torch.
|
175
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
176
176
|
callback_steps: int = 1,
|
177
177
|
) -> Union[ImagePipelineOutput, Tuple]:
|
178
178
|
"""
|
@@ -196,7 +196,7 @@ class VQDiffusionPipeline(DiffusionPipeline):
|
|
196
196
|
generator (`torch.Generator`, *optional*):
|
197
197
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
198
198
|
generation deterministic.
|
199
|
-
latents (`torch.
|
199
|
+
latents (`torch.Tensor` of shape (batch), *optional*):
|
200
200
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
201
201
|
generation. Must be valid embedding indices.If not provided, a latents tensor will be generated of
|
202
202
|
completely masked latent pixels.
|
@@ -206,7 +206,7 @@ class VQDiffusionPipeline(DiffusionPipeline):
|
|
206
206
|
Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple.
|
207
207
|
callback (`Callable`, *optional*):
|
208
208
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
209
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
209
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
210
210
|
callback_steps (`int`, *optional*, defaults to 1):
|
211
211
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
212
212
|
every step.
|
@@ -301,7 +301,7 @@ class VQDiffusionPipeline(DiffusionPipeline):
|
|
301
301
|
|
302
302
|
return ImagePipelineOutput(images=image)
|
303
303
|
|
304
|
-
def truncate(self, log_p_x_0: torch.
|
304
|
+
def truncate(self, log_p_x_0: torch.Tensor, truncation_rate: float) -> torch.Tensor:
|
305
305
|
"""
|
306
306
|
Truncates `log_p_x_0` such that for each column vector, the total cumulative probability is `truncation_rate`
|
307
307
|
The lowest probabilities that would increase the cumulative probability above `truncation_rate` are set to
|
@@ -22,7 +22,7 @@ from typing import Dict, List, Optional, Tuple, Union
|
|
22
22
|
|
23
23
|
import torch
|
24
24
|
|
25
|
-
from ...models import AutoencoderKL,
|
25
|
+
from ...models import AutoencoderKL, DiTTransformer2DModel
|
26
26
|
from ...schedulers import KarrasDiffusionSchedulers
|
27
27
|
from ...utils.torch_utils import randn_tensor
|
28
28
|
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
@@ -36,8 +36,8 @@ class DiTPipeline(DiffusionPipeline):
|
|
36
36
|
implemented for all pipelines (downloading, saving, running on a particular device, etc.).
|
37
37
|
|
38
38
|
Parameters:
|
39
|
-
transformer ([`
|
40
|
-
A class conditioned `
|
39
|
+
transformer ([`DiTTransformer2DModel`]):
|
40
|
+
A class conditioned `DiTTransformer2DModel` to denoise the encoded image latents.
|
41
41
|
vae ([`AutoencoderKL`]):
|
42
42
|
Variational Auto-Encoder (VAE) model to encode and decode images to and from latent representations.
|
43
43
|
scheduler ([`DDIMScheduler`]):
|
@@ -48,7 +48,7 @@ class DiTPipeline(DiffusionPipeline):
|
|
48
48
|
|
49
49
|
def __init__(
|
50
50
|
self,
|
51
|
-
transformer:
|
51
|
+
transformer: DiTTransformer2DModel,
|
52
52
|
vae: AutoencoderKL,
|
53
53
|
scheduler: KarrasDiffusionSchedulers,
|
54
54
|
id2label: Optional[Dict[int, str]] = None,
|
@@ -227,6 +227,9 @@ class DiTPipeline(DiffusionPipeline):
|
|
227
227
|
if output_type == "pil":
|
228
228
|
samples = self.numpy_to_pil(samples)
|
229
229
|
|
230
|
+
# Offload all models
|
231
|
+
self.maybe_free_model_hooks()
|
232
|
+
|
230
233
|
if not return_dict:
|
231
234
|
return (samples,)
|
232
235
|
|
@@ -41,20 +41,20 @@ class FreeInitMixin:
|
|
41
41
|
num_iters (`int`, *optional*, defaults to `3`):
|
42
42
|
Number of FreeInit noise re-initialization iterations.
|
43
43
|
use_fast_sampling (`bool`, *optional*, defaults to `False`):
|
44
|
-
Whether or not to speedup sampling procedure at the cost of probably lower quality results. Enables
|
45
|
-
|
44
|
+
Whether or not to speedup sampling procedure at the cost of probably lower quality results. Enables the
|
45
|
+
"Coarse-to-Fine Sampling" strategy, as mentioned in the paper, if set to `True`.
|
46
46
|
method (`str`, *optional*, defaults to `butterworth`):
|
47
|
-
Must be one of `butterworth`, `ideal` or `gaussian` to use as the filtering method for the
|
48
|
-
|
47
|
+
Must be one of `butterworth`, `ideal` or `gaussian` to use as the filtering method for the FreeInit low
|
48
|
+
pass filter.
|
49
49
|
order (`int`, *optional*, defaults to `4`):
|
50
50
|
Order of the filter used in `butterworth` method. Larger values lead to `ideal` method behaviour
|
51
51
|
whereas lower values lead to `gaussian` method behaviour.
|
52
52
|
spatial_stop_frequency (`float`, *optional*, defaults to `0.25`):
|
53
|
-
Normalized stop frequency for spatial dimensions. Must be between 0 to 1. Referred to as `d_s` in
|
54
|
-
|
53
|
+
Normalized stop frequency for spatial dimensions. Must be between 0 to 1. Referred to as `d_s` in the
|
54
|
+
original implementation.
|
55
55
|
temporal_stop_frequency (`float`, *optional*, defaults to `0.25`):
|
56
|
-
Normalized stop frequency for temporal dimensions. Must be between 0 to 1. Referred to as `d_t` in
|
57
|
-
|
56
|
+
Normalized stop frequency for temporal dimensions. Must be between 0 to 1. Referred to as `d_t` in the
|
57
|
+
original implementation.
|
58
58
|
"""
|
59
59
|
self._free_init_num_iters = num_iters
|
60
60
|
self._free_init_use_fast_sampling = use_fast_sampling
|
@@ -146,39 +146,40 @@ class FreeInitMixin:
|
|
146
146
|
):
|
147
147
|
if free_init_iteration == 0:
|
148
148
|
self._free_init_initial_noise = latents.detach().clone()
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
latents = latents.to(dtype)
|
149
|
+
else:
|
150
|
+
latent_shape = latents.shape
|
151
|
+
|
152
|
+
free_init_filter_shape = (1, *latent_shape[1:])
|
153
|
+
free_init_freq_filter = self._get_free_init_freq_filter(
|
154
|
+
shape=free_init_filter_shape,
|
155
|
+
device=device,
|
156
|
+
filter_type=self._free_init_method,
|
157
|
+
order=self._free_init_order,
|
158
|
+
spatial_stop_frequency=self._free_init_spatial_stop_frequency,
|
159
|
+
temporal_stop_frequency=self._free_init_temporal_stop_frequency,
|
160
|
+
)
|
161
|
+
|
162
|
+
current_diffuse_timestep = self.scheduler.config.num_train_timesteps - 1
|
163
|
+
diffuse_timesteps = torch.full((latent_shape[0],), current_diffuse_timestep).long()
|
164
|
+
|
165
|
+
z_t = self.scheduler.add_noise(
|
166
|
+
original_samples=latents, noise=self._free_init_initial_noise, timesteps=diffuse_timesteps.to(device)
|
167
|
+
).to(dtype=torch.float32)
|
168
|
+
|
169
|
+
z_rand = randn_tensor(
|
170
|
+
shape=latent_shape,
|
171
|
+
generator=generator,
|
172
|
+
device=device,
|
173
|
+
dtype=torch.float32,
|
174
|
+
)
|
175
|
+
latents = self._apply_freq_filter(z_t, z_rand, low_pass_filter=free_init_freq_filter)
|
176
|
+
latents = latents.to(dtype)
|
178
177
|
|
179
178
|
# Coarse-to-Fine Sampling for faster inference (can lead to lower quality)
|
180
179
|
if self._free_init_use_fast_sampling:
|
181
|
-
num_inference_steps =
|
180
|
+
num_inference_steps = max(
|
181
|
+
1, int(num_inference_steps / self._free_init_num_iters * (free_init_iteration + 1))
|
182
|
+
)
|
182
183
|
self.scheduler.set_timesteps(num_inference_steps, device=device)
|
183
184
|
|
184
185
|
return latents, self.scheduler.timesteps
|
@@ -0,0 +1,48 @@
|
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
3
|
+
from ...utils import (
|
4
|
+
DIFFUSERS_SLOW_IMPORT,
|
5
|
+
OptionalDependencyNotAvailable,
|
6
|
+
_LazyModule,
|
7
|
+
get_objects_from_module,
|
8
|
+
is_torch_available,
|
9
|
+
is_transformers_available,
|
10
|
+
)
|
11
|
+
|
12
|
+
|
13
|
+
_dummy_objects = {}
|
14
|
+
_import_structure = {}
|
15
|
+
|
16
|
+
|
17
|
+
try:
|
18
|
+
if not (is_transformers_available() and is_torch_available()):
|
19
|
+
raise OptionalDependencyNotAvailable()
|
20
|
+
except OptionalDependencyNotAvailable:
|
21
|
+
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
22
|
+
|
23
|
+
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
24
|
+
else:
|
25
|
+
_import_structure["pipeline_hunyuandit"] = ["HunyuanDiTPipeline"]
|
26
|
+
|
27
|
+
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
28
|
+
try:
|
29
|
+
if not (is_transformers_available() and is_torch_available()):
|
30
|
+
raise OptionalDependencyNotAvailable()
|
31
|
+
|
32
|
+
except OptionalDependencyNotAvailable:
|
33
|
+
from ...utils.dummy_torch_and_transformers_objects import *
|
34
|
+
else:
|
35
|
+
from .pipeline_hunyuandit import HunyuanDiTPipeline
|
36
|
+
|
37
|
+
else:
|
38
|
+
import sys
|
39
|
+
|
40
|
+
sys.modules[__name__] = _LazyModule(
|
41
|
+
__name__,
|
42
|
+
globals()["__file__"],
|
43
|
+
_import_structure,
|
44
|
+
module_spec=__spec__,
|
45
|
+
)
|
46
|
+
|
47
|
+
for name, value in _dummy_objects.items():
|
48
|
+
setattr(sys.modules[__name__], name, value)
|