diffusers 0.27.1__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +18 -1
- diffusers/callbacks.py +156 -0
- diffusers/commands/env.py +110 -6
- diffusers/configuration_utils.py +16 -11
- diffusers/dependency_versions_table.py +2 -1
- diffusers/image_processor.py +158 -45
- diffusers/loaders/__init__.py +2 -5
- diffusers/loaders/autoencoder.py +4 -4
- diffusers/loaders/controlnet.py +4 -4
- diffusers/loaders/ip_adapter.py +80 -22
- diffusers/loaders/lora.py +134 -20
- diffusers/loaders/lora_conversion_utils.py +46 -43
- diffusers/loaders/peft.py +4 -3
- diffusers/loaders/single_file.py +401 -170
- diffusers/loaders/single_file_model.py +290 -0
- diffusers/loaders/single_file_utils.py +616 -672
- diffusers/loaders/textual_inversion.py +41 -20
- diffusers/loaders/unet.py +168 -115
- diffusers/loaders/unet_loader_utils.py +163 -0
- diffusers/models/__init__.py +2 -0
- diffusers/models/activations.py +11 -3
- diffusers/models/attention.py +10 -11
- diffusers/models/attention_processor.py +367 -148
- diffusers/models/autoencoders/autoencoder_asym_kl.py +14 -16
- diffusers/models/autoencoders/autoencoder_kl.py +18 -19
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -11
- diffusers/models/autoencoders/autoencoder_tiny.py +16 -16
- diffusers/models/autoencoders/consistency_decoder_vae.py +36 -11
- diffusers/models/autoencoders/vae.py +23 -24
- diffusers/models/controlnet.py +12 -9
- diffusers/models/controlnet_flax.py +4 -4
- diffusers/models/controlnet_xs.py +1915 -0
- diffusers/models/downsampling.py +17 -18
- diffusers/models/embeddings.py +147 -24
- diffusers/models/model_loading_utils.py +149 -0
- diffusers/models/modeling_flax_pytorch_utils.py +2 -1
- diffusers/models/modeling_flax_utils.py +4 -4
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +118 -98
- diffusers/models/resnet.py +18 -23
- diffusers/models/transformer_temporal.py +3 -3
- diffusers/models/transformers/dual_transformer_2d.py +4 -4
- diffusers/models/transformers/prior_transformer.py +7 -7
- diffusers/models/transformers/t5_film_transformer.py +17 -19
- diffusers/models/transformers/transformer_2d.py +272 -156
- diffusers/models/transformers/transformer_temporal.py +10 -10
- diffusers/models/unets/unet_1d.py +5 -5
- diffusers/models/unets/unet_1d_blocks.py +29 -29
- diffusers/models/unets/unet_2d.py +6 -6
- diffusers/models/unets/unet_2d_blocks.py +137 -128
- diffusers/models/unets/unet_2d_condition.py +20 -15
- diffusers/models/unets/unet_2d_condition_flax.py +6 -5
- diffusers/models/unets/unet_3d_blocks.py +79 -77
- diffusers/models/unets/unet_3d_condition.py +13 -9
- diffusers/models/unets/unet_i2vgen_xl.py +14 -13
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +114 -14
- diffusers/models/unets/unet_spatio_temporal_condition.py +15 -14
- diffusers/models/unets/unet_stable_cascade.py +16 -13
- diffusers/models/upsampling.py +17 -20
- diffusers/models/vq_model.py +16 -15
- diffusers/pipelines/__init__.py +25 -3
- diffusers/pipelines/amused/pipeline_amused.py +12 -12
- diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
- diffusers/pipelines/animatediff/__init__.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +24 -46
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1284 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +82 -72
- diffusers/pipelines/animatediff/pipeline_output.py +3 -2
- diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +54 -35
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +120 -36
- diffusers/pipelines/auto_pipeline.py +21 -17
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -5
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +5 -5
- diffusers/pipelines/controlnet/multicontrolnet.py +4 -8
- diffusers/pipelines/controlnet/pipeline_controlnet.py +87 -52
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +50 -43
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +52 -40
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +80 -47
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +147 -49
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +89 -55
- diffusers/pipelines/controlnet_xs/__init__.py +68 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +911 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1115 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +14 -28
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +18 -33
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +21 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +20 -36
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +23 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +17 -32
- diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +43 -20
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +36 -18
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +12 -12
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +18 -21
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +30 -25
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +69 -59
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
- diffusers/pipelines/dit/pipeline_dit.py +3 -0
- diffusers/pipelines/free_init_utils.py +39 -38
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +23 -20
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +32 -29
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
- diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +20 -33
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +24 -35
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +48 -30
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +50 -28
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +11 -11
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +61 -67
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +70 -69
- diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
- diffusers/pipelines/marigold/__init__.py +50 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +561 -0
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
- diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
- diffusers/pipelines/pia/pipeline_pia.py +39 -125
- diffusers/pipelines/pipeline_flax_utils.py +4 -4
- diffusers/pipelines/pipeline_loading_utils.py +268 -23
- diffusers/pipelines/pipeline_utils.py +266 -37
- diffusers/pipelines/pixart_alpha/__init__.py +8 -1
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +65 -75
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +880 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +10 -5
- diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
- diffusers/pipelines/shap_e/renderer.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +36 -22
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +33 -32
- diffusers/pipelines/stable_diffusion/__init__.py +0 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +18 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +73 -39
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +24 -17
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +66 -36
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +82 -46
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +123 -28
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +16 -16
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +24 -19
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +37 -31
- diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +23 -15
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +44 -42
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +23 -18
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +19 -14
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +20 -15
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -19
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +65 -32
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +274 -38
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +92 -25
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +88 -44
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +108 -56
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +96 -51
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -25
- diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +59 -30
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +71 -42
- diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +18 -41
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +21 -85
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -19
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +39 -33
- diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
- diffusers/pipelines/unidiffuser/modeling_uvit.py +9 -9
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +23 -23
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -6
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +10 -10
- diffusers/schedulers/__init__.py +2 -2
- diffusers/schedulers/deprecated/__init__.py +1 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
- diffusers/schedulers/scheduling_amused.py +5 -5
- diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
- diffusers/schedulers/scheduling_consistency_models.py +23 -25
- diffusers/schedulers/scheduling_ddim.py +22 -24
- diffusers/schedulers/scheduling_ddim_flax.py +2 -1
- diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
- diffusers/schedulers/scheduling_ddim_parallel.py +28 -30
- diffusers/schedulers/scheduling_ddpm.py +20 -22
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +26 -28
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
- diffusers/schedulers/scheduling_deis_multistep.py +46 -42
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +107 -77
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +46 -46
- diffusers/schedulers/scheduling_dpmsolver_sde.py +26 -22
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +90 -65
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +78 -53
- diffusers/schedulers/scheduling_edm_euler.py +53 -30
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +26 -28
- diffusers/schedulers/scheduling_euler_discrete.py +163 -67
- diffusers/schedulers/scheduling_heun_discrete.py +60 -38
- diffusers/schedulers/scheduling_ipndm.py +8 -8
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +22 -18
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +22 -18
- diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
- diffusers/schedulers/scheduling_lcm.py +21 -23
- diffusers/schedulers/scheduling_lms_discrete.py +27 -25
- diffusers/schedulers/scheduling_pndm.py +20 -20
- diffusers/schedulers/scheduling_repaint.py +20 -20
- diffusers/schedulers/scheduling_sasolver.py +55 -54
- diffusers/schedulers/scheduling_sde_ve.py +19 -19
- diffusers/schedulers/scheduling_tcd.py +39 -30
- diffusers/schedulers/scheduling_unclip.py +15 -15
- diffusers/schedulers/scheduling_unipc_multistep.py +115 -41
- diffusers/schedulers/scheduling_utils.py +14 -5
- diffusers/schedulers/scheduling_utils_flax.py +3 -3
- diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
- diffusers/training_utils.py +56 -1
- diffusers/utils/__init__.py +7 -0
- diffusers/utils/doc_utils.py +1 -0
- diffusers/utils/dummy_pt_objects.py +30 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +90 -0
- diffusers/utils/dynamic_modules_utils.py +24 -11
- diffusers/utils/hub_utils.py +3 -2
- diffusers/utils/import_utils.py +91 -0
- diffusers/utils/loading_utils.py +2 -2
- diffusers/utils/logging.py +1 -1
- diffusers/utils/peft_utils.py +32 -5
- diffusers/utils/state_dict_utils.py +11 -2
- diffusers/utils/testing_utils.py +71 -6
- diffusers/utils/torch_utils.py +1 -0
- diffusers/video_processor.py +113 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/METADATA +7 -7
- diffusers-0.28.0.dist-info/RECORD +414 -0
- diffusers-0.27.1.dist-info/RECORD +0 -399
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/LICENSE +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/WHEEL +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/top_level.txt +0 -0
@@ -191,7 +191,12 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
191
191
|
|
192
192
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
193
193
|
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
194
|
-
shape = (
|
194
|
+
shape = (
|
195
|
+
batch_size,
|
196
|
+
num_channels_latents,
|
197
|
+
int(height) // self.vae_scale_factor,
|
198
|
+
int(width) // self.vae_scale_factor,
|
199
|
+
)
|
195
200
|
if isinstance(generator, list) and len(generator) != batch_size:
|
196
201
|
raise ValueError(
|
197
202
|
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
@@ -219,10 +224,10 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
219
224
|
num_images_per_prompt: int = 1,
|
220
225
|
eta: float = 0.0,
|
221
226
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
222
|
-
latents: Optional[torch.
|
227
|
+
latents: Optional[torch.Tensor] = None,
|
223
228
|
output_type: Optional[str] = "pil",
|
224
229
|
return_dict: bool = True,
|
225
|
-
callback: Optional[Callable[[int, int, torch.
|
230
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
226
231
|
callback_steps: int = 1,
|
227
232
|
editing_prompt: Optional[Union[str, List[str]]] = None,
|
228
233
|
editing_prompt_embeddings: Optional[torch.Tensor] = None,
|
@@ -263,7 +268,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
263
268
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
264
269
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
265
270
|
generation deterministic.
|
266
|
-
latents (`torch.
|
271
|
+
latents (`torch.Tensor`, *optional*):
|
267
272
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
268
273
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
269
274
|
tensor is generated by sampling using the supplied random `generator`.
|
@@ -274,7 +279,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
274
279
|
plain tuple.
|
275
280
|
callback (`Callable`, *optional*):
|
276
281
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
277
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
282
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
278
283
|
callback_steps (`int`, *optional*, defaults to 1):
|
279
284
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
280
285
|
every step.
|
@@ -69,7 +69,7 @@ class ShapEPipelineOutput(BaseOutput):
|
|
69
69
|
Output class for [`ShapEPipeline`] and [`ShapEImg2ImgPipeline`].
|
70
70
|
|
71
71
|
Args:
|
72
|
-
images (`torch.
|
72
|
+
images (`torch.Tensor`)
|
73
73
|
A list of images for 3D rendering.
|
74
74
|
"""
|
75
75
|
|
@@ -187,7 +187,7 @@ class ShapEPipeline(DiffusionPipeline):
|
|
187
187
|
num_images_per_prompt: int = 1,
|
188
188
|
num_inference_steps: int = 25,
|
189
189
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
190
|
-
latents: Optional[torch.
|
190
|
+
latents: Optional[torch.Tensor] = None,
|
191
191
|
guidance_scale: float = 4.0,
|
192
192
|
frame_size: int = 64,
|
193
193
|
output_type: Optional[str] = "pil", # pil, np, latent, mesh
|
@@ -207,7 +207,7 @@ class ShapEPipeline(DiffusionPipeline):
|
|
207
207
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
208
208
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
209
209
|
generation deterministic.
|
210
|
-
latents (`torch.
|
210
|
+
latents (`torch.Tensor`, *optional*):
|
211
211
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
212
212
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
213
213
|
tensor is generated by sampling using the supplied random `generator`.
|
@@ -70,7 +70,7 @@ class ShapEPipelineOutput(BaseOutput):
|
|
70
70
|
Output class for [`ShapEPipeline`] and [`ShapEImg2ImgPipeline`].
|
71
71
|
|
72
72
|
Args:
|
73
|
-
images (`torch.
|
73
|
+
images (`torch.Tensor`)
|
74
74
|
A list of images for 3D rendering.
|
75
75
|
"""
|
76
76
|
|
@@ -86,7 +86,7 @@ class ShapEImg2ImgPipeline(DiffusionPipeline):
|
|
86
86
|
|
87
87
|
Args:
|
88
88
|
prior ([`PriorTransformer`]):
|
89
|
-
The
|
89
|
+
The canonical unCLIP prior to approximate the image embedding from the text embedding.
|
90
90
|
image_encoder ([`~transformers.CLIPVisionModel`]):
|
91
91
|
Frozen image-encoder.
|
92
92
|
image_processor ([`~transformers.CLIPImageProcessor`]):
|
@@ -169,7 +169,7 @@ class ShapEImg2ImgPipeline(DiffusionPipeline):
|
|
169
169
|
num_images_per_prompt: int = 1,
|
170
170
|
num_inference_steps: int = 25,
|
171
171
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
172
|
-
latents: Optional[torch.
|
172
|
+
latents: Optional[torch.Tensor] = None,
|
173
173
|
guidance_scale: float = 4.0,
|
174
174
|
frame_size: int = 64,
|
175
175
|
output_type: Optional[str] = "pil", # pil, np, latent, mesh
|
@@ -179,7 +179,7 @@ class ShapEImg2ImgPipeline(DiffusionPipeline):
|
|
179
179
|
The call function to the pipeline for generation.
|
180
180
|
|
181
181
|
Args:
|
182
|
-
image (`torch.
|
182
|
+
image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
|
183
183
|
`Image` or tensor representing an image batch to be used as the starting point. Can also accept image
|
184
184
|
latents as image, but if passing latents directly it is not encoded again.
|
185
185
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
@@ -190,7 +190,7 @@ class ShapEImg2ImgPipeline(DiffusionPipeline):
|
|
190
190
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
191
191
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
192
192
|
generation deterministic.
|
193
|
-
latents (`torch.
|
193
|
+
latents (`torch.Tensor`, *optional*):
|
194
194
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
195
195
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
196
196
|
tensor is generated by sampling using the supplied random `generator`.
|
@@ -239,15 +239,15 @@ class ShapEImg2ImgPipeline(DiffusionPipeline):
|
|
239
239
|
|
240
240
|
num_embeddings = self.prior.config.num_embeddings
|
241
241
|
embedding_dim = self.prior.config.embedding_dim
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
242
|
+
if latents is None:
|
243
|
+
latents = self.prepare_latents(
|
244
|
+
(batch_size, num_embeddings * embedding_dim),
|
245
|
+
image_embeds.dtype,
|
246
|
+
device,
|
247
|
+
generator,
|
248
|
+
latents,
|
249
|
+
self.scheduler,
|
250
|
+
)
|
251
251
|
|
252
252
|
# YiYi notes: for testing only to match ldm, we can directly create a latents with desired shape: batch_size, num_embeddings, embedding_dim
|
253
253
|
latents = latents.reshape(latents.shape[0], num_embeddings, embedding_dim)
|
@@ -844,7 +844,7 @@ class ShapERenderer(ModelMixin, ConfigMixin):
|
|
844
844
|
transmittance(t[i + 1]) := transmittance(t[i]). 4) The last term is integration to infinity (e.g. [t[-1],
|
845
845
|
math.inf]) that is evaluated by the void_model (i.e. we consider this space to be empty).
|
846
846
|
|
847
|
-
|
847
|
+
Args:
|
848
848
|
rays: [batch_size x ... x 2 x 3] origin and direction. sampler: disjoint volume integrals. n_samples:
|
849
849
|
number of ts to sample. prev_model_outputs: model outputs from the previous rendering step, including
|
850
850
|
|
@@ -100,8 +100,10 @@ class StableCascadeDecoderPipeline(DiffusionPipeline):
|
|
100
100
|
)
|
101
101
|
self.register_to_config(latent_dim_scale=latent_dim_scale)
|
102
102
|
|
103
|
-
def prepare_latents(
|
104
|
-
batch_size,
|
103
|
+
def prepare_latents(
|
104
|
+
self, batch_size, image_embeddings, num_images_per_prompt, dtype, device, generator, latents, scheduler
|
105
|
+
):
|
106
|
+
_, channels, height, width = image_embeddings.shape
|
105
107
|
latents_shape = (
|
106
108
|
batch_size * num_images_per_prompt,
|
107
109
|
4,
|
@@ -127,10 +129,10 @@ class StableCascadeDecoderPipeline(DiffusionPipeline):
|
|
127
129
|
do_classifier_free_guidance,
|
128
130
|
prompt=None,
|
129
131
|
negative_prompt=None,
|
130
|
-
prompt_embeds: Optional[torch.
|
131
|
-
prompt_embeds_pooled: Optional[torch.
|
132
|
-
negative_prompt_embeds: Optional[torch.
|
133
|
-
negative_prompt_embeds_pooled: Optional[torch.
|
132
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
133
|
+
prompt_embeds_pooled: Optional[torch.Tensor] = None,
|
134
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
135
|
+
negative_prompt_embeds_pooled: Optional[torch.Tensor] = None,
|
134
136
|
):
|
135
137
|
if prompt_embeds is None:
|
136
138
|
# get prompt text embeddings
|
@@ -283,18 +285,18 @@ class StableCascadeDecoderPipeline(DiffusionPipeline):
|
|
283
285
|
@replace_example_docstring(EXAMPLE_DOC_STRING)
|
284
286
|
def __call__(
|
285
287
|
self,
|
286
|
-
image_embeddings: Union[torch.
|
288
|
+
image_embeddings: Union[torch.Tensor, List[torch.Tensor]],
|
287
289
|
prompt: Union[str, List[str]] = None,
|
288
290
|
num_inference_steps: int = 10,
|
289
291
|
guidance_scale: float = 0.0,
|
290
292
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
291
|
-
prompt_embeds: Optional[torch.
|
292
|
-
prompt_embeds_pooled: Optional[torch.
|
293
|
-
negative_prompt_embeds: Optional[torch.
|
294
|
-
negative_prompt_embeds_pooled: Optional[torch.
|
293
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
294
|
+
prompt_embeds_pooled: Optional[torch.Tensor] = None,
|
295
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
296
|
+
negative_prompt_embeds_pooled: Optional[torch.Tensor] = None,
|
295
297
|
num_images_per_prompt: int = 1,
|
296
298
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
297
|
-
latents: Optional[torch.
|
299
|
+
latents: Optional[torch.Tensor] = None,
|
298
300
|
output_type: Optional[str] = "pil",
|
299
301
|
return_dict: bool = True,
|
300
302
|
callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
|
@@ -304,7 +306,7 @@ class StableCascadeDecoderPipeline(DiffusionPipeline):
|
|
304
306
|
Function invoked when calling the pipeline for generation.
|
305
307
|
|
306
308
|
Args:
|
307
|
-
image_embedding (`torch.
|
309
|
+
image_embedding (`torch.Tensor` or `List[torch.Tensor]`):
|
308
310
|
Image Embeddings either extracted from an image or generated by a Prior Model.
|
309
311
|
prompt (`str` or `List[str]`):
|
310
312
|
The prompt or prompts to guide the image generation.
|
@@ -320,26 +322,26 @@ class StableCascadeDecoderPipeline(DiffusionPipeline):
|
|
320
322
|
negative_prompt (`str` or `List[str]`, *optional*):
|
321
323
|
The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
|
322
324
|
if `decoder_guidance_scale` is less than `1`).
|
323
|
-
prompt_embeds (`torch.
|
325
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
324
326
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
325
327
|
provided, text embeddings will be generated from `prompt` input argument.
|
326
|
-
prompt_embeds_pooled (`torch.
|
328
|
+
prompt_embeds_pooled (`torch.Tensor`, *optional*):
|
327
329
|
Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
|
328
330
|
If not provided, pooled text embeddings will be generated from `prompt` input argument.
|
329
|
-
negative_prompt_embeds (`torch.
|
331
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
330
332
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
331
333
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
332
334
|
argument.
|
333
|
-
negative_prompt_embeds_pooled (`torch.
|
335
|
+
negative_prompt_embeds_pooled (`torch.Tensor`, *optional*):
|
334
336
|
Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
335
|
-
weighting. If not provided, negative_prompt_embeds_pooled will be generated from `negative_prompt`
|
336
|
-
argument.
|
337
|
+
weighting. If not provided, negative_prompt_embeds_pooled will be generated from `negative_prompt`
|
338
|
+
input argument.
|
337
339
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
338
340
|
The number of images to generate per prompt.
|
339
341
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
340
342
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
341
343
|
to make generation deterministic.
|
342
|
-
latents (`torch.
|
344
|
+
latents (`torch.Tensor`, *optional*):
|
343
345
|
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
|
344
346
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
345
347
|
tensor will ge generated by sampling using the supplied random `generator`.
|
@@ -383,7 +385,19 @@ class StableCascadeDecoderPipeline(DiffusionPipeline):
|
|
383
385
|
)
|
384
386
|
if isinstance(image_embeddings, list):
|
385
387
|
image_embeddings = torch.cat(image_embeddings, dim=0)
|
386
|
-
|
388
|
+
|
389
|
+
if prompt is not None and isinstance(prompt, str):
|
390
|
+
batch_size = 1
|
391
|
+
elif prompt is not None and isinstance(prompt, list):
|
392
|
+
batch_size = len(prompt)
|
393
|
+
else:
|
394
|
+
batch_size = prompt_embeds.shape[0]
|
395
|
+
|
396
|
+
# Compute the effective number of images per prompt
|
397
|
+
# We must account for the fact that the image embeddings from the prior can be generated with num_images_per_prompt > 1
|
398
|
+
# This results in a case where a single prompt is associated with multiple image embeddings
|
399
|
+
# Divide the number of image embeddings by the batch size to determine if this is the case.
|
400
|
+
num_images_per_prompt = num_images_per_prompt * (image_embeddings.shape[0] // batch_size)
|
387
401
|
|
388
402
|
# 2. Encode caption
|
389
403
|
if prompt_embeds is None and negative_prompt_embeds is None:
|
@@ -417,7 +431,7 @@ class StableCascadeDecoderPipeline(DiffusionPipeline):
|
|
417
431
|
|
418
432
|
# 5. Prepare latents
|
419
433
|
latents = self.prepare_latents(
|
420
|
-
image_embeddings, num_images_per_prompt, dtype, device, generator, latents, self.scheduler
|
434
|
+
batch_size, image_embeddings, num_images_per_prompt, dtype, device, generator, latents, self.scheduler
|
421
435
|
)
|
422
436
|
|
423
437
|
# 6. Run denoising loop
|
@@ -31,7 +31,10 @@ TEXT2IMAGE_EXAMPLE_DOC_STRING = """
|
|
31
31
|
```py
|
32
32
|
>>> import torch
|
33
33
|
>>> from diffusers import StableCascadeCombinedPipeline
|
34
|
-
|
34
|
+
|
35
|
+
>>> pipe = StableCascadeCombinedPipeline.from_pretrained(
|
36
|
+
... "stabilityai/stable-cascade", variant="bf16", torch_dtype=torch.bfloat16
|
37
|
+
... )
|
35
38
|
>>> pipe.enable_model_cpu_offload()
|
36
39
|
>>> prompt = "an image of a shiba inu, donning a spacesuit and helmet"
|
37
40
|
>>> images = pipe(prompt=prompt)
|
@@ -68,6 +71,7 @@ class StableCascadeCombinedPipeline(DiffusionPipeline):
|
|
68
71
|
"""
|
69
72
|
|
70
73
|
_load_connected_pipes = True
|
74
|
+
_optional_components = ["prior_feature_extractor", "prior_image_encoder"]
|
71
75
|
|
72
76
|
def __init__(
|
73
77
|
self,
|
@@ -117,25 +121,25 @@ class StableCascadeCombinedPipeline(DiffusionPipeline):
|
|
117
121
|
def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
|
118
122
|
self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
|
119
123
|
|
120
|
-
def enable_model_cpu_offload(self, gpu_id=
|
124
|
+
def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
|
121
125
|
r"""
|
122
126
|
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
|
123
127
|
to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
|
124
128
|
method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
|
125
129
|
`enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
|
126
130
|
"""
|
127
|
-
self.prior_pipe.enable_model_cpu_offload(gpu_id=gpu_id)
|
128
|
-
self.decoder_pipe.enable_model_cpu_offload(gpu_id=gpu_id)
|
131
|
+
self.prior_pipe.enable_model_cpu_offload(gpu_id=gpu_id, device=device)
|
132
|
+
self.decoder_pipe.enable_model_cpu_offload(gpu_id=gpu_id, device=device)
|
129
133
|
|
130
|
-
def enable_sequential_cpu_offload(self, gpu_id=
|
134
|
+
def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
|
131
135
|
r"""
|
132
136
|
Offloads all models (`unet`, `text_encoder`, `vae`, and `safety checker` state dicts) to CPU using 🤗
|
133
137
|
Accelerate, significantly reducing memory usage. Models are moved to a `torch.device('meta')` and loaded on a
|
134
138
|
GPU only when their specific submodule's `forward` method is called. Offloading happens on a submodule basis.
|
135
139
|
Memory savings are higher than using `enable_model_cpu_offload`, but performance is lower.
|
136
140
|
"""
|
137
|
-
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
138
|
-
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
141
|
+
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id, device=device)
|
142
|
+
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id, device=device)
|
139
143
|
|
140
144
|
def progress_bar(self, iterable=None, total=None):
|
141
145
|
self.prior_pipe.progress_bar(iterable=iterable, total=total)
|
@@ -158,13 +162,13 @@ class StableCascadeCombinedPipeline(DiffusionPipeline):
|
|
158
162
|
num_inference_steps: int = 12,
|
159
163
|
decoder_guidance_scale: float = 0.0,
|
160
164
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
161
|
-
prompt_embeds: Optional[torch.
|
162
|
-
prompt_embeds_pooled: Optional[torch.
|
163
|
-
negative_prompt_embeds: Optional[torch.
|
164
|
-
negative_prompt_embeds_pooled: Optional[torch.
|
165
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
166
|
+
prompt_embeds_pooled: Optional[torch.Tensor] = None,
|
167
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
168
|
+
negative_prompt_embeds_pooled: Optional[torch.Tensor] = None,
|
165
169
|
num_images_per_prompt: int = 1,
|
166
170
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
167
|
-
latents: Optional[torch.
|
171
|
+
latents: Optional[torch.Tensor] = None,
|
168
172
|
output_type: Optional[str] = "pil",
|
169
173
|
return_dict: bool = True,
|
170
174
|
prior_callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
|
@@ -183,17 +187,17 @@ class StableCascadeCombinedPipeline(DiffusionPipeline):
|
|
183
187
|
negative_prompt (`str` or `List[str]`, *optional*):
|
184
188
|
The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
|
185
189
|
if `guidance_scale` is less than `1`).
|
186
|
-
prompt_embeds (`torch.
|
190
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
187
191
|
Pre-generated text embeddings for the prior. Can be used to easily tweak text inputs, *e.g.* prompt
|
188
192
|
weighting. If not provided, text embeddings will be generated from `prompt` input argument.
|
189
|
-
prompt_embeds_pooled (`torch.
|
193
|
+
prompt_embeds_pooled (`torch.Tensor`, *optional*):
|
190
194
|
Pre-generated text embeddings for the prior. Can be used to easily tweak text inputs, *e.g.* prompt
|
191
195
|
weighting. If not provided, text embeddings will be generated from `prompt` input argument.
|
192
|
-
negative_prompt_embeds (`torch.
|
196
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
193
197
|
Pre-generated negative text embeddings for the prior. Can be used to easily tweak text inputs, *e.g.*
|
194
198
|
prompt weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt`
|
195
199
|
input argument.
|
196
|
-
negative_prompt_embeds_pooled (`torch.
|
200
|
+
negative_prompt_embeds_pooled (`torch.Tensor`, *optional*):
|
197
201
|
Pre-generated negative text embeddings for the prior. Can be used to easily tweak text inputs, *e.g.*
|
198
202
|
prompt weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt`
|
199
203
|
input argument.
|
@@ -226,7 +230,7 @@ class StableCascadeCombinedPipeline(DiffusionPipeline):
|
|
226
230
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
227
231
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
228
232
|
to make generation deterministic.
|
229
|
-
latents (`torch.
|
233
|
+
latents (`torch.Tensor`, *optional*):
|
230
234
|
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
|
231
235
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
232
236
|
tensor will ge generated by sampling using the supplied random `generator`.
|
@@ -242,7 +246,7 @@ class StableCascadeCombinedPipeline(DiffusionPipeline):
|
|
242
246
|
prior_callback_on_step_end_tensor_inputs (`List`, *optional*):
|
243
247
|
The list of tensor inputs for the `prior_callback_on_step_end` function. The tensors specified in the
|
244
248
|
list will be passed as `callback_kwargs` argument. You will only be able to include variables listed in
|
245
|
-
the `._callback_tensor_inputs` attribute of your
|
249
|
+
the `._callback_tensor_inputs` attribute of your pipeline class.
|
246
250
|
callback_on_step_end (`Callable`, *optional*):
|
247
251
|
A function that calls at the end of each denoising steps during the inference. The function is called
|
248
252
|
with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
|
@@ -251,7 +255,7 @@ class StableCascadeCombinedPipeline(DiffusionPipeline):
|
|
251
255
|
callback_on_step_end_tensor_inputs (`List`, *optional*):
|
252
256
|
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
|
253
257
|
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
|
254
|
-
`._callback_tensor_inputs` attribute of your
|
258
|
+
`._callback_tensor_inputs` attribute of your pipeline class.
|
255
259
|
|
256
260
|
Examples:
|
257
261
|
|
@@ -54,19 +54,19 @@ class StableCascadePriorPipelineOutput(BaseOutput):
|
|
54
54
|
Output class for WuerstchenPriorPipeline.
|
55
55
|
|
56
56
|
Args:
|
57
|
-
image_embeddings (`torch.
|
57
|
+
image_embeddings (`torch.Tensor` or `np.ndarray`)
|
58
58
|
Prior image embeddings for text prompt
|
59
|
-
prompt_embeds (`torch.
|
59
|
+
prompt_embeds (`torch.Tensor`):
|
60
60
|
Text embeddings for the prompt.
|
61
|
-
negative_prompt_embeds (`torch.
|
61
|
+
negative_prompt_embeds (`torch.Tensor`):
|
62
62
|
Text embeddings for the negative prompt.
|
63
63
|
"""
|
64
64
|
|
65
|
-
image_embeddings: Union[torch.
|
66
|
-
prompt_embeds: Union[torch.
|
67
|
-
prompt_embeds_pooled: Union[torch.
|
68
|
-
negative_prompt_embeds: Union[torch.
|
69
|
-
negative_prompt_embeds_pooled: Union[torch.
|
65
|
+
image_embeddings: Union[torch.Tensor, np.ndarray]
|
66
|
+
prompt_embeds: Union[torch.Tensor, np.ndarray]
|
67
|
+
prompt_embeds_pooled: Union[torch.Tensor, np.ndarray]
|
68
|
+
negative_prompt_embeds: Union[torch.Tensor, np.ndarray]
|
69
|
+
negative_prompt_embeds_pooled: Union[torch.Tensor, np.ndarray]
|
70
70
|
|
71
71
|
|
72
72
|
class StableCascadePriorPipeline(DiffusionPipeline):
|
@@ -80,7 +80,8 @@ class StableCascadePriorPipeline(DiffusionPipeline):
|
|
80
80
|
prior ([`StableCascadeUNet`]):
|
81
81
|
The Stable Cascade prior to approximate the image embedding from the text and/or image embedding.
|
82
82
|
text_encoder ([`CLIPTextModelWithProjection`]):
|
83
|
-
Frozen text-encoder
|
83
|
+
Frozen text-encoder
|
84
|
+
([laion/CLIP-ViT-bigG-14-laion2B-39B-b160k](https://huggingface.co/laion/CLIP-ViT-bigG-14-laion2B-39B-b160k)).
|
84
85
|
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
85
86
|
Model that extracts features from generated images to be used as inputs for the `image_encoder`.
|
86
87
|
image_encoder ([`CLIPVisionModelWithProjection`]):
|
@@ -149,10 +150,10 @@ class StableCascadePriorPipeline(DiffusionPipeline):
|
|
149
150
|
do_classifier_free_guidance,
|
150
151
|
prompt=None,
|
151
152
|
negative_prompt=None,
|
152
|
-
prompt_embeds: Optional[torch.
|
153
|
-
prompt_embeds_pooled: Optional[torch.
|
154
|
-
negative_prompt_embeds: Optional[torch.
|
155
|
-
negative_prompt_embeds_pooled: Optional[torch.
|
153
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
154
|
+
prompt_embeds_pooled: Optional[torch.Tensor] = None,
|
155
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
156
|
+
negative_prompt_embeds_pooled: Optional[torch.Tensor] = None,
|
156
157
|
):
|
157
158
|
if prompt_embeds is None:
|
158
159
|
# get prompt text embeddings
|
@@ -373,14 +374,14 @@ class StableCascadePriorPipeline(DiffusionPipeline):
|
|
373
374
|
timesteps: List[float] = None,
|
374
375
|
guidance_scale: float = 4.0,
|
375
376
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
376
|
-
prompt_embeds: Optional[torch.
|
377
|
-
prompt_embeds_pooled: Optional[torch.
|
378
|
-
negative_prompt_embeds: Optional[torch.
|
379
|
-
negative_prompt_embeds_pooled: Optional[torch.
|
380
|
-
image_embeds: Optional[torch.
|
377
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
378
|
+
prompt_embeds_pooled: Optional[torch.Tensor] = None,
|
379
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
380
|
+
negative_prompt_embeds_pooled: Optional[torch.Tensor] = None,
|
381
|
+
image_embeds: Optional[torch.Tensor] = None,
|
381
382
|
num_images_per_prompt: Optional[int] = 1,
|
382
383
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
383
|
-
latents: Optional[torch.
|
384
|
+
latents: Optional[torch.Tensor] = None,
|
384
385
|
output_type: Optional[str] = "pt",
|
385
386
|
return_dict: bool = True,
|
386
387
|
callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
|
@@ -408,29 +409,29 @@ class StableCascadePriorPipeline(DiffusionPipeline):
|
|
408
409
|
negative_prompt (`str` or `List[str]`, *optional*):
|
409
410
|
The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
|
410
411
|
if `decoder_guidance_scale` is less than `1`).
|
411
|
-
prompt_embeds (`torch.
|
412
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
412
413
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
413
414
|
provided, text embeddings will be generated from `prompt` input argument.
|
414
|
-
prompt_embeds_pooled (`torch.
|
415
|
+
prompt_embeds_pooled (`torch.Tensor`, *optional*):
|
415
416
|
Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
|
416
417
|
If not provided, pooled text embeddings will be generated from `prompt` input argument.
|
417
|
-
negative_prompt_embeds (`torch.
|
418
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
418
419
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
419
420
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
420
421
|
argument.
|
421
|
-
negative_prompt_embeds_pooled (`torch.
|
422
|
+
negative_prompt_embeds_pooled (`torch.Tensor`, *optional*):
|
422
423
|
Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
423
|
-
weighting. If not provided, negative_prompt_embeds_pooled will be generated from `negative_prompt`
|
424
|
-
argument.
|
425
|
-
image_embeds (`torch.
|
426
|
-
Pre-generated image embeddings. Can be used to easily tweak image inputs, *e.g.* prompt weighting.
|
427
|
-
|
424
|
+
weighting. If not provided, negative_prompt_embeds_pooled will be generated from `negative_prompt`
|
425
|
+
input argument.
|
426
|
+
image_embeds (`torch.Tensor`, *optional*):
|
427
|
+
Pre-generated image embeddings. Can be used to easily tweak image inputs, *e.g.* prompt weighting. If
|
428
|
+
not provided, image embeddings will be generated from `image` input argument if existing.
|
428
429
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
429
430
|
The number of images to generate per prompt.
|
430
431
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
431
432
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
432
433
|
to make generation deterministic.
|
433
|
-
latents (`torch.
|
434
|
+
latents (`torch.Tensor`, *optional*):
|
434
435
|
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
|
435
436
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
436
437
|
tensor will ge generated by sampling using the supplied random `generator`.
|
@@ -452,9 +453,9 @@ class StableCascadePriorPipeline(DiffusionPipeline):
|
|
452
453
|
Examples:
|
453
454
|
|
454
455
|
Returns:
|
455
|
-
[`StableCascadePriorPipelineOutput`] or `tuple` [`StableCascadePriorPipelineOutput`] if
|
456
|
-
|
457
|
-
|
456
|
+
[`StableCascadePriorPipelineOutput`] or `tuple` [`StableCascadePriorPipelineOutput`] if `return_dict` is
|
457
|
+
True, otherwise a `tuple`. When returning a tuple, the first element is a list with the generated image
|
458
|
+
embeddings.
|
458
459
|
"""
|
459
460
|
|
460
461
|
# 0. Define commonly used variables
|
@@ -113,7 +113,6 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
113
113
|
from .pipeline_stable_diffusion import (
|
114
114
|
StableDiffusionPipeline,
|
115
115
|
StableDiffusionPipelineOutput,
|
116
|
-
StableDiffusionSafetyChecker,
|
117
116
|
)
|
118
117
|
from .pipeline_stable_diffusion_img2img import StableDiffusionImg2ImgPipeline
|
119
118
|
from .pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipeline
|
@@ -12,7 +12,7 @@
|
|
12
12
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
13
|
# See the License for the specific language governing permissions and
|
14
14
|
# limitations under the License.
|
15
|
-
"""
|
15
|
+
"""Conversion script for the Stable Diffusion checkpoints."""
|
16
16
|
|
17
17
|
import re
|
18
18
|
from contextlib import nullcontext
|
@@ -557,7 +557,7 @@ def convert_ldm_unet_checkpoint(
|
|
557
557
|
paths, new_checkpoint, unet_state_dict, additional_replacements=[meta_path], config=config
|
558
558
|
)
|
559
559
|
|
560
|
-
output_block_list = {k: sorted(v) for k, v in output_block_list.items()}
|
560
|
+
output_block_list = {k: sorted(v) for k, v in sorted(output_block_list.items())}
|
561
561
|
if ["conv.bias", "conv.weight"] in output_block_list.values():
|
562
562
|
index = list(output_block_list.values()).index(["conv.bias", "conv.weight"])
|
563
563
|
new_checkpoint[f"up_blocks.{block_id}.upsamplers.0.conv.weight"] = unet_state_dict[
|
@@ -1153,6 +1153,8 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1153
1153
|
controlnet: Optional[bool] = None,
|
1154
1154
|
adapter: Optional[bool] = None,
|
1155
1155
|
load_safety_checker: bool = True,
|
1156
|
+
safety_checker: Optional[StableDiffusionSafetyChecker] = None,
|
1157
|
+
feature_extractor: Optional[AutoFeatureExtractor] = None,
|
1156
1158
|
pipeline_class: DiffusionPipeline = None,
|
1157
1159
|
local_files_only=False,
|
1158
1160
|
vae_path=None,
|
@@ -1205,6 +1207,12 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1205
1207
|
If `checkpoint_path` is in `safetensors` format, load checkpoint with safetensors instead of PyTorch.
|
1206
1208
|
load_safety_checker (`bool`, *optional*, defaults to `True`):
|
1207
1209
|
Whether to load the safety checker or not. Defaults to `True`.
|
1210
|
+
safety_checker (`StableDiffusionSafetyChecker`, *optional*, defaults to `None`):
|
1211
|
+
Safety checker to use. If this parameter is `None`, the function will load a new instance of
|
1212
|
+
[StableDiffusionSafetyChecker] by itself, if needed.
|
1213
|
+
feature_extractor (`AutoFeatureExtractor`, *optional*, defaults to `None`):
|
1214
|
+
Feature extractor to use. If this parameter is `None`, the function will load a new instance of
|
1215
|
+
[AutoFeatureExtractor] by itself, if needed.
|
1208
1216
|
pipeline_class (`str`, *optional*, defaults to `None`):
|
1209
1217
|
The pipeline class to use. Pass `None` to determine automatically.
|
1210
1218
|
local_files_only (`bool`, *optional*, defaults to `False`):
|
@@ -1530,8 +1538,8 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1530
1538
|
unet=unet,
|
1531
1539
|
scheduler=scheduler,
|
1532
1540
|
controlnet=controlnet,
|
1533
|
-
safety_checker=
|
1534
|
-
feature_extractor=
|
1541
|
+
safety_checker=safety_checker,
|
1542
|
+
feature_extractor=feature_extractor,
|
1535
1543
|
)
|
1536
1544
|
if hasattr(pipe, "requires_safety_checker"):
|
1537
1545
|
pipe.requires_safety_checker = False
|
@@ -1551,8 +1559,8 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1551
1559
|
unet=unet,
|
1552
1560
|
scheduler=scheduler,
|
1553
1561
|
low_res_scheduler=low_res_scheduler,
|
1554
|
-
safety_checker=
|
1555
|
-
feature_extractor=
|
1562
|
+
safety_checker=safety_checker,
|
1563
|
+
feature_extractor=feature_extractor,
|
1556
1564
|
)
|
1557
1565
|
|
1558
1566
|
else:
|
@@ -1562,8 +1570,8 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1562
1570
|
tokenizer=tokenizer,
|
1563
1571
|
unet=unet,
|
1564
1572
|
scheduler=scheduler,
|
1565
|
-
safety_checker=
|
1566
|
-
feature_extractor=
|
1573
|
+
safety_checker=safety_checker,
|
1574
|
+
feature_extractor=feature_extractor,
|
1567
1575
|
)
|
1568
1576
|
if hasattr(pipe, "requires_safety_checker"):
|
1569
1577
|
pipe.requires_safety_checker = False
|
@@ -1684,9 +1692,6 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1684
1692
|
feature_extractor = AutoFeatureExtractor.from_pretrained(
|
1685
1693
|
"CompVis/stable-diffusion-safety-checker", local_files_only=local_files_only
|
1686
1694
|
)
|
1687
|
-
else:
|
1688
|
-
safety_checker = None
|
1689
|
-
feature_extractor = None
|
1690
1695
|
|
1691
1696
|
if controlnet:
|
1692
1697
|
pipe = pipeline_class(
|
@@ -1838,6 +1843,8 @@ def download_controlnet_from_original_ckpt(
|
|
1838
1843
|
while "state_dict" in checkpoint:
|
1839
1844
|
checkpoint = checkpoint["state_dict"]
|
1840
1845
|
|
1846
|
+
with open(original_config_file, "r") as f:
|
1847
|
+
original_config_file = f.read()
|
1841
1848
|
original_config = yaml.safe_load(original_config_file)
|
1842
1849
|
|
1843
1850
|
if num_in_channels is not None:
|
@@ -288,7 +288,7 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline):
|
|
288
288
|
prompt (`str` or `List[str]`, *optional*):
|
289
289
|
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
|
290
290
|
instead.
|
291
|
-
image (`PIL.Image.Image` or List[`PIL.Image.Image`] or `torch.
|
291
|
+
image (`PIL.Image.Image` or List[`PIL.Image.Image`] or `torch.Tensor`):
|
292
292
|
`Image`, or tensor representing an image batch which will be upscaled. *
|
293
293
|
num_inference_steps (`int`, *optional*, defaults to 50):
|
294
294
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
@@ -329,7 +329,7 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline):
|
|
329
329
|
plain tuple.
|
330
330
|
callback (`Callable`, *optional*):
|
331
331
|
A function that will be called every `callback_steps` steps during inference. The function will be
|
332
|
-
called with the following arguments: `callback(step: int, timestep: int, latents: torch.
|
332
|
+
called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
333
333
|
callback_steps (`int`, *optional*, defaults to 1):
|
334
334
|
The frequency at which the `callback` function will be called. If not specified, the callback will be
|
335
335
|
called at every step.
|