diffusers 0.27.1__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +18 -1
- diffusers/callbacks.py +156 -0
- diffusers/commands/env.py +110 -6
- diffusers/configuration_utils.py +16 -11
- diffusers/dependency_versions_table.py +2 -1
- diffusers/image_processor.py +158 -45
- diffusers/loaders/__init__.py +2 -5
- diffusers/loaders/autoencoder.py +4 -4
- diffusers/loaders/controlnet.py +4 -4
- diffusers/loaders/ip_adapter.py +80 -22
- diffusers/loaders/lora.py +134 -20
- diffusers/loaders/lora_conversion_utils.py +46 -43
- diffusers/loaders/peft.py +4 -3
- diffusers/loaders/single_file.py +401 -170
- diffusers/loaders/single_file_model.py +290 -0
- diffusers/loaders/single_file_utils.py +616 -672
- diffusers/loaders/textual_inversion.py +41 -20
- diffusers/loaders/unet.py +168 -115
- diffusers/loaders/unet_loader_utils.py +163 -0
- diffusers/models/__init__.py +2 -0
- diffusers/models/activations.py +11 -3
- diffusers/models/attention.py +10 -11
- diffusers/models/attention_processor.py +367 -148
- diffusers/models/autoencoders/autoencoder_asym_kl.py +14 -16
- diffusers/models/autoencoders/autoencoder_kl.py +18 -19
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -11
- diffusers/models/autoencoders/autoencoder_tiny.py +16 -16
- diffusers/models/autoencoders/consistency_decoder_vae.py +36 -11
- diffusers/models/autoencoders/vae.py +23 -24
- diffusers/models/controlnet.py +12 -9
- diffusers/models/controlnet_flax.py +4 -4
- diffusers/models/controlnet_xs.py +1915 -0
- diffusers/models/downsampling.py +17 -18
- diffusers/models/embeddings.py +147 -24
- diffusers/models/model_loading_utils.py +149 -0
- diffusers/models/modeling_flax_pytorch_utils.py +2 -1
- diffusers/models/modeling_flax_utils.py +4 -4
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +118 -98
- diffusers/models/resnet.py +18 -23
- diffusers/models/transformer_temporal.py +3 -3
- diffusers/models/transformers/dual_transformer_2d.py +4 -4
- diffusers/models/transformers/prior_transformer.py +7 -7
- diffusers/models/transformers/t5_film_transformer.py +17 -19
- diffusers/models/transformers/transformer_2d.py +272 -156
- diffusers/models/transformers/transformer_temporal.py +10 -10
- diffusers/models/unets/unet_1d.py +5 -5
- diffusers/models/unets/unet_1d_blocks.py +29 -29
- diffusers/models/unets/unet_2d.py +6 -6
- diffusers/models/unets/unet_2d_blocks.py +137 -128
- diffusers/models/unets/unet_2d_condition.py +20 -15
- diffusers/models/unets/unet_2d_condition_flax.py +6 -5
- diffusers/models/unets/unet_3d_blocks.py +79 -77
- diffusers/models/unets/unet_3d_condition.py +13 -9
- diffusers/models/unets/unet_i2vgen_xl.py +14 -13
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +114 -14
- diffusers/models/unets/unet_spatio_temporal_condition.py +15 -14
- diffusers/models/unets/unet_stable_cascade.py +16 -13
- diffusers/models/upsampling.py +17 -20
- diffusers/models/vq_model.py +16 -15
- diffusers/pipelines/__init__.py +25 -3
- diffusers/pipelines/amused/pipeline_amused.py +12 -12
- diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
- diffusers/pipelines/animatediff/__init__.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +24 -46
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1284 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +82 -72
- diffusers/pipelines/animatediff/pipeline_output.py +3 -2
- diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +54 -35
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +120 -36
- diffusers/pipelines/auto_pipeline.py +21 -17
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -5
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +5 -5
- diffusers/pipelines/controlnet/multicontrolnet.py +4 -8
- diffusers/pipelines/controlnet/pipeline_controlnet.py +87 -52
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +50 -43
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +52 -40
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +80 -47
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +147 -49
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +89 -55
- diffusers/pipelines/controlnet_xs/__init__.py +68 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +911 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1115 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +14 -28
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +18 -33
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +21 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +20 -36
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +23 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +17 -32
- diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +43 -20
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +36 -18
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +12 -12
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +18 -21
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +30 -25
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +69 -59
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
- diffusers/pipelines/dit/pipeline_dit.py +3 -0
- diffusers/pipelines/free_init_utils.py +39 -38
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +23 -20
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +32 -29
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
- diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +20 -33
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +24 -35
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +48 -30
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +50 -28
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +11 -11
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +61 -67
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +70 -69
- diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
- diffusers/pipelines/marigold/__init__.py +50 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +561 -0
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
- diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
- diffusers/pipelines/pia/pipeline_pia.py +39 -125
- diffusers/pipelines/pipeline_flax_utils.py +4 -4
- diffusers/pipelines/pipeline_loading_utils.py +268 -23
- diffusers/pipelines/pipeline_utils.py +266 -37
- diffusers/pipelines/pixart_alpha/__init__.py +8 -1
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +65 -75
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +880 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +10 -5
- diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
- diffusers/pipelines/shap_e/renderer.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +36 -22
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +33 -32
- diffusers/pipelines/stable_diffusion/__init__.py +0 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +18 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +73 -39
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +24 -17
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +66 -36
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +82 -46
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +123 -28
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +16 -16
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +24 -19
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +37 -31
- diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +23 -15
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +44 -42
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +23 -18
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +19 -14
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +20 -15
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -19
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +65 -32
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +274 -38
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +92 -25
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +88 -44
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +108 -56
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +96 -51
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -25
- diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +59 -30
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +71 -42
- diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +18 -41
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +21 -85
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -19
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +39 -33
- diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
- diffusers/pipelines/unidiffuser/modeling_uvit.py +9 -9
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +23 -23
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -6
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +10 -10
- diffusers/schedulers/__init__.py +2 -2
- diffusers/schedulers/deprecated/__init__.py +1 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
- diffusers/schedulers/scheduling_amused.py +5 -5
- diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
- diffusers/schedulers/scheduling_consistency_models.py +23 -25
- diffusers/schedulers/scheduling_ddim.py +22 -24
- diffusers/schedulers/scheduling_ddim_flax.py +2 -1
- diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
- diffusers/schedulers/scheduling_ddim_parallel.py +28 -30
- diffusers/schedulers/scheduling_ddpm.py +20 -22
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +26 -28
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
- diffusers/schedulers/scheduling_deis_multistep.py +46 -42
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +107 -77
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +46 -46
- diffusers/schedulers/scheduling_dpmsolver_sde.py +26 -22
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +90 -65
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +78 -53
- diffusers/schedulers/scheduling_edm_euler.py +53 -30
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +26 -28
- diffusers/schedulers/scheduling_euler_discrete.py +163 -67
- diffusers/schedulers/scheduling_heun_discrete.py +60 -38
- diffusers/schedulers/scheduling_ipndm.py +8 -8
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +22 -18
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +22 -18
- diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
- diffusers/schedulers/scheduling_lcm.py +21 -23
- diffusers/schedulers/scheduling_lms_discrete.py +27 -25
- diffusers/schedulers/scheduling_pndm.py +20 -20
- diffusers/schedulers/scheduling_repaint.py +20 -20
- diffusers/schedulers/scheduling_sasolver.py +55 -54
- diffusers/schedulers/scheduling_sde_ve.py +19 -19
- diffusers/schedulers/scheduling_tcd.py +39 -30
- diffusers/schedulers/scheduling_unclip.py +15 -15
- diffusers/schedulers/scheduling_unipc_multistep.py +115 -41
- diffusers/schedulers/scheduling_utils.py +14 -5
- diffusers/schedulers/scheduling_utils_flax.py +3 -3
- diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
- diffusers/training_utils.py +56 -1
- diffusers/utils/__init__.py +7 -0
- diffusers/utils/doc_utils.py +1 -0
- diffusers/utils/dummy_pt_objects.py +30 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +90 -0
- diffusers/utils/dynamic_modules_utils.py +24 -11
- diffusers/utils/hub_utils.py +3 -2
- diffusers/utils/import_utils.py +91 -0
- diffusers/utils/loading_utils.py +2 -2
- diffusers/utils/logging.py +1 -1
- diffusers/utils/peft_utils.py +32 -5
- diffusers/utils/state_dict_utils.py +11 -2
- diffusers/utils/testing_utils.py +71 -6
- diffusers/utils/torch_utils.py +1 -0
- diffusers/video_processor.py +113 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/METADATA +7 -7
- diffusers-0.28.0.dist-info/RECORD +414 -0
- diffusers-0.27.1.dist-info/RECORD +0 -399
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/LICENSE +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/WHEEL +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/top_level.txt +0 -0
@@ -129,7 +129,7 @@ class KandinskyCombinedPipeline(DiffusionPipeline):
|
|
129
129
|
movq ([`VQModel`]):
|
130
130
|
MoVQ Decoder to generate the image from the latents.
|
131
131
|
prior_prior ([`PriorTransformer`]):
|
132
|
-
The
|
132
|
+
The canonical unCLIP prior to approximate the image embedding from the text embedding.
|
133
133
|
prior_image_encoder ([`CLIPVisionModelWithProjection`]):
|
134
134
|
Frozen image-encoder.
|
135
135
|
prior_text_encoder ([`CLIPTextModelWithProjection`]):
|
@@ -143,6 +143,7 @@ class KandinskyCombinedPipeline(DiffusionPipeline):
|
|
143
143
|
|
144
144
|
_load_connected_pipes = True
|
145
145
|
model_cpu_offload_seq = "text_encoder->unet->movq->prior_prior->prior_image_encoder->prior_text_encoder"
|
146
|
+
_exclude_from_cpu_offload = ["prior_prior"]
|
146
147
|
|
147
148
|
def __init__(
|
148
149
|
self,
|
@@ -225,9 +226,9 @@ class KandinskyCombinedPipeline(DiffusionPipeline):
|
|
225
226
|
prior_guidance_scale: float = 4.0,
|
226
227
|
prior_num_inference_steps: int = 25,
|
227
228
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
228
|
-
latents: Optional[torch.
|
229
|
+
latents: Optional[torch.Tensor] = None,
|
229
230
|
output_type: Optional[str] = "pil",
|
230
|
-
callback: Optional[Callable[[int, int, torch.
|
231
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
231
232
|
callback_steps: int = 1,
|
232
233
|
return_dict: bool = True,
|
233
234
|
):
|
@@ -267,7 +268,7 @@ class KandinskyCombinedPipeline(DiffusionPipeline):
|
|
267
268
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
268
269
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
269
270
|
to make generation deterministic.
|
270
|
-
latents (`torch.
|
271
|
+
latents (`torch.Tensor`, *optional*):
|
271
272
|
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
|
272
273
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
273
274
|
tensor will ge generated by sampling using the supplied random `generator`.
|
@@ -276,7 +277,7 @@ class KandinskyCombinedPipeline(DiffusionPipeline):
|
|
276
277
|
(`np.array`) or `"pt"` (`torch.Tensor`).
|
277
278
|
callback (`Callable`, *optional*):
|
278
279
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
279
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
280
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
280
281
|
callback_steps (`int`, *optional*, defaults to 1):
|
281
282
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
282
283
|
every step.
|
@@ -346,7 +347,7 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
|
|
346
347
|
movq ([`VQModel`]):
|
347
348
|
MoVQ Decoder to generate the image from the latents.
|
348
349
|
prior_prior ([`PriorTransformer`]):
|
349
|
-
The
|
350
|
+
The canonical unCLIP prior to approximate the image embedding from the text embedding.
|
350
351
|
prior_image_encoder ([`CLIPVisionModelWithProjection`]):
|
351
352
|
Frozen image-encoder.
|
352
353
|
prior_text_encoder ([`CLIPTextModelWithProjection`]):
|
@@ -360,6 +361,7 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
|
|
360
361
|
|
361
362
|
_load_connected_pipes = True
|
362
363
|
model_cpu_offload_seq = "prior_text_encoder->prior_image_encoder->prior_prior->" "text_encoder->unet->movq"
|
364
|
+
_exclude_from_cpu_offload = ["prior_prior"]
|
363
365
|
|
364
366
|
def __init__(
|
365
367
|
self,
|
@@ -434,7 +436,7 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
|
|
434
436
|
def __call__(
|
435
437
|
self,
|
436
438
|
prompt: Union[str, List[str]],
|
437
|
-
image: Union[torch.
|
439
|
+
image: Union[torch.Tensor, PIL.Image.Image, List[torch.Tensor], List[PIL.Image.Image]],
|
438
440
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
439
441
|
num_inference_steps: int = 100,
|
440
442
|
guidance_scale: float = 4.0,
|
@@ -445,9 +447,9 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
|
|
445
447
|
prior_guidance_scale: float = 4.0,
|
446
448
|
prior_num_inference_steps: int = 25,
|
447
449
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
448
|
-
latents: Optional[torch.
|
450
|
+
latents: Optional[torch.Tensor] = None,
|
449
451
|
output_type: Optional[str] = "pil",
|
450
|
-
callback: Optional[Callable[[int, int, torch.
|
452
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
451
453
|
callback_steps: int = 1,
|
452
454
|
return_dict: bool = True,
|
453
455
|
):
|
@@ -457,7 +459,7 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
|
|
457
459
|
Args:
|
458
460
|
prompt (`str` or `List[str]`):
|
459
461
|
The prompt or prompts to guide the image generation.
|
460
|
-
image (`torch.
|
462
|
+
image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
|
461
463
|
`Image`, or tensor representing an image batch, that will be used as the starting point for the
|
462
464
|
process. Can also accept image latents as `image`, if passing latents directly, it will not be encoded
|
463
465
|
again.
|
@@ -497,7 +499,7 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
|
|
497
499
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
498
500
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
499
501
|
to make generation deterministic.
|
500
|
-
latents (`torch.
|
502
|
+
latents (`torch.Tensor`, *optional*):
|
501
503
|
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
|
502
504
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
503
505
|
tensor will ge generated by sampling using the supplied random `generator`.
|
@@ -506,7 +508,7 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
|
|
506
508
|
(`np.array`) or `"pt"` (`torch.Tensor`).
|
507
509
|
callback (`Callable`, *optional*):
|
508
510
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
509
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
511
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
510
512
|
callback_steps (`int`, *optional*, defaults to 1):
|
511
513
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
512
514
|
every step.
|
@@ -586,7 +588,7 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
|
|
586
588
|
movq ([`VQModel`]):
|
587
589
|
MoVQ Decoder to generate the image from the latents.
|
588
590
|
prior_prior ([`PriorTransformer`]):
|
589
|
-
The
|
591
|
+
The canonical unCLIP prior to approximate the image embedding from the text embedding.
|
590
592
|
prior_image_encoder ([`CLIPVisionModelWithProjection`]):
|
591
593
|
Frozen image-encoder.
|
592
594
|
prior_text_encoder ([`CLIPTextModelWithProjection`]):
|
@@ -600,6 +602,7 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
|
|
600
602
|
|
601
603
|
_load_connected_pipes = True
|
602
604
|
model_cpu_offload_seq = "prior_text_encoder->prior_image_encoder->prior_prior->text_encoder->unet->movq"
|
605
|
+
_exclude_from_cpu_offload = ["prior_prior"]
|
603
606
|
|
604
607
|
def __init__(
|
605
608
|
self,
|
@@ -674,8 +677,8 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
|
|
674
677
|
def __call__(
|
675
678
|
self,
|
676
679
|
prompt: Union[str, List[str]],
|
677
|
-
image: Union[torch.
|
678
|
-
mask_image: Union[torch.
|
680
|
+
image: Union[torch.Tensor, PIL.Image.Image, List[torch.Tensor], List[PIL.Image.Image]],
|
681
|
+
mask_image: Union[torch.Tensor, PIL.Image.Image, List[torch.Tensor], List[PIL.Image.Image]],
|
679
682
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
680
683
|
num_inference_steps: int = 100,
|
681
684
|
guidance_scale: float = 4.0,
|
@@ -685,9 +688,9 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
|
|
685
688
|
prior_guidance_scale: float = 4.0,
|
686
689
|
prior_num_inference_steps: int = 25,
|
687
690
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
688
|
-
latents: Optional[torch.
|
691
|
+
latents: Optional[torch.Tensor] = None,
|
689
692
|
output_type: Optional[str] = "pil",
|
690
|
-
callback: Optional[Callable[[int, int, torch.
|
693
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
691
694
|
callback_steps: int = 1,
|
692
695
|
return_dict: bool = True,
|
693
696
|
):
|
@@ -697,7 +700,7 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
|
|
697
700
|
Args:
|
698
701
|
prompt (`str` or `List[str]`):
|
699
702
|
The prompt or prompts to guide the image generation.
|
700
|
-
image (`torch.
|
703
|
+
image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
|
701
704
|
`Image`, or tensor representing an image batch, that will be used as the starting point for the
|
702
705
|
process. Can also accept image latents as `image`, if passing latents directly, it will not be encoded
|
703
706
|
again.
|
@@ -736,7 +739,7 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
|
|
736
739
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
737
740
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
738
741
|
to make generation deterministic.
|
739
|
-
latents (`torch.
|
742
|
+
latents (`torch.Tensor`, *optional*):
|
740
743
|
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
|
741
744
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
742
745
|
tensor will ge generated by sampling using the supplied random `generator`.
|
@@ -745,7 +748,7 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
|
|
745
748
|
(`np.array`) or `"pt"` (`torch.Tensor`).
|
746
749
|
callback (`Callable`, *optional*):
|
747
750
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
748
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
751
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
749
752
|
callback_steps (`int`, *optional*, defaults to 1):
|
750
753
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
751
754
|
every step.
|
@@ -266,10 +266,10 @@ class KandinskyImg2ImgPipeline(DiffusionPipeline):
|
|
266
266
|
# add_noise method to overwrite the one in schedule because it use a different beta schedule for adding noise vs sampling
|
267
267
|
def add_noise(
|
268
268
|
self,
|
269
|
-
original_samples: torch.
|
270
|
-
noise: torch.
|
269
|
+
original_samples: torch.Tensor,
|
270
|
+
noise: torch.Tensor,
|
271
271
|
timesteps: torch.IntTensor,
|
272
|
-
) -> torch.
|
272
|
+
) -> torch.Tensor:
|
273
273
|
betas = torch.linspace(0.0001, 0.02, 1000, dtype=torch.float32)
|
274
274
|
alphas = 1.0 - betas
|
275
275
|
alphas_cumprod = torch.cumprod(alphas, dim=0)
|
@@ -295,9 +295,9 @@ class KandinskyImg2ImgPipeline(DiffusionPipeline):
|
|
295
295
|
def __call__(
|
296
296
|
self,
|
297
297
|
prompt: Union[str, List[str]],
|
298
|
-
image: Union[torch.
|
299
|
-
image_embeds: torch.
|
300
|
-
negative_image_embeds: torch.
|
298
|
+
image: Union[torch.Tensor, PIL.Image.Image, List[torch.Tensor], List[PIL.Image.Image]],
|
299
|
+
image_embeds: torch.Tensor,
|
300
|
+
negative_image_embeds: torch.Tensor,
|
301
301
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
302
302
|
height: int = 512,
|
303
303
|
width: int = 512,
|
@@ -307,7 +307,7 @@ class KandinskyImg2ImgPipeline(DiffusionPipeline):
|
|
307
307
|
num_images_per_prompt: int = 1,
|
308
308
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
309
309
|
output_type: Optional[str] = "pil",
|
310
|
-
callback: Optional[Callable[[int, int, torch.
|
310
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
311
311
|
callback_steps: int = 1,
|
312
312
|
return_dict: bool = True,
|
313
313
|
):
|
@@ -317,12 +317,12 @@ class KandinskyImg2ImgPipeline(DiffusionPipeline):
|
|
317
317
|
Args:
|
318
318
|
prompt (`str` or `List[str]`):
|
319
319
|
The prompt or prompts to guide the image generation.
|
320
|
-
image (`torch.
|
320
|
+
image (`torch.Tensor`, `PIL.Image.Image`):
|
321
321
|
`Image`, or tensor representing an image batch, that will be used as the starting point for the
|
322
322
|
process.
|
323
|
-
image_embeds (`torch.
|
323
|
+
image_embeds (`torch.Tensor` or `List[torch.Tensor]`):
|
324
324
|
The clip image embeddings for text prompt, that will be used to condition the image generation.
|
325
|
-
negative_image_embeds (`torch.
|
325
|
+
negative_image_embeds (`torch.Tensor` or `List[torch.Tensor]`):
|
326
326
|
The clip image embeddings for negative text prompt, will be used to condition the image generation.
|
327
327
|
negative_prompt (`str` or `List[str]`, *optional*):
|
328
328
|
The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
|
@@ -356,7 +356,7 @@ class KandinskyImg2ImgPipeline(DiffusionPipeline):
|
|
356
356
|
(`np.array`) or `"pt"` (`torch.Tensor`).
|
357
357
|
callback (`Callable`, *optional*):
|
358
358
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
359
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
359
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
360
360
|
callback_steps (`int`, *optional*, defaults to 1):
|
361
361
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
362
362
|
every step.
|
@@ -398,10 +398,10 @@ class KandinskyInpaintPipeline(DiffusionPipeline):
|
|
398
398
|
def __call__(
|
399
399
|
self,
|
400
400
|
prompt: Union[str, List[str]],
|
401
|
-
image: Union[torch.
|
402
|
-
mask_image: Union[torch.
|
403
|
-
image_embeds: torch.
|
404
|
-
negative_image_embeds: torch.
|
401
|
+
image: Union[torch.Tensor, PIL.Image.Image],
|
402
|
+
mask_image: Union[torch.Tensor, PIL.Image.Image, np.ndarray],
|
403
|
+
image_embeds: torch.Tensor,
|
404
|
+
negative_image_embeds: torch.Tensor,
|
405
405
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
406
406
|
height: int = 512,
|
407
407
|
width: int = 512,
|
@@ -409,9 +409,9 @@ class KandinskyInpaintPipeline(DiffusionPipeline):
|
|
409
409
|
guidance_scale: float = 4.0,
|
410
410
|
num_images_per_prompt: int = 1,
|
411
411
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
412
|
-
latents: Optional[torch.
|
412
|
+
latents: Optional[torch.Tensor] = None,
|
413
413
|
output_type: Optional[str] = "pil",
|
414
|
-
callback: Optional[Callable[[int, int, torch.
|
414
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
415
415
|
callback_steps: int = 1,
|
416
416
|
return_dict: bool = True,
|
417
417
|
):
|
@@ -421,10 +421,10 @@ class KandinskyInpaintPipeline(DiffusionPipeline):
|
|
421
421
|
Args:
|
422
422
|
prompt (`str` or `List[str]`):
|
423
423
|
The prompt or prompts to guide the image generation.
|
424
|
-
image (`torch.
|
424
|
+
image (`torch.Tensor`, `PIL.Image.Image` or `np.ndarray`):
|
425
425
|
`Image`, or tensor representing an image batch, that will be used as the starting point for the
|
426
426
|
process.
|
427
|
-
mask_image (`PIL.Image.Image`,`torch.
|
427
|
+
mask_image (`PIL.Image.Image`,`torch.Tensor` or `np.ndarray`):
|
428
428
|
`Image`, or a tensor representing an image batch, to mask `image`. White pixels in the mask will be
|
429
429
|
repainted, while black pixels will be preserved. You can pass a pytorch tensor as mask only if the
|
430
430
|
image you passed is a pytorch tensor, and it should contain one color channel (L) instead of 3, so the
|
@@ -432,9 +432,9 @@ class KandinskyInpaintPipeline(DiffusionPipeline):
|
|
432
432
|
image or numpy array, mask should also be a either PIL image or numpy array. If it is a PIL image, it
|
433
433
|
will be converted to a single channel (luminance) before use. If it is a nummpy array, the expected
|
434
434
|
shape is `(H, W)`.
|
435
|
-
image_embeds (`torch.
|
435
|
+
image_embeds (`torch.Tensor` or `List[torch.Tensor]`):
|
436
436
|
The clip image embeddings for text prompt, that will be used to condition the image generation.
|
437
|
-
negative_image_embeds (`torch.
|
437
|
+
negative_image_embeds (`torch.Tensor` or `List[torch.Tensor]`):
|
438
438
|
The clip image embeddings for negative text prompt, will be used to condition the image generation.
|
439
439
|
negative_prompt (`str` or `List[str]`, *optional*):
|
440
440
|
The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
|
@@ -457,7 +457,7 @@ class KandinskyInpaintPipeline(DiffusionPipeline):
|
|
457
457
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
458
458
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
459
459
|
to make generation deterministic.
|
460
|
-
latents (`torch.
|
460
|
+
latents (`torch.Tensor`, *optional*):
|
461
461
|
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
|
462
462
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
463
463
|
tensor will ge generated by sampling using the supplied random `generator`.
|
@@ -466,7 +466,7 @@ class KandinskyInpaintPipeline(DiffusionPipeline):
|
|
466
466
|
(`np.array`) or `"pt"` (`torch.Tensor`).
|
467
467
|
callback (`Callable`, *optional*):
|
468
468
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
469
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
469
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
470
470
|
callback_steps (`int`, *optional*, defaults to 1):
|
471
471
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
472
472
|
every step.
|
@@ -115,14 +115,14 @@ class KandinskyPriorPipelineOutput(BaseOutput):
|
|
115
115
|
Output class for KandinskyPriorPipeline.
|
116
116
|
|
117
117
|
Args:
|
118
|
-
image_embeds (`torch.
|
118
|
+
image_embeds (`torch.Tensor`)
|
119
119
|
clip image embeddings for text prompt
|
120
120
|
negative_image_embeds (`List[PIL.Image.Image]` or `np.ndarray`)
|
121
121
|
clip image embeddings for unconditional tokens
|
122
122
|
"""
|
123
123
|
|
124
|
-
image_embeds: Union[torch.
|
125
|
-
negative_image_embeds: Union[torch.
|
124
|
+
image_embeds: Union[torch.Tensor, np.ndarray]
|
125
|
+
negative_image_embeds: Union[torch.Tensor, np.ndarray]
|
126
126
|
|
127
127
|
|
128
128
|
class KandinskyPriorPipeline(DiffusionPipeline):
|
@@ -134,7 +134,7 @@ class KandinskyPriorPipeline(DiffusionPipeline):
|
|
134
134
|
|
135
135
|
Args:
|
136
136
|
prior ([`PriorTransformer`]):
|
137
|
-
The
|
137
|
+
The canonical unCLIP prior to approximate the image embedding from the text embedding.
|
138
138
|
image_encoder ([`CLIPVisionModelWithProjection`]):
|
139
139
|
Frozen image-encoder.
|
140
140
|
text_encoder ([`CLIPTextModelWithProjection`]):
|
@@ -173,12 +173,12 @@ class KandinskyPriorPipeline(DiffusionPipeline):
|
|
173
173
|
@replace_example_docstring(EXAMPLE_INTERPOLATE_DOC_STRING)
|
174
174
|
def interpolate(
|
175
175
|
self,
|
176
|
-
images_and_prompts: List[Union[str, PIL.Image.Image, torch.
|
176
|
+
images_and_prompts: List[Union[str, PIL.Image.Image, torch.Tensor]],
|
177
177
|
weights: List[float],
|
178
178
|
num_images_per_prompt: int = 1,
|
179
179
|
num_inference_steps: int = 25,
|
180
180
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
181
|
-
latents: Optional[torch.
|
181
|
+
latents: Optional[torch.Tensor] = None,
|
182
182
|
negative_prior_prompt: Optional[str] = None,
|
183
183
|
negative_prompt: str = "",
|
184
184
|
guidance_scale: float = 4.0,
|
@@ -188,7 +188,7 @@ class KandinskyPriorPipeline(DiffusionPipeline):
|
|
188
188
|
Function invoked when using the prior pipeline for interpolation.
|
189
189
|
|
190
190
|
Args:
|
191
|
-
images_and_prompts (`List[Union[str, PIL.Image.Image, torch.
|
191
|
+
images_and_prompts (`List[Union[str, PIL.Image.Image, torch.Tensor]]`):
|
192
192
|
list of prompts and images to guide the image generation.
|
193
193
|
weights: (`List[float]`):
|
194
194
|
list of weights for each condition in `images_and_prompts`
|
@@ -200,7 +200,7 @@ class KandinskyPriorPipeline(DiffusionPipeline):
|
|
200
200
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
201
201
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
202
202
|
to make generation deterministic.
|
203
|
-
latents (`torch.
|
203
|
+
latents (`torch.Tensor`, *optional*):
|
204
204
|
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
|
205
205
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
206
206
|
tensor will ge generated by sampling using the supplied random `generator`.
|
@@ -403,7 +403,7 @@ class KandinskyPriorPipeline(DiffusionPipeline):
|
|
403
403
|
num_images_per_prompt: int = 1,
|
404
404
|
num_inference_steps: int = 25,
|
405
405
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
406
|
-
latents: Optional[torch.
|
406
|
+
latents: Optional[torch.Tensor] = None,
|
407
407
|
guidance_scale: float = 4.0,
|
408
408
|
output_type: Optional[str] = "pt",
|
409
409
|
return_dict: bool = True,
|
@@ -425,7 +425,7 @@ class KandinskyPriorPipeline(DiffusionPipeline):
|
|
425
425
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
426
426
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
427
427
|
to make generation deterministic.
|
428
|
-
latents (`torch.
|
428
|
+
latents (`torch.Tensor`, *optional*):
|
429
429
|
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
|
430
430
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
431
431
|
tensor will ge generated by sampling using the supplied random `generator`.
|
@@ -123,15 +123,15 @@ class KandinskyV22Pipeline(DiffusionPipeline):
|
|
123
123
|
@replace_example_docstring(EXAMPLE_DOC_STRING)
|
124
124
|
def __call__(
|
125
125
|
self,
|
126
|
-
image_embeds: Union[torch.
|
127
|
-
negative_image_embeds: Union[torch.
|
126
|
+
image_embeds: Union[torch.Tensor, List[torch.Tensor]],
|
127
|
+
negative_image_embeds: Union[torch.Tensor, List[torch.Tensor]],
|
128
128
|
height: int = 512,
|
129
129
|
width: int = 512,
|
130
130
|
num_inference_steps: int = 100,
|
131
131
|
guidance_scale: float = 4.0,
|
132
132
|
num_images_per_prompt: int = 1,
|
133
133
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
134
|
-
latents: Optional[torch.
|
134
|
+
latents: Optional[torch.Tensor] = None,
|
135
135
|
output_type: Optional[str] = "pil",
|
136
136
|
return_dict: bool = True,
|
137
137
|
callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
|
@@ -142,9 +142,9 @@ class KandinskyV22Pipeline(DiffusionPipeline):
|
|
142
142
|
Function invoked when calling the pipeline for generation.
|
143
143
|
|
144
144
|
Args:
|
145
|
-
image_embeds (`torch.
|
145
|
+
image_embeds (`torch.Tensor` or `List[torch.Tensor]`):
|
146
146
|
The clip image embeddings for text prompt, that will be used to condition the image generation.
|
147
|
-
negative_image_embeds (`torch.
|
147
|
+
negative_image_embeds (`torch.Tensor` or `List[torch.Tensor]`):
|
148
148
|
The clip image embeddings for negative text prompt, will be used to condition the image generation.
|
149
149
|
height (`int`, *optional*, defaults to 512):
|
150
150
|
The height in pixels of the generated image.
|
@@ -164,7 +164,7 @@ class KandinskyV22Pipeline(DiffusionPipeline):
|
|
164
164
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
165
165
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
166
166
|
to make generation deterministic.
|
167
|
-
latents (`torch.
|
167
|
+
latents (`torch.Tensor`, *optional*):
|
168
168
|
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
|
169
169
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
170
170
|
tensor will ge generated by sampling using the supplied random `generator`.
|