diffusers 0.27.1__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +18 -1
- diffusers/callbacks.py +156 -0
- diffusers/commands/env.py +110 -6
- diffusers/configuration_utils.py +16 -11
- diffusers/dependency_versions_table.py +2 -1
- diffusers/image_processor.py +158 -45
- diffusers/loaders/__init__.py +2 -5
- diffusers/loaders/autoencoder.py +4 -4
- diffusers/loaders/controlnet.py +4 -4
- diffusers/loaders/ip_adapter.py +80 -22
- diffusers/loaders/lora.py +134 -20
- diffusers/loaders/lora_conversion_utils.py +46 -43
- diffusers/loaders/peft.py +4 -3
- diffusers/loaders/single_file.py +401 -170
- diffusers/loaders/single_file_model.py +290 -0
- diffusers/loaders/single_file_utils.py +616 -672
- diffusers/loaders/textual_inversion.py +41 -20
- diffusers/loaders/unet.py +168 -115
- diffusers/loaders/unet_loader_utils.py +163 -0
- diffusers/models/__init__.py +2 -0
- diffusers/models/activations.py +11 -3
- diffusers/models/attention.py +10 -11
- diffusers/models/attention_processor.py +367 -148
- diffusers/models/autoencoders/autoencoder_asym_kl.py +14 -16
- diffusers/models/autoencoders/autoencoder_kl.py +18 -19
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -11
- diffusers/models/autoencoders/autoencoder_tiny.py +16 -16
- diffusers/models/autoencoders/consistency_decoder_vae.py +36 -11
- diffusers/models/autoencoders/vae.py +23 -24
- diffusers/models/controlnet.py +12 -9
- diffusers/models/controlnet_flax.py +4 -4
- diffusers/models/controlnet_xs.py +1915 -0
- diffusers/models/downsampling.py +17 -18
- diffusers/models/embeddings.py +147 -24
- diffusers/models/model_loading_utils.py +149 -0
- diffusers/models/modeling_flax_pytorch_utils.py +2 -1
- diffusers/models/modeling_flax_utils.py +4 -4
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +118 -98
- diffusers/models/resnet.py +18 -23
- diffusers/models/transformer_temporal.py +3 -3
- diffusers/models/transformers/dual_transformer_2d.py +4 -4
- diffusers/models/transformers/prior_transformer.py +7 -7
- diffusers/models/transformers/t5_film_transformer.py +17 -19
- diffusers/models/transformers/transformer_2d.py +272 -156
- diffusers/models/transformers/transformer_temporal.py +10 -10
- diffusers/models/unets/unet_1d.py +5 -5
- diffusers/models/unets/unet_1d_blocks.py +29 -29
- diffusers/models/unets/unet_2d.py +6 -6
- diffusers/models/unets/unet_2d_blocks.py +137 -128
- diffusers/models/unets/unet_2d_condition.py +20 -15
- diffusers/models/unets/unet_2d_condition_flax.py +6 -5
- diffusers/models/unets/unet_3d_blocks.py +79 -77
- diffusers/models/unets/unet_3d_condition.py +13 -9
- diffusers/models/unets/unet_i2vgen_xl.py +14 -13
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +114 -14
- diffusers/models/unets/unet_spatio_temporal_condition.py +15 -14
- diffusers/models/unets/unet_stable_cascade.py +16 -13
- diffusers/models/upsampling.py +17 -20
- diffusers/models/vq_model.py +16 -15
- diffusers/pipelines/__init__.py +25 -3
- diffusers/pipelines/amused/pipeline_amused.py +12 -12
- diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
- diffusers/pipelines/animatediff/__init__.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +24 -46
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1284 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +82 -72
- diffusers/pipelines/animatediff/pipeline_output.py +3 -2
- diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +54 -35
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +120 -36
- diffusers/pipelines/auto_pipeline.py +21 -17
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -5
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +5 -5
- diffusers/pipelines/controlnet/multicontrolnet.py +4 -8
- diffusers/pipelines/controlnet/pipeline_controlnet.py +87 -52
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +50 -43
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +52 -40
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +80 -47
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +147 -49
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +89 -55
- diffusers/pipelines/controlnet_xs/__init__.py +68 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +911 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1115 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +14 -28
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +18 -33
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +21 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +20 -36
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +23 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +17 -32
- diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +43 -20
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +36 -18
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +12 -12
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +18 -21
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +30 -25
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +69 -59
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
- diffusers/pipelines/dit/pipeline_dit.py +3 -0
- diffusers/pipelines/free_init_utils.py +39 -38
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +23 -20
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +32 -29
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
- diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +20 -33
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +24 -35
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +48 -30
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +50 -28
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +11 -11
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +61 -67
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +70 -69
- diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
- diffusers/pipelines/marigold/__init__.py +50 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +561 -0
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
- diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
- diffusers/pipelines/pia/pipeline_pia.py +39 -125
- diffusers/pipelines/pipeline_flax_utils.py +4 -4
- diffusers/pipelines/pipeline_loading_utils.py +268 -23
- diffusers/pipelines/pipeline_utils.py +266 -37
- diffusers/pipelines/pixart_alpha/__init__.py +8 -1
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +65 -75
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +880 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +10 -5
- diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
- diffusers/pipelines/shap_e/renderer.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +36 -22
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +33 -32
- diffusers/pipelines/stable_diffusion/__init__.py +0 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +18 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +73 -39
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +24 -17
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +66 -36
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +82 -46
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +123 -28
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +16 -16
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +24 -19
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +37 -31
- diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +23 -15
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +44 -42
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +23 -18
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +19 -14
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +20 -15
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -19
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +65 -32
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +274 -38
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +92 -25
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +88 -44
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +108 -56
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +96 -51
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -25
- diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +59 -30
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +71 -42
- diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +18 -41
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +21 -85
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -19
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +39 -33
- diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
- diffusers/pipelines/unidiffuser/modeling_uvit.py +9 -9
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +23 -23
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -6
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +10 -10
- diffusers/schedulers/__init__.py +2 -2
- diffusers/schedulers/deprecated/__init__.py +1 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
- diffusers/schedulers/scheduling_amused.py +5 -5
- diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
- diffusers/schedulers/scheduling_consistency_models.py +23 -25
- diffusers/schedulers/scheduling_ddim.py +22 -24
- diffusers/schedulers/scheduling_ddim_flax.py +2 -1
- diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
- diffusers/schedulers/scheduling_ddim_parallel.py +28 -30
- diffusers/schedulers/scheduling_ddpm.py +20 -22
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +26 -28
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
- diffusers/schedulers/scheduling_deis_multistep.py +46 -42
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +107 -77
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +46 -46
- diffusers/schedulers/scheduling_dpmsolver_sde.py +26 -22
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +90 -65
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +78 -53
- diffusers/schedulers/scheduling_edm_euler.py +53 -30
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +26 -28
- diffusers/schedulers/scheduling_euler_discrete.py +163 -67
- diffusers/schedulers/scheduling_heun_discrete.py +60 -38
- diffusers/schedulers/scheduling_ipndm.py +8 -8
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +22 -18
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +22 -18
- diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
- diffusers/schedulers/scheduling_lcm.py +21 -23
- diffusers/schedulers/scheduling_lms_discrete.py +27 -25
- diffusers/schedulers/scheduling_pndm.py +20 -20
- diffusers/schedulers/scheduling_repaint.py +20 -20
- diffusers/schedulers/scheduling_sasolver.py +55 -54
- diffusers/schedulers/scheduling_sde_ve.py +19 -19
- diffusers/schedulers/scheduling_tcd.py +39 -30
- diffusers/schedulers/scheduling_unclip.py +15 -15
- diffusers/schedulers/scheduling_unipc_multistep.py +115 -41
- diffusers/schedulers/scheduling_utils.py +14 -5
- diffusers/schedulers/scheduling_utils_flax.py +3 -3
- diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
- diffusers/training_utils.py +56 -1
- diffusers/utils/__init__.py +7 -0
- diffusers/utils/doc_utils.py +1 -0
- diffusers/utils/dummy_pt_objects.py +30 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +90 -0
- diffusers/utils/dynamic_modules_utils.py +24 -11
- diffusers/utils/hub_utils.py +3 -2
- diffusers/utils/import_utils.py +91 -0
- diffusers/utils/loading_utils.py +2 -2
- diffusers/utils/logging.py +1 -1
- diffusers/utils/peft_utils.py +32 -5
- diffusers/utils/state_dict_utils.py +11 -2
- diffusers/utils/testing_utils.py +71 -6
- diffusers/utils/torch_utils.py +1 -0
- diffusers/video_processor.py +113 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/METADATA +7 -7
- diffusers-0.28.0.dist-info/RECORD +414 -0
- diffusers-0.27.1.dist-info/RECORD +0 -399
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/LICENSE +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/WHEEL +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.27.1.dist-info → diffusers-0.28.0.dist-info}/top_level.txt +0 -0
@@ -197,7 +197,7 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline):
|
|
197
197
|
)
|
198
198
|
|
199
199
|
# verify batch size of prompt and image are same if image is a list or tensor or numpy array
|
200
|
-
if isinstance(image, list
|
200
|
+
if isinstance(image, (list, np.ndarray)):
|
201
201
|
if prompt is not None and isinstance(prompt, str):
|
202
202
|
batch_size = 1
|
203
203
|
elif prompt is not None and isinstance(prompt, list):
|
@@ -395,7 +395,7 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline):
|
|
395
395
|
[`schedulers.DDIMScheduler`], will be ignored for others.
|
396
396
|
generator (`np.random.RandomState`, *optional*):
|
397
397
|
A np.random.RandomState to make generation deterministic.
|
398
|
-
latents (`torch.
|
398
|
+
latents (`torch.Tensor`, *optional*):
|
399
399
|
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
|
400
400
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
401
401
|
tensor will ge generated by sampling using the supplied random `generator`.
|
@@ -469,7 +469,7 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline):
|
|
469
469
|
|
470
470
|
latents = self.prepare_latents(
|
471
471
|
batch_size * num_images_per_prompt,
|
472
|
-
self.num_latent_channels,
|
472
|
+
self.config.num_latent_channels,
|
473
473
|
height,
|
474
474
|
width,
|
475
475
|
latents_dtype,
|
@@ -498,12 +498,12 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline):
|
|
498
498
|
|
499
499
|
# 7. Check that sizes of image and latents match
|
500
500
|
num_channels_image = image.shape[1]
|
501
|
-
if self.num_latent_channels + num_channels_image != self.num_unet_input_channels:
|
501
|
+
if self.config.num_latent_channels + num_channels_image != self.config.num_unet_input_channels:
|
502
502
|
raise ValueError(
|
503
503
|
"Incorrect configuration settings! The config of `pipeline.unet` expects"
|
504
|
-
f" {self.num_unet_input_channels} but received `num_channels_latents`: {self.num_latent_channels} +"
|
504
|
+
f" {self.config.num_unet_input_channels} but received `num_channels_latents`: {self.config.num_latent_channels} +"
|
505
505
|
f" `num_channels_image`: {num_channels_image} "
|
506
|
-
f" = {self.num_latent_channels + num_channels_image}. Please verify the config of"
|
506
|
+
f" = {self.config.num_latent_channels + num_channels_image}. Please verify the config of"
|
507
507
|
" `pipeline.unet` or your `image` input."
|
508
508
|
)
|
509
509
|
|
@@ -11,7 +11,6 @@
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
|
-
|
15
14
|
import inspect
|
16
15
|
from typing import Any, Callable, Dict, List, Optional, Union
|
17
16
|
|
@@ -19,6 +18,7 @@ import torch
|
|
19
18
|
from packaging import version
|
20
19
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
|
21
20
|
|
21
|
+
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
|
22
22
|
from ...configuration_utils import FrozenDict
|
23
23
|
from ...image_processor import PipelineImageInput, VaeImageProcessor
|
24
24
|
from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
|
@@ -75,6 +75,7 @@ def retrieve_timesteps(
|
|
75
75
|
num_inference_steps: Optional[int] = None,
|
76
76
|
device: Optional[Union[str, torch.device]] = None,
|
77
77
|
timesteps: Optional[List[int]] = None,
|
78
|
+
sigmas: Optional[List[float]] = None,
|
78
79
|
**kwargs,
|
79
80
|
):
|
80
81
|
"""
|
@@ -85,19 +86,23 @@ def retrieve_timesteps(
|
|
85
86
|
scheduler (`SchedulerMixin`):
|
86
87
|
The scheduler to get timesteps from.
|
87
88
|
num_inference_steps (`int`):
|
88
|
-
The number of diffusion steps used when generating samples with a pre-trained model. If used,
|
89
|
-
|
89
|
+
The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
|
90
|
+
must be `None`.
|
90
91
|
device (`str` or `torch.device`, *optional*):
|
91
92
|
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
92
93
|
timesteps (`List[int]`, *optional*):
|
93
|
-
|
94
|
-
|
95
|
-
|
94
|
+
Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
|
95
|
+
`num_inference_steps` and `sigmas` must be `None`.
|
96
|
+
sigmas (`List[float]`, *optional*):
|
97
|
+
Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
|
98
|
+
`num_inference_steps` and `timesteps` must be `None`.
|
96
99
|
|
97
100
|
Returns:
|
98
101
|
`Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
|
99
102
|
second element is the number of inference steps.
|
100
103
|
"""
|
104
|
+
if timesteps is not None and sigmas is not None:
|
105
|
+
raise ValueError("Only one of `timesteps` or `sigmas` can be passed. Please choose one to set custom values")
|
101
106
|
if timesteps is not None:
|
102
107
|
accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
|
103
108
|
if not accepts_timesteps:
|
@@ -108,6 +113,16 @@ def retrieve_timesteps(
|
|
108
113
|
scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
|
109
114
|
timesteps = scheduler.timesteps
|
110
115
|
num_inference_steps = len(timesteps)
|
116
|
+
elif sigmas is not None:
|
117
|
+
accept_sigmas = "sigmas" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
|
118
|
+
if not accept_sigmas:
|
119
|
+
raise ValueError(
|
120
|
+
f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
|
121
|
+
f" sigmas schedules. Please check whether you are using the correct scheduler."
|
122
|
+
)
|
123
|
+
scheduler.set_timesteps(sigmas=sigmas, device=device, **kwargs)
|
124
|
+
timesteps = scheduler.timesteps
|
125
|
+
num_inference_steps = len(timesteps)
|
111
126
|
else:
|
112
127
|
scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
|
113
128
|
timesteps = scheduler.timesteps
|
@@ -259,8 +274,8 @@ class StableDiffusionPipeline(
|
|
259
274
|
num_images_per_prompt,
|
260
275
|
do_classifier_free_guidance,
|
261
276
|
negative_prompt=None,
|
262
|
-
prompt_embeds: Optional[torch.
|
263
|
-
negative_prompt_embeds: Optional[torch.
|
277
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
278
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
264
279
|
lora_scale: Optional[float] = None,
|
265
280
|
**kwargs,
|
266
281
|
):
|
@@ -291,8 +306,8 @@ class StableDiffusionPipeline(
|
|
291
306
|
num_images_per_prompt,
|
292
307
|
do_classifier_free_guidance,
|
293
308
|
negative_prompt=None,
|
294
|
-
prompt_embeds: Optional[torch.
|
295
|
-
negative_prompt_embeds: Optional[torch.
|
309
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
310
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
296
311
|
lora_scale: Optional[float] = None,
|
297
312
|
clip_skip: Optional[int] = None,
|
298
313
|
):
|
@@ -312,10 +327,10 @@ class StableDiffusionPipeline(
|
|
312
327
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
313
328
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
314
329
|
less than `1`).
|
315
|
-
prompt_embeds (`torch.
|
330
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
316
331
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
317
332
|
provided, text embeddings will be generated from `prompt` input argument.
|
318
|
-
negative_prompt_embeds (`torch.
|
333
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
319
334
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
320
335
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
321
336
|
argument.
|
@@ -652,7 +667,12 @@ class StableDiffusionPipeline(
|
|
652
667
|
)
|
653
668
|
|
654
669
|
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
655
|
-
shape = (
|
670
|
+
shape = (
|
671
|
+
batch_size,
|
672
|
+
num_channels_latents,
|
673
|
+
int(height) // self.vae_scale_factor,
|
674
|
+
int(width) // self.vae_scale_factor,
|
675
|
+
)
|
656
676
|
if isinstance(generator, list) and len(generator) != batch_size:
|
657
677
|
raise ValueError(
|
658
678
|
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
@@ -669,20 +689,22 @@ class StableDiffusionPipeline(
|
|
669
689
|
return latents
|
670
690
|
|
671
691
|
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
|
672
|
-
def get_guidance_scale_embedding(
|
692
|
+
def get_guidance_scale_embedding(
|
693
|
+
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
|
694
|
+
) -> torch.Tensor:
|
673
695
|
"""
|
674
696
|
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
|
675
697
|
|
676
698
|
Args:
|
677
|
-
|
678
|
-
|
699
|
+
w (`torch.Tensor`):
|
700
|
+
Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
|
679
701
|
embedding_dim (`int`, *optional*, defaults to 512):
|
680
|
-
|
681
|
-
dtype:
|
682
|
-
|
702
|
+
Dimension of the embeddings to generate.
|
703
|
+
dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
|
704
|
+
Data type of the generated embeddings.
|
683
705
|
|
684
706
|
Returns:
|
685
|
-
`torch.
|
707
|
+
`torch.Tensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
|
686
708
|
"""
|
687
709
|
assert len(w.shape) == 1
|
688
710
|
w = w * 1000.0
|
@@ -737,22 +759,25 @@ class StableDiffusionPipeline(
|
|
737
759
|
width: Optional[int] = None,
|
738
760
|
num_inference_steps: int = 50,
|
739
761
|
timesteps: List[int] = None,
|
762
|
+
sigmas: List[float] = None,
|
740
763
|
guidance_scale: float = 7.5,
|
741
764
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
742
765
|
num_images_per_prompt: Optional[int] = 1,
|
743
766
|
eta: float = 0.0,
|
744
767
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
745
|
-
latents: Optional[torch.
|
746
|
-
prompt_embeds: Optional[torch.
|
747
|
-
negative_prompt_embeds: Optional[torch.
|
768
|
+
latents: Optional[torch.Tensor] = None,
|
769
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
770
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
748
771
|
ip_adapter_image: Optional[PipelineImageInput] = None,
|
749
|
-
ip_adapter_image_embeds: Optional[List[torch.
|
772
|
+
ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
|
750
773
|
output_type: Optional[str] = "pil",
|
751
774
|
return_dict: bool = True,
|
752
775
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
753
776
|
guidance_rescale: float = 0.0,
|
754
777
|
clip_skip: Optional[int] = None,
|
755
|
-
callback_on_step_end: Optional[
|
778
|
+
callback_on_step_end: Optional[
|
779
|
+
Union[Callable[[int, int, Dict], None], PipelineCallback, MultiPipelineCallbacks]
|
780
|
+
] = None,
|
756
781
|
callback_on_step_end_tensor_inputs: List[str] = ["latents"],
|
757
782
|
**kwargs,
|
758
783
|
):
|
@@ -773,6 +798,10 @@ class StableDiffusionPipeline(
|
|
773
798
|
Custom timesteps to use for the denoising process with schedulers which support a `timesteps` argument
|
774
799
|
in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
|
775
800
|
passed will be used. Must be in descending order.
|
801
|
+
sigmas (`List[float]`, *optional*):
|
802
|
+
Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
|
803
|
+
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
804
|
+
will be used.
|
776
805
|
guidance_scale (`float`, *optional*, defaults to 7.5):
|
777
806
|
A higher guidance scale value encourages the model to generate images closely linked to the text
|
778
807
|
`prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
|
@@ -787,22 +816,22 @@ class StableDiffusionPipeline(
|
|
787
816
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
788
817
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
789
818
|
generation deterministic.
|
790
|
-
latents (`torch.
|
819
|
+
latents (`torch.Tensor`, *optional*):
|
791
820
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
792
821
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
793
822
|
tensor is generated by sampling using the supplied random `generator`.
|
794
|
-
prompt_embeds (`torch.
|
823
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
795
824
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
796
825
|
provided, text embeddings are generated from the `prompt` input argument.
|
797
|
-
negative_prompt_embeds (`torch.
|
826
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
798
827
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
799
828
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
800
829
|
ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
|
801
|
-
ip_adapter_image_embeds (`List[torch.
|
802
|
-
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
803
|
-
Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
804
|
-
if `do_classifier_free_guidance` is set to `True`.
|
805
|
-
|
830
|
+
ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
|
831
|
+
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
832
|
+
IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. It should
|
833
|
+
contain the negative image embedding if `do_classifier_free_guidance` is set to `True`. If not
|
834
|
+
provided, embeddings are computed from the `ip_adapter_image` input argument.
|
806
835
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
807
836
|
The output format of the generated image. Choose between `PIL.Image` or `np.array`.
|
808
837
|
return_dict (`bool`, *optional*, defaults to `True`):
|
@@ -818,11 +847,11 @@ class StableDiffusionPipeline(
|
|
818
847
|
clip_skip (`int`, *optional*):
|
819
848
|
Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
|
820
849
|
the output of the pre-final layer will be used for computing the prompt embeddings.
|
821
|
-
callback_on_step_end (`Callable`, *optional*):
|
822
|
-
A function
|
823
|
-
with the following arguments: `callback_on_step_end(self:
|
824
|
-
callback_kwargs: Dict)`. `callback_kwargs` will include a
|
825
|
-
`callback_on_step_end_tensor_inputs`.
|
850
|
+
callback_on_step_end (`Callable`, `PipelineCallback`, `MultiPipelineCallbacks`, *optional*):
|
851
|
+
A function or a subclass of `PipelineCallback` or `MultiPipelineCallbacks` that is called at the end of
|
852
|
+
each denoising step during the inference. with the following arguments: `callback_on_step_end(self:
|
853
|
+
DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)`. `callback_kwargs` will include a
|
854
|
+
list of all tensors as specified by `callback_on_step_end_tensor_inputs`.
|
826
855
|
callback_on_step_end_tensor_inputs (`List`, *optional*):
|
827
856
|
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
|
828
857
|
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
|
@@ -854,6 +883,9 @@ class StableDiffusionPipeline(
|
|
854
883
|
"Passing `callback_steps` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`",
|
855
884
|
)
|
856
885
|
|
886
|
+
if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
|
887
|
+
callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
|
888
|
+
|
857
889
|
# 0. Default height and width to unet
|
858
890
|
height = height or self.unet.config.sample_size * self.vae_scale_factor
|
859
891
|
width = width or self.unet.config.sample_size * self.vae_scale_factor
|
@@ -922,7 +954,9 @@ class StableDiffusionPipeline(
|
|
922
954
|
)
|
923
955
|
|
924
956
|
# 4. Prepare timesteps
|
925
|
-
timesteps, num_inference_steps = retrieve_timesteps(
|
957
|
+
timesteps, num_inference_steps = retrieve_timesteps(
|
958
|
+
self.scheduler, num_inference_steps, device, timesteps, sigmas
|
959
|
+
)
|
926
960
|
|
927
961
|
# 5. Prepare latent variables
|
928
962
|
num_channels_latents = self.unet.config.in_channels
|
@@ -156,8 +156,8 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
|
|
156
156
|
num_images_per_prompt,
|
157
157
|
do_classifier_free_guidance,
|
158
158
|
negative_prompt=None,
|
159
|
-
prompt_embeds: Optional[torch.
|
160
|
-
negative_prompt_embeds: Optional[torch.
|
159
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
160
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
161
161
|
lora_scale: Optional[float] = None,
|
162
162
|
**kwargs,
|
163
163
|
):
|
@@ -189,8 +189,8 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
|
|
189
189
|
num_images_per_prompt,
|
190
190
|
do_classifier_free_guidance,
|
191
191
|
negative_prompt=None,
|
192
|
-
prompt_embeds: Optional[torch.
|
193
|
-
negative_prompt_embeds: Optional[torch.
|
192
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
193
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
194
194
|
lora_scale: Optional[float] = None,
|
195
195
|
clip_skip: Optional[int] = None,
|
196
196
|
):
|
@@ -210,10 +210,10 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
|
|
210
210
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
211
211
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
212
212
|
less than `1`).
|
213
|
-
prompt_embeds (`torch.
|
213
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
214
214
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
215
215
|
provided, text embeddings will be generated from `prompt` input argument.
|
216
|
-
negative_prompt_embeds (`torch.
|
216
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
217
217
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
218
218
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
219
219
|
argument.
|
@@ -548,8 +548,15 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
|
|
548
548
|
pixel_values = pixel_values.to(device=device)
|
549
549
|
# The DPT-Hybrid model uses batch-norm layers which are not compatible with fp16.
|
550
550
|
# So we use `torch.autocast` here for half precision inference.
|
551
|
-
|
552
|
-
|
551
|
+
if torch.backends.mps.is_available():
|
552
|
+
autocast_ctx = contextlib.nullcontext()
|
553
|
+
logger.warning(
|
554
|
+
"The DPT-Hybrid model uses batch-norm layers which are not compatible with fp16, but autocast is not yet supported on MPS."
|
555
|
+
)
|
556
|
+
else:
|
557
|
+
autocast_ctx = torch.autocast(device.type, dtype=dtype)
|
558
|
+
|
559
|
+
with autocast_ctx:
|
553
560
|
depth_map = self.depth_estimator(pixel_values).predicted_depth
|
554
561
|
else:
|
555
562
|
depth_map = depth_map.to(device=device, dtype=dtype)
|
@@ -602,7 +609,7 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
|
|
602
609
|
self,
|
603
610
|
prompt: Union[str, List[str]] = None,
|
604
611
|
image: PipelineImageInput = None,
|
605
|
-
depth_map: Optional[torch.
|
612
|
+
depth_map: Optional[torch.Tensor] = None,
|
606
613
|
strength: float = 0.8,
|
607
614
|
num_inference_steps: Optional[int] = 50,
|
608
615
|
guidance_scale: Optional[float] = 7.5,
|
@@ -610,8 +617,8 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
|
|
610
617
|
num_images_per_prompt: Optional[int] = 1,
|
611
618
|
eta: Optional[float] = 0.0,
|
612
619
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
613
|
-
prompt_embeds: Optional[torch.
|
614
|
-
negative_prompt_embeds: Optional[torch.
|
620
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
621
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
615
622
|
output_type: Optional[str] = "pil",
|
616
623
|
return_dict: bool = True,
|
617
624
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
@@ -626,10 +633,10 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
|
|
626
633
|
Args:
|
627
634
|
prompt (`str` or `List[str]`, *optional*):
|
628
635
|
The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
|
629
|
-
image (`torch.
|
636
|
+
image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
|
630
637
|
`Image` or tensor representing an image batch to be used as the starting point. Can accept image
|
631
638
|
latents as `image` only if `depth_map` is not `None`.
|
632
|
-
depth_map (`torch.
|
639
|
+
depth_map (`torch.Tensor`, *optional*):
|
633
640
|
Depth prediction to be used as additional conditioning for the image generation process. If not
|
634
641
|
defined, it automatically predicts the depth with `self.depth_estimator`.
|
635
642
|
strength (`float`, *optional*, defaults to 0.8):
|
@@ -655,10 +662,10 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
|
|
655
662
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
656
663
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
657
664
|
generation deterministic.
|
658
|
-
prompt_embeds (`torch.
|
665
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
659
666
|
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
|
660
667
|
provided, text embeddings are generated from the `prompt` input argument.
|
661
|
-
negative_prompt_embeds (`torch.
|
668
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
662
669
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
663
670
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
664
671
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
@@ -700,8 +707,8 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
|
|
700
707
|
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
701
708
|
>>> init_image = Image.open(requests.get(url, stream=True).raw)
|
702
709
|
>>> prompt = "two tigers"
|
703
|
-
>>>
|
704
|
-
>>> image = pipe(prompt=prompt, image=init_image, negative_prompt=
|
710
|
+
>>> n_prompt = "bad, deformed, ugly, bad anotomy"
|
711
|
+
>>> image = pipe(prompt=prompt, image=init_image, negative_prompt=n_prompt, strength=0.7).images[0]
|
705
712
|
```
|
706
713
|
|
707
714
|
Returns:
|
@@ -207,7 +207,7 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline, StableDiffusionMi
|
|
207
207
|
and not isinstance(image, list)
|
208
208
|
):
|
209
209
|
raise ValueError(
|
210
|
-
"`image` has to be of type `torch.
|
210
|
+
"`image` has to be of type `torch.Tensor` or `PIL.Image.Image` or `List[PIL.Image.Image]` but is"
|
211
211
|
f" {type(image)}"
|
212
212
|
)
|
213
213
|
|
@@ -224,7 +224,12 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline, StableDiffusionMi
|
|
224
224
|
|
225
225
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
226
226
|
def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
|
227
|
-
shape = (
|
227
|
+
shape = (
|
228
|
+
batch_size,
|
229
|
+
num_channels_latents,
|
230
|
+
int(height) // self.vae_scale_factor,
|
231
|
+
int(width) // self.vae_scale_factor,
|
232
|
+
)
|
228
233
|
if isinstance(generator, list) and len(generator) != batch_size:
|
229
234
|
raise ValueError(
|
230
235
|
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
@@ -243,7 +248,7 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline, StableDiffusionMi
|
|
243
248
|
@torch.no_grad()
|
244
249
|
def __call__(
|
245
250
|
self,
|
246
|
-
image: Union[PIL.Image.Image, List[PIL.Image.Image], torch.
|
251
|
+
image: Union[PIL.Image.Image, List[PIL.Image.Image], torch.Tensor],
|
247
252
|
height: Optional[int] = None,
|
248
253
|
width: Optional[int] = None,
|
249
254
|
num_inference_steps: int = 50,
|
@@ -251,17 +256,17 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline, StableDiffusionMi
|
|
251
256
|
num_images_per_prompt: Optional[int] = 1,
|
252
257
|
eta: float = 0.0,
|
253
258
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
254
|
-
latents: Optional[torch.
|
259
|
+
latents: Optional[torch.Tensor] = None,
|
255
260
|
output_type: Optional[str] = "pil",
|
256
261
|
return_dict: bool = True,
|
257
|
-
callback: Optional[Callable[[int, int, torch.
|
262
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
258
263
|
callback_steps: int = 1,
|
259
264
|
):
|
260
265
|
r"""
|
261
266
|
The call function to the pipeline for generation.
|
262
267
|
|
263
268
|
Args:
|
264
|
-
image (`PIL.Image.Image` or `List[PIL.Image.Image]` or `torch.
|
269
|
+
image (`PIL.Image.Image` or `List[PIL.Image.Image]` or `torch.Tensor`):
|
265
270
|
Image or images to guide image generation. If you provide a tensor, it needs to be compatible with
|
266
271
|
[`CLIPImageProcessor`](https://huggingface.co/lambdalabs/sd-image-variations-diffusers/blob/main/feature_extractor/preprocessor_config.json).
|
267
272
|
height (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
|
@@ -282,7 +287,7 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline, StableDiffusionMi
|
|
282
287
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
283
288
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
284
289
|
generation deterministic.
|
285
|
-
latents (`torch.
|
290
|
+
latents (`torch.Tensor`, *optional*):
|
286
291
|
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
|
287
292
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
288
293
|
tensor is generated by sampling using the supplied random `generator`.
|
@@ -293,7 +298,7 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline, StableDiffusionMi
|
|
293
298
|
plain tuple.
|
294
299
|
callback (`Callable`, *optional*):
|
295
300
|
A function that calls every `callback_steps` steps during inference. The function is called with the
|
296
|
-
following arguments: `callback(step: int, timestep: int, latents: torch.
|
301
|
+
following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
297
302
|
callback_steps (`int`, *optional*, defaults to 1):
|
298
303
|
The frequency at which the `callback` function is called. If not specified, the callback is called at
|
299
304
|
every step.
|