diffusers 0.27.2__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +18 -1
- diffusers/callbacks.py +156 -0
- diffusers/commands/env.py +110 -6
- diffusers/configuration_utils.py +16 -11
- diffusers/dependency_versions_table.py +2 -1
- diffusers/image_processor.py +158 -45
- diffusers/loaders/__init__.py +2 -5
- diffusers/loaders/autoencoder.py +4 -4
- diffusers/loaders/controlnet.py +4 -4
- diffusers/loaders/ip_adapter.py +80 -22
- diffusers/loaders/lora.py +134 -20
- diffusers/loaders/lora_conversion_utils.py +46 -43
- diffusers/loaders/peft.py +4 -3
- diffusers/loaders/single_file.py +401 -170
- diffusers/loaders/single_file_model.py +290 -0
- diffusers/loaders/single_file_utils.py +616 -672
- diffusers/loaders/textual_inversion.py +41 -20
- diffusers/loaders/unet.py +168 -115
- diffusers/loaders/unet_loader_utils.py +163 -0
- diffusers/models/__init__.py +2 -0
- diffusers/models/activations.py +11 -3
- diffusers/models/attention.py +10 -11
- diffusers/models/attention_processor.py +367 -148
- diffusers/models/autoencoders/autoencoder_asym_kl.py +14 -16
- diffusers/models/autoencoders/autoencoder_kl.py +18 -19
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -11
- diffusers/models/autoencoders/autoencoder_tiny.py +16 -16
- diffusers/models/autoencoders/consistency_decoder_vae.py +36 -11
- diffusers/models/autoencoders/vae.py +23 -24
- diffusers/models/controlnet.py +12 -9
- diffusers/models/controlnet_flax.py +4 -4
- diffusers/models/controlnet_xs.py +1915 -0
- diffusers/models/downsampling.py +17 -18
- diffusers/models/embeddings.py +147 -24
- diffusers/models/model_loading_utils.py +149 -0
- diffusers/models/modeling_flax_pytorch_utils.py +2 -1
- diffusers/models/modeling_flax_utils.py +4 -4
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +118 -98
- diffusers/models/resnet.py +18 -23
- diffusers/models/transformer_temporal.py +3 -3
- diffusers/models/transformers/dual_transformer_2d.py +4 -4
- diffusers/models/transformers/prior_transformer.py +7 -7
- diffusers/models/transformers/t5_film_transformer.py +17 -19
- diffusers/models/transformers/transformer_2d.py +272 -156
- diffusers/models/transformers/transformer_temporal.py +10 -10
- diffusers/models/unets/unet_1d.py +5 -5
- diffusers/models/unets/unet_1d_blocks.py +29 -29
- diffusers/models/unets/unet_2d.py +6 -6
- diffusers/models/unets/unet_2d_blocks.py +137 -128
- diffusers/models/unets/unet_2d_condition.py +19 -15
- diffusers/models/unets/unet_2d_condition_flax.py +6 -5
- diffusers/models/unets/unet_3d_blocks.py +79 -77
- diffusers/models/unets/unet_3d_condition.py +13 -9
- diffusers/models/unets/unet_i2vgen_xl.py +14 -13
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +114 -14
- diffusers/models/unets/unet_spatio_temporal_condition.py +15 -14
- diffusers/models/unets/unet_stable_cascade.py +16 -13
- diffusers/models/upsampling.py +17 -20
- diffusers/models/vq_model.py +16 -15
- diffusers/pipelines/__init__.py +25 -3
- diffusers/pipelines/amused/pipeline_amused.py +12 -12
- diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
- diffusers/pipelines/animatediff/__init__.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +24 -46
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1284 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +82 -72
- diffusers/pipelines/animatediff/pipeline_output.py +3 -2
- diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +54 -35
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +120 -36
- diffusers/pipelines/auto_pipeline.py +21 -17
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -5
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +5 -5
- diffusers/pipelines/controlnet/multicontrolnet.py +4 -8
- diffusers/pipelines/controlnet/pipeline_controlnet.py +87 -52
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +50 -43
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +52 -40
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +80 -47
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +147 -49
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +89 -55
- diffusers/pipelines/controlnet_xs/__init__.py +68 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +911 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1115 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +14 -28
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +18 -33
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +21 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +20 -36
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +23 -39
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +17 -32
- diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +43 -20
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +36 -18
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +12 -12
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +18 -18
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +20 -15
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +30 -25
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +69 -59
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
- diffusers/pipelines/dit/pipeline_dit.py +3 -0
- diffusers/pipelines/free_init_utils.py +39 -38
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +23 -20
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +32 -29
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
- diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +20 -33
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +24 -35
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +48 -30
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +50 -28
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +11 -11
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +61 -67
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +70 -69
- diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
- diffusers/pipelines/marigold/__init__.py +50 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +561 -0
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
- diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
- diffusers/pipelines/pia/pipeline_pia.py +39 -125
- diffusers/pipelines/pipeline_flax_utils.py +4 -4
- diffusers/pipelines/pipeline_loading_utils.py +268 -23
- diffusers/pipelines/pipeline_utils.py +266 -37
- diffusers/pipelines/pixart_alpha/__init__.py +8 -1
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +65 -75
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +880 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +10 -5
- diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
- diffusers/pipelines/shap_e/renderer.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +18 -18
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +33 -32
- diffusers/pipelines/stable_diffusion/__init__.py +0 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +18 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +73 -39
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +24 -17
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +66 -36
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +82 -46
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +123 -28
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +16 -16
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +24 -19
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +37 -31
- diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +23 -15
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +44 -39
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +23 -18
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +19 -14
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +20 -15
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -19
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +65 -32
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +274 -38
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +92 -25
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +88 -44
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +108 -56
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +96 -51
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -25
- diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +59 -30
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +71 -42
- diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +18 -41
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +21 -85
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -19
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +39 -33
- diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
- diffusers/pipelines/unidiffuser/modeling_uvit.py +9 -9
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +23 -23
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -6
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +10 -10
- diffusers/schedulers/__init__.py +2 -2
- diffusers/schedulers/deprecated/__init__.py +1 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
- diffusers/schedulers/scheduling_amused.py +5 -5
- diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
- diffusers/schedulers/scheduling_consistency_models.py +20 -26
- diffusers/schedulers/scheduling_ddim.py +22 -24
- diffusers/schedulers/scheduling_ddim_flax.py +2 -1
- diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
- diffusers/schedulers/scheduling_ddim_parallel.py +28 -30
- diffusers/schedulers/scheduling_ddpm.py +20 -22
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +26 -28
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
- diffusers/schedulers/scheduling_deis_multistep.py +42 -42
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +103 -77
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +46 -46
- diffusers/schedulers/scheduling_dpmsolver_sde.py +23 -23
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +86 -65
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +75 -54
- diffusers/schedulers/scheduling_edm_euler.py +50 -31
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +23 -29
- diffusers/schedulers/scheduling_euler_discrete.py +160 -68
- diffusers/schedulers/scheduling_heun_discrete.py +57 -39
- diffusers/schedulers/scheduling_ipndm.py +8 -8
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +19 -19
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +19 -19
- diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
- diffusers/schedulers/scheduling_lcm.py +21 -23
- diffusers/schedulers/scheduling_lms_discrete.py +24 -26
- diffusers/schedulers/scheduling_pndm.py +20 -20
- diffusers/schedulers/scheduling_repaint.py +20 -20
- diffusers/schedulers/scheduling_sasolver.py +55 -54
- diffusers/schedulers/scheduling_sde_ve.py +19 -19
- diffusers/schedulers/scheduling_tcd.py +39 -30
- diffusers/schedulers/scheduling_unclip.py +15 -15
- diffusers/schedulers/scheduling_unipc_multistep.py +111 -41
- diffusers/schedulers/scheduling_utils.py +14 -5
- diffusers/schedulers/scheduling_utils_flax.py +3 -3
- diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
- diffusers/training_utils.py +56 -1
- diffusers/utils/__init__.py +7 -0
- diffusers/utils/doc_utils.py +1 -0
- diffusers/utils/dummy_pt_objects.py +30 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +90 -0
- diffusers/utils/dynamic_modules_utils.py +24 -11
- diffusers/utils/hub_utils.py +3 -2
- diffusers/utils/import_utils.py +91 -0
- diffusers/utils/loading_utils.py +2 -2
- diffusers/utils/logging.py +1 -1
- diffusers/utils/peft_utils.py +32 -5
- diffusers/utils/state_dict_utils.py +11 -2
- diffusers/utils/testing_utils.py +71 -6
- diffusers/utils/torch_utils.py +1 -0
- diffusers/video_processor.py +113 -0
- {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/METADATA +47 -47
- diffusers-0.28.0.dist-info/RECORD +414 -0
- {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/WHEEL +1 -1
- diffusers-0.27.2.dist-info/RECORD +0 -399
- {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/LICENSE +0 -0
- {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,6 @@ from ...models import UNet2DConditionModel
|
|
12
12
|
from ...schedulers import DDPMScheduler
|
13
13
|
from ...utils import (
|
14
14
|
BACKENDS_MAPPING,
|
15
|
-
is_accelerate_available,
|
16
15
|
is_bs4_available,
|
17
16
|
is_ftfy_available,
|
18
17
|
logging,
|
@@ -115,6 +114,7 @@ class IFPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
115
114
|
|
116
115
|
_optional_components = ["tokenizer", "text_encoder", "safety_checker", "feature_extractor", "watermarker"]
|
117
116
|
model_cpu_offload_seq = "text_encoder->unet"
|
117
|
+
_exclude_from_cpu_offload = ["watermarker"]
|
118
118
|
|
119
119
|
def __init__(
|
120
120
|
self,
|
@@ -156,20 +156,6 @@ class IFPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
156
156
|
)
|
157
157
|
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
158
158
|
|
159
|
-
def remove_all_hooks(self):
|
160
|
-
if is_accelerate_available():
|
161
|
-
from accelerate.hooks import remove_hook_from_module
|
162
|
-
else:
|
163
|
-
raise ImportError("Please install accelerate via `pip install accelerate`")
|
164
|
-
|
165
|
-
for model in [self.text_encoder, self.unet, self.safety_checker]:
|
166
|
-
if model is not None:
|
167
|
-
remove_hook_from_module(model, recurse=True)
|
168
|
-
|
169
|
-
self.unet_offload_hook = None
|
170
|
-
self.text_encoder_offload_hook = None
|
171
|
-
self.final_offload_hook = None
|
172
|
-
|
173
159
|
@torch.no_grad()
|
174
160
|
def encode_prompt(
|
175
161
|
self,
|
@@ -178,8 +164,8 @@ class IFPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
178
164
|
num_images_per_prompt: int = 1,
|
179
165
|
device: Optional[torch.device] = None,
|
180
166
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
181
|
-
prompt_embeds: Optional[torch.
|
182
|
-
negative_prompt_embeds: Optional[torch.
|
167
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
168
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
183
169
|
clean_caption: bool = False,
|
184
170
|
):
|
185
171
|
r"""
|
@@ -198,10 +184,10 @@ class IFPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
198
184
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
199
185
|
`negative_prompt_embeds`. instead. If not defined, one has to pass `negative_prompt_embeds`. instead.
|
200
186
|
Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`).
|
201
|
-
prompt_embeds (`torch.
|
187
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
202
188
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
203
189
|
provided, text embeddings will be generated from `prompt` input argument.
|
204
|
-
negative_prompt_embeds (`torch.
|
190
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
205
191
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
206
192
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
207
193
|
argument.
|
@@ -335,9 +321,6 @@ class IFPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
335
321
|
nsfw_detected = None
|
336
322
|
watermark_detected = None
|
337
323
|
|
338
|
-
if hasattr(self, "unet_offload_hook") and self.unet_offload_hook is not None:
|
339
|
-
self.unet_offload_hook.offload()
|
340
|
-
|
341
324
|
return image, nsfw_detected, watermark_detected
|
342
325
|
|
343
326
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
|
@@ -566,11 +549,11 @@ class IFPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
566
549
|
width: Optional[int] = None,
|
567
550
|
eta: float = 0.0,
|
568
551
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
569
|
-
prompt_embeds: Optional[torch.
|
570
|
-
negative_prompt_embeds: Optional[torch.
|
552
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
553
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
571
554
|
output_type: Optional[str] = "pil",
|
572
555
|
return_dict: bool = True,
|
573
|
-
callback: Optional[Callable[[int, int, torch.
|
556
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
574
557
|
callback_steps: int = 1,
|
575
558
|
clean_caption: bool = True,
|
576
559
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
@@ -610,10 +593,10 @@ class IFPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
610
593
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
611
594
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
612
595
|
to make generation deterministic.
|
613
|
-
prompt_embeds (`torch.
|
596
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
614
597
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
615
598
|
provided, text embeddings will be generated from `prompt` input argument.
|
616
|
-
negative_prompt_embeds (`torch.
|
599
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
617
600
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
618
601
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
619
602
|
argument.
|
@@ -624,7 +607,7 @@ class IFPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
624
607
|
Whether or not to return a [`~pipelines.stable_diffusion.IFPipelineOutput`] instead of a plain tuple.
|
625
608
|
callback (`Callable`, *optional*):
|
626
609
|
A function that will be called every `callback_steps` steps during inference. The function will be
|
627
|
-
called with the following arguments: `callback(step: int, timestep: int, latents: torch.
|
610
|
+
called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
628
611
|
callback_steps (`int`, *optional*, defaults to 1):
|
629
612
|
The frequency at which the `callback` function will be called. If not specified, the callback will be
|
630
613
|
called at every step.
|
@@ -691,6 +674,9 @@ class IFPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
691
674
|
self.scheduler.set_timesteps(num_inference_steps, device=device)
|
692
675
|
timesteps = self.scheduler.timesteps
|
693
676
|
|
677
|
+
if hasattr(self.scheduler, "set_begin_index"):
|
678
|
+
self.scheduler.set_begin_index(0)
|
679
|
+
|
694
680
|
# 5. Prepare intermediate images
|
695
681
|
intermediate_images = self.prepare_intermediate_images(
|
696
682
|
batch_size * num_images_per_prompt,
|
@@ -15,7 +15,6 @@ from ...schedulers import DDPMScheduler
|
|
15
15
|
from ...utils import (
|
16
16
|
BACKENDS_MAPPING,
|
17
17
|
PIL_INTERPOLATION,
|
18
|
-
is_accelerate_available,
|
19
18
|
is_bs4_available,
|
20
19
|
is_ftfy_available,
|
21
20
|
logging,
|
@@ -139,6 +138,7 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
139
138
|
|
140
139
|
_optional_components = ["tokenizer", "text_encoder", "safety_checker", "feature_extractor", "watermarker"]
|
141
140
|
model_cpu_offload_seq = "text_encoder->unet"
|
141
|
+
_exclude_from_cpu_offload = ["watermarker"]
|
142
142
|
|
143
143
|
def __init__(
|
144
144
|
self,
|
@@ -180,21 +180,6 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
180
180
|
)
|
181
181
|
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
182
182
|
|
183
|
-
# Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline.remove_all_hooks
|
184
|
-
def remove_all_hooks(self):
|
185
|
-
if is_accelerate_available():
|
186
|
-
from accelerate.hooks import remove_hook_from_module
|
187
|
-
else:
|
188
|
-
raise ImportError("Please install accelerate via `pip install accelerate`")
|
189
|
-
|
190
|
-
for model in [self.text_encoder, self.unet, self.safety_checker]:
|
191
|
-
if model is not None:
|
192
|
-
remove_hook_from_module(model, recurse=True)
|
193
|
-
|
194
|
-
self.unet_offload_hook = None
|
195
|
-
self.text_encoder_offload_hook = None
|
196
|
-
self.final_offload_hook = None
|
197
|
-
|
198
183
|
@torch.no_grad()
|
199
184
|
def encode_prompt(
|
200
185
|
self,
|
@@ -203,8 +188,8 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
203
188
|
num_images_per_prompt: int = 1,
|
204
189
|
device: Optional[torch.device] = None,
|
205
190
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
206
|
-
prompt_embeds: Optional[torch.
|
207
|
-
negative_prompt_embeds: Optional[torch.
|
191
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
192
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
208
193
|
clean_caption: bool = False,
|
209
194
|
):
|
210
195
|
r"""
|
@@ -223,10 +208,10 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
223
208
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
224
209
|
`negative_prompt_embeds`. instead. If not defined, one has to pass `negative_prompt_embeds`. instead.
|
225
210
|
Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`).
|
226
|
-
prompt_embeds (`torch.
|
211
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
227
212
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
228
213
|
provided, text embeddings will be generated from `prompt` input argument.
|
229
|
-
negative_prompt_embeds (`torch.
|
214
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
230
215
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
231
216
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
232
217
|
argument.
|
@@ -361,9 +346,6 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
361
346
|
nsfw_detected = None
|
362
347
|
watermark_detected = None
|
363
348
|
|
364
|
-
if hasattr(self, "unet_offload_hook") and self.unet_offload_hook is not None:
|
365
|
-
self.unet_offload_hook.offload()
|
366
|
-
|
367
349
|
return image, nsfw_detected, watermark_detected
|
368
350
|
|
369
351
|
# Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline.prepare_extra_step_kwargs
|
@@ -439,7 +421,7 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
439
421
|
and not isinstance(check_image_type, np.ndarray)
|
440
422
|
):
|
441
423
|
raise ValueError(
|
442
|
-
"`image` has to be of type `torch.
|
424
|
+
"`image` has to be of type `torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, or List[...] but is"
|
443
425
|
f" {type(check_image_type)}"
|
444
426
|
)
|
445
427
|
|
@@ -613,7 +595,7 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
613
595
|
|
614
596
|
for image_ in image:
|
615
597
|
image_ = image_.convert("RGB")
|
616
|
-
image_ = resize(image_, self.unet.sample_size)
|
598
|
+
image_ = resize(image_, self.unet.config.sample_size)
|
617
599
|
image_ = np.array(image_)
|
618
600
|
image_ = image_.astype(np.float32)
|
619
601
|
image_ = image_ / 127.5 - 1
|
@@ -633,12 +615,15 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
633
615
|
|
634
616
|
return image
|
635
617
|
|
618
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline.get_timesteps
|
636
619
|
def get_timesteps(self, num_inference_steps, strength):
|
637
620
|
# get the original timestep using init_timestep
|
638
621
|
init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
|
639
622
|
|
640
623
|
t_start = max(num_inference_steps - init_timestep, 0)
|
641
|
-
timesteps = self.scheduler.timesteps[t_start:]
|
624
|
+
timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
|
625
|
+
if hasattr(self.scheduler, "set_begin_index"):
|
626
|
+
self.scheduler.set_begin_index(t_start * self.scheduler.order)
|
642
627
|
|
643
628
|
return timesteps, num_inference_steps - t_start
|
644
629
|
|
@@ -680,11 +665,11 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
680
665
|
num_images_per_prompt: Optional[int] = 1,
|
681
666
|
eta: float = 0.0,
|
682
667
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
683
|
-
prompt_embeds: Optional[torch.
|
684
|
-
negative_prompt_embeds: Optional[torch.
|
668
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
669
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
685
670
|
output_type: Optional[str] = "pil",
|
686
671
|
return_dict: bool = True,
|
687
|
-
callback: Optional[Callable[[int, int, torch.
|
672
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
688
673
|
callback_steps: int = 1,
|
689
674
|
clean_caption: bool = True,
|
690
675
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
@@ -696,7 +681,7 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
696
681
|
prompt (`str` or `List[str]`, *optional*):
|
697
682
|
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
|
698
683
|
instead.
|
699
|
-
image (`torch.
|
684
|
+
image (`torch.Tensor` or `PIL.Image.Image`):
|
700
685
|
`Image`, or tensor representing an image batch, that will be used as the starting point for the
|
701
686
|
process.
|
702
687
|
strength (`float`, *optional*, defaults to 0.7):
|
@@ -729,10 +714,10 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
729
714
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
730
715
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
731
716
|
to make generation deterministic.
|
732
|
-
prompt_embeds (`torch.
|
717
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
733
718
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
734
719
|
provided, text embeddings will be generated from `prompt` input argument.
|
735
|
-
negative_prompt_embeds (`torch.
|
720
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
736
721
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
737
722
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
738
723
|
argument.
|
@@ -743,7 +728,7 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
743
728
|
Whether or not to return a [`~pipelines.stable_diffusion.IFPipelineOutput`] instead of a plain tuple.
|
744
729
|
callback (`Callable`, *optional*):
|
745
730
|
A function that will be called every `callback_steps` steps during inference. The function will be
|
746
|
-
called with the following arguments: `callback(step: int, timestep: int, latents: torch.
|
731
|
+
called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
747
732
|
callback_steps (`int`, *optional*, defaults to 1):
|
748
733
|
The frequency at which the `callback` function will be called. If not specified, the callback will be
|
749
734
|
called at every step.
|
@@ -16,7 +16,6 @@ from ...schedulers import DDPMScheduler
|
|
16
16
|
from ...utils import (
|
17
17
|
BACKENDS_MAPPING,
|
18
18
|
PIL_INTERPOLATION,
|
19
|
-
is_accelerate_available,
|
20
19
|
is_bs4_available,
|
21
20
|
is_ftfy_available,
|
22
21
|
logging,
|
@@ -143,6 +142,7 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
143
142
|
|
144
143
|
_optional_components = ["tokenizer", "text_encoder", "safety_checker", "feature_extractor"]
|
145
144
|
model_cpu_offload_seq = "text_encoder->unet"
|
145
|
+
_exclude_from_cpu_offload = ["watermarker"]
|
146
146
|
|
147
147
|
def __init__(
|
148
148
|
self,
|
@@ -191,21 +191,6 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
191
191
|
)
|
192
192
|
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
193
193
|
|
194
|
-
# Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline.remove_all_hooks
|
195
|
-
def remove_all_hooks(self):
|
196
|
-
if is_accelerate_available():
|
197
|
-
from accelerate.hooks import remove_hook_from_module
|
198
|
-
else:
|
199
|
-
raise ImportError("Please install accelerate via `pip install accelerate`")
|
200
|
-
|
201
|
-
for model in [self.text_encoder, self.unet, self.safety_checker]:
|
202
|
-
if model is not None:
|
203
|
-
remove_hook_from_module(model, recurse=True)
|
204
|
-
|
205
|
-
self.unet_offload_hook = None
|
206
|
-
self.text_encoder_offload_hook = None
|
207
|
-
self.final_offload_hook = None
|
208
|
-
|
209
194
|
# Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline._text_preprocessing
|
210
195
|
def _text_preprocessing(self, text, clean_caption=False):
|
211
196
|
if clean_caption and not is_bs4_available():
|
@@ -355,8 +340,8 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
355
340
|
num_images_per_prompt: int = 1,
|
356
341
|
device: Optional[torch.device] = None,
|
357
342
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
358
|
-
prompt_embeds: Optional[torch.
|
359
|
-
negative_prompt_embeds: Optional[torch.
|
343
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
344
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
360
345
|
clean_caption: bool = False,
|
361
346
|
):
|
362
347
|
r"""
|
@@ -375,10 +360,10 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
375
360
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
376
361
|
`negative_prompt_embeds`. instead. If not defined, one has to pass `negative_prompt_embeds`. instead.
|
377
362
|
Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`).
|
378
|
-
prompt_embeds (`torch.
|
363
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
379
364
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
380
365
|
provided, text embeddings will be generated from `prompt` input argument.
|
381
|
-
negative_prompt_embeds (`torch.
|
366
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
382
367
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
383
368
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
384
369
|
argument.
|
@@ -513,9 +498,6 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
513
498
|
nsfw_detected = None
|
514
499
|
watermark_detected = None
|
515
500
|
|
516
|
-
if hasattr(self, "unet_offload_hook") and self.unet_offload_hook is not None:
|
517
|
-
self.unet_offload_hook.offload()
|
518
|
-
|
519
501
|
return image, nsfw_detected, watermark_detected
|
520
502
|
|
521
503
|
# Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline.prepare_extra_step_kwargs
|
@@ -594,7 +576,7 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
594
576
|
and not isinstance(check_image_type, np.ndarray)
|
595
577
|
):
|
596
578
|
raise ValueError(
|
597
|
-
"`image` has to be of type `torch.
|
579
|
+
"`image` has to be of type `torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, or List[...] but is"
|
598
580
|
f" {type(check_image_type)}"
|
599
581
|
)
|
600
582
|
|
@@ -625,7 +607,7 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
625
607
|
and not isinstance(check_image_type, np.ndarray)
|
626
608
|
):
|
627
609
|
raise ValueError(
|
628
|
-
"`original_image` has to be of type `torch.
|
610
|
+
"`original_image` has to be of type `torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, or List[...] but is"
|
629
611
|
f" {type(check_image_type)}"
|
630
612
|
)
|
631
613
|
|
@@ -662,7 +644,7 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
662
644
|
|
663
645
|
for image_ in image:
|
664
646
|
image_ = image_.convert("RGB")
|
665
|
-
image_ = resize(image_, self.unet.sample_size)
|
647
|
+
image_ = resize(image_, self.unet.config.sample_size)
|
666
648
|
image_ = np.array(image_)
|
667
649
|
image_ = image_.astype(np.float32)
|
668
650
|
image_ = image_ / 127.5 - 1
|
@@ -714,13 +696,15 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
714
696
|
|
715
697
|
return image
|
716
698
|
|
717
|
-
# Copied from diffusers.pipelines.
|
699
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline.get_timesteps
|
718
700
|
def get_timesteps(self, num_inference_steps, strength):
|
719
701
|
# get the original timestep using init_timestep
|
720
702
|
init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
|
721
703
|
|
722
704
|
t_start = max(num_inference_steps - init_timestep, 0)
|
723
|
-
timesteps = self.scheduler.timesteps[t_start:]
|
705
|
+
timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
|
706
|
+
if hasattr(self.scheduler, "set_begin_index"):
|
707
|
+
self.scheduler.set_begin_index(t_start * self.scheduler.order)
|
724
708
|
|
725
709
|
return timesteps, num_inference_steps - t_start
|
726
710
|
|
@@ -751,7 +735,7 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
751
735
|
@replace_example_docstring(EXAMPLE_DOC_STRING)
|
752
736
|
def __call__(
|
753
737
|
self,
|
754
|
-
image: Union[PIL.Image.Image, np.ndarray, torch.
|
738
|
+
image: Union[PIL.Image.Image, np.ndarray, torch.Tensor],
|
755
739
|
original_image: Union[
|
756
740
|
PIL.Image.Image, torch.Tensor, np.ndarray, List[PIL.Image.Image], List[torch.Tensor], List[np.ndarray]
|
757
741
|
] = None,
|
@@ -764,11 +748,11 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
764
748
|
num_images_per_prompt: Optional[int] = 1,
|
765
749
|
eta: float = 0.0,
|
766
750
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
767
|
-
prompt_embeds: Optional[torch.
|
768
|
-
negative_prompt_embeds: Optional[torch.
|
751
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
752
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
769
753
|
output_type: Optional[str] = "pil",
|
770
754
|
return_dict: bool = True,
|
771
|
-
callback: Optional[Callable[[int, int, torch.
|
755
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
772
756
|
callback_steps: int = 1,
|
773
757
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
774
758
|
noise_level: int = 250,
|
@@ -778,10 +762,10 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
778
762
|
Function invoked when calling the pipeline for generation.
|
779
763
|
|
780
764
|
Args:
|
781
|
-
image (`torch.
|
765
|
+
image (`torch.Tensor` or `PIL.Image.Image`):
|
782
766
|
`Image`, or tensor representing an image batch, that will be used as the starting point for the
|
783
767
|
process.
|
784
|
-
original_image (`torch.
|
768
|
+
original_image (`torch.Tensor` or `PIL.Image.Image`):
|
785
769
|
The original image that `image` was varied from.
|
786
770
|
strength (`float`, *optional*, defaults to 0.8):
|
787
771
|
Conceptually, indicates how much to transform the reference `image`. Must be between 0 and 1. `image`
|
@@ -816,10 +800,10 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
816
800
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
817
801
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
818
802
|
to make generation deterministic.
|
819
|
-
prompt_embeds (`torch.
|
803
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
820
804
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
821
805
|
provided, text embeddings will be generated from `prompt` input argument.
|
822
|
-
negative_prompt_embeds (`torch.
|
806
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
823
807
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
824
808
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
825
809
|
argument.
|
@@ -830,7 +814,7 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
830
814
|
Whether or not to return a [`~pipelines.stable_diffusion.IFPipelineOutput`] instead of a plain tuple.
|
831
815
|
callback (`Callable`, *optional*):
|
832
816
|
A function that will be called every `callback_steps` steps during inference. The function will be
|
833
|
-
called with the following arguments: `callback(step: int, timestep: int, latents: torch.
|
817
|
+
called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
834
818
|
callback_steps (`int`, *optional*, defaults to 1):
|
835
819
|
The frequency at which the `callback` function will be called. If not specified, the callback will be
|
836
820
|
called at every step.
|
@@ -1010,8 +994,6 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
1010
994
|
nsfw_detected = None
|
1011
995
|
watermark_detected = None
|
1012
996
|
|
1013
|
-
if hasattr(self, "unet_offload_hook") and self.unet_offload_hook is not None:
|
1014
|
-
self.unet_offload_hook.offload()
|
1015
997
|
else:
|
1016
998
|
# 10. Post-processing
|
1017
999
|
image = (image / 2 + 0.5).clamp(0, 1)
|
@@ -15,7 +15,6 @@ from ...schedulers import DDPMScheduler
|
|
15
15
|
from ...utils import (
|
16
16
|
BACKENDS_MAPPING,
|
17
17
|
PIL_INTERPOLATION,
|
18
|
-
is_accelerate_available,
|
19
18
|
is_bs4_available,
|
20
19
|
is_ftfy_available,
|
21
20
|
logging,
|
@@ -142,6 +141,7 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
142
141
|
|
143
142
|
_optional_components = ["tokenizer", "text_encoder", "safety_checker", "feature_extractor", "watermarker"]
|
144
143
|
model_cpu_offload_seq = "text_encoder->unet"
|
144
|
+
_exclude_from_cpu_offload = ["watermarker"]
|
145
145
|
|
146
146
|
def __init__(
|
147
147
|
self,
|
@@ -183,21 +183,6 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
183
183
|
)
|
184
184
|
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
185
185
|
|
186
|
-
# Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline.remove_all_hooks
|
187
|
-
def remove_all_hooks(self):
|
188
|
-
if is_accelerate_available():
|
189
|
-
from accelerate.hooks import remove_hook_from_module
|
190
|
-
else:
|
191
|
-
raise ImportError("Please install accelerate via `pip install accelerate`")
|
192
|
-
|
193
|
-
for model in [self.text_encoder, self.unet, self.safety_checker]:
|
194
|
-
if model is not None:
|
195
|
-
remove_hook_from_module(model, recurse=True)
|
196
|
-
|
197
|
-
self.unet_offload_hook = None
|
198
|
-
self.text_encoder_offload_hook = None
|
199
|
-
self.final_offload_hook = None
|
200
|
-
|
201
186
|
@torch.no_grad()
|
202
187
|
# Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline.encode_prompt
|
203
188
|
def encode_prompt(
|
@@ -207,8 +192,8 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
207
192
|
num_images_per_prompt: int = 1,
|
208
193
|
device: Optional[torch.device] = None,
|
209
194
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
210
|
-
prompt_embeds: Optional[torch.
|
211
|
-
negative_prompt_embeds: Optional[torch.
|
195
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
196
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
212
197
|
clean_caption: bool = False,
|
213
198
|
):
|
214
199
|
r"""
|
@@ -227,10 +212,10 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
227
212
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
228
213
|
`negative_prompt_embeds`. instead. If not defined, one has to pass `negative_prompt_embeds`. instead.
|
229
214
|
Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`).
|
230
|
-
prompt_embeds (`torch.
|
215
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
231
216
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
232
217
|
provided, text embeddings will be generated from `prompt` input argument.
|
233
|
-
negative_prompt_embeds (`torch.
|
218
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
234
219
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
235
220
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
236
221
|
argument.
|
@@ -365,9 +350,6 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
365
350
|
nsfw_detected = None
|
366
351
|
watermark_detected = None
|
367
352
|
|
368
|
-
if hasattr(self, "unet_offload_hook") and self.unet_offload_hook is not None:
|
369
|
-
self.unet_offload_hook.offload()
|
370
|
-
|
371
353
|
return image, nsfw_detected, watermark_detected
|
372
354
|
|
373
355
|
# Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline.prepare_extra_step_kwargs
|
@@ -446,7 +428,7 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
446
428
|
and not isinstance(check_image_type, np.ndarray)
|
447
429
|
):
|
448
430
|
raise ValueError(
|
449
|
-
"`image` has to be of type `torch.
|
431
|
+
"`image` has to be of type `torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, or List[...] but is"
|
450
432
|
f" {type(check_image_type)}"
|
451
433
|
)
|
452
434
|
|
@@ -477,7 +459,7 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
477
459
|
and not isinstance(check_image_type, np.ndarray)
|
478
460
|
):
|
479
461
|
raise ValueError(
|
480
|
-
"`mask_image` has to be of type `torch.
|
462
|
+
"`mask_image` has to be of type `torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, or List[...] but is"
|
481
463
|
f" {type(check_image_type)}"
|
482
464
|
)
|
483
465
|
|
@@ -654,7 +636,7 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
654
636
|
|
655
637
|
for image_ in image:
|
656
638
|
image_ = image_.convert("RGB")
|
657
|
-
image_ = resize(image_, self.unet.sample_size)
|
639
|
+
image_ = resize(image_, self.unet.config.sample_size)
|
658
640
|
image_ = np.array(image_)
|
659
641
|
image_ = image_.astype(np.float32)
|
660
642
|
image_ = image_ / 127.5 - 1
|
@@ -701,7 +683,7 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
701
683
|
|
702
684
|
for mask_image_ in mask_image:
|
703
685
|
mask_image_ = mask_image_.convert("L")
|
704
|
-
mask_image_ = resize(mask_image_, self.unet.sample_size)
|
686
|
+
mask_image_ = resize(mask_image_, self.unet.config.sample_size)
|
705
687
|
mask_image_ = np.array(mask_image_)
|
706
688
|
mask_image_ = mask_image_[None, None, :]
|
707
689
|
new_mask_image.append(mask_image_)
|
@@ -723,13 +705,15 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
723
705
|
|
724
706
|
return mask_image
|
725
707
|
|
726
|
-
# Copied from diffusers.pipelines.
|
708
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline.get_timesteps
|
727
709
|
def get_timesteps(self, num_inference_steps, strength):
|
728
710
|
# get the original timestep using init_timestep
|
729
711
|
init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
|
730
712
|
|
731
713
|
t_start = max(num_inference_steps - init_timestep, 0)
|
732
|
-
timesteps = self.scheduler.timesteps[t_start:]
|
714
|
+
timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
|
715
|
+
if hasattr(self.scheduler, "set_begin_index"):
|
716
|
+
self.scheduler.set_begin_index(t_start * self.scheduler.order)
|
733
717
|
|
734
718
|
return timesteps, num_inference_steps - t_start
|
735
719
|
|
@@ -776,11 +760,11 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
776
760
|
num_images_per_prompt: Optional[int] = 1,
|
777
761
|
eta: float = 0.0,
|
778
762
|
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
779
|
-
prompt_embeds: Optional[torch.
|
780
|
-
negative_prompt_embeds: Optional[torch.
|
763
|
+
prompt_embeds: Optional[torch.Tensor] = None,
|
764
|
+
negative_prompt_embeds: Optional[torch.Tensor] = None,
|
781
765
|
output_type: Optional[str] = "pil",
|
782
766
|
return_dict: bool = True,
|
783
|
-
callback: Optional[Callable[[int, int, torch.
|
767
|
+
callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
|
784
768
|
callback_steps: int = 1,
|
785
769
|
clean_caption: bool = True,
|
786
770
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
@@ -792,7 +776,7 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
792
776
|
prompt (`str` or `List[str]`, *optional*):
|
793
777
|
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
|
794
778
|
instead.
|
795
|
-
image (`torch.
|
779
|
+
image (`torch.Tensor` or `PIL.Image.Image`):
|
796
780
|
`Image`, or tensor representing an image batch, that will be used as the starting point for the
|
797
781
|
process.
|
798
782
|
mask_image (`PIL.Image.Image`):
|
@@ -830,10 +814,10 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
830
814
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
831
815
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
832
816
|
to make generation deterministic.
|
833
|
-
prompt_embeds (`torch.
|
817
|
+
prompt_embeds (`torch.Tensor`, *optional*):
|
834
818
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
835
819
|
provided, text embeddings will be generated from `prompt` input argument.
|
836
|
-
negative_prompt_embeds (`torch.
|
820
|
+
negative_prompt_embeds (`torch.Tensor`, *optional*):
|
837
821
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
838
822
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
839
823
|
argument.
|
@@ -844,7 +828,7 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
844
828
|
Whether or not to return a [`~pipelines.stable_diffusion.IFPipelineOutput`] instead of a plain tuple.
|
845
829
|
callback (`Callable`, *optional*):
|
846
830
|
A function that will be called every `callback_steps` steps during inference. The function will be
|
847
|
-
called with the following arguments: `callback(step: int, timestep: int, latents: torch.
|
831
|
+
called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
|
848
832
|
callback_steps (`int`, *optional*, defaults to 1):
|
849
833
|
The frequency at which the `callback` function will be called. If not specified, the callback will be
|
850
834
|
called at every step.
|