diffusers 0.19.3__py3-none-any.whl → 0.20.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- diffusers/__init__.py +3 -1
- diffusers/commands/fp16_safetensors.py +2 -7
- diffusers/configuration_utils.py +23 -1
- diffusers/dependency_versions_table.py +1 -1
- diffusers/loaders.py +62 -64
- diffusers/models/__init__.py +1 -0
- diffusers/models/activations.py +2 -0
- diffusers/models/attention.py +45 -1
- diffusers/models/autoencoder_tiny.py +193 -0
- diffusers/models/controlnet.py +1 -1
- diffusers/models/embeddings.py +56 -0
- diffusers/models/lora.py +0 -6
- diffusers/models/modeling_flax_utils.py +28 -2
- diffusers/models/modeling_utils.py +33 -16
- diffusers/models/transformer_2d.py +26 -9
- diffusers/models/unet_1d.py +2 -2
- diffusers/models/unet_2d_blocks.py +106 -56
- diffusers/models/unet_2d_condition.py +20 -5
- diffusers/models/vae.py +106 -1
- diffusers/pipelines/__init__.py +1 -0
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +10 -3
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +10 -3
- diffusers/pipelines/audioldm/pipeline_audioldm.py +1 -1
- diffusers/pipelines/auto_pipeline.py +33 -43
- diffusers/pipelines/controlnet/multicontrolnet.py +4 -2
- diffusers/pipelines/controlnet/pipeline_controlnet.py +20 -4
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +15 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +14 -4
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +157 -10
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +1 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +43 -2
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +44 -2
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +1 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +1 -1
- diffusers/pipelines/pipeline_flax_utils.py +41 -4
- diffusers/pipelines/pipeline_utils.py +60 -16
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/__init__.py +1 -0
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +81 -37
- diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py +12 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py +832 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +9 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +17 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +10 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +3 -5
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +75 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +76 -6
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +1 -2
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +10 -3
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +10 -3
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +11 -4
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +1 -1
- diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +131 -28
- diffusers/schedulers/scheduling_consistency_models.py +70 -57
- diffusers/schedulers/scheduling_ddim.py +76 -71
- diffusers/schedulers/scheduling_ddim_inverse.py +76 -44
- diffusers/schedulers/scheduling_ddim_parallel.py +11 -8
- diffusers/schedulers/scheduling_ddpm.py +68 -67
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -15
- diffusers/schedulers/scheduling_deis_multistep.py +93 -85
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +118 -120
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +116 -109
- diffusers/schedulers/scheduling_dpmsolver_sde.py +57 -43
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +122 -121
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +54 -44
- diffusers/schedulers/scheduling_euler_discrete.py +63 -56
- diffusers/schedulers/scheduling_heun_discrete.py +57 -45
- diffusers/schedulers/scheduling_ipndm.py +27 -22
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +54 -41
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +52 -41
- diffusers/schedulers/scheduling_karras_ve.py +55 -45
- diffusers/schedulers/scheduling_lms_discrete.py +58 -52
- diffusers/schedulers/scheduling_pndm.py +77 -62
- diffusers/schedulers/scheduling_repaint.py +56 -38
- diffusers/schedulers/scheduling_sde_ve.py +62 -50
- diffusers/schedulers/scheduling_sde_vp.py +32 -11
- diffusers/schedulers/scheduling_unclip.py +3 -3
- diffusers/schedulers/scheduling_unipc_multistep.py +131 -91
- diffusers/schedulers/scheduling_utils.py +41 -35
- diffusers/schedulers/scheduling_utils_flax.py +8 -2
- diffusers/schedulers/scheduling_vq_diffusion.py +39 -68
- diffusers/utils/__init__.py +2 -2
- diffusers/utils/dummy_pt_objects.py +15 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +15 -0
- diffusers/utils/hub_utils.py +105 -2
- diffusers/utils/import_utils.py +0 -4
- diffusers/utils/pil_utils.py +19 -0
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/METADATA +5 -7
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/RECORD +113 -112
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/WHEEL +1 -1
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/entry_points.txt +0 -1
- diffusers/models/cross_attention.py +0 -94
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/LICENSE +0 -0
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/top_level.txt +0 -0
@@ -58,8 +58,45 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
|
58
58
|
EXAMPLE_DOC_STRING = """
|
59
59
|
Examples:
|
60
60
|
```py
|
61
|
-
>>> #
|
62
|
-
>>>
|
61
|
+
>>> # !pip install opencv-python transformers accelerate
|
62
|
+
>>> from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel, AutoencoderKL
|
63
|
+
>>> from diffusers.utils import load_image
|
64
|
+
>>> import numpy as np
|
65
|
+
>>> import torch
|
66
|
+
|
67
|
+
>>> import cv2
|
68
|
+
>>> from PIL import Image
|
69
|
+
|
70
|
+
>>> prompt = "aerial view, a futuristic research complex in a bright foggy jungle, hard lighting"
|
71
|
+
>>> negative_prompt = "low quality, bad quality, sketches"
|
72
|
+
|
73
|
+
>>> # download an image
|
74
|
+
>>> image = load_image(
|
75
|
+
... "https://hf.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/hf-logo.png"
|
76
|
+
... )
|
77
|
+
|
78
|
+
>>> # initialize the models and pipeline
|
79
|
+
>>> controlnet_conditioning_scale = 0.5 # recommended for good generalization
|
80
|
+
>>> controlnet = ControlNetModel.from_pretrained(
|
81
|
+
... "diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16
|
82
|
+
... )
|
83
|
+
>>> vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
|
84
|
+
>>> pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
|
85
|
+
... "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, vae=vae, torch_dtype=torch.float16
|
86
|
+
... )
|
87
|
+
>>> pipe.enable_model_cpu_offload()
|
88
|
+
|
89
|
+
>>> # get canny image
|
90
|
+
>>> image = np.array(image)
|
91
|
+
>>> image = cv2.Canny(image, 100, 200)
|
92
|
+
>>> image = image[:, :, None]
|
93
|
+
>>> image = np.concatenate([image, image, image], axis=2)
|
94
|
+
>>> canny_image = Image.fromarray(image)
|
95
|
+
|
96
|
+
>>> # generate image
|
97
|
+
>>> image = pipe(
|
98
|
+
... prompt, controlnet_conditioning_scale=controlnet_conditioning_scale, image=canny_image
|
99
|
+
... ).images[0]
|
63
100
|
```
|
64
101
|
"""
|
65
102
|
|
@@ -112,7 +149,7 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
|
|
112
149
|
tokenizer: CLIPTokenizer,
|
113
150
|
tokenizer_2: CLIPTokenizer,
|
114
151
|
unet: UNet2DConditionModel,
|
115
|
-
controlnet: ControlNetModel,
|
152
|
+
controlnet: Union[ControlNetModel, List[ControlNetModel], Tuple[ControlNetModel], MultiControlNetModel],
|
116
153
|
scheduler: KarrasDiffusionSchedulers,
|
117
154
|
force_zeros_for_empty_prompt: bool = True,
|
118
155
|
add_watermarker: Optional[bool] = None,
|
@@ -120,7 +157,7 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
|
|
120
157
|
super().__init__()
|
121
158
|
|
122
159
|
if isinstance(controlnet, (list, tuple)):
|
123
|
-
|
160
|
+
controlnet = MultiControlNetModel(controlnet)
|
124
161
|
|
125
162
|
self.register_modules(
|
126
163
|
vae=vae,
|
@@ -305,7 +342,6 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
|
|
305
342
|
|
306
343
|
text_input_ids = text_inputs.input_ids
|
307
344
|
untruncated_ids = tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
|
308
|
-
untruncated_ids = tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
|
309
345
|
|
310
346
|
if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(
|
311
347
|
text_input_ids, untruncated_ids
|
@@ -432,6 +468,8 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
|
|
432
468
|
negative_prompt_2=None,
|
433
469
|
prompt_embeds=None,
|
434
470
|
negative_prompt_embeds=None,
|
471
|
+
pooled_prompt_embeds=None,
|
472
|
+
negative_pooled_prompt_embeds=None,
|
435
473
|
controlnet_conditioning_scale=1.0,
|
436
474
|
control_guidance_start=0.0,
|
437
475
|
control_guidance_end=1.0,
|
@@ -482,6 +520,25 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
|
|
482
520
|
f" {negative_prompt_embeds.shape}."
|
483
521
|
)
|
484
522
|
|
523
|
+
if prompt_embeds is not None and pooled_prompt_embeds is None:
|
524
|
+
raise ValueError(
|
525
|
+
"If `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`."
|
526
|
+
)
|
527
|
+
|
528
|
+
if negative_prompt_embeds is not None and negative_pooled_prompt_embeds is None:
|
529
|
+
raise ValueError(
|
530
|
+
"If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`."
|
531
|
+
)
|
532
|
+
|
533
|
+
# `prompt` needs more sophisticated handling when there are multiple
|
534
|
+
# conditionings.
|
535
|
+
if isinstance(self.controlnet, MultiControlNetModel):
|
536
|
+
if isinstance(prompt, list):
|
537
|
+
logger.warning(
|
538
|
+
f"You have {len(self.controlnet.nets)} ControlNets and you have passed {len(prompt)}"
|
539
|
+
" prompts. The conditionings will be fixed across the prompts."
|
540
|
+
)
|
541
|
+
|
485
542
|
# Check `image`
|
486
543
|
is_compiled = hasattr(F, "scaled_dot_product_attention") and isinstance(
|
487
544
|
self.controlnet, torch._dynamo.eval_frame.OptimizedModule
|
@@ -492,6 +549,25 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
|
|
492
549
|
and isinstance(self.controlnet._orig_mod, ControlNetModel)
|
493
550
|
):
|
494
551
|
self.check_image(image, prompt, prompt_embeds)
|
552
|
+
elif (
|
553
|
+
isinstance(self.controlnet, MultiControlNetModel)
|
554
|
+
or is_compiled
|
555
|
+
and isinstance(self.controlnet._orig_mod, MultiControlNetModel)
|
556
|
+
):
|
557
|
+
if not isinstance(image, list):
|
558
|
+
raise TypeError("For multiple controlnets: `image` must be type `list`")
|
559
|
+
|
560
|
+
# When `image` is a nested list:
|
561
|
+
# (e.g. [[canny_image_1, pose_image_1], [canny_image_2, pose_image_2]])
|
562
|
+
elif any(isinstance(i, list) for i in image):
|
563
|
+
raise ValueError("A single batch of multiple conditionings are supported at the moment.")
|
564
|
+
elif len(image) != len(self.controlnet.nets):
|
565
|
+
raise ValueError(
|
566
|
+
f"For multiple controlnets: `image` must have the same length as the number of controlnets, but got {len(image)} images and {len(self.controlnet.nets)} ControlNets."
|
567
|
+
)
|
568
|
+
|
569
|
+
for image_ in image:
|
570
|
+
self.check_image(image_, prompt, prompt_embeds)
|
495
571
|
else:
|
496
572
|
assert False
|
497
573
|
|
@@ -503,14 +579,41 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
|
|
503
579
|
):
|
504
580
|
if not isinstance(controlnet_conditioning_scale, float):
|
505
581
|
raise TypeError("For single controlnet: `controlnet_conditioning_scale` must be type `float`.")
|
582
|
+
elif (
|
583
|
+
isinstance(self.controlnet, MultiControlNetModel)
|
584
|
+
or is_compiled
|
585
|
+
and isinstance(self.controlnet._orig_mod, MultiControlNetModel)
|
586
|
+
):
|
587
|
+
if isinstance(controlnet_conditioning_scale, list):
|
588
|
+
if any(isinstance(i, list) for i in controlnet_conditioning_scale):
|
589
|
+
raise ValueError("A single batch of multiple conditionings are supported at the moment.")
|
590
|
+
elif isinstance(controlnet_conditioning_scale, list) and len(controlnet_conditioning_scale) != len(
|
591
|
+
self.controlnet.nets
|
592
|
+
):
|
593
|
+
raise ValueError(
|
594
|
+
"For multiple controlnets: When `controlnet_conditioning_scale` is specified as `list`, it must have"
|
595
|
+
" the same length as the number of controlnets"
|
596
|
+
)
|
506
597
|
else:
|
507
598
|
assert False
|
508
599
|
|
600
|
+
if not isinstance(control_guidance_start, (tuple, list)):
|
601
|
+
control_guidance_start = [control_guidance_start]
|
602
|
+
|
603
|
+
if not isinstance(control_guidance_end, (tuple, list)):
|
604
|
+
control_guidance_end = [control_guidance_end]
|
605
|
+
|
509
606
|
if len(control_guidance_start) != len(control_guidance_end):
|
510
607
|
raise ValueError(
|
511
608
|
f"`control_guidance_start` has {len(control_guidance_start)} elements, but `control_guidance_end` has {len(control_guidance_end)} elements. Make sure to provide the same number of elements to each list."
|
512
609
|
)
|
513
610
|
|
611
|
+
if isinstance(self.controlnet, MultiControlNetModel):
|
612
|
+
if len(control_guidance_start) != len(self.controlnet.nets):
|
613
|
+
raise ValueError(
|
614
|
+
f"`control_guidance_start`: {control_guidance_start} has {len(control_guidance_start)} elements but there are {len(self.controlnet.nets)} controlnets available. Make sure to provide {len(self.controlnet.nets)}."
|
615
|
+
)
|
616
|
+
|
514
617
|
for start, end in zip(control_guidance_start, control_guidance_end):
|
515
618
|
if start >= end:
|
516
619
|
raise ValueError(
|
@@ -521,6 +624,7 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
|
|
521
624
|
if end > 1.0:
|
522
625
|
raise ValueError(f"control guidance end: {end} can't be larger than 1.0.")
|
523
626
|
|
627
|
+
# Copied from diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline.check_image
|
524
628
|
def check_image(self, image, prompt, prompt_embeds):
|
525
629
|
image_is_pil = isinstance(image, PIL.Image.Image)
|
526
630
|
image_is_tensor = isinstance(image, torch.Tensor)
|
@@ -558,6 +662,7 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
|
|
558
662
|
f"If image batch size is not 1, image batch size must be same as prompt batch size. image batch size: {image_batch_size}, prompt batch size: {prompt_batch_size}"
|
559
663
|
)
|
560
664
|
|
665
|
+
# Copied from diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline.prepare_image
|
561
666
|
def prepare_image(
|
562
667
|
self,
|
563
668
|
image,
|
@@ -669,6 +774,8 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
|
|
669
774
|
latents: Optional[torch.FloatTensor] = None,
|
670
775
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|
671
776
|
negative_prompt_embeds: Optional[torch.FloatTensor] = None,
|
777
|
+
pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
778
|
+
negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
672
779
|
output_type: Optional[str] = "pil",
|
673
780
|
return_dict: bool = True,
|
674
781
|
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
|
@@ -739,6 +846,13 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
|
|
739
846
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
740
847
|
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
741
848
|
argument.
|
849
|
+
pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
850
|
+
Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
|
851
|
+
If not provided, pooled text embeddings will be generated from `prompt` input argument.
|
852
|
+
negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
853
|
+
Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
854
|
+
weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
|
855
|
+
input argument.
|
742
856
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
743
857
|
The output format of the generate image. Choose between
|
744
858
|
[PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
|
@@ -754,7 +868,7 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
|
|
754
868
|
cross_attention_kwargs (`dict`, *optional*):
|
755
869
|
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
756
870
|
`self.processor` in
|
757
|
-
[diffusers.
|
871
|
+
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
758
872
|
controlnet_conditioning_scale (`float` or `List[float]`, *optional*, defaults to 1.0):
|
759
873
|
The outputs of the controlnet are multiplied by `controlnet_conditioning_scale` before they are added
|
760
874
|
to the residual in the original unet. If multiple ControlNets are specified in init, you can set the
|
@@ -810,6 +924,8 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
|
|
810
924
|
negative_prompt_2,
|
811
925
|
prompt_embeds,
|
812
926
|
negative_prompt_embeds,
|
927
|
+
pooled_prompt_embeds,
|
928
|
+
negative_pooled_prompt_embeds,
|
813
929
|
controlnet_conditioning_scale,
|
814
930
|
control_guidance_start,
|
815
931
|
control_guidance_end,
|
@@ -829,6 +945,9 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
|
|
829
945
|
# corresponds to doing no classifier free guidance.
|
830
946
|
do_classifier_free_guidance = guidance_scale > 1.0
|
831
947
|
|
948
|
+
if isinstance(controlnet, MultiControlNetModel) and isinstance(controlnet_conditioning_scale, float):
|
949
|
+
controlnet_conditioning_scale = [controlnet_conditioning_scale] * len(controlnet.nets)
|
950
|
+
|
832
951
|
global_pool_conditions = (
|
833
952
|
controlnet.config.global_pool_conditions
|
834
953
|
if isinstance(controlnet, ControlNetModel)
|
@@ -855,6 +974,8 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
|
|
855
974
|
negative_prompt_2,
|
856
975
|
prompt_embeds=prompt_embeds,
|
857
976
|
negative_prompt_embeds=negative_prompt_embeds,
|
977
|
+
pooled_prompt_embeds=pooled_prompt_embeds,
|
978
|
+
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
|
858
979
|
lora_scale=text_encoder_lora_scale,
|
859
980
|
)
|
860
981
|
|
@@ -872,6 +993,26 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
|
|
872
993
|
guess_mode=guess_mode,
|
873
994
|
)
|
874
995
|
height, width = image.shape[-2:]
|
996
|
+
elif isinstance(controlnet, MultiControlNetModel):
|
997
|
+
images = []
|
998
|
+
|
999
|
+
for image_ in image:
|
1000
|
+
image_ = self.prepare_image(
|
1001
|
+
image=image_,
|
1002
|
+
width=width,
|
1003
|
+
height=height,
|
1004
|
+
batch_size=batch_size * num_images_per_prompt,
|
1005
|
+
num_images_per_prompt=num_images_per_prompt,
|
1006
|
+
device=device,
|
1007
|
+
dtype=controlnet.dtype,
|
1008
|
+
do_classifier_free_guidance=do_classifier_free_guidance,
|
1009
|
+
guess_mode=guess_mode,
|
1010
|
+
)
|
1011
|
+
|
1012
|
+
images.append(image_)
|
1013
|
+
|
1014
|
+
image = images
|
1015
|
+
height, width = image[0].shape[-2:]
|
875
1016
|
else:
|
876
1017
|
assert False
|
877
1018
|
|
@@ -902,12 +1043,15 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
|
|
902
1043
|
1.0 - float(i / len(timesteps) < s or (i + 1) / len(timesteps) > e)
|
903
1044
|
for s, e in zip(control_guidance_start, control_guidance_end)
|
904
1045
|
]
|
905
|
-
controlnet_keep.append(keeps[0] if
|
1046
|
+
controlnet_keep.append(keeps[0] if isinstance(controlnet, ControlNetModel) else keeps)
|
906
1047
|
|
907
|
-
|
1048
|
+
# 7.2 Prepare added time ids & embeddings
|
1049
|
+
if isinstance(image, list):
|
1050
|
+
original_size = original_size or image[0].shape[-2:]
|
1051
|
+
else:
|
1052
|
+
original_size = original_size or image.shape[-2:]
|
908
1053
|
target_size = target_size or (height, width)
|
909
1054
|
|
910
|
-
# 7.2 Prepare added time ids & embeddings
|
911
1055
|
add_text_embeds = pooled_prompt_embeds
|
912
1056
|
add_time_ids = self._get_add_time_ids(
|
913
1057
|
original_size, crops_coords_top_left, target_size, dtype=prompt_embeds.dtype
|
@@ -943,7 +1087,10 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
|
|
943
1087
|
if isinstance(controlnet_keep[i], list):
|
944
1088
|
cond_scale = [c * s for c, s in zip(controlnet_conditioning_scale, controlnet_keep[i])]
|
945
1089
|
else:
|
946
|
-
|
1090
|
+
controlnet_cond_scale = controlnet_conditioning_scale
|
1091
|
+
if isinstance(controlnet_cond_scale, list):
|
1092
|
+
controlnet_cond_scale = controlnet_cond_scale[0]
|
1093
|
+
cond_scale = controlnet_cond_scale * controlnet_keep[i]
|
947
1094
|
|
948
1095
|
added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
|
949
1096
|
down_block_res_samples, mid_block_res_sample = self.controlnet(
|
@@ -51,19 +51,11 @@ EXAMPLE_DOC_STRING = """
|
|
51
51
|
>>> import jax.numpy as jnp
|
52
52
|
>>> from flax.jax_utils import replicate
|
53
53
|
>>> from flax.training.common_utils import shard
|
54
|
-
>>> from diffusers.utils import load_image
|
54
|
+
>>> from diffusers.utils import load_image, make_image_grid
|
55
55
|
>>> from PIL import Image
|
56
56
|
>>> from diffusers import FlaxStableDiffusionControlNetPipeline, FlaxControlNetModel
|
57
57
|
|
58
58
|
|
59
|
-
>>> def image_grid(imgs, rows, cols):
|
60
|
-
... w, h = imgs[0].size
|
61
|
-
... grid = Image.new("RGB", size=(cols * w, rows * h))
|
62
|
-
... for i, img in enumerate(imgs):
|
63
|
-
... grid.paste(img, box=(i % cols * w, i // cols * h))
|
64
|
-
... return grid
|
65
|
-
|
66
|
-
|
67
59
|
>>> def create_key(seed=0):
|
68
60
|
... return jax.random.PRNGKey(seed)
|
69
61
|
|
@@ -110,7 +102,7 @@ EXAMPLE_DOC_STRING = """
|
|
110
102
|
... ).images
|
111
103
|
|
112
104
|
>>> output_images = pipe.numpy_to_pil(np.asarray(output.reshape((num_samples,) + output.shape[-3:])))
|
113
|
-
>>> output_images =
|
105
|
+
>>> output_images = make_image_grid(output_images, num_samples // 4, 4)
|
114
106
|
>>> output_images.save("generated_image.png")
|
115
107
|
```
|
116
108
|
"""
|
@@ -662,7 +662,7 @@ class IFPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
662
662
|
cross_attention_kwargs (`dict`, *optional*):
|
663
663
|
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
664
664
|
`self.processor` in
|
665
|
-
[diffusers.
|
665
|
+
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
666
666
|
|
667
667
|
Examples:
|
668
668
|
|
@@ -783,7 +783,7 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
783
783
|
cross_attention_kwargs (`dict`, *optional*):
|
784
784
|
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
785
785
|
`self.processor` in
|
786
|
-
[diffusers.
|
786
|
+
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
787
787
|
|
788
788
|
Examples:
|
789
789
|
|
@@ -865,7 +865,7 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
865
865
|
cross_attention_kwargs (`dict`, *optional*):
|
866
866
|
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
867
867
|
`self.processor` in
|
868
|
-
[diffusers.
|
868
|
+
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
869
869
|
noise_level (`int`, *optional*, defaults to 250):
|
870
870
|
The amount of noise to add to the upscaled image. Must be in the range `[0, 1000)`
|
871
871
|
clean_caption (`bool`, *optional*, defaults to `True`):
|
@@ -883,7 +883,7 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
883
883
|
cross_attention_kwargs (`dict`, *optional*):
|
884
884
|
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
885
885
|
`self.processor` in
|
886
|
-
[diffusers.
|
886
|
+
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
887
887
|
|
888
888
|
Examples:
|
889
889
|
|
@@ -961,7 +961,7 @@ class IFInpaintingSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
961
961
|
cross_attention_kwargs (`dict`, *optional*):
|
962
962
|
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
963
963
|
`self.processor` in
|
964
|
-
[diffusers.
|
964
|
+
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
965
965
|
noise_level (`int`, *optional*, defaults to 0):
|
966
966
|
The amount of noise to add to the upscaled image. Must be in the range `[0, 1000)`
|
967
967
|
clean_caption (`bool`, *optional*, defaults to `True`):
|
@@ -730,7 +730,7 @@ class IFSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
|
|
730
730
|
cross_attention_kwargs (`dict`, *optional*):
|
731
731
|
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
732
732
|
`self.processor` in
|
733
|
-
[diffusers.
|
733
|
+
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
734
734
|
noise_level (`int`, *optional*, defaults to 250):
|
735
735
|
The amount of noise to add to the upscaled image. Must be in the range `[0, 1000)`
|
736
736
|
clean_caption (`bool`, *optional*, defaults to `True`):
|
@@ -188,6 +188,9 @@ class KandinskyCombinedPipeline(DiffusionPipeline):
|
|
188
188
|
movq=movq,
|
189
189
|
)
|
190
190
|
|
191
|
+
def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
|
192
|
+
self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
|
193
|
+
|
191
194
|
def enable_model_cpu_offload(self, gpu_id=0):
|
192
195
|
r"""
|
193
196
|
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
|
@@ -198,6 +201,16 @@ class KandinskyCombinedPipeline(DiffusionPipeline):
|
|
198
201
|
self.prior_pipe.enable_model_cpu_offload()
|
199
202
|
self.decoder_pipe.enable_model_cpu_offload()
|
200
203
|
|
204
|
+
def enable_sequential_cpu_offload(self, gpu_id=0):
|
205
|
+
r"""
|
206
|
+
Offloads all models (`unet`, `text_encoder`, `vae`, and `safety checker` state dicts) to CPU using 🤗
|
207
|
+
Accelerate, significantly reducing memory usage. Models are moved to a `torch.device('meta')` and loaded on a
|
208
|
+
GPU only when their specific submodule's `forward` method is called. Offloading happens on a submodule basis.
|
209
|
+
Memory savings are higher than using `enable_model_cpu_offload`, but performance is lower.
|
210
|
+
"""
|
211
|
+
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
212
|
+
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
213
|
+
|
201
214
|
def progress_bar(self, iterable=None, total=None):
|
202
215
|
self.prior_pipe.progress_bar(iterable=iterable, total=total)
|
203
216
|
self.decoder_pipe.progress_bar(iterable=iterable, total=total)
|
@@ -398,6 +411,9 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
|
|
398
411
|
movq=movq,
|
399
412
|
)
|
400
413
|
|
414
|
+
def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
|
415
|
+
self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
|
416
|
+
|
401
417
|
def enable_model_cpu_offload(self, gpu_id=0):
|
402
418
|
r"""
|
403
419
|
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
|
@@ -408,6 +424,17 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
|
|
408
424
|
self.prior_pipe.enable_model_cpu_offload()
|
409
425
|
self.decoder_pipe.enable_model_cpu_offload()
|
410
426
|
|
427
|
+
def enable_sequential_cpu_offload(self, gpu_id=0):
|
428
|
+
r"""
|
429
|
+
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
|
430
|
+
text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
|
431
|
+
`torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
|
432
|
+
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
433
|
+
`enable_model_cpu_offload`, but performance is lower.
|
434
|
+
"""
|
435
|
+
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
436
|
+
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
437
|
+
|
411
438
|
def progress_bar(self, iterable=None, total=None):
|
412
439
|
self.prior_pipe.progress_bar(iterable=iterable, total=total)
|
413
440
|
self.decoder_pipe.progress_bar(iterable=iterable, total=total)
|
@@ -447,7 +474,7 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
|
|
447
474
|
The prompt or prompts to guide the image generation.
|
448
475
|
image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
|
449
476
|
`Image`, or tensor representing an image batch, that will be used as the starting point for the
|
450
|
-
process. Can also
|
477
|
+
process. Can also accept image latents as `image`, if passing latents directly, it will not be encoded
|
451
478
|
again.
|
452
479
|
negative_prompt (`str` or `List[str]`, *optional*):
|
453
480
|
The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
|
@@ -630,6 +657,9 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
|
|
630
657
|
movq=movq,
|
631
658
|
)
|
632
659
|
|
660
|
+
def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
|
661
|
+
self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
|
662
|
+
|
633
663
|
def enable_model_cpu_offload(self, gpu_id=0):
|
634
664
|
r"""
|
635
665
|
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
|
@@ -640,6 +670,17 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
|
|
640
670
|
self.prior_pipe.enable_model_cpu_offload()
|
641
671
|
self.decoder_pipe.enable_model_cpu_offload()
|
642
672
|
|
673
|
+
def enable_sequential_cpu_offload(self, gpu_id=0):
|
674
|
+
r"""
|
675
|
+
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
|
676
|
+
text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
|
677
|
+
`torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
|
678
|
+
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
679
|
+
`enable_model_cpu_offload`, but performance is lower.
|
680
|
+
"""
|
681
|
+
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
682
|
+
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
683
|
+
|
643
684
|
def progress_bar(self, iterable=None, total=None):
|
644
685
|
self.prior_pipe.progress_bar(iterable=iterable, total=total)
|
645
686
|
self.decoder_pipe.progress_bar(iterable=iterable, total=total)
|
@@ -679,7 +720,7 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
|
|
679
720
|
The prompt or prompts to guide the image generation.
|
680
721
|
image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
|
681
722
|
`Image`, or tensor representing an image batch, that will be used as the starting point for the
|
682
|
-
process. Can also
|
723
|
+
process. Can also accept image latents as `image`, if passing latents directly, it will not be encoded
|
683
724
|
again.
|
684
725
|
mask_image (`np.array`):
|
685
726
|
Tensor representing an image batch, to mask `image`. White pixels in the mask will be repainted, while
|
@@ -177,6 +177,9 @@ class KandinskyV22CombinedPipeline(DiffusionPipeline):
|
|
177
177
|
movq=movq,
|
178
178
|
)
|
179
179
|
|
180
|
+
def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
|
181
|
+
self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
|
182
|
+
|
180
183
|
def enable_model_cpu_offload(self, gpu_id=0):
|
181
184
|
r"""
|
182
185
|
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
|
@@ -187,6 +190,17 @@ class KandinskyV22CombinedPipeline(DiffusionPipeline):
|
|
187
190
|
self.prior_pipe.enable_model_cpu_offload()
|
188
191
|
self.decoder_pipe.enable_model_cpu_offload()
|
189
192
|
|
193
|
+
def enable_sequential_cpu_offload(self, gpu_id=0):
|
194
|
+
r"""
|
195
|
+
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
|
196
|
+
text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
|
197
|
+
`torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
|
198
|
+
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
199
|
+
`enable_model_cpu_offload`, but performance is lower.
|
200
|
+
"""
|
201
|
+
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
202
|
+
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
203
|
+
|
190
204
|
def progress_bar(self, iterable=None, total=None):
|
191
205
|
self.prior_pipe.progress_bar(iterable=iterable, total=total)
|
192
206
|
self.decoder_pipe.progress_bar(iterable=iterable, total=total)
|
@@ -378,6 +392,9 @@ class KandinskyV22Img2ImgCombinedPipeline(DiffusionPipeline):
|
|
378
392
|
movq=movq,
|
379
393
|
)
|
380
394
|
|
395
|
+
def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
|
396
|
+
self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
|
397
|
+
|
381
398
|
def enable_model_cpu_offload(self, gpu_id=0):
|
382
399
|
r"""
|
383
400
|
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
|
@@ -388,6 +405,17 @@ class KandinskyV22Img2ImgCombinedPipeline(DiffusionPipeline):
|
|
388
405
|
self.prior_pipe.enable_model_cpu_offload()
|
389
406
|
self.decoder_pipe.enable_model_cpu_offload()
|
390
407
|
|
408
|
+
def enable_sequential_cpu_offload(self, gpu_id=0):
|
409
|
+
r"""
|
410
|
+
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
|
411
|
+
text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
|
412
|
+
`torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
|
413
|
+
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
414
|
+
`enable_model_cpu_offload`, but performance is lower.
|
415
|
+
"""
|
416
|
+
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
417
|
+
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
418
|
+
|
391
419
|
def progress_bar(self, iterable=None, total=None):
|
392
420
|
self.prior_pipe.progress_bar(iterable=iterable, total=total)
|
393
421
|
self.decoder_pipe.progress_bar(iterable=iterable, total=total)
|
@@ -427,7 +455,7 @@ class KandinskyV22Img2ImgCombinedPipeline(DiffusionPipeline):
|
|
427
455
|
The prompt or prompts to guide the image generation.
|
428
456
|
image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
|
429
457
|
`Image`, or tensor representing an image batch, that will be used as the starting point for the
|
430
|
-
process. Can also
|
458
|
+
process. Can also accept image latents as `image`, if passing latents directly, it will not be encoded
|
431
459
|
again.
|
432
460
|
negative_prompt (`str` or `List[str]`, *optional*):
|
433
461
|
The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
|
@@ -601,6 +629,9 @@ class KandinskyV22InpaintCombinedPipeline(DiffusionPipeline):
|
|
601
629
|
movq=movq,
|
602
630
|
)
|
603
631
|
|
632
|
+
def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
|
633
|
+
self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
|
634
|
+
|
604
635
|
def enable_model_cpu_offload(self, gpu_id=0):
|
605
636
|
r"""
|
606
637
|
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
|
@@ -611,6 +642,17 @@ class KandinskyV22InpaintCombinedPipeline(DiffusionPipeline):
|
|
611
642
|
self.prior_pipe.enable_model_cpu_offload()
|
612
643
|
self.decoder_pipe.enable_model_cpu_offload()
|
613
644
|
|
645
|
+
def enable_sequential_cpu_offload(self, gpu_id=0):
|
646
|
+
r"""
|
647
|
+
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
|
648
|
+
text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
|
649
|
+
`torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
|
650
|
+
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
651
|
+
`enable_model_cpu_offload`, but performance is lower.
|
652
|
+
"""
|
653
|
+
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
654
|
+
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
655
|
+
|
614
656
|
def progress_bar(self, iterable=None, total=None):
|
615
657
|
self.prior_pipe.progress_bar(iterable=iterable, total=total)
|
616
658
|
self.decoder_pipe.progress_bar(iterable=iterable, total=total)
|
@@ -650,7 +692,7 @@ class KandinskyV22InpaintCombinedPipeline(DiffusionPipeline):
|
|
650
692
|
The prompt or prompts to guide the image generation.
|
651
693
|
image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
|
652
694
|
`Image`, or tensor representing an image batch, that will be used as the starting point for the
|
653
|
-
process. Can also
|
695
|
+
process. Can also accept image latents as `image`, if passing latents directly, it will not be encoded
|
654
696
|
again.
|
655
697
|
mask_image (`np.array`):
|
656
698
|
Tensor representing an image batch, to mask `image`. White pixels in the mask will be repainted, while
|
@@ -258,7 +258,7 @@ class KandinskyV22ControlnetImg2ImgPipeline(DiffusionPipeline):
|
|
258
258
|
The clip image embeddings for text prompt, that will be used to condition the image generation.
|
259
259
|
image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
|
260
260
|
`Image`, or tensor representing an image batch, that will be used as the starting point for the
|
261
|
-
process. Can also
|
261
|
+
process. Can also accept image latents as `image`, if passing latents directly, it will not be encoded
|
262
262
|
again.
|
263
263
|
strength (`float`, *optional*, defaults to 0.8):
|
264
264
|
Conceptually, indicates how much to transform the reference `image`. Must be between 0 and 1. `image`
|
@@ -230,7 +230,7 @@ class KandinskyV22Img2ImgPipeline(DiffusionPipeline):
|
|
230
230
|
The clip image embeddings for text prompt, that will be used to condition the image generation.
|
231
231
|
image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
|
232
232
|
`Image`, or tensor representing an image batch, that will be used as the starting point for the
|
233
|
-
process. Can also
|
233
|
+
process. Can also accept image latents as `image`, if passing latents directly, it will not be encoded
|
234
234
|
again.
|
235
235
|
strength (`float`, *optional*, defaults to 0.8):
|
236
236
|
Conceptually, indicates how much to transform the reference `image`. Must be between 0 and 1. `image`
|