diffusers 0.19.3__py3-none-any.whl → 0.20.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- diffusers/__init__.py +3 -1
- diffusers/commands/fp16_safetensors.py +2 -7
- diffusers/configuration_utils.py +23 -1
- diffusers/dependency_versions_table.py +1 -1
- diffusers/loaders.py +62 -64
- diffusers/models/__init__.py +1 -0
- diffusers/models/activations.py +2 -0
- diffusers/models/attention.py +45 -1
- diffusers/models/autoencoder_tiny.py +193 -0
- diffusers/models/controlnet.py +1 -1
- diffusers/models/embeddings.py +56 -0
- diffusers/models/lora.py +0 -6
- diffusers/models/modeling_flax_utils.py +28 -2
- diffusers/models/modeling_utils.py +33 -16
- diffusers/models/transformer_2d.py +26 -9
- diffusers/models/unet_1d.py +2 -2
- diffusers/models/unet_2d_blocks.py +106 -56
- diffusers/models/unet_2d_condition.py +20 -5
- diffusers/models/vae.py +106 -1
- diffusers/pipelines/__init__.py +1 -0
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +10 -3
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +10 -3
- diffusers/pipelines/audioldm/pipeline_audioldm.py +1 -1
- diffusers/pipelines/auto_pipeline.py +33 -43
- diffusers/pipelines/controlnet/multicontrolnet.py +4 -2
- diffusers/pipelines/controlnet/pipeline_controlnet.py +20 -4
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +15 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +14 -4
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +157 -10
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +1 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +43 -2
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +44 -2
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +1 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +1 -1
- diffusers/pipelines/pipeline_flax_utils.py +41 -4
- diffusers/pipelines/pipeline_utils.py +60 -16
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/__init__.py +1 -0
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +81 -37
- diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py +12 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py +832 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +9 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +17 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +10 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +10 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +3 -5
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +75 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +76 -6
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +1 -2
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +10 -3
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +10 -3
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +11 -4
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +1 -1
- diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +131 -28
- diffusers/schedulers/scheduling_consistency_models.py +70 -57
- diffusers/schedulers/scheduling_ddim.py +76 -71
- diffusers/schedulers/scheduling_ddim_inverse.py +76 -44
- diffusers/schedulers/scheduling_ddim_parallel.py +11 -8
- diffusers/schedulers/scheduling_ddpm.py +68 -67
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -15
- diffusers/schedulers/scheduling_deis_multistep.py +93 -85
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +118 -120
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +116 -109
- diffusers/schedulers/scheduling_dpmsolver_sde.py +57 -43
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +122 -121
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +54 -44
- diffusers/schedulers/scheduling_euler_discrete.py +63 -56
- diffusers/schedulers/scheduling_heun_discrete.py +57 -45
- diffusers/schedulers/scheduling_ipndm.py +27 -22
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +54 -41
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +52 -41
- diffusers/schedulers/scheduling_karras_ve.py +55 -45
- diffusers/schedulers/scheduling_lms_discrete.py +58 -52
- diffusers/schedulers/scheduling_pndm.py +77 -62
- diffusers/schedulers/scheduling_repaint.py +56 -38
- diffusers/schedulers/scheduling_sde_ve.py +62 -50
- diffusers/schedulers/scheduling_sde_vp.py +32 -11
- diffusers/schedulers/scheduling_unclip.py +3 -3
- diffusers/schedulers/scheduling_unipc_multistep.py +131 -91
- diffusers/schedulers/scheduling_utils.py +41 -35
- diffusers/schedulers/scheduling_utils_flax.py +8 -2
- diffusers/schedulers/scheduling_vq_diffusion.py +39 -68
- diffusers/utils/__init__.py +2 -2
- diffusers/utils/dummy_pt_objects.py +15 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +15 -0
- diffusers/utils/hub_utils.py +105 -2
- diffusers/utils/import_utils.py +0 -4
- diffusers/utils/pil_utils.py +19 -0
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/METADATA +5 -7
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/RECORD +113 -112
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/WHEEL +1 -1
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/entry_points.txt +0 -1
- diffusers/models/cross_attention.py +0 -94
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/LICENSE +0 -0
- {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/top_level.txt +0 -0
@@ -158,16 +158,11 @@ def _get_signature_keys(obj):
|
|
158
158
|
class AutoPipelineForText2Image(ConfigMixin):
|
159
159
|
r"""
|
160
160
|
|
161
|
-
|
161
|
+
[`AutoPipelineForText2Image`] is a generic pipeline class that instantiates a text-to-image pipeline class. The
|
162
|
+
specific underlying pipeline class is automatically selected from either the
|
163
|
+
[`~AutoPipelineForText2Image.from_pretrained`] or [`~AutoPipelineForText2Image.from_pipe`] methods.
|
162
164
|
|
163
|
-
|
164
|
-
pipeline class in diffusers.
|
165
|
-
|
166
|
-
The pipeline type (for example [`StableDiffusionPipeline`]) is automatically selected when created with the
|
167
|
-
AutoPipelineForText2Image.from_pretrained(pretrained_model_name_or_path) or
|
168
|
-
AutoPipelineForText2Image.from_pipe(pipeline) class methods .
|
169
|
-
|
170
|
-
This class cannot be instantiated using __init__() (throws an error).
|
165
|
+
This class cannot be instantiated using `__init__()` (throws an error).
|
171
166
|
|
172
167
|
Class attributes:
|
173
168
|
|
@@ -294,10 +289,10 @@ class AutoPipelineForText2Image(ConfigMixin):
|
|
294
289
|
Examples:
|
295
290
|
|
296
291
|
```py
|
297
|
-
>>> from diffusers import
|
292
|
+
>>> from diffusers import AutoPipelineForText2Image
|
298
293
|
|
299
|
-
>>> pipeline =
|
300
|
-
>>>
|
294
|
+
>>> pipeline = AutoPipelineForText2Image.from_pretrained("runwayml/stable-diffusion-v1-5")
|
295
|
+
>>> image = pipeline(prompt).images[0]
|
301
296
|
```
|
302
297
|
"""
|
303
298
|
config = cls.load_config(pretrained_model_or_path)
|
@@ -328,13 +323,14 @@ class AutoPipelineForText2Image(ConfigMixin):
|
|
328
323
|
an instantiated `DiffusionPipeline` object
|
329
324
|
|
330
325
|
```py
|
331
|
-
>>> from diffusers import
|
326
|
+
>>> from diffusers import AutoPipelineForText2Image, AutoPipelineForImage2Image
|
332
327
|
|
333
328
|
>>> pipe_i2i = AutoPipelineForImage2Image.from_pretrained(
|
334
329
|
... "runwayml/stable-diffusion-v1-5", requires_safety_checker=False
|
335
330
|
... )
|
336
331
|
|
337
|
-
>>> pipe_t2i =
|
332
|
+
>>> pipe_t2i = AutoPipelineForText2Image.from_pipe(pipe_i2i)
|
333
|
+
>>> image = pipe_t2i(prompt).images[0]
|
338
334
|
```
|
339
335
|
"""
|
340
336
|
|
@@ -401,16 +397,11 @@ class AutoPipelineForText2Image(ConfigMixin):
|
|
401
397
|
class AutoPipelineForImage2Image(ConfigMixin):
|
402
398
|
r"""
|
403
399
|
|
404
|
-
|
405
|
-
|
406
|
-
[
|
407
|
-
pipeline classes in diffusers.
|
400
|
+
[`AutoPipelineForImage2Image`] is a generic pipeline class that instantiates an image-to-image pipeline class. The
|
401
|
+
specific underlying pipeline class is automatically selected from either the
|
402
|
+
[`~AutoPipelineForImage2Image.from_pretrained`] or [`~AutoPipelineForImage2Image.from_pipe`] methods.
|
408
403
|
|
409
|
-
|
410
|
-
`AutoPipelineForImage2Image.from_pretrained(pretrained_model_name_or_path)` or
|
411
|
-
`AutoPipelineForImage2Image.from_pipe(pipeline)` class methods.
|
412
|
-
|
413
|
-
This class cannot be instantiated using __init__() (throws an error).
|
404
|
+
This class cannot be instantiated using `__init__()` (throws an error).
|
414
405
|
|
415
406
|
Class attributes:
|
416
407
|
|
@@ -438,7 +429,8 @@ class AutoPipelineForImage2Image(ConfigMixin):
|
|
438
429
|
2. Find the image-to-image pipeline linked to the pipeline class using pattern matching on pipeline class
|
439
430
|
name.
|
440
431
|
|
441
|
-
If a `controlnet` argument is passed, it will instantiate a StableDiffusionControlNetImg2ImgPipeline
|
432
|
+
If a `controlnet` argument is passed, it will instantiate a [`StableDiffusionControlNetImg2ImgPipeline`]
|
433
|
+
object.
|
442
434
|
|
443
435
|
The pipeline is set in evaluation mode (`model.eval()`) by default.
|
444
436
|
|
@@ -537,10 +529,10 @@ class AutoPipelineForImage2Image(ConfigMixin):
|
|
537
529
|
Examples:
|
538
530
|
|
539
531
|
```py
|
540
|
-
>>> from diffusers import
|
532
|
+
>>> from diffusers import AutoPipelineForImage2Image
|
541
533
|
|
542
|
-
>>> pipeline =
|
543
|
-
>>>
|
534
|
+
>>> pipeline = AutoPipelineForImage2Image.from_pretrained("runwayml/stable-diffusion-v1-5")
|
535
|
+
>>> image = pipeline(prompt, image).images[0]
|
544
536
|
```
|
545
537
|
"""
|
546
538
|
config = cls.load_config(pretrained_model_or_path)
|
@@ -573,13 +565,14 @@ class AutoPipelineForImage2Image(ConfigMixin):
|
|
573
565
|
Examples:
|
574
566
|
|
575
567
|
```py
|
576
|
-
>>> from diffusers import
|
568
|
+
>>> from diffusers import AutoPipelineForText2Image, AutoPipelineForImage2Image
|
577
569
|
|
578
570
|
>>> pipe_t2i = AutoPipelineForText2Image.from_pretrained(
|
579
571
|
... "runwayml/stable-diffusion-v1-5", requires_safety_checker=False
|
580
572
|
... )
|
581
573
|
|
582
|
-
>>> pipe_i2i =
|
574
|
+
>>> pipe_i2i = AutoPipelineForImage2Image.from_pipe(pipe_t2i)
|
575
|
+
>>> image = pipe_i2i(prompt, image).images[0]
|
583
576
|
```
|
584
577
|
"""
|
585
578
|
|
@@ -646,16 +639,11 @@ class AutoPipelineForImage2Image(ConfigMixin):
|
|
646
639
|
class AutoPipelineForInpainting(ConfigMixin):
|
647
640
|
r"""
|
648
641
|
|
649
|
-
|
650
|
-
|
651
|
-
[
|
652
|
-
pipeline class in diffusers.
|
653
|
-
|
654
|
-
The pipeline type (for example [`IFInpaintingPipeline`]) is automatically selected when created with the
|
655
|
-
AutoPipelineForInpainting.from_pretrained(pretrained_model_name_or_path) or
|
656
|
-
AutoPipelineForInpainting.from_pipe(pipeline) class methods .
|
642
|
+
[`AutoPipelineForInpainting`] is a generic pipeline class that instantiates an inpainting pipeline class. The
|
643
|
+
specific underlying pipeline class is automatically selected from either the
|
644
|
+
[`~AutoPipelineForInpainting.from_pretrained`] or [`~AutoPipelineForInpainting.from_pipe`] methods.
|
657
645
|
|
658
|
-
This class cannot be instantiated using __init__() (throws an error).
|
646
|
+
This class cannot be instantiated using `__init__()` (throws an error).
|
659
647
|
|
660
648
|
Class attributes:
|
661
649
|
|
@@ -682,7 +670,8 @@ class AutoPipelineForInpainting(ConfigMixin):
|
|
682
670
|
config object
|
683
671
|
2. Find the inpainting pipeline linked to the pipeline class using pattern matching on pipeline class name.
|
684
672
|
|
685
|
-
If a `controlnet` argument is passed, it will instantiate a StableDiffusionControlNetInpaintPipeline
|
673
|
+
If a `controlnet` argument is passed, it will instantiate a [`StableDiffusionControlNetInpaintPipeline`]
|
674
|
+
object.
|
686
675
|
|
687
676
|
The pipeline is set in evaluation mode (`model.eval()`) by default.
|
688
677
|
|
@@ -781,10 +770,10 @@ class AutoPipelineForInpainting(ConfigMixin):
|
|
781
770
|
Examples:
|
782
771
|
|
783
772
|
```py
|
784
|
-
>>> from diffusers import
|
773
|
+
>>> from diffusers import AutoPipelineForInpainting
|
785
774
|
|
786
|
-
>>> pipeline =
|
787
|
-
>>>
|
775
|
+
>>> pipeline = AutoPipelineForInpainting.from_pretrained("runwayml/stable-diffusion-v1-5")
|
776
|
+
>>> image = pipeline(prompt, image=init_image, mask_image=mask_image).images[0]
|
788
777
|
```
|
789
778
|
"""
|
790
779
|
config = cls.load_config(pretrained_model_or_path)
|
@@ -817,13 +806,14 @@ class AutoPipelineForInpainting(ConfigMixin):
|
|
817
806
|
Examples:
|
818
807
|
|
819
808
|
```py
|
820
|
-
>>> from diffusers import
|
809
|
+
>>> from diffusers import AutoPipelineForText2Image, AutoPipelineForInpainting
|
821
810
|
|
822
811
|
>>> pipe_t2i = AutoPipelineForText2Image.from_pretrained(
|
823
812
|
... "DeepFloyd/IF-I-XL-v1.0", requires_safety_checker=False
|
824
813
|
... )
|
825
814
|
|
826
815
|
>>> pipe_inpaint = AutoPipelineForInpainting.from_pipe(pipe_t2i)
|
816
|
+
>>> image = pipe_inpaint(prompt, image=init_image, mask_image=mask_image).images[0]
|
827
817
|
```
|
828
818
|
"""
|
829
819
|
original_config = dict(pipeline.config)
|
@@ -39,6 +39,7 @@ class MultiControlNetModel(ModelMixin):
|
|
39
39
|
class_labels: Optional[torch.Tensor] = None,
|
40
40
|
timestep_cond: Optional[torch.Tensor] = None,
|
41
41
|
attention_mask: Optional[torch.Tensor] = None,
|
42
|
+
added_cond_kwargs: Optional[Dict[str, torch.Tensor]] = None,
|
42
43
|
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
43
44
|
guess_mode: bool = False,
|
44
45
|
return_dict: bool = True,
|
@@ -53,6 +54,7 @@ class MultiControlNetModel(ModelMixin):
|
|
53
54
|
class_labels=class_labels,
|
54
55
|
timestep_cond=timestep_cond,
|
55
56
|
attention_mask=attention_mask,
|
57
|
+
added_cond_kwargs=added_cond_kwargs,
|
56
58
|
cross_attention_kwargs=cross_attention_kwargs,
|
57
59
|
guess_mode=guess_mode,
|
58
60
|
return_dict=return_dict,
|
@@ -75,7 +77,7 @@ class MultiControlNetModel(ModelMixin):
|
|
75
77
|
save_directory: Union[str, os.PathLike],
|
76
78
|
is_main_process: bool = True,
|
77
79
|
save_function: Callable = None,
|
78
|
-
safe_serialization: bool =
|
80
|
+
safe_serialization: bool = True,
|
79
81
|
variant: Optional[str] = None,
|
80
82
|
):
|
81
83
|
"""
|
@@ -93,7 +95,7 @@ class MultiControlNetModel(ModelMixin):
|
|
93
95
|
The function to use to save the state dictionary. Useful on distributed training like TPUs when one
|
94
96
|
need to replace `torch.save` by another method. Can be configured with the environment variable
|
95
97
|
`DIFFUSERS_SAVE_MODE`.
|
96
|
-
safe_serialization (`bool`, *optional*, defaults to `
|
98
|
+
safe_serialization (`bool`, *optional*, defaults to `True`):
|
97
99
|
Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).
|
98
100
|
variant (`str`, *optional*):
|
99
101
|
If specified, weights are saved in the format pytorch_model.<variant>.bin.
|
@@ -326,7 +326,14 @@ class StableDiffusionControlNetPipeline(
|
|
326
326
|
)
|
327
327
|
prompt_embeds = prompt_embeds[0]
|
328
328
|
|
329
|
-
|
329
|
+
if self.text_encoder is not None:
|
330
|
+
prompt_embeds_dtype = self.text_encoder.dtype
|
331
|
+
elif self.unet is not None:
|
332
|
+
prompt_embeds_dtype = self.unet.dtype
|
333
|
+
else:
|
334
|
+
prompt_embeds_dtype = prompt_embeds.dtype
|
335
|
+
|
336
|
+
prompt_embeds = prompt_embeds.to(dtype=prompt_embeds_dtype, device=device)
|
330
337
|
|
331
338
|
bs_embed, seq_len, _ = prompt_embeds.shape
|
332
339
|
# duplicate text embeddings for each generation per prompt, using mps friendly method
|
@@ -382,7 +389,7 @@ class StableDiffusionControlNetPipeline(
|
|
382
389
|
# duplicate unconditional embeddings for each generation per prompt, using mps friendly method
|
383
390
|
seq_len = negative_prompt_embeds.shape[1]
|
384
391
|
|
385
|
-
negative_prompt_embeds = negative_prompt_embeds.to(dtype=
|
392
|
+
negative_prompt_embeds = negative_prompt_embeds.to(dtype=prompt_embeds_dtype, device=device)
|
386
393
|
|
387
394
|
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
|
388
395
|
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
|
@@ -554,6 +561,12 @@ class StableDiffusionControlNetPipeline(
|
|
554
561
|
else:
|
555
562
|
assert False
|
556
563
|
|
564
|
+
if not isinstance(control_guidance_start, (tuple, list)):
|
565
|
+
control_guidance_start = [control_guidance_start]
|
566
|
+
|
567
|
+
if not isinstance(control_guidance_end, (tuple, list)):
|
568
|
+
control_guidance_end = [control_guidance_end]
|
569
|
+
|
557
570
|
if len(control_guidance_start) != len(control_guidance_end):
|
558
571
|
raise ValueError(
|
559
572
|
f"`control_guidance_start` has {len(control_guidance_start)} elements, but `control_guidance_end` has {len(control_guidance_end)} elements. Make sure to provide the same number of elements to each list."
|
@@ -760,7 +773,7 @@ class StableDiffusionControlNetPipeline(
|
|
760
773
|
cross_attention_kwargs (`dict`, *optional*):
|
761
774
|
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
762
775
|
`self.processor` in
|
763
|
-
[diffusers.
|
776
|
+
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
764
777
|
controlnet_conditioning_scale (`float` or `List[float]`, *optional*, defaults to 1.0):
|
765
778
|
The outputs of the controlnet are multiplied by `controlnet_conditioning_scale` before they are added
|
766
779
|
to the residual in the original unet. If multiple ControlNets are specified in init, you can set the
|
@@ -934,7 +947,10 @@ class StableDiffusionControlNetPipeline(
|
|
934
947
|
if isinstance(controlnet_keep[i], list):
|
935
948
|
cond_scale = [c * s for c, s in zip(controlnet_conditioning_scale, controlnet_keep[i])]
|
936
949
|
else:
|
937
|
-
|
950
|
+
controlnet_cond_scale = controlnet_conditioning_scale
|
951
|
+
if isinstance(controlnet_cond_scale, list):
|
952
|
+
controlnet_cond_scale = controlnet_cond_scale[0]
|
953
|
+
cond_scale = controlnet_cond_scale * controlnet_keep[i]
|
938
954
|
|
939
955
|
down_block_res_samples, mid_block_res_sample = self.controlnet(
|
940
956
|
control_model_input,
|
@@ -352,7 +352,14 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
|
352
352
|
)
|
353
353
|
prompt_embeds = prompt_embeds[0]
|
354
354
|
|
355
|
-
|
355
|
+
if self.text_encoder is not None:
|
356
|
+
prompt_embeds_dtype = self.text_encoder.dtype
|
357
|
+
elif self.unet is not None:
|
358
|
+
prompt_embeds_dtype = self.unet.dtype
|
359
|
+
else:
|
360
|
+
prompt_embeds_dtype = prompt_embeds.dtype
|
361
|
+
|
362
|
+
prompt_embeds = prompt_embeds.to(dtype=prompt_embeds_dtype, device=device)
|
356
363
|
|
357
364
|
bs_embed, seq_len, _ = prompt_embeds.shape
|
358
365
|
# duplicate text embeddings for each generation per prompt, using mps friendly method
|
@@ -408,7 +415,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
|
408
415
|
# duplicate unconditional embeddings for each generation per prompt, using mps friendly method
|
409
416
|
seq_len = negative_prompt_embeds.shape[1]
|
410
417
|
|
411
|
-
negative_prompt_embeds = negative_prompt_embeds.to(dtype=
|
418
|
+
negative_prompt_embeds = negative_prompt_embeds.to(dtype=prompt_embeds_dtype, device=device)
|
412
419
|
|
413
420
|
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
|
414
421
|
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
|
@@ -790,7 +797,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
|
790
797
|
instead.
|
791
798
|
image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, `List[np.ndarray]`,:
|
792
799
|
`List[List[torch.FloatTensor]]`, `List[List[np.ndarray]]` or `List[List[PIL.Image.Image]]`):
|
793
|
-
The initial image will be used as the starting point for the image generation process. Can also
|
800
|
+
The initial image will be used as the starting point for the image generation process. Can also accept
|
794
801
|
image latents as `image`, if passing latents directly, it will not be encoded again.
|
795
802
|
control_image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, `List[np.ndarray]`,:
|
796
803
|
`List[List[torch.FloatTensor]]`, `List[List[np.ndarray]]` or `List[List[PIL.Image.Image]]`):
|
@@ -851,7 +858,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
|
851
858
|
cross_attention_kwargs (`dict`, *optional*):
|
852
859
|
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
853
860
|
`self.processor` in
|
854
|
-
[diffusers.
|
861
|
+
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
855
862
|
controlnet_conditioning_scale (`float` or `List[float]`, *optional*, defaults to 1.0):
|
856
863
|
The outputs of the controlnet are multiplied by `controlnet_conditioning_scale` before they are added
|
857
864
|
to the residual in the original unet. If multiple ControlNets are specified in init, you can set the
|
@@ -914,8 +921,6 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
|
914
921
|
# corresponds to doing no classifier free guidance.
|
915
922
|
do_classifier_free_guidance = guidance_scale > 1.0
|
916
923
|
|
917
|
-
controlnet = self.controlnet._orig_mod if is_compiled_module(self.controlnet) else self.controlnet
|
918
|
-
|
919
924
|
if isinstance(controlnet, MultiControlNetModel) and isinstance(controlnet_conditioning_scale, float):
|
920
925
|
controlnet_conditioning_scale = [controlnet_conditioning_scale] * len(controlnet.nets)
|
921
926
|
|
@@ -1027,7 +1032,10 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
|
1027
1032
|
if isinstance(controlnet_keep[i], list):
|
1028
1033
|
cond_scale = [c * s for c, s in zip(controlnet_conditioning_scale, controlnet_keep[i])]
|
1029
1034
|
else:
|
1030
|
-
|
1035
|
+
controlnet_cond_scale = controlnet_conditioning_scale
|
1036
|
+
if isinstance(controlnet_cond_scale, list):
|
1037
|
+
controlnet_cond_scale = controlnet_cond_scale[0]
|
1038
|
+
cond_scale = controlnet_cond_scale * controlnet_keep[i]
|
1031
1039
|
|
1032
1040
|
down_block_res_samples, mid_block_res_sample = self.controlnet(
|
1033
1041
|
control_model_input,
|
@@ -469,7 +469,14 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
469
469
|
)
|
470
470
|
prompt_embeds = prompt_embeds[0]
|
471
471
|
|
472
|
-
|
472
|
+
if self.text_encoder is not None:
|
473
|
+
prompt_embeds_dtype = self.text_encoder.dtype
|
474
|
+
elif self.unet is not None:
|
475
|
+
prompt_embeds_dtype = self.unet.dtype
|
476
|
+
else:
|
477
|
+
prompt_embeds_dtype = prompt_embeds.dtype
|
478
|
+
|
479
|
+
prompt_embeds = prompt_embeds.to(dtype=prompt_embeds_dtype, device=device)
|
473
480
|
|
474
481
|
bs_embed, seq_len, _ = prompt_embeds.shape
|
475
482
|
# duplicate text embeddings for each generation per prompt, using mps friendly method
|
@@ -525,7 +532,7 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
525
532
|
# duplicate unconditional embeddings for each generation per prompt, using mps friendly method
|
526
533
|
seq_len = negative_prompt_embeds.shape[1]
|
527
534
|
|
528
|
-
negative_prompt_embeds = negative_prompt_embeds.to(dtype=
|
535
|
+
negative_prompt_embeds = negative_prompt_embeds.to(dtype=prompt_embeds_dtype, device=device)
|
529
536
|
|
530
537
|
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
|
531
538
|
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
|
@@ -1048,7 +1055,7 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
1048
1055
|
cross_attention_kwargs (`dict`, *optional*):
|
1049
1056
|
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
1050
1057
|
`self.processor` in
|
1051
|
-
[diffusers.
|
1058
|
+
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
1052
1059
|
controlnet_conditioning_scale (`float` or `List[float]`, *optional*, defaults to 0.5):
|
1053
1060
|
The outputs of the controlnet are multiplied by `controlnet_conditioning_scale` before they are added
|
1054
1061
|
to the residual in the original unet. If multiple ControlNets are specified in init, you can set the
|
@@ -1262,7 +1269,10 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
1262
1269
|
if isinstance(controlnet_keep[i], list):
|
1263
1270
|
cond_scale = [c * s for c, s in zip(controlnet_conditioning_scale, controlnet_keep[i])]
|
1264
1271
|
else:
|
1265
|
-
|
1272
|
+
controlnet_cond_scale = controlnet_conditioning_scale
|
1273
|
+
if isinstance(controlnet_cond_scale, list):
|
1274
|
+
controlnet_cond_scale = controlnet_cond_scale[0]
|
1275
|
+
cond_scale = controlnet_cond_scale * controlnet_keep[i]
|
1266
1276
|
|
1267
1277
|
down_block_res_samples, mid_block_res_sample = self.controlnet(
|
1268
1278
|
control_model_input,
|