diffusers 0.19.3__py3-none-any.whl → 0.20.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. diffusers/__init__.py +3 -1
  2. diffusers/commands/fp16_safetensors.py +2 -7
  3. diffusers/configuration_utils.py +23 -1
  4. diffusers/dependency_versions_table.py +1 -1
  5. diffusers/loaders.py +62 -64
  6. diffusers/models/__init__.py +1 -0
  7. diffusers/models/activations.py +2 -0
  8. diffusers/models/attention.py +45 -1
  9. diffusers/models/autoencoder_tiny.py +193 -0
  10. diffusers/models/controlnet.py +1 -1
  11. diffusers/models/embeddings.py +56 -0
  12. diffusers/models/lora.py +0 -6
  13. diffusers/models/modeling_flax_utils.py +28 -2
  14. diffusers/models/modeling_utils.py +33 -16
  15. diffusers/models/transformer_2d.py +26 -9
  16. diffusers/models/unet_1d.py +2 -2
  17. diffusers/models/unet_2d_blocks.py +106 -56
  18. diffusers/models/unet_2d_condition.py +20 -5
  19. diffusers/models/vae.py +106 -1
  20. diffusers/pipelines/__init__.py +1 -0
  21. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +10 -3
  22. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +10 -3
  23. diffusers/pipelines/audioldm/pipeline_audioldm.py +1 -1
  24. diffusers/pipelines/auto_pipeline.py +33 -43
  25. diffusers/pipelines/controlnet/multicontrolnet.py +4 -2
  26. diffusers/pipelines/controlnet/pipeline_controlnet.py +20 -4
  27. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +15 -7
  28. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +14 -4
  29. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +157 -10
  30. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -10
  31. diffusers/pipelines/deepfloyd_if/pipeline_if.py +1 -1
  32. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +1 -1
  33. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +1 -1
  34. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +1 -1
  35. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +1 -1
  36. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +1 -1
  37. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +43 -2
  38. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +44 -2
  39. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +1 -1
  40. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +1 -1
  41. diffusers/pipelines/pipeline_flax_utils.py +41 -4
  42. diffusers/pipelines/pipeline_utils.py +60 -16
  43. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +2 -2
  44. diffusers/pipelines/stable_diffusion/__init__.py +1 -0
  45. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +81 -37
  46. diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +10 -3
  47. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -3
  48. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py +10 -3
  49. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +10 -3
  50. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py +12 -5
  51. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py +832 -0
  52. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +10 -3
  53. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +10 -3
  54. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +10 -3
  55. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +9 -2
  56. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +17 -8
  57. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py +10 -3
  58. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +10 -3
  59. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +10 -3
  60. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +10 -3
  61. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py +10 -3
  62. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +10 -3
  63. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +10 -3
  64. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +10 -3
  65. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +3 -5
  66. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +75 -3
  67. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +76 -6
  68. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +1 -2
  69. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +10 -3
  70. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +10 -3
  71. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +11 -4
  72. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +1 -1
  73. diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +131 -28
  74. diffusers/schedulers/scheduling_consistency_models.py +70 -57
  75. diffusers/schedulers/scheduling_ddim.py +76 -71
  76. diffusers/schedulers/scheduling_ddim_inverse.py +76 -44
  77. diffusers/schedulers/scheduling_ddim_parallel.py +11 -8
  78. diffusers/schedulers/scheduling_ddpm.py +68 -67
  79. diffusers/schedulers/scheduling_ddpm_parallel.py +18 -15
  80. diffusers/schedulers/scheduling_deis_multistep.py +93 -85
  81. diffusers/schedulers/scheduling_dpmsolver_multistep.py +118 -120
  82. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +116 -109
  83. diffusers/schedulers/scheduling_dpmsolver_sde.py +57 -43
  84. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +122 -121
  85. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +54 -44
  86. diffusers/schedulers/scheduling_euler_discrete.py +63 -56
  87. diffusers/schedulers/scheduling_heun_discrete.py +57 -45
  88. diffusers/schedulers/scheduling_ipndm.py +27 -22
  89. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +54 -41
  90. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +52 -41
  91. diffusers/schedulers/scheduling_karras_ve.py +55 -45
  92. diffusers/schedulers/scheduling_lms_discrete.py +58 -52
  93. diffusers/schedulers/scheduling_pndm.py +77 -62
  94. diffusers/schedulers/scheduling_repaint.py +56 -38
  95. diffusers/schedulers/scheduling_sde_ve.py +62 -50
  96. diffusers/schedulers/scheduling_sde_vp.py +32 -11
  97. diffusers/schedulers/scheduling_unclip.py +3 -3
  98. diffusers/schedulers/scheduling_unipc_multistep.py +131 -91
  99. diffusers/schedulers/scheduling_utils.py +41 -35
  100. diffusers/schedulers/scheduling_utils_flax.py +8 -2
  101. diffusers/schedulers/scheduling_vq_diffusion.py +39 -68
  102. diffusers/utils/__init__.py +2 -2
  103. diffusers/utils/dummy_pt_objects.py +15 -0
  104. diffusers/utils/dummy_torch_and_transformers_objects.py +15 -0
  105. diffusers/utils/hub_utils.py +105 -2
  106. diffusers/utils/import_utils.py +0 -4
  107. diffusers/utils/pil_utils.py +19 -0
  108. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/METADATA +5 -7
  109. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/RECORD +113 -112
  110. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/WHEEL +1 -1
  111. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/entry_points.txt +0 -1
  112. diffusers/models/cross_attention.py +0 -94
  113. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/LICENSE +0 -0
  114. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/top_level.txt +0 -0
@@ -158,16 +158,11 @@ def _get_signature_keys(obj):
158
158
  class AutoPipelineForText2Image(ConfigMixin):
159
159
  r"""
160
160
 
161
- AutoPipeline for text-to-image generation.
161
+ [`AutoPipelineForText2Image`] is a generic pipeline class that instantiates a text-to-image pipeline class. The
162
+ specific underlying pipeline class is automatically selected from either the
163
+ [`~AutoPipelineForText2Image.from_pretrained`] or [`~AutoPipelineForText2Image.from_pipe`] methods.
162
164
 
163
- [`AutoPipelineForText2Image`] is a generic pipeline class that will be instantiated as one of the text-to-image
164
- pipeline class in diffusers.
165
-
166
- The pipeline type (for example [`StableDiffusionPipeline`]) is automatically selected when created with the
167
- AutoPipelineForText2Image.from_pretrained(pretrained_model_name_or_path) or
168
- AutoPipelineForText2Image.from_pipe(pipeline) class methods .
169
-
170
- This class cannot be instantiated using __init__() (throws an error).
165
+ This class cannot be instantiated using `__init__()` (throws an error).
171
166
 
172
167
  Class attributes:
173
168
 
@@ -294,10 +289,10 @@ class AutoPipelineForText2Image(ConfigMixin):
294
289
  Examples:
295
290
 
296
291
  ```py
297
- >>> from diffusers import AutoPipelineForTextToImage
292
+ >>> from diffusers import AutoPipelineForText2Image
298
293
 
299
- >>> pipeline = AutoPipelineForTextToImage.from_pretrained("runwayml/stable-diffusion-v1-5")
300
- >>> print(pipeline.__class__)
294
+ >>> pipeline = AutoPipelineForText2Image.from_pretrained("runwayml/stable-diffusion-v1-5")
295
+ >>> image = pipeline(prompt).images[0]
301
296
  ```
302
297
  """
303
298
  config = cls.load_config(pretrained_model_or_path)
@@ -328,13 +323,14 @@ class AutoPipelineForText2Image(ConfigMixin):
328
323
  an instantiated `DiffusionPipeline` object
329
324
 
330
325
  ```py
331
- >>> from diffusers import AutoPipelineForTextToImage, AutoPipelineForImageToImage
326
+ >>> from diffusers import AutoPipelineForText2Image, AutoPipelineForImage2Image
332
327
 
333
328
  >>> pipe_i2i = AutoPipelineForImage2Image.from_pretrained(
334
329
  ... "runwayml/stable-diffusion-v1-5", requires_safety_checker=False
335
330
  ... )
336
331
 
337
- >>> pipe_t2i = AutoPipelineForTextToImage.from_pipe(pipe_t2i)
332
+ >>> pipe_t2i = AutoPipelineForText2Image.from_pipe(pipe_i2i)
333
+ >>> image = pipe_t2i(prompt).images[0]
338
334
  ```
339
335
  """
340
336
 
@@ -401,16 +397,11 @@ class AutoPipelineForText2Image(ConfigMixin):
401
397
  class AutoPipelineForImage2Image(ConfigMixin):
402
398
  r"""
403
399
 
404
- AutoPipeline for image-to-image generation.
405
-
406
- [`AutoPipelineForImage2Image`] is a generic pipeline class that will be instantiated as one of the image-to-image
407
- pipeline classes in diffusers.
400
+ [`AutoPipelineForImage2Image`] is a generic pipeline class that instantiates an image-to-image pipeline class. The
401
+ specific underlying pipeline class is automatically selected from either the
402
+ [`~AutoPipelineForImage2Image.from_pretrained`] or [`~AutoPipelineForImage2Image.from_pipe`] methods.
408
403
 
409
- The pipeline type (for example [`StableDiffusionImg2ImgPipeline`]) is automatically selected when created with the
410
- `AutoPipelineForImage2Image.from_pretrained(pretrained_model_name_or_path)` or
411
- `AutoPipelineForImage2Image.from_pipe(pipeline)` class methods.
412
-
413
- This class cannot be instantiated using __init__() (throws an error).
404
+ This class cannot be instantiated using `__init__()` (throws an error).
414
405
 
415
406
  Class attributes:
416
407
 
@@ -438,7 +429,8 @@ class AutoPipelineForImage2Image(ConfigMixin):
438
429
  2. Find the image-to-image pipeline linked to the pipeline class using pattern matching on pipeline class
439
430
  name.
440
431
 
441
- If a `controlnet` argument is passed, it will instantiate a StableDiffusionControlNetImg2ImgPipeline object.
432
+ If a `controlnet` argument is passed, it will instantiate a [`StableDiffusionControlNetImg2ImgPipeline`]
433
+ object.
442
434
 
443
435
  The pipeline is set in evaluation mode (`model.eval()`) by default.
444
436
 
@@ -537,10 +529,10 @@ class AutoPipelineForImage2Image(ConfigMixin):
537
529
  Examples:
538
530
 
539
531
  ```py
540
- >>> from diffusers import AutoPipelineForTextToImage
532
+ >>> from diffusers import AutoPipelineForImage2Image
541
533
 
542
- >>> pipeline = AutoPipelineForImageToImage.from_pretrained("runwayml/stable-diffusion-v1-5")
543
- >>> print(pipeline.__class__)
534
+ >>> pipeline = AutoPipelineForImage2Image.from_pretrained("runwayml/stable-diffusion-v1-5")
535
+ >>> image = pipeline(prompt, image).images[0]
544
536
  ```
545
537
  """
546
538
  config = cls.load_config(pretrained_model_or_path)
@@ -573,13 +565,14 @@ class AutoPipelineForImage2Image(ConfigMixin):
573
565
  Examples:
574
566
 
575
567
  ```py
576
- >>> from diffusers import AutoPipelineForTextToImage, AutoPipelineForImageToImage
568
+ >>> from diffusers import AutoPipelineForText2Image, AutoPipelineForImage2Image
577
569
 
578
570
  >>> pipe_t2i = AutoPipelineForText2Image.from_pretrained(
579
571
  ... "runwayml/stable-diffusion-v1-5", requires_safety_checker=False
580
572
  ... )
581
573
 
582
- >>> pipe_i2i = AutoPipelineForImageToImage.from_pipe(pipe_t2i)
574
+ >>> pipe_i2i = AutoPipelineForImage2Image.from_pipe(pipe_t2i)
575
+ >>> image = pipe_i2i(prompt, image).images[0]
583
576
  ```
584
577
  """
585
578
 
@@ -646,16 +639,11 @@ class AutoPipelineForImage2Image(ConfigMixin):
646
639
  class AutoPipelineForInpainting(ConfigMixin):
647
640
  r"""
648
641
 
649
- AutoPipeline for inpainting generation.
650
-
651
- [`AutoPipelineForInpainting`] is a generic pipeline class that will be instantiated as one of the inpainting
652
- pipeline class in diffusers.
653
-
654
- The pipeline type (for example [`IFInpaintingPipeline`]) is automatically selected when created with the
655
- AutoPipelineForInpainting.from_pretrained(pretrained_model_name_or_path) or
656
- AutoPipelineForInpainting.from_pipe(pipeline) class methods .
642
+ [`AutoPipelineForInpainting`] is a generic pipeline class that instantiates an inpainting pipeline class. The
643
+ specific underlying pipeline class is automatically selected from either the
644
+ [`~AutoPipelineForInpainting.from_pretrained`] or [`~AutoPipelineForInpainting.from_pipe`] methods.
657
645
 
658
- This class cannot be instantiated using __init__() (throws an error).
646
+ This class cannot be instantiated using `__init__()` (throws an error).
659
647
 
660
648
  Class attributes:
661
649
 
@@ -682,7 +670,8 @@ class AutoPipelineForInpainting(ConfigMixin):
682
670
  config object
683
671
  2. Find the inpainting pipeline linked to the pipeline class using pattern matching on pipeline class name.
684
672
 
685
- If a `controlnet` argument is passed, it will instantiate a StableDiffusionControlNetInpaintPipeline object.
673
+ If a `controlnet` argument is passed, it will instantiate a [`StableDiffusionControlNetInpaintPipeline`]
674
+ object.
686
675
 
687
676
  The pipeline is set in evaluation mode (`model.eval()`) by default.
688
677
 
@@ -781,10 +770,10 @@ class AutoPipelineForInpainting(ConfigMixin):
781
770
  Examples:
782
771
 
783
772
  ```py
784
- >>> from diffusers import AutoPipelineForTextToImage
773
+ >>> from diffusers import AutoPipelineForInpainting
785
774
 
786
- >>> pipeline = AutoPipelineForImageToImage.from_pretrained("runwayml/stable-diffusion-v1-5")
787
- >>> print(pipeline.__class__)
775
+ >>> pipeline = AutoPipelineForInpainting.from_pretrained("runwayml/stable-diffusion-v1-5")
776
+ >>> image = pipeline(prompt, image=init_image, mask_image=mask_image).images[0]
788
777
  ```
789
778
  """
790
779
  config = cls.load_config(pretrained_model_or_path)
@@ -817,13 +806,14 @@ class AutoPipelineForInpainting(ConfigMixin):
817
806
  Examples:
818
807
 
819
808
  ```py
820
- >>> from diffusers import AutoPipelineForTextToImage, AutoPipelineForInpainting
809
+ >>> from diffusers import AutoPipelineForText2Image, AutoPipelineForInpainting
821
810
 
822
811
  >>> pipe_t2i = AutoPipelineForText2Image.from_pretrained(
823
812
  ... "DeepFloyd/IF-I-XL-v1.0", requires_safety_checker=False
824
813
  ... )
825
814
 
826
815
  >>> pipe_inpaint = AutoPipelineForInpainting.from_pipe(pipe_t2i)
816
+ >>> image = pipe_inpaint(prompt, image=init_image, mask_image=mask_image).images[0]
827
817
  ```
828
818
  """
829
819
  original_config = dict(pipeline.config)
@@ -39,6 +39,7 @@ class MultiControlNetModel(ModelMixin):
39
39
  class_labels: Optional[torch.Tensor] = None,
40
40
  timestep_cond: Optional[torch.Tensor] = None,
41
41
  attention_mask: Optional[torch.Tensor] = None,
42
+ added_cond_kwargs: Optional[Dict[str, torch.Tensor]] = None,
42
43
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
43
44
  guess_mode: bool = False,
44
45
  return_dict: bool = True,
@@ -53,6 +54,7 @@ class MultiControlNetModel(ModelMixin):
53
54
  class_labels=class_labels,
54
55
  timestep_cond=timestep_cond,
55
56
  attention_mask=attention_mask,
57
+ added_cond_kwargs=added_cond_kwargs,
56
58
  cross_attention_kwargs=cross_attention_kwargs,
57
59
  guess_mode=guess_mode,
58
60
  return_dict=return_dict,
@@ -75,7 +77,7 @@ class MultiControlNetModel(ModelMixin):
75
77
  save_directory: Union[str, os.PathLike],
76
78
  is_main_process: bool = True,
77
79
  save_function: Callable = None,
78
- safe_serialization: bool = False,
80
+ safe_serialization: bool = True,
79
81
  variant: Optional[str] = None,
80
82
  ):
81
83
  """
@@ -93,7 +95,7 @@ class MultiControlNetModel(ModelMixin):
93
95
  The function to use to save the state dictionary. Useful on distributed training like TPUs when one
94
96
  need to replace `torch.save` by another method. Can be configured with the environment variable
95
97
  `DIFFUSERS_SAVE_MODE`.
96
- safe_serialization (`bool`, *optional*, defaults to `False`):
98
+ safe_serialization (`bool`, *optional*, defaults to `True`):
97
99
  Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).
98
100
  variant (`str`, *optional*):
99
101
  If specified, weights are saved in the format pytorch_model.<variant>.bin.
@@ -326,7 +326,14 @@ class StableDiffusionControlNetPipeline(
326
326
  )
327
327
  prompt_embeds = prompt_embeds[0]
328
328
 
329
- prompt_embeds = prompt_embeds.to(dtype=self.text_encoder.dtype, device=device)
329
+ if self.text_encoder is not None:
330
+ prompt_embeds_dtype = self.text_encoder.dtype
331
+ elif self.unet is not None:
332
+ prompt_embeds_dtype = self.unet.dtype
333
+ else:
334
+ prompt_embeds_dtype = prompt_embeds.dtype
335
+
336
+ prompt_embeds = prompt_embeds.to(dtype=prompt_embeds_dtype, device=device)
330
337
 
331
338
  bs_embed, seq_len, _ = prompt_embeds.shape
332
339
  # duplicate text embeddings for each generation per prompt, using mps friendly method
@@ -382,7 +389,7 @@ class StableDiffusionControlNetPipeline(
382
389
  # duplicate unconditional embeddings for each generation per prompt, using mps friendly method
383
390
  seq_len = negative_prompt_embeds.shape[1]
384
391
 
385
- negative_prompt_embeds = negative_prompt_embeds.to(dtype=self.text_encoder.dtype, device=device)
392
+ negative_prompt_embeds = negative_prompt_embeds.to(dtype=prompt_embeds_dtype, device=device)
386
393
 
387
394
  negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
388
395
  negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
@@ -554,6 +561,12 @@ class StableDiffusionControlNetPipeline(
554
561
  else:
555
562
  assert False
556
563
 
564
+ if not isinstance(control_guidance_start, (tuple, list)):
565
+ control_guidance_start = [control_guidance_start]
566
+
567
+ if not isinstance(control_guidance_end, (tuple, list)):
568
+ control_guidance_end = [control_guidance_end]
569
+
557
570
  if len(control_guidance_start) != len(control_guidance_end):
558
571
  raise ValueError(
559
572
  f"`control_guidance_start` has {len(control_guidance_start)} elements, but `control_guidance_end` has {len(control_guidance_end)} elements. Make sure to provide the same number of elements to each list."
@@ -760,7 +773,7 @@ class StableDiffusionControlNetPipeline(
760
773
  cross_attention_kwargs (`dict`, *optional*):
761
774
  A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
762
775
  `self.processor` in
763
- [diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
776
+ [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
764
777
  controlnet_conditioning_scale (`float` or `List[float]`, *optional*, defaults to 1.0):
765
778
  The outputs of the controlnet are multiplied by `controlnet_conditioning_scale` before they are added
766
779
  to the residual in the original unet. If multiple ControlNets are specified in init, you can set the
@@ -934,7 +947,10 @@ class StableDiffusionControlNetPipeline(
934
947
  if isinstance(controlnet_keep[i], list):
935
948
  cond_scale = [c * s for c, s in zip(controlnet_conditioning_scale, controlnet_keep[i])]
936
949
  else:
937
- cond_scale = controlnet_conditioning_scale * controlnet_keep[i]
950
+ controlnet_cond_scale = controlnet_conditioning_scale
951
+ if isinstance(controlnet_cond_scale, list):
952
+ controlnet_cond_scale = controlnet_cond_scale[0]
953
+ cond_scale = controlnet_cond_scale * controlnet_keep[i]
938
954
 
939
955
  down_block_res_samples, mid_block_res_sample = self.controlnet(
940
956
  control_model_input,
@@ -352,7 +352,14 @@ class StableDiffusionControlNetImg2ImgPipeline(
352
352
  )
353
353
  prompt_embeds = prompt_embeds[0]
354
354
 
355
- prompt_embeds = prompt_embeds.to(dtype=self.text_encoder.dtype, device=device)
355
+ if self.text_encoder is not None:
356
+ prompt_embeds_dtype = self.text_encoder.dtype
357
+ elif self.unet is not None:
358
+ prompt_embeds_dtype = self.unet.dtype
359
+ else:
360
+ prompt_embeds_dtype = prompt_embeds.dtype
361
+
362
+ prompt_embeds = prompt_embeds.to(dtype=prompt_embeds_dtype, device=device)
356
363
 
357
364
  bs_embed, seq_len, _ = prompt_embeds.shape
358
365
  # duplicate text embeddings for each generation per prompt, using mps friendly method
@@ -408,7 +415,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
408
415
  # duplicate unconditional embeddings for each generation per prompt, using mps friendly method
409
416
  seq_len = negative_prompt_embeds.shape[1]
410
417
 
411
- negative_prompt_embeds = negative_prompt_embeds.to(dtype=self.text_encoder.dtype, device=device)
418
+ negative_prompt_embeds = negative_prompt_embeds.to(dtype=prompt_embeds_dtype, device=device)
412
419
 
413
420
  negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
414
421
  negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
@@ -790,7 +797,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
790
797
  instead.
791
798
  image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, `List[np.ndarray]`,:
792
799
  `List[List[torch.FloatTensor]]`, `List[List[np.ndarray]]` or `List[List[PIL.Image.Image]]`):
793
- The initial image will be used as the starting point for the image generation process. Can also accpet
800
+ The initial image will be used as the starting point for the image generation process. Can also accept
794
801
  image latents as `image`, if passing latents directly, it will not be encoded again.
795
802
  control_image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, `List[np.ndarray]`,:
796
803
  `List[List[torch.FloatTensor]]`, `List[List[np.ndarray]]` or `List[List[PIL.Image.Image]]`):
@@ -851,7 +858,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
851
858
  cross_attention_kwargs (`dict`, *optional*):
852
859
  A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
853
860
  `self.processor` in
854
- [diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
861
+ [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
855
862
  controlnet_conditioning_scale (`float` or `List[float]`, *optional*, defaults to 1.0):
856
863
  The outputs of the controlnet are multiplied by `controlnet_conditioning_scale` before they are added
857
864
  to the residual in the original unet. If multiple ControlNets are specified in init, you can set the
@@ -914,8 +921,6 @@ class StableDiffusionControlNetImg2ImgPipeline(
914
921
  # corresponds to doing no classifier free guidance.
915
922
  do_classifier_free_guidance = guidance_scale > 1.0
916
923
 
917
- controlnet = self.controlnet._orig_mod if is_compiled_module(self.controlnet) else self.controlnet
918
-
919
924
  if isinstance(controlnet, MultiControlNetModel) and isinstance(controlnet_conditioning_scale, float):
920
925
  controlnet_conditioning_scale = [controlnet_conditioning_scale] * len(controlnet.nets)
921
926
 
@@ -1027,7 +1032,10 @@ class StableDiffusionControlNetImg2ImgPipeline(
1027
1032
  if isinstance(controlnet_keep[i], list):
1028
1033
  cond_scale = [c * s for c, s in zip(controlnet_conditioning_scale, controlnet_keep[i])]
1029
1034
  else:
1030
- cond_scale = controlnet_conditioning_scale * controlnet_keep[i]
1035
+ controlnet_cond_scale = controlnet_conditioning_scale
1036
+ if isinstance(controlnet_cond_scale, list):
1037
+ controlnet_cond_scale = controlnet_cond_scale[0]
1038
+ cond_scale = controlnet_cond_scale * controlnet_keep[i]
1031
1039
 
1032
1040
  down_block_res_samples, mid_block_res_sample = self.controlnet(
1033
1041
  control_model_input,
@@ -469,7 +469,14 @@ class StableDiffusionControlNetInpaintPipeline(
469
469
  )
470
470
  prompt_embeds = prompt_embeds[0]
471
471
 
472
- prompt_embeds = prompt_embeds.to(dtype=self.text_encoder.dtype, device=device)
472
+ if self.text_encoder is not None:
473
+ prompt_embeds_dtype = self.text_encoder.dtype
474
+ elif self.unet is not None:
475
+ prompt_embeds_dtype = self.unet.dtype
476
+ else:
477
+ prompt_embeds_dtype = prompt_embeds.dtype
478
+
479
+ prompt_embeds = prompt_embeds.to(dtype=prompt_embeds_dtype, device=device)
473
480
 
474
481
  bs_embed, seq_len, _ = prompt_embeds.shape
475
482
  # duplicate text embeddings for each generation per prompt, using mps friendly method
@@ -525,7 +532,7 @@ class StableDiffusionControlNetInpaintPipeline(
525
532
  # duplicate unconditional embeddings for each generation per prompt, using mps friendly method
526
533
  seq_len = negative_prompt_embeds.shape[1]
527
534
 
528
- negative_prompt_embeds = negative_prompt_embeds.to(dtype=self.text_encoder.dtype, device=device)
535
+ negative_prompt_embeds = negative_prompt_embeds.to(dtype=prompt_embeds_dtype, device=device)
529
536
 
530
537
  negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
531
538
  negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
@@ -1048,7 +1055,7 @@ class StableDiffusionControlNetInpaintPipeline(
1048
1055
  cross_attention_kwargs (`dict`, *optional*):
1049
1056
  A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
1050
1057
  `self.processor` in
1051
- [diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
1058
+ [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
1052
1059
  controlnet_conditioning_scale (`float` or `List[float]`, *optional*, defaults to 0.5):
1053
1060
  The outputs of the controlnet are multiplied by `controlnet_conditioning_scale` before they are added
1054
1061
  to the residual in the original unet. If multiple ControlNets are specified in init, you can set the
@@ -1262,7 +1269,10 @@ class StableDiffusionControlNetInpaintPipeline(
1262
1269
  if isinstance(controlnet_keep[i], list):
1263
1270
  cond_scale = [c * s for c, s in zip(controlnet_conditioning_scale, controlnet_keep[i])]
1264
1271
  else:
1265
- cond_scale = controlnet_conditioning_scale * controlnet_keep[i]
1272
+ controlnet_cond_scale = controlnet_conditioning_scale
1273
+ if isinstance(controlnet_cond_scale, list):
1274
+ controlnet_cond_scale = controlnet_cond_scale[0]
1275
+ cond_scale = controlnet_cond_scale * controlnet_keep[i]
1266
1276
 
1267
1277
  down_block_res_samples, mid_block_res_sample = self.controlnet(
1268
1278
  control_model_input,