diffusers 0.19.3__py3-none-any.whl → 0.20.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. diffusers/__init__.py +3 -1
  2. diffusers/commands/fp16_safetensors.py +2 -7
  3. diffusers/configuration_utils.py +23 -1
  4. diffusers/dependency_versions_table.py +1 -1
  5. diffusers/loaders.py +62 -64
  6. diffusers/models/__init__.py +1 -0
  7. diffusers/models/activations.py +2 -0
  8. diffusers/models/attention.py +45 -1
  9. diffusers/models/autoencoder_tiny.py +193 -0
  10. diffusers/models/controlnet.py +1 -1
  11. diffusers/models/embeddings.py +56 -0
  12. diffusers/models/lora.py +0 -6
  13. diffusers/models/modeling_flax_utils.py +28 -2
  14. diffusers/models/modeling_utils.py +33 -16
  15. diffusers/models/transformer_2d.py +26 -9
  16. diffusers/models/unet_1d.py +2 -2
  17. diffusers/models/unet_2d_blocks.py +106 -56
  18. diffusers/models/unet_2d_condition.py +20 -5
  19. diffusers/models/vae.py +106 -1
  20. diffusers/pipelines/__init__.py +1 -0
  21. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +10 -3
  22. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +10 -3
  23. diffusers/pipelines/audioldm/pipeline_audioldm.py +1 -1
  24. diffusers/pipelines/auto_pipeline.py +33 -43
  25. diffusers/pipelines/controlnet/multicontrolnet.py +4 -2
  26. diffusers/pipelines/controlnet/pipeline_controlnet.py +20 -4
  27. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +15 -7
  28. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +14 -4
  29. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +157 -10
  30. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -10
  31. diffusers/pipelines/deepfloyd_if/pipeline_if.py +1 -1
  32. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +1 -1
  33. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +1 -1
  34. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +1 -1
  35. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +1 -1
  36. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +1 -1
  37. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +43 -2
  38. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +44 -2
  39. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +1 -1
  40. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +1 -1
  41. diffusers/pipelines/pipeline_flax_utils.py +41 -4
  42. diffusers/pipelines/pipeline_utils.py +60 -16
  43. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +2 -2
  44. diffusers/pipelines/stable_diffusion/__init__.py +1 -0
  45. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +81 -37
  46. diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +10 -3
  47. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -3
  48. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py +10 -3
  49. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +10 -3
  50. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py +12 -5
  51. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py +832 -0
  52. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +10 -3
  53. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +10 -3
  54. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +10 -3
  55. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +9 -2
  56. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +17 -8
  57. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py +10 -3
  58. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +10 -3
  59. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +10 -3
  60. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +10 -3
  61. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py +10 -3
  62. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +10 -3
  63. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +10 -3
  64. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +10 -3
  65. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +3 -5
  66. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +75 -3
  67. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +76 -6
  68. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +1 -2
  69. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +10 -3
  70. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +10 -3
  71. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +11 -4
  72. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +1 -1
  73. diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +131 -28
  74. diffusers/schedulers/scheduling_consistency_models.py +70 -57
  75. diffusers/schedulers/scheduling_ddim.py +76 -71
  76. diffusers/schedulers/scheduling_ddim_inverse.py +76 -44
  77. diffusers/schedulers/scheduling_ddim_parallel.py +11 -8
  78. diffusers/schedulers/scheduling_ddpm.py +68 -67
  79. diffusers/schedulers/scheduling_ddpm_parallel.py +18 -15
  80. diffusers/schedulers/scheduling_deis_multistep.py +93 -85
  81. diffusers/schedulers/scheduling_dpmsolver_multistep.py +118 -120
  82. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +116 -109
  83. diffusers/schedulers/scheduling_dpmsolver_sde.py +57 -43
  84. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +122 -121
  85. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +54 -44
  86. diffusers/schedulers/scheduling_euler_discrete.py +63 -56
  87. diffusers/schedulers/scheduling_heun_discrete.py +57 -45
  88. diffusers/schedulers/scheduling_ipndm.py +27 -22
  89. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +54 -41
  90. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +52 -41
  91. diffusers/schedulers/scheduling_karras_ve.py +55 -45
  92. diffusers/schedulers/scheduling_lms_discrete.py +58 -52
  93. diffusers/schedulers/scheduling_pndm.py +77 -62
  94. diffusers/schedulers/scheduling_repaint.py +56 -38
  95. diffusers/schedulers/scheduling_sde_ve.py +62 -50
  96. diffusers/schedulers/scheduling_sde_vp.py +32 -11
  97. diffusers/schedulers/scheduling_unclip.py +3 -3
  98. diffusers/schedulers/scheduling_unipc_multistep.py +131 -91
  99. diffusers/schedulers/scheduling_utils.py +41 -35
  100. diffusers/schedulers/scheduling_utils_flax.py +8 -2
  101. diffusers/schedulers/scheduling_vq_diffusion.py +39 -68
  102. diffusers/utils/__init__.py +2 -2
  103. diffusers/utils/dummy_pt_objects.py +15 -0
  104. diffusers/utils/dummy_torch_and_transformers_objects.py +15 -0
  105. diffusers/utils/hub_utils.py +105 -2
  106. diffusers/utils/import_utils.py +0 -4
  107. diffusers/utils/pil_utils.py +19 -0
  108. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/METADATA +5 -7
  109. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/RECORD +113 -112
  110. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/WHEEL +1 -1
  111. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/entry_points.txt +0 -1
  112. diffusers/models/cross_attention.py +0 -94
  113. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/LICENSE +0 -0
  114. {diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/top_level.txt +0 -0
@@ -58,8 +58,45 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
58
58
  EXAMPLE_DOC_STRING = """
59
59
  Examples:
60
60
  ```py
61
- >>> # To be updated when there's a useful ControlNet checkpoint
62
- >>> # compatible with SDXL.
61
+ >>> # !pip install opencv-python transformers accelerate
62
+ >>> from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel, AutoencoderKL
63
+ >>> from diffusers.utils import load_image
64
+ >>> import numpy as np
65
+ >>> import torch
66
+
67
+ >>> import cv2
68
+ >>> from PIL import Image
69
+
70
+ >>> prompt = "aerial view, a futuristic research complex in a bright foggy jungle, hard lighting"
71
+ >>> negative_prompt = "low quality, bad quality, sketches"
72
+
73
+ >>> # download an image
74
+ >>> image = load_image(
75
+ ... "https://hf.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/hf-logo.png"
76
+ ... )
77
+
78
+ >>> # initialize the models and pipeline
79
+ >>> controlnet_conditioning_scale = 0.5 # recommended for good generalization
80
+ >>> controlnet = ControlNetModel.from_pretrained(
81
+ ... "diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16
82
+ ... )
83
+ >>> vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
84
+ >>> pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
85
+ ... "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, vae=vae, torch_dtype=torch.float16
86
+ ... )
87
+ >>> pipe.enable_model_cpu_offload()
88
+
89
+ >>> # get canny image
90
+ >>> image = np.array(image)
91
+ >>> image = cv2.Canny(image, 100, 200)
92
+ >>> image = image[:, :, None]
93
+ >>> image = np.concatenate([image, image, image], axis=2)
94
+ >>> canny_image = Image.fromarray(image)
95
+
96
+ >>> # generate image
97
+ >>> image = pipe(
98
+ ... prompt, controlnet_conditioning_scale=controlnet_conditioning_scale, image=canny_image
99
+ ... ).images[0]
63
100
  ```
64
101
  """
65
102
 
@@ -112,7 +149,7 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
112
149
  tokenizer: CLIPTokenizer,
113
150
  tokenizer_2: CLIPTokenizer,
114
151
  unet: UNet2DConditionModel,
115
- controlnet: ControlNetModel,
152
+ controlnet: Union[ControlNetModel, List[ControlNetModel], Tuple[ControlNetModel], MultiControlNetModel],
116
153
  scheduler: KarrasDiffusionSchedulers,
117
154
  force_zeros_for_empty_prompt: bool = True,
118
155
  add_watermarker: Optional[bool] = None,
@@ -120,7 +157,7 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
120
157
  super().__init__()
121
158
 
122
159
  if isinstance(controlnet, (list, tuple)):
123
- raise ValueError("MultiControlNet is not yet supported.")
160
+ controlnet = MultiControlNetModel(controlnet)
124
161
 
125
162
  self.register_modules(
126
163
  vae=vae,
@@ -305,7 +342,6 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
305
342
 
306
343
  text_input_ids = text_inputs.input_ids
307
344
  untruncated_ids = tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
308
- untruncated_ids = tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
309
345
 
310
346
  if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(
311
347
  text_input_ids, untruncated_ids
@@ -432,6 +468,8 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
432
468
  negative_prompt_2=None,
433
469
  prompt_embeds=None,
434
470
  negative_prompt_embeds=None,
471
+ pooled_prompt_embeds=None,
472
+ negative_pooled_prompt_embeds=None,
435
473
  controlnet_conditioning_scale=1.0,
436
474
  control_guidance_start=0.0,
437
475
  control_guidance_end=1.0,
@@ -482,6 +520,25 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
482
520
  f" {negative_prompt_embeds.shape}."
483
521
  )
484
522
 
523
+ if prompt_embeds is not None and pooled_prompt_embeds is None:
524
+ raise ValueError(
525
+ "If `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`."
526
+ )
527
+
528
+ if negative_prompt_embeds is not None and negative_pooled_prompt_embeds is None:
529
+ raise ValueError(
530
+ "If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`."
531
+ )
532
+
533
+ # `prompt` needs more sophisticated handling when there are multiple
534
+ # conditionings.
535
+ if isinstance(self.controlnet, MultiControlNetModel):
536
+ if isinstance(prompt, list):
537
+ logger.warning(
538
+ f"You have {len(self.controlnet.nets)} ControlNets and you have passed {len(prompt)}"
539
+ " prompts. The conditionings will be fixed across the prompts."
540
+ )
541
+
485
542
  # Check `image`
486
543
  is_compiled = hasattr(F, "scaled_dot_product_attention") and isinstance(
487
544
  self.controlnet, torch._dynamo.eval_frame.OptimizedModule
@@ -492,6 +549,25 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
492
549
  and isinstance(self.controlnet._orig_mod, ControlNetModel)
493
550
  ):
494
551
  self.check_image(image, prompt, prompt_embeds)
552
+ elif (
553
+ isinstance(self.controlnet, MultiControlNetModel)
554
+ or is_compiled
555
+ and isinstance(self.controlnet._orig_mod, MultiControlNetModel)
556
+ ):
557
+ if not isinstance(image, list):
558
+ raise TypeError("For multiple controlnets: `image` must be type `list`")
559
+
560
+ # When `image` is a nested list:
561
+ # (e.g. [[canny_image_1, pose_image_1], [canny_image_2, pose_image_2]])
562
+ elif any(isinstance(i, list) for i in image):
563
+ raise ValueError("A single batch of multiple conditionings are supported at the moment.")
564
+ elif len(image) != len(self.controlnet.nets):
565
+ raise ValueError(
566
+ f"For multiple controlnets: `image` must have the same length as the number of controlnets, but got {len(image)} images and {len(self.controlnet.nets)} ControlNets."
567
+ )
568
+
569
+ for image_ in image:
570
+ self.check_image(image_, prompt, prompt_embeds)
495
571
  else:
496
572
  assert False
497
573
 
@@ -503,14 +579,41 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
503
579
  ):
504
580
  if not isinstance(controlnet_conditioning_scale, float):
505
581
  raise TypeError("For single controlnet: `controlnet_conditioning_scale` must be type `float`.")
582
+ elif (
583
+ isinstance(self.controlnet, MultiControlNetModel)
584
+ or is_compiled
585
+ and isinstance(self.controlnet._orig_mod, MultiControlNetModel)
586
+ ):
587
+ if isinstance(controlnet_conditioning_scale, list):
588
+ if any(isinstance(i, list) for i in controlnet_conditioning_scale):
589
+ raise ValueError("A single batch of multiple conditionings are supported at the moment.")
590
+ elif isinstance(controlnet_conditioning_scale, list) and len(controlnet_conditioning_scale) != len(
591
+ self.controlnet.nets
592
+ ):
593
+ raise ValueError(
594
+ "For multiple controlnets: When `controlnet_conditioning_scale` is specified as `list`, it must have"
595
+ " the same length as the number of controlnets"
596
+ )
506
597
  else:
507
598
  assert False
508
599
 
600
+ if not isinstance(control_guidance_start, (tuple, list)):
601
+ control_guidance_start = [control_guidance_start]
602
+
603
+ if not isinstance(control_guidance_end, (tuple, list)):
604
+ control_guidance_end = [control_guidance_end]
605
+
509
606
  if len(control_guidance_start) != len(control_guidance_end):
510
607
  raise ValueError(
511
608
  f"`control_guidance_start` has {len(control_guidance_start)} elements, but `control_guidance_end` has {len(control_guidance_end)} elements. Make sure to provide the same number of elements to each list."
512
609
  )
513
610
 
611
+ if isinstance(self.controlnet, MultiControlNetModel):
612
+ if len(control_guidance_start) != len(self.controlnet.nets):
613
+ raise ValueError(
614
+ f"`control_guidance_start`: {control_guidance_start} has {len(control_guidance_start)} elements but there are {len(self.controlnet.nets)} controlnets available. Make sure to provide {len(self.controlnet.nets)}."
615
+ )
616
+
514
617
  for start, end in zip(control_guidance_start, control_guidance_end):
515
618
  if start >= end:
516
619
  raise ValueError(
@@ -521,6 +624,7 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
521
624
  if end > 1.0:
522
625
  raise ValueError(f"control guidance end: {end} can't be larger than 1.0.")
523
626
 
627
+ # Copied from diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline.check_image
524
628
  def check_image(self, image, prompt, prompt_embeds):
525
629
  image_is_pil = isinstance(image, PIL.Image.Image)
526
630
  image_is_tensor = isinstance(image, torch.Tensor)
@@ -558,6 +662,7 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
558
662
  f"If image batch size is not 1, image batch size must be same as prompt batch size. image batch size: {image_batch_size}, prompt batch size: {prompt_batch_size}"
559
663
  )
560
664
 
665
+ # Copied from diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline.prepare_image
561
666
  def prepare_image(
562
667
  self,
563
668
  image,
@@ -669,6 +774,8 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
669
774
  latents: Optional[torch.FloatTensor] = None,
670
775
  prompt_embeds: Optional[torch.FloatTensor] = None,
671
776
  negative_prompt_embeds: Optional[torch.FloatTensor] = None,
777
+ pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
778
+ negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
672
779
  output_type: Optional[str] = "pil",
673
780
  return_dict: bool = True,
674
781
  callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
@@ -739,6 +846,13 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
739
846
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
740
847
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
741
848
  argument.
849
+ pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
850
+ Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
851
+ If not provided, pooled text embeddings will be generated from `prompt` input argument.
852
+ negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
853
+ Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
854
+ weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
855
+ input argument.
742
856
  output_type (`str`, *optional*, defaults to `"pil"`):
743
857
  The output format of the generate image. Choose between
744
858
  [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
@@ -754,7 +868,7 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
754
868
  cross_attention_kwargs (`dict`, *optional*):
755
869
  A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
756
870
  `self.processor` in
757
- [diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
871
+ [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
758
872
  controlnet_conditioning_scale (`float` or `List[float]`, *optional*, defaults to 1.0):
759
873
  The outputs of the controlnet are multiplied by `controlnet_conditioning_scale` before they are added
760
874
  to the residual in the original unet. If multiple ControlNets are specified in init, you can set the
@@ -810,6 +924,8 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
810
924
  negative_prompt_2,
811
925
  prompt_embeds,
812
926
  negative_prompt_embeds,
927
+ pooled_prompt_embeds,
928
+ negative_pooled_prompt_embeds,
813
929
  controlnet_conditioning_scale,
814
930
  control_guidance_start,
815
931
  control_guidance_end,
@@ -829,6 +945,9 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
829
945
  # corresponds to doing no classifier free guidance.
830
946
  do_classifier_free_guidance = guidance_scale > 1.0
831
947
 
948
+ if isinstance(controlnet, MultiControlNetModel) and isinstance(controlnet_conditioning_scale, float):
949
+ controlnet_conditioning_scale = [controlnet_conditioning_scale] * len(controlnet.nets)
950
+
832
951
  global_pool_conditions = (
833
952
  controlnet.config.global_pool_conditions
834
953
  if isinstance(controlnet, ControlNetModel)
@@ -855,6 +974,8 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
855
974
  negative_prompt_2,
856
975
  prompt_embeds=prompt_embeds,
857
976
  negative_prompt_embeds=negative_prompt_embeds,
977
+ pooled_prompt_embeds=pooled_prompt_embeds,
978
+ negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
858
979
  lora_scale=text_encoder_lora_scale,
859
980
  )
860
981
 
@@ -872,6 +993,26 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
872
993
  guess_mode=guess_mode,
873
994
  )
874
995
  height, width = image.shape[-2:]
996
+ elif isinstance(controlnet, MultiControlNetModel):
997
+ images = []
998
+
999
+ for image_ in image:
1000
+ image_ = self.prepare_image(
1001
+ image=image_,
1002
+ width=width,
1003
+ height=height,
1004
+ batch_size=batch_size * num_images_per_prompt,
1005
+ num_images_per_prompt=num_images_per_prompt,
1006
+ device=device,
1007
+ dtype=controlnet.dtype,
1008
+ do_classifier_free_guidance=do_classifier_free_guidance,
1009
+ guess_mode=guess_mode,
1010
+ )
1011
+
1012
+ images.append(image_)
1013
+
1014
+ image = images
1015
+ height, width = image[0].shape[-2:]
875
1016
  else:
876
1017
  assert False
877
1018
 
@@ -902,12 +1043,15 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
902
1043
  1.0 - float(i / len(timesteps) < s or (i + 1) / len(timesteps) > e)
903
1044
  for s, e in zip(control_guidance_start, control_guidance_end)
904
1045
  ]
905
- controlnet_keep.append(keeps[0] if len(keeps) == 1 else keeps)
1046
+ controlnet_keep.append(keeps[0] if isinstance(controlnet, ControlNetModel) else keeps)
906
1047
 
907
- original_size = original_size or image.shape[-2:]
1048
+ # 7.2 Prepare added time ids & embeddings
1049
+ if isinstance(image, list):
1050
+ original_size = original_size or image[0].shape[-2:]
1051
+ else:
1052
+ original_size = original_size or image.shape[-2:]
908
1053
  target_size = target_size or (height, width)
909
1054
 
910
- # 7.2 Prepare added time ids & embeddings
911
1055
  add_text_embeds = pooled_prompt_embeds
912
1056
  add_time_ids = self._get_add_time_ids(
913
1057
  original_size, crops_coords_top_left, target_size, dtype=prompt_embeds.dtype
@@ -943,7 +1087,10 @@ class StableDiffusionXLControlNetPipeline(DiffusionPipeline, TextualInversionLoa
943
1087
  if isinstance(controlnet_keep[i], list):
944
1088
  cond_scale = [c * s for c, s in zip(controlnet_conditioning_scale, controlnet_keep[i])]
945
1089
  else:
946
- cond_scale = controlnet_conditioning_scale * controlnet_keep[i]
1090
+ controlnet_cond_scale = controlnet_conditioning_scale
1091
+ if isinstance(controlnet_cond_scale, list):
1092
+ controlnet_cond_scale = controlnet_cond_scale[0]
1093
+ cond_scale = controlnet_cond_scale * controlnet_keep[i]
947
1094
 
948
1095
  added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
949
1096
  down_block_res_samples, mid_block_res_sample = self.controlnet(
@@ -51,19 +51,11 @@ EXAMPLE_DOC_STRING = """
51
51
  >>> import jax.numpy as jnp
52
52
  >>> from flax.jax_utils import replicate
53
53
  >>> from flax.training.common_utils import shard
54
- >>> from diffusers.utils import load_image
54
+ >>> from diffusers.utils import load_image, make_image_grid
55
55
  >>> from PIL import Image
56
56
  >>> from diffusers import FlaxStableDiffusionControlNetPipeline, FlaxControlNetModel
57
57
 
58
58
 
59
- >>> def image_grid(imgs, rows, cols):
60
- ... w, h = imgs[0].size
61
- ... grid = Image.new("RGB", size=(cols * w, rows * h))
62
- ... for i, img in enumerate(imgs):
63
- ... grid.paste(img, box=(i % cols * w, i // cols * h))
64
- ... return grid
65
-
66
-
67
59
  >>> def create_key(seed=0):
68
60
  ... return jax.random.PRNGKey(seed)
69
61
 
@@ -110,7 +102,7 @@ EXAMPLE_DOC_STRING = """
110
102
  ... ).images
111
103
 
112
104
  >>> output_images = pipe.numpy_to_pil(np.asarray(output.reshape((num_samples,) + output.shape[-3:])))
113
- >>> output_images = image_grid(output_images, num_samples // 4, 4)
105
+ >>> output_images = make_image_grid(output_images, num_samples // 4, 4)
114
106
  >>> output_images.save("generated_image.png")
115
107
  ```
116
108
  """
@@ -662,7 +662,7 @@ class IFPipeline(DiffusionPipeline, LoraLoaderMixin):
662
662
  cross_attention_kwargs (`dict`, *optional*):
663
663
  A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
664
664
  `self.processor` in
665
- [diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
665
+ [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
666
666
 
667
667
  Examples:
668
668
 
@@ -783,7 +783,7 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
783
783
  cross_attention_kwargs (`dict`, *optional*):
784
784
  A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
785
785
  `self.processor` in
786
- [diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
786
+ [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
787
787
 
788
788
  Examples:
789
789
 
@@ -865,7 +865,7 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
865
865
  cross_attention_kwargs (`dict`, *optional*):
866
866
  A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
867
867
  `self.processor` in
868
- [diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
868
+ [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
869
869
  noise_level (`int`, *optional*, defaults to 250):
870
870
  The amount of noise to add to the upscaled image. Must be in the range `[0, 1000)`
871
871
  clean_caption (`bool`, *optional*, defaults to `True`):
@@ -883,7 +883,7 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
883
883
  cross_attention_kwargs (`dict`, *optional*):
884
884
  A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
885
885
  `self.processor` in
886
- [diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
886
+ [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
887
887
 
888
888
  Examples:
889
889
 
@@ -961,7 +961,7 @@ class IFInpaintingSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
961
961
  cross_attention_kwargs (`dict`, *optional*):
962
962
  A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
963
963
  `self.processor` in
964
- [diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
964
+ [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
965
965
  noise_level (`int`, *optional*, defaults to 0):
966
966
  The amount of noise to add to the upscaled image. Must be in the range `[0, 1000)`
967
967
  clean_caption (`bool`, *optional*, defaults to `True`):
@@ -730,7 +730,7 @@ class IFSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
730
730
  cross_attention_kwargs (`dict`, *optional*):
731
731
  A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
732
732
  `self.processor` in
733
- [diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
733
+ [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
734
734
  noise_level (`int`, *optional*, defaults to 250):
735
735
  The amount of noise to add to the upscaled image. Must be in the range `[0, 1000)`
736
736
  clean_caption (`bool`, *optional*, defaults to `True`):
@@ -188,6 +188,9 @@ class KandinskyCombinedPipeline(DiffusionPipeline):
188
188
  movq=movq,
189
189
  )
190
190
 
191
+ def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
192
+ self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
193
+
191
194
  def enable_model_cpu_offload(self, gpu_id=0):
192
195
  r"""
193
196
  Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
@@ -198,6 +201,16 @@ class KandinskyCombinedPipeline(DiffusionPipeline):
198
201
  self.prior_pipe.enable_model_cpu_offload()
199
202
  self.decoder_pipe.enable_model_cpu_offload()
200
203
 
204
+ def enable_sequential_cpu_offload(self, gpu_id=0):
205
+ r"""
206
+ Offloads all models (`unet`, `text_encoder`, `vae`, and `safety checker` state dicts) to CPU using 🤗
207
+ Accelerate, significantly reducing memory usage. Models are moved to a `torch.device('meta')` and loaded on a
208
+ GPU only when their specific submodule's `forward` method is called. Offloading happens on a submodule basis.
209
+ Memory savings are higher than using `enable_model_cpu_offload`, but performance is lower.
210
+ """
211
+ self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
212
+ self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
213
+
201
214
  def progress_bar(self, iterable=None, total=None):
202
215
  self.prior_pipe.progress_bar(iterable=iterable, total=total)
203
216
  self.decoder_pipe.progress_bar(iterable=iterable, total=total)
@@ -398,6 +411,9 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
398
411
  movq=movq,
399
412
  )
400
413
 
414
+ def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
415
+ self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
416
+
401
417
  def enable_model_cpu_offload(self, gpu_id=0):
402
418
  r"""
403
419
  Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
@@ -408,6 +424,17 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
408
424
  self.prior_pipe.enable_model_cpu_offload()
409
425
  self.decoder_pipe.enable_model_cpu_offload()
410
426
 
427
+ def enable_sequential_cpu_offload(self, gpu_id=0):
428
+ r"""
429
+ Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
430
+ text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
431
+ `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
432
+ Note that offloading happens on a submodule basis. Memory savings are higher than with
433
+ `enable_model_cpu_offload`, but performance is lower.
434
+ """
435
+ self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
436
+ self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
437
+
411
438
  def progress_bar(self, iterable=None, total=None):
412
439
  self.prior_pipe.progress_bar(iterable=iterable, total=total)
413
440
  self.decoder_pipe.progress_bar(iterable=iterable, total=total)
@@ -447,7 +474,7 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
447
474
  The prompt or prompts to guide the image generation.
448
475
  image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
449
476
  `Image`, or tensor representing an image batch, that will be used as the starting point for the
450
- process. Can also accpet image latents as `image`, if passing latents directly, it will not be encoded
477
+ process. Can also accept image latents as `image`, if passing latents directly, it will not be encoded
451
478
  again.
452
479
  negative_prompt (`str` or `List[str]`, *optional*):
453
480
  The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
@@ -630,6 +657,9 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
630
657
  movq=movq,
631
658
  )
632
659
 
660
+ def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
661
+ self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
662
+
633
663
  def enable_model_cpu_offload(self, gpu_id=0):
634
664
  r"""
635
665
  Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
@@ -640,6 +670,17 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
640
670
  self.prior_pipe.enable_model_cpu_offload()
641
671
  self.decoder_pipe.enable_model_cpu_offload()
642
672
 
673
+ def enable_sequential_cpu_offload(self, gpu_id=0):
674
+ r"""
675
+ Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
676
+ text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
677
+ `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
678
+ Note that offloading happens on a submodule basis. Memory savings are higher than with
679
+ `enable_model_cpu_offload`, but performance is lower.
680
+ """
681
+ self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
682
+ self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
683
+
643
684
  def progress_bar(self, iterable=None, total=None):
644
685
  self.prior_pipe.progress_bar(iterable=iterable, total=total)
645
686
  self.decoder_pipe.progress_bar(iterable=iterable, total=total)
@@ -679,7 +720,7 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
679
720
  The prompt or prompts to guide the image generation.
680
721
  image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
681
722
  `Image`, or tensor representing an image batch, that will be used as the starting point for the
682
- process. Can also accpet image latents as `image`, if passing latents directly, it will not be encoded
723
+ process. Can also accept image latents as `image`, if passing latents directly, it will not be encoded
683
724
  again.
684
725
  mask_image (`np.array`):
685
726
  Tensor representing an image batch, to mask `image`. White pixels in the mask will be repainted, while
@@ -177,6 +177,9 @@ class KandinskyV22CombinedPipeline(DiffusionPipeline):
177
177
  movq=movq,
178
178
  )
179
179
 
180
+ def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
181
+ self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
182
+
180
183
  def enable_model_cpu_offload(self, gpu_id=0):
181
184
  r"""
182
185
  Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
@@ -187,6 +190,17 @@ class KandinskyV22CombinedPipeline(DiffusionPipeline):
187
190
  self.prior_pipe.enable_model_cpu_offload()
188
191
  self.decoder_pipe.enable_model_cpu_offload()
189
192
 
193
+ def enable_sequential_cpu_offload(self, gpu_id=0):
194
+ r"""
195
+ Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
196
+ text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
197
+ `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
198
+ Note that offloading happens on a submodule basis. Memory savings are higher than with
199
+ `enable_model_cpu_offload`, but performance is lower.
200
+ """
201
+ self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
202
+ self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
203
+
190
204
  def progress_bar(self, iterable=None, total=None):
191
205
  self.prior_pipe.progress_bar(iterable=iterable, total=total)
192
206
  self.decoder_pipe.progress_bar(iterable=iterable, total=total)
@@ -378,6 +392,9 @@ class KandinskyV22Img2ImgCombinedPipeline(DiffusionPipeline):
378
392
  movq=movq,
379
393
  )
380
394
 
395
+ def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
396
+ self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
397
+
381
398
  def enable_model_cpu_offload(self, gpu_id=0):
382
399
  r"""
383
400
  Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
@@ -388,6 +405,17 @@ class KandinskyV22Img2ImgCombinedPipeline(DiffusionPipeline):
388
405
  self.prior_pipe.enable_model_cpu_offload()
389
406
  self.decoder_pipe.enable_model_cpu_offload()
390
407
 
408
+ def enable_sequential_cpu_offload(self, gpu_id=0):
409
+ r"""
410
+ Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
411
+ text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
412
+ `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
413
+ Note that offloading happens on a submodule basis. Memory savings are higher than with
414
+ `enable_model_cpu_offload`, but performance is lower.
415
+ """
416
+ self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
417
+ self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
418
+
391
419
  def progress_bar(self, iterable=None, total=None):
392
420
  self.prior_pipe.progress_bar(iterable=iterable, total=total)
393
421
  self.decoder_pipe.progress_bar(iterable=iterable, total=total)
@@ -427,7 +455,7 @@ class KandinskyV22Img2ImgCombinedPipeline(DiffusionPipeline):
427
455
  The prompt or prompts to guide the image generation.
428
456
  image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
429
457
  `Image`, or tensor representing an image batch, that will be used as the starting point for the
430
- process. Can also accpet image latents as `image`, if passing latents directly, it will not be encoded
458
+ process. Can also accept image latents as `image`, if passing latents directly, it will not be encoded
431
459
  again.
432
460
  negative_prompt (`str` or `List[str]`, *optional*):
433
461
  The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
@@ -601,6 +629,9 @@ class KandinskyV22InpaintCombinedPipeline(DiffusionPipeline):
601
629
  movq=movq,
602
630
  )
603
631
 
632
+ def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
633
+ self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
634
+
604
635
  def enable_model_cpu_offload(self, gpu_id=0):
605
636
  r"""
606
637
  Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
@@ -611,6 +642,17 @@ class KandinskyV22InpaintCombinedPipeline(DiffusionPipeline):
611
642
  self.prior_pipe.enable_model_cpu_offload()
612
643
  self.decoder_pipe.enable_model_cpu_offload()
613
644
 
645
+ def enable_sequential_cpu_offload(self, gpu_id=0):
646
+ r"""
647
+ Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
648
+ text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
649
+ `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
650
+ Note that offloading happens on a submodule basis. Memory savings are higher than with
651
+ `enable_model_cpu_offload`, but performance is lower.
652
+ """
653
+ self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
654
+ self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
655
+
614
656
  def progress_bar(self, iterable=None, total=None):
615
657
  self.prior_pipe.progress_bar(iterable=iterable, total=total)
616
658
  self.decoder_pipe.progress_bar(iterable=iterable, total=total)
@@ -650,7 +692,7 @@ class KandinskyV22InpaintCombinedPipeline(DiffusionPipeline):
650
692
  The prompt or prompts to guide the image generation.
651
693
  image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
652
694
  `Image`, or tensor representing an image batch, that will be used as the starting point for the
653
- process. Can also accpet image latents as `image`, if passing latents directly, it will not be encoded
695
+ process. Can also accept image latents as `image`, if passing latents directly, it will not be encoded
654
696
  again.
655
697
  mask_image (`np.array`):
656
698
  Tensor representing an image batch, to mask `image`. White pixels in the mask will be repainted, while
@@ -258,7 +258,7 @@ class KandinskyV22ControlnetImg2ImgPipeline(DiffusionPipeline):
258
258
  The clip image embeddings for text prompt, that will be used to condition the image generation.
259
259
  image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
260
260
  `Image`, or tensor representing an image batch, that will be used as the starting point for the
261
- process. Can also accpet image latents as `image`, if passing latents directly, it will not be encoded
261
+ process. Can also accept image latents as `image`, if passing latents directly, it will not be encoded
262
262
  again.
263
263
  strength (`float`, *optional*, defaults to 0.8):
264
264
  Conceptually, indicates how much to transform the reference `image`. Must be between 0 and 1. `image`
@@ -230,7 +230,7 @@ class KandinskyV22Img2ImgPipeline(DiffusionPipeline):
230
230
  The clip image embeddings for text prompt, that will be used to condition the image generation.
231
231
  image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
232
232
  `Image`, or tensor representing an image batch, that will be used as the starting point for the
233
- process. Can also accpet image latents as `image`, if passing latents directly, it will not be encoded
233
+ process. Can also accept image latents as `image`, if passing latents directly, it will not be encoded
234
234
  again.
235
235
  strength (`float`, *optional*, defaults to 0.8):
236
236
  Conceptually, indicates how much to transform the reference `image`. Must be between 0 and 1. `image`