diffusers 0.27.2__py3-none-any.whl → 0.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. diffusers/__init__.py +18 -1
  2. diffusers/callbacks.py +156 -0
  3. diffusers/commands/env.py +110 -6
  4. diffusers/configuration_utils.py +16 -11
  5. diffusers/dependency_versions_table.py +2 -1
  6. diffusers/image_processor.py +158 -45
  7. diffusers/loaders/__init__.py +2 -5
  8. diffusers/loaders/autoencoder.py +4 -4
  9. diffusers/loaders/controlnet.py +4 -4
  10. diffusers/loaders/ip_adapter.py +80 -22
  11. diffusers/loaders/lora.py +134 -20
  12. diffusers/loaders/lora_conversion_utils.py +46 -43
  13. diffusers/loaders/peft.py +4 -3
  14. diffusers/loaders/single_file.py +401 -170
  15. diffusers/loaders/single_file_model.py +290 -0
  16. diffusers/loaders/single_file_utils.py +616 -672
  17. diffusers/loaders/textual_inversion.py +41 -20
  18. diffusers/loaders/unet.py +168 -115
  19. diffusers/loaders/unet_loader_utils.py +163 -0
  20. diffusers/models/__init__.py +2 -0
  21. diffusers/models/activations.py +11 -3
  22. diffusers/models/attention.py +10 -11
  23. diffusers/models/attention_processor.py +367 -148
  24. diffusers/models/autoencoders/autoencoder_asym_kl.py +14 -16
  25. diffusers/models/autoencoders/autoencoder_kl.py +18 -19
  26. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -11
  27. diffusers/models/autoencoders/autoencoder_tiny.py +16 -16
  28. diffusers/models/autoencoders/consistency_decoder_vae.py +36 -11
  29. diffusers/models/autoencoders/vae.py +23 -24
  30. diffusers/models/controlnet.py +12 -9
  31. diffusers/models/controlnet_flax.py +4 -4
  32. diffusers/models/controlnet_xs.py +1915 -0
  33. diffusers/models/downsampling.py +17 -18
  34. diffusers/models/embeddings.py +147 -24
  35. diffusers/models/model_loading_utils.py +149 -0
  36. diffusers/models/modeling_flax_pytorch_utils.py +2 -1
  37. diffusers/models/modeling_flax_utils.py +4 -4
  38. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  39. diffusers/models/modeling_utils.py +118 -98
  40. diffusers/models/resnet.py +18 -23
  41. diffusers/models/transformer_temporal.py +3 -3
  42. diffusers/models/transformers/dual_transformer_2d.py +4 -4
  43. diffusers/models/transformers/prior_transformer.py +7 -7
  44. diffusers/models/transformers/t5_film_transformer.py +17 -19
  45. diffusers/models/transformers/transformer_2d.py +272 -156
  46. diffusers/models/transformers/transformer_temporal.py +10 -10
  47. diffusers/models/unets/unet_1d.py +5 -5
  48. diffusers/models/unets/unet_1d_blocks.py +29 -29
  49. diffusers/models/unets/unet_2d.py +6 -6
  50. diffusers/models/unets/unet_2d_blocks.py +137 -128
  51. diffusers/models/unets/unet_2d_condition.py +19 -15
  52. diffusers/models/unets/unet_2d_condition_flax.py +6 -5
  53. diffusers/models/unets/unet_3d_blocks.py +79 -77
  54. diffusers/models/unets/unet_3d_condition.py +13 -9
  55. diffusers/models/unets/unet_i2vgen_xl.py +14 -13
  56. diffusers/models/unets/unet_kandinsky3.py +1 -1
  57. diffusers/models/unets/unet_motion_model.py +114 -14
  58. diffusers/models/unets/unet_spatio_temporal_condition.py +15 -14
  59. diffusers/models/unets/unet_stable_cascade.py +16 -13
  60. diffusers/models/upsampling.py +17 -20
  61. diffusers/models/vq_model.py +16 -15
  62. diffusers/pipelines/__init__.py +25 -3
  63. diffusers/pipelines/amused/pipeline_amused.py +12 -12
  64. diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
  65. diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
  66. diffusers/pipelines/animatediff/__init__.py +2 -0
  67. diffusers/pipelines/animatediff/pipeline_animatediff.py +24 -46
  68. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1284 -0
  69. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +82 -72
  70. diffusers/pipelines/animatediff/pipeline_output.py +3 -2
  71. diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
  72. diffusers/pipelines/audioldm2/modeling_audioldm2.py +54 -35
  73. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +120 -36
  74. diffusers/pipelines/auto_pipeline.py +21 -17
  75. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  76. diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -5
  77. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
  78. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
  79. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +5 -5
  80. diffusers/pipelines/controlnet/multicontrolnet.py +4 -8
  81. diffusers/pipelines/controlnet/pipeline_controlnet.py +87 -52
  82. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
  83. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +50 -43
  84. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +52 -40
  85. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +80 -47
  86. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +147 -49
  87. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +89 -55
  88. diffusers/pipelines/controlnet_xs/__init__.py +68 -0
  89. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +911 -0
  90. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1115 -0
  91. diffusers/pipelines/deepfloyd_if/pipeline_if.py +14 -28
  92. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +18 -33
  93. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +21 -39
  94. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +20 -36
  95. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +23 -39
  96. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +17 -32
  97. diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
  98. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +43 -20
  99. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +36 -18
  100. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
  101. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
  102. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +12 -12
  103. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +18 -18
  104. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +20 -15
  105. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +20 -15
  106. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +30 -25
  107. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +69 -59
  108. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
  109. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
  110. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
  111. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
  112. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
  113. diffusers/pipelines/dit/pipeline_dit.py +3 -0
  114. diffusers/pipelines/free_init_utils.py +39 -38
  115. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
  116. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
  117. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +23 -20
  118. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
  119. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
  120. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
  121. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  122. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +32 -29
  123. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
  124. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
  125. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
  126. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
  127. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
  128. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
  129. diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
  130. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +20 -33
  131. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +24 -35
  132. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +48 -30
  133. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +50 -28
  134. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +11 -11
  135. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +61 -67
  136. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +70 -69
  137. diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
  138. diffusers/pipelines/marigold/__init__.py +50 -0
  139. diffusers/pipelines/marigold/marigold_image_processing.py +561 -0
  140. diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
  141. diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
  142. diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
  143. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
  144. diffusers/pipelines/pia/pipeline_pia.py +39 -125
  145. diffusers/pipelines/pipeline_flax_utils.py +4 -4
  146. diffusers/pipelines/pipeline_loading_utils.py +268 -23
  147. diffusers/pipelines/pipeline_utils.py +266 -37
  148. diffusers/pipelines/pixart_alpha/__init__.py +8 -1
  149. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +65 -75
  150. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +880 -0
  151. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +10 -5
  152. diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
  153. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
  154. diffusers/pipelines/shap_e/renderer.py +1 -1
  155. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +18 -18
  156. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
  157. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +33 -32
  158. diffusers/pipelines/stable_diffusion/__init__.py +0 -1
  159. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +18 -11
  160. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  161. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
  162. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +73 -39
  163. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +24 -17
  164. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
  165. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +66 -36
  166. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +82 -46
  167. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +123 -28
  168. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +6 -6
  169. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +16 -16
  170. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +24 -19
  171. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +37 -31
  172. diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
  173. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +23 -15
  174. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +44 -39
  175. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +23 -18
  176. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +19 -14
  177. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +20 -15
  178. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -19
  179. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +65 -32
  180. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +274 -38
  181. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
  182. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  183. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +92 -25
  184. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +88 -44
  185. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +108 -56
  186. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +96 -51
  187. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -25
  188. diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
  189. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
  190. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +59 -30
  191. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +71 -42
  192. diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
  193. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +18 -41
  194. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +21 -85
  195. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -19
  196. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +39 -33
  197. diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
  198. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
  199. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
  200. diffusers/pipelines/unidiffuser/modeling_uvit.py +9 -9
  201. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +23 -23
  202. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
  203. diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
  204. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +4 -6
  205. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
  206. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
  207. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +10 -10
  208. diffusers/schedulers/__init__.py +2 -2
  209. diffusers/schedulers/deprecated/__init__.py +1 -1
  210. diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
  211. diffusers/schedulers/scheduling_amused.py +5 -5
  212. diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
  213. diffusers/schedulers/scheduling_consistency_models.py +20 -26
  214. diffusers/schedulers/scheduling_ddim.py +22 -24
  215. diffusers/schedulers/scheduling_ddim_flax.py +2 -1
  216. diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
  217. diffusers/schedulers/scheduling_ddim_parallel.py +28 -30
  218. diffusers/schedulers/scheduling_ddpm.py +20 -22
  219. diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
  220. diffusers/schedulers/scheduling_ddpm_parallel.py +26 -28
  221. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
  222. diffusers/schedulers/scheduling_deis_multistep.py +42 -42
  223. diffusers/schedulers/scheduling_dpmsolver_multistep.py +103 -77
  224. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
  225. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +46 -46
  226. diffusers/schedulers/scheduling_dpmsolver_sde.py +23 -23
  227. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +86 -65
  228. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +75 -54
  229. diffusers/schedulers/scheduling_edm_euler.py +50 -31
  230. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +23 -29
  231. diffusers/schedulers/scheduling_euler_discrete.py +160 -68
  232. diffusers/schedulers/scheduling_heun_discrete.py +57 -39
  233. diffusers/schedulers/scheduling_ipndm.py +8 -8
  234. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +19 -19
  235. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +19 -19
  236. diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
  237. diffusers/schedulers/scheduling_lcm.py +21 -23
  238. diffusers/schedulers/scheduling_lms_discrete.py +24 -26
  239. diffusers/schedulers/scheduling_pndm.py +20 -20
  240. diffusers/schedulers/scheduling_repaint.py +20 -20
  241. diffusers/schedulers/scheduling_sasolver.py +55 -54
  242. diffusers/schedulers/scheduling_sde_ve.py +19 -19
  243. diffusers/schedulers/scheduling_tcd.py +39 -30
  244. diffusers/schedulers/scheduling_unclip.py +15 -15
  245. diffusers/schedulers/scheduling_unipc_multistep.py +111 -41
  246. diffusers/schedulers/scheduling_utils.py +14 -5
  247. diffusers/schedulers/scheduling_utils_flax.py +3 -3
  248. diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
  249. diffusers/training_utils.py +56 -1
  250. diffusers/utils/__init__.py +7 -0
  251. diffusers/utils/doc_utils.py +1 -0
  252. diffusers/utils/dummy_pt_objects.py +30 -0
  253. diffusers/utils/dummy_torch_and_transformers_objects.py +90 -0
  254. diffusers/utils/dynamic_modules_utils.py +24 -11
  255. diffusers/utils/hub_utils.py +3 -2
  256. diffusers/utils/import_utils.py +91 -0
  257. diffusers/utils/loading_utils.py +2 -2
  258. diffusers/utils/logging.py +1 -1
  259. diffusers/utils/peft_utils.py +32 -5
  260. diffusers/utils/state_dict_utils.py +11 -2
  261. diffusers/utils/testing_utils.py +71 -6
  262. diffusers/utils/torch_utils.py +1 -0
  263. diffusers/video_processor.py +113 -0
  264. {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/METADATA +47 -47
  265. diffusers-0.28.0.dist-info/RECORD +414 -0
  266. {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/WHEEL +1 -1
  267. diffusers-0.27.2.dist-info/RECORD +0 -399
  268. {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/LICENSE +0 -0
  269. {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/entry_points.txt +0 -0
  270. {diffusers-0.27.2.dist-info → diffusers-0.28.0.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,6 @@ from ...models import UNet2DConditionModel
12
12
  from ...schedulers import DDPMScheduler
13
13
  from ...utils import (
14
14
  BACKENDS_MAPPING,
15
- is_accelerate_available,
16
15
  is_bs4_available,
17
16
  is_ftfy_available,
18
17
  logging,
@@ -115,6 +114,7 @@ class IFPipeline(DiffusionPipeline, LoraLoaderMixin):
115
114
 
116
115
  _optional_components = ["tokenizer", "text_encoder", "safety_checker", "feature_extractor", "watermarker"]
117
116
  model_cpu_offload_seq = "text_encoder->unet"
117
+ _exclude_from_cpu_offload = ["watermarker"]
118
118
 
119
119
  def __init__(
120
120
  self,
@@ -156,20 +156,6 @@ class IFPipeline(DiffusionPipeline, LoraLoaderMixin):
156
156
  )
157
157
  self.register_to_config(requires_safety_checker=requires_safety_checker)
158
158
 
159
- def remove_all_hooks(self):
160
- if is_accelerate_available():
161
- from accelerate.hooks import remove_hook_from_module
162
- else:
163
- raise ImportError("Please install accelerate via `pip install accelerate`")
164
-
165
- for model in [self.text_encoder, self.unet, self.safety_checker]:
166
- if model is not None:
167
- remove_hook_from_module(model, recurse=True)
168
-
169
- self.unet_offload_hook = None
170
- self.text_encoder_offload_hook = None
171
- self.final_offload_hook = None
172
-
173
159
  @torch.no_grad()
174
160
  def encode_prompt(
175
161
  self,
@@ -178,8 +164,8 @@ class IFPipeline(DiffusionPipeline, LoraLoaderMixin):
178
164
  num_images_per_prompt: int = 1,
179
165
  device: Optional[torch.device] = None,
180
166
  negative_prompt: Optional[Union[str, List[str]]] = None,
181
- prompt_embeds: Optional[torch.FloatTensor] = None,
182
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
167
+ prompt_embeds: Optional[torch.Tensor] = None,
168
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
183
169
  clean_caption: bool = False,
184
170
  ):
185
171
  r"""
@@ -198,10 +184,10 @@ class IFPipeline(DiffusionPipeline, LoraLoaderMixin):
198
184
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
199
185
  `negative_prompt_embeds`. instead. If not defined, one has to pass `negative_prompt_embeds`. instead.
200
186
  Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`).
201
- prompt_embeds (`torch.FloatTensor`, *optional*):
187
+ prompt_embeds (`torch.Tensor`, *optional*):
202
188
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
203
189
  provided, text embeddings will be generated from `prompt` input argument.
204
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
190
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
205
191
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
206
192
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
207
193
  argument.
@@ -335,9 +321,6 @@ class IFPipeline(DiffusionPipeline, LoraLoaderMixin):
335
321
  nsfw_detected = None
336
322
  watermark_detected = None
337
323
 
338
- if hasattr(self, "unet_offload_hook") and self.unet_offload_hook is not None:
339
- self.unet_offload_hook.offload()
340
-
341
324
  return image, nsfw_detected, watermark_detected
342
325
 
343
326
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
@@ -566,11 +549,11 @@ class IFPipeline(DiffusionPipeline, LoraLoaderMixin):
566
549
  width: Optional[int] = None,
567
550
  eta: float = 0.0,
568
551
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
569
- prompt_embeds: Optional[torch.FloatTensor] = None,
570
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
552
+ prompt_embeds: Optional[torch.Tensor] = None,
553
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
571
554
  output_type: Optional[str] = "pil",
572
555
  return_dict: bool = True,
573
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
556
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
574
557
  callback_steps: int = 1,
575
558
  clean_caption: bool = True,
576
559
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
@@ -610,10 +593,10 @@ class IFPipeline(DiffusionPipeline, LoraLoaderMixin):
610
593
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
611
594
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
612
595
  to make generation deterministic.
613
- prompt_embeds (`torch.FloatTensor`, *optional*):
596
+ prompt_embeds (`torch.Tensor`, *optional*):
614
597
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
615
598
  provided, text embeddings will be generated from `prompt` input argument.
616
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
599
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
617
600
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
618
601
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
619
602
  argument.
@@ -624,7 +607,7 @@ class IFPipeline(DiffusionPipeline, LoraLoaderMixin):
624
607
  Whether or not to return a [`~pipelines.stable_diffusion.IFPipelineOutput`] instead of a plain tuple.
625
608
  callback (`Callable`, *optional*):
626
609
  A function that will be called every `callback_steps` steps during inference. The function will be
627
- called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
610
+ called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
628
611
  callback_steps (`int`, *optional*, defaults to 1):
629
612
  The frequency at which the `callback` function will be called. If not specified, the callback will be
630
613
  called at every step.
@@ -691,6 +674,9 @@ class IFPipeline(DiffusionPipeline, LoraLoaderMixin):
691
674
  self.scheduler.set_timesteps(num_inference_steps, device=device)
692
675
  timesteps = self.scheduler.timesteps
693
676
 
677
+ if hasattr(self.scheduler, "set_begin_index"):
678
+ self.scheduler.set_begin_index(0)
679
+
694
680
  # 5. Prepare intermediate images
695
681
  intermediate_images = self.prepare_intermediate_images(
696
682
  batch_size * num_images_per_prompt,
@@ -15,7 +15,6 @@ from ...schedulers import DDPMScheduler
15
15
  from ...utils import (
16
16
  BACKENDS_MAPPING,
17
17
  PIL_INTERPOLATION,
18
- is_accelerate_available,
19
18
  is_bs4_available,
20
19
  is_ftfy_available,
21
20
  logging,
@@ -139,6 +138,7 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
139
138
 
140
139
  _optional_components = ["tokenizer", "text_encoder", "safety_checker", "feature_extractor", "watermarker"]
141
140
  model_cpu_offload_seq = "text_encoder->unet"
141
+ _exclude_from_cpu_offload = ["watermarker"]
142
142
 
143
143
  def __init__(
144
144
  self,
@@ -180,21 +180,6 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
180
180
  )
181
181
  self.register_to_config(requires_safety_checker=requires_safety_checker)
182
182
 
183
- # Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline.remove_all_hooks
184
- def remove_all_hooks(self):
185
- if is_accelerate_available():
186
- from accelerate.hooks import remove_hook_from_module
187
- else:
188
- raise ImportError("Please install accelerate via `pip install accelerate`")
189
-
190
- for model in [self.text_encoder, self.unet, self.safety_checker]:
191
- if model is not None:
192
- remove_hook_from_module(model, recurse=True)
193
-
194
- self.unet_offload_hook = None
195
- self.text_encoder_offload_hook = None
196
- self.final_offload_hook = None
197
-
198
183
  @torch.no_grad()
199
184
  def encode_prompt(
200
185
  self,
@@ -203,8 +188,8 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
203
188
  num_images_per_prompt: int = 1,
204
189
  device: Optional[torch.device] = None,
205
190
  negative_prompt: Optional[Union[str, List[str]]] = None,
206
- prompt_embeds: Optional[torch.FloatTensor] = None,
207
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
191
+ prompt_embeds: Optional[torch.Tensor] = None,
192
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
208
193
  clean_caption: bool = False,
209
194
  ):
210
195
  r"""
@@ -223,10 +208,10 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
223
208
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
224
209
  `negative_prompt_embeds`. instead. If not defined, one has to pass `negative_prompt_embeds`. instead.
225
210
  Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`).
226
- prompt_embeds (`torch.FloatTensor`, *optional*):
211
+ prompt_embeds (`torch.Tensor`, *optional*):
227
212
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
228
213
  provided, text embeddings will be generated from `prompt` input argument.
229
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
214
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
230
215
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
231
216
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
232
217
  argument.
@@ -361,9 +346,6 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
361
346
  nsfw_detected = None
362
347
  watermark_detected = None
363
348
 
364
- if hasattr(self, "unet_offload_hook") and self.unet_offload_hook is not None:
365
- self.unet_offload_hook.offload()
366
-
367
349
  return image, nsfw_detected, watermark_detected
368
350
 
369
351
  # Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline.prepare_extra_step_kwargs
@@ -439,7 +421,7 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
439
421
  and not isinstance(check_image_type, np.ndarray)
440
422
  ):
441
423
  raise ValueError(
442
- "`image` has to be of type `torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, or List[...] but is"
424
+ "`image` has to be of type `torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, or List[...] but is"
443
425
  f" {type(check_image_type)}"
444
426
  )
445
427
 
@@ -613,7 +595,7 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
613
595
 
614
596
  for image_ in image:
615
597
  image_ = image_.convert("RGB")
616
- image_ = resize(image_, self.unet.sample_size)
598
+ image_ = resize(image_, self.unet.config.sample_size)
617
599
  image_ = np.array(image_)
618
600
  image_ = image_.astype(np.float32)
619
601
  image_ = image_ / 127.5 - 1
@@ -633,12 +615,15 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
633
615
 
634
616
  return image
635
617
 
618
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline.get_timesteps
636
619
  def get_timesteps(self, num_inference_steps, strength):
637
620
  # get the original timestep using init_timestep
638
621
  init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
639
622
 
640
623
  t_start = max(num_inference_steps - init_timestep, 0)
641
- timesteps = self.scheduler.timesteps[t_start:]
624
+ timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
625
+ if hasattr(self.scheduler, "set_begin_index"):
626
+ self.scheduler.set_begin_index(t_start * self.scheduler.order)
642
627
 
643
628
  return timesteps, num_inference_steps - t_start
644
629
 
@@ -680,11 +665,11 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
680
665
  num_images_per_prompt: Optional[int] = 1,
681
666
  eta: float = 0.0,
682
667
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
683
- prompt_embeds: Optional[torch.FloatTensor] = None,
684
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
668
+ prompt_embeds: Optional[torch.Tensor] = None,
669
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
685
670
  output_type: Optional[str] = "pil",
686
671
  return_dict: bool = True,
687
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
672
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
688
673
  callback_steps: int = 1,
689
674
  clean_caption: bool = True,
690
675
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
@@ -696,7 +681,7 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
696
681
  prompt (`str` or `List[str]`, *optional*):
697
682
  The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
698
683
  instead.
699
- image (`torch.FloatTensor` or `PIL.Image.Image`):
684
+ image (`torch.Tensor` or `PIL.Image.Image`):
700
685
  `Image`, or tensor representing an image batch, that will be used as the starting point for the
701
686
  process.
702
687
  strength (`float`, *optional*, defaults to 0.7):
@@ -729,10 +714,10 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
729
714
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
730
715
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
731
716
  to make generation deterministic.
732
- prompt_embeds (`torch.FloatTensor`, *optional*):
717
+ prompt_embeds (`torch.Tensor`, *optional*):
733
718
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
734
719
  provided, text embeddings will be generated from `prompt` input argument.
735
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
720
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
736
721
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
737
722
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
738
723
  argument.
@@ -743,7 +728,7 @@ class IFImg2ImgPipeline(DiffusionPipeline, LoraLoaderMixin):
743
728
  Whether or not to return a [`~pipelines.stable_diffusion.IFPipelineOutput`] instead of a plain tuple.
744
729
  callback (`Callable`, *optional*):
745
730
  A function that will be called every `callback_steps` steps during inference. The function will be
746
- called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
731
+ called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
747
732
  callback_steps (`int`, *optional*, defaults to 1):
748
733
  The frequency at which the `callback` function will be called. If not specified, the callback will be
749
734
  called at every step.
@@ -16,7 +16,6 @@ from ...schedulers import DDPMScheduler
16
16
  from ...utils import (
17
17
  BACKENDS_MAPPING,
18
18
  PIL_INTERPOLATION,
19
- is_accelerate_available,
20
19
  is_bs4_available,
21
20
  is_ftfy_available,
22
21
  logging,
@@ -143,6 +142,7 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
143
142
 
144
143
  _optional_components = ["tokenizer", "text_encoder", "safety_checker", "feature_extractor"]
145
144
  model_cpu_offload_seq = "text_encoder->unet"
145
+ _exclude_from_cpu_offload = ["watermarker"]
146
146
 
147
147
  def __init__(
148
148
  self,
@@ -191,21 +191,6 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
191
191
  )
192
192
  self.register_to_config(requires_safety_checker=requires_safety_checker)
193
193
 
194
- # Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline.remove_all_hooks
195
- def remove_all_hooks(self):
196
- if is_accelerate_available():
197
- from accelerate.hooks import remove_hook_from_module
198
- else:
199
- raise ImportError("Please install accelerate via `pip install accelerate`")
200
-
201
- for model in [self.text_encoder, self.unet, self.safety_checker]:
202
- if model is not None:
203
- remove_hook_from_module(model, recurse=True)
204
-
205
- self.unet_offload_hook = None
206
- self.text_encoder_offload_hook = None
207
- self.final_offload_hook = None
208
-
209
194
  # Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline._text_preprocessing
210
195
  def _text_preprocessing(self, text, clean_caption=False):
211
196
  if clean_caption and not is_bs4_available():
@@ -355,8 +340,8 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
355
340
  num_images_per_prompt: int = 1,
356
341
  device: Optional[torch.device] = None,
357
342
  negative_prompt: Optional[Union[str, List[str]]] = None,
358
- prompt_embeds: Optional[torch.FloatTensor] = None,
359
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
343
+ prompt_embeds: Optional[torch.Tensor] = None,
344
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
360
345
  clean_caption: bool = False,
361
346
  ):
362
347
  r"""
@@ -375,10 +360,10 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
375
360
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
376
361
  `negative_prompt_embeds`. instead. If not defined, one has to pass `negative_prompt_embeds`. instead.
377
362
  Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`).
378
- prompt_embeds (`torch.FloatTensor`, *optional*):
363
+ prompt_embeds (`torch.Tensor`, *optional*):
379
364
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
380
365
  provided, text embeddings will be generated from `prompt` input argument.
381
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
366
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
382
367
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
383
368
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
384
369
  argument.
@@ -513,9 +498,6 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
513
498
  nsfw_detected = None
514
499
  watermark_detected = None
515
500
 
516
- if hasattr(self, "unet_offload_hook") and self.unet_offload_hook is not None:
517
- self.unet_offload_hook.offload()
518
-
519
501
  return image, nsfw_detected, watermark_detected
520
502
 
521
503
  # Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline.prepare_extra_step_kwargs
@@ -594,7 +576,7 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
594
576
  and not isinstance(check_image_type, np.ndarray)
595
577
  ):
596
578
  raise ValueError(
597
- "`image` has to be of type `torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, or List[...] but is"
579
+ "`image` has to be of type `torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, or List[...] but is"
598
580
  f" {type(check_image_type)}"
599
581
  )
600
582
 
@@ -625,7 +607,7 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
625
607
  and not isinstance(check_image_type, np.ndarray)
626
608
  ):
627
609
  raise ValueError(
628
- "`original_image` has to be of type `torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, or List[...] but is"
610
+ "`original_image` has to be of type `torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, or List[...] but is"
629
611
  f" {type(check_image_type)}"
630
612
  )
631
613
 
@@ -662,7 +644,7 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
662
644
 
663
645
  for image_ in image:
664
646
  image_ = image_.convert("RGB")
665
- image_ = resize(image_, self.unet.sample_size)
647
+ image_ = resize(image_, self.unet.config.sample_size)
666
648
  image_ = np.array(image_)
667
649
  image_ = image_.astype(np.float32)
668
650
  image_ = image_ / 127.5 - 1
@@ -714,13 +696,15 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
714
696
 
715
697
  return image
716
698
 
717
- # Copied from diffusers.pipelines.deepfloyd_if.pipeline_if_img2img.IFImg2ImgPipeline.get_timesteps
699
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline.get_timesteps
718
700
  def get_timesteps(self, num_inference_steps, strength):
719
701
  # get the original timestep using init_timestep
720
702
  init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
721
703
 
722
704
  t_start = max(num_inference_steps - init_timestep, 0)
723
- timesteps = self.scheduler.timesteps[t_start:]
705
+ timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
706
+ if hasattr(self.scheduler, "set_begin_index"):
707
+ self.scheduler.set_begin_index(t_start * self.scheduler.order)
724
708
 
725
709
  return timesteps, num_inference_steps - t_start
726
710
 
@@ -751,7 +735,7 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
751
735
  @replace_example_docstring(EXAMPLE_DOC_STRING)
752
736
  def __call__(
753
737
  self,
754
- image: Union[PIL.Image.Image, np.ndarray, torch.FloatTensor],
738
+ image: Union[PIL.Image.Image, np.ndarray, torch.Tensor],
755
739
  original_image: Union[
756
740
  PIL.Image.Image, torch.Tensor, np.ndarray, List[PIL.Image.Image], List[torch.Tensor], List[np.ndarray]
757
741
  ] = None,
@@ -764,11 +748,11 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
764
748
  num_images_per_prompt: Optional[int] = 1,
765
749
  eta: float = 0.0,
766
750
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
767
- prompt_embeds: Optional[torch.FloatTensor] = None,
768
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
751
+ prompt_embeds: Optional[torch.Tensor] = None,
752
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
769
753
  output_type: Optional[str] = "pil",
770
754
  return_dict: bool = True,
771
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
755
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
772
756
  callback_steps: int = 1,
773
757
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
774
758
  noise_level: int = 250,
@@ -778,10 +762,10 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
778
762
  Function invoked when calling the pipeline for generation.
779
763
 
780
764
  Args:
781
- image (`torch.FloatTensor` or `PIL.Image.Image`):
765
+ image (`torch.Tensor` or `PIL.Image.Image`):
782
766
  `Image`, or tensor representing an image batch, that will be used as the starting point for the
783
767
  process.
784
- original_image (`torch.FloatTensor` or `PIL.Image.Image`):
768
+ original_image (`torch.Tensor` or `PIL.Image.Image`):
785
769
  The original image that `image` was varied from.
786
770
  strength (`float`, *optional*, defaults to 0.8):
787
771
  Conceptually, indicates how much to transform the reference `image`. Must be between 0 and 1. `image`
@@ -816,10 +800,10 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
816
800
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
817
801
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
818
802
  to make generation deterministic.
819
- prompt_embeds (`torch.FloatTensor`, *optional*):
803
+ prompt_embeds (`torch.Tensor`, *optional*):
820
804
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
821
805
  provided, text embeddings will be generated from `prompt` input argument.
822
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
806
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
823
807
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
824
808
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
825
809
  argument.
@@ -830,7 +814,7 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
830
814
  Whether or not to return a [`~pipelines.stable_diffusion.IFPipelineOutput`] instead of a plain tuple.
831
815
  callback (`Callable`, *optional*):
832
816
  A function that will be called every `callback_steps` steps during inference. The function will be
833
- called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
817
+ called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
834
818
  callback_steps (`int`, *optional*, defaults to 1):
835
819
  The frequency at which the `callback` function will be called. If not specified, the callback will be
836
820
  called at every step.
@@ -1010,8 +994,6 @@ class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, LoraLoaderMixin):
1010
994
  nsfw_detected = None
1011
995
  watermark_detected = None
1012
996
 
1013
- if hasattr(self, "unet_offload_hook") and self.unet_offload_hook is not None:
1014
- self.unet_offload_hook.offload()
1015
997
  else:
1016
998
  # 10. Post-processing
1017
999
  image = (image / 2 + 0.5).clamp(0, 1)
@@ -15,7 +15,6 @@ from ...schedulers import DDPMScheduler
15
15
  from ...utils import (
16
16
  BACKENDS_MAPPING,
17
17
  PIL_INTERPOLATION,
18
- is_accelerate_available,
19
18
  is_bs4_available,
20
19
  is_ftfy_available,
21
20
  logging,
@@ -142,6 +141,7 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
142
141
 
143
142
  _optional_components = ["tokenizer", "text_encoder", "safety_checker", "feature_extractor", "watermarker"]
144
143
  model_cpu_offload_seq = "text_encoder->unet"
144
+ _exclude_from_cpu_offload = ["watermarker"]
145
145
 
146
146
  def __init__(
147
147
  self,
@@ -183,21 +183,6 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
183
183
  )
184
184
  self.register_to_config(requires_safety_checker=requires_safety_checker)
185
185
 
186
- # Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline.remove_all_hooks
187
- def remove_all_hooks(self):
188
- if is_accelerate_available():
189
- from accelerate.hooks import remove_hook_from_module
190
- else:
191
- raise ImportError("Please install accelerate via `pip install accelerate`")
192
-
193
- for model in [self.text_encoder, self.unet, self.safety_checker]:
194
- if model is not None:
195
- remove_hook_from_module(model, recurse=True)
196
-
197
- self.unet_offload_hook = None
198
- self.text_encoder_offload_hook = None
199
- self.final_offload_hook = None
200
-
201
186
  @torch.no_grad()
202
187
  # Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline.encode_prompt
203
188
  def encode_prompt(
@@ -207,8 +192,8 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
207
192
  num_images_per_prompt: int = 1,
208
193
  device: Optional[torch.device] = None,
209
194
  negative_prompt: Optional[Union[str, List[str]]] = None,
210
- prompt_embeds: Optional[torch.FloatTensor] = None,
211
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
195
+ prompt_embeds: Optional[torch.Tensor] = None,
196
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
212
197
  clean_caption: bool = False,
213
198
  ):
214
199
  r"""
@@ -227,10 +212,10 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
227
212
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
228
213
  `negative_prompt_embeds`. instead. If not defined, one has to pass `negative_prompt_embeds`. instead.
229
214
  Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`).
230
- prompt_embeds (`torch.FloatTensor`, *optional*):
215
+ prompt_embeds (`torch.Tensor`, *optional*):
231
216
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
232
217
  provided, text embeddings will be generated from `prompt` input argument.
233
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
218
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
234
219
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
235
220
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
236
221
  argument.
@@ -365,9 +350,6 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
365
350
  nsfw_detected = None
366
351
  watermark_detected = None
367
352
 
368
- if hasattr(self, "unet_offload_hook") and self.unet_offload_hook is not None:
369
- self.unet_offload_hook.offload()
370
-
371
353
  return image, nsfw_detected, watermark_detected
372
354
 
373
355
  # Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline.prepare_extra_step_kwargs
@@ -446,7 +428,7 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
446
428
  and not isinstance(check_image_type, np.ndarray)
447
429
  ):
448
430
  raise ValueError(
449
- "`image` has to be of type `torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, or List[...] but is"
431
+ "`image` has to be of type `torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, or List[...] but is"
450
432
  f" {type(check_image_type)}"
451
433
  )
452
434
 
@@ -477,7 +459,7 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
477
459
  and not isinstance(check_image_type, np.ndarray)
478
460
  ):
479
461
  raise ValueError(
480
- "`mask_image` has to be of type `torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, or List[...] but is"
462
+ "`mask_image` has to be of type `torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, or List[...] but is"
481
463
  f" {type(check_image_type)}"
482
464
  )
483
465
 
@@ -654,7 +636,7 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
654
636
 
655
637
  for image_ in image:
656
638
  image_ = image_.convert("RGB")
657
- image_ = resize(image_, self.unet.sample_size)
639
+ image_ = resize(image_, self.unet.config.sample_size)
658
640
  image_ = np.array(image_)
659
641
  image_ = image_.astype(np.float32)
660
642
  image_ = image_ / 127.5 - 1
@@ -701,7 +683,7 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
701
683
 
702
684
  for mask_image_ in mask_image:
703
685
  mask_image_ = mask_image_.convert("L")
704
- mask_image_ = resize(mask_image_, self.unet.sample_size)
686
+ mask_image_ = resize(mask_image_, self.unet.config.sample_size)
705
687
  mask_image_ = np.array(mask_image_)
706
688
  mask_image_ = mask_image_[None, None, :]
707
689
  new_mask_image.append(mask_image_)
@@ -723,13 +705,15 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
723
705
 
724
706
  return mask_image
725
707
 
726
- # Copied from diffusers.pipelines.deepfloyd_if.pipeline_if_img2img.IFImg2ImgPipeline.get_timesteps
708
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline.get_timesteps
727
709
  def get_timesteps(self, num_inference_steps, strength):
728
710
  # get the original timestep using init_timestep
729
711
  init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
730
712
 
731
713
  t_start = max(num_inference_steps - init_timestep, 0)
732
- timesteps = self.scheduler.timesteps[t_start:]
714
+ timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
715
+ if hasattr(self.scheduler, "set_begin_index"):
716
+ self.scheduler.set_begin_index(t_start * self.scheduler.order)
733
717
 
734
718
  return timesteps, num_inference_steps - t_start
735
719
 
@@ -776,11 +760,11 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
776
760
  num_images_per_prompt: Optional[int] = 1,
777
761
  eta: float = 0.0,
778
762
  generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
779
- prompt_embeds: Optional[torch.FloatTensor] = None,
780
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
763
+ prompt_embeds: Optional[torch.Tensor] = None,
764
+ negative_prompt_embeds: Optional[torch.Tensor] = None,
781
765
  output_type: Optional[str] = "pil",
782
766
  return_dict: bool = True,
783
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
767
+ callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
784
768
  callback_steps: int = 1,
785
769
  clean_caption: bool = True,
786
770
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
@@ -792,7 +776,7 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
792
776
  prompt (`str` or `List[str]`, *optional*):
793
777
  The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
794
778
  instead.
795
- image (`torch.FloatTensor` or `PIL.Image.Image`):
779
+ image (`torch.Tensor` or `PIL.Image.Image`):
796
780
  `Image`, or tensor representing an image batch, that will be used as the starting point for the
797
781
  process.
798
782
  mask_image (`PIL.Image.Image`):
@@ -830,10 +814,10 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
830
814
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
831
815
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
832
816
  to make generation deterministic.
833
- prompt_embeds (`torch.FloatTensor`, *optional*):
817
+ prompt_embeds (`torch.Tensor`, *optional*):
834
818
  Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
835
819
  provided, text embeddings will be generated from `prompt` input argument.
836
- negative_prompt_embeds (`torch.FloatTensor`, *optional*):
820
+ negative_prompt_embeds (`torch.Tensor`, *optional*):
837
821
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
838
822
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
839
823
  argument.
@@ -844,7 +828,7 @@ class IFInpaintingPipeline(DiffusionPipeline, LoraLoaderMixin):
844
828
  Whether or not to return a [`~pipelines.stable_diffusion.IFPipelineOutput`] instead of a plain tuple.
845
829
  callback (`Callable`, *optional*):
846
830
  A function that will be called every `callback_steps` steps during inference. The function will be
847
- called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
831
+ called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
848
832
  callback_steps (`int`, *optional*, defaults to 1):
849
833
  The frequency at which the `callback` function will be called. If not specified, the callback will be
850
834
  called at every step.