diffusers 0.33.1__py3-none-any.whl → 0.35.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (551) hide show
  1. diffusers/__init__.py +145 -1
  2. diffusers/callbacks.py +35 -0
  3. diffusers/commands/__init__.py +1 -1
  4. diffusers/commands/custom_blocks.py +134 -0
  5. diffusers/commands/diffusers_cli.py +3 -1
  6. diffusers/commands/env.py +1 -1
  7. diffusers/commands/fp16_safetensors.py +2 -2
  8. diffusers/configuration_utils.py +11 -2
  9. diffusers/dependency_versions_check.py +1 -1
  10. diffusers/dependency_versions_table.py +3 -3
  11. diffusers/experimental/rl/value_guided_sampling.py +1 -1
  12. diffusers/guiders/__init__.py +41 -0
  13. diffusers/guiders/adaptive_projected_guidance.py +188 -0
  14. diffusers/guiders/auto_guidance.py +190 -0
  15. diffusers/guiders/classifier_free_guidance.py +141 -0
  16. diffusers/guiders/classifier_free_zero_star_guidance.py +152 -0
  17. diffusers/guiders/frequency_decoupled_guidance.py +327 -0
  18. diffusers/guiders/guider_utils.py +309 -0
  19. diffusers/guiders/perturbed_attention_guidance.py +271 -0
  20. diffusers/guiders/skip_layer_guidance.py +262 -0
  21. diffusers/guiders/smoothed_energy_guidance.py +251 -0
  22. diffusers/guiders/tangential_classifier_free_guidance.py +143 -0
  23. diffusers/hooks/__init__.py +17 -0
  24. diffusers/hooks/_common.py +56 -0
  25. diffusers/hooks/_helpers.py +293 -0
  26. diffusers/hooks/faster_cache.py +9 -8
  27. diffusers/hooks/first_block_cache.py +259 -0
  28. diffusers/hooks/group_offloading.py +332 -227
  29. diffusers/hooks/hooks.py +58 -3
  30. diffusers/hooks/layer_skip.py +263 -0
  31. diffusers/hooks/layerwise_casting.py +5 -10
  32. diffusers/hooks/pyramid_attention_broadcast.py +15 -12
  33. diffusers/hooks/smoothed_energy_guidance_utils.py +167 -0
  34. diffusers/hooks/utils.py +43 -0
  35. diffusers/image_processor.py +7 -2
  36. diffusers/loaders/__init__.py +10 -0
  37. diffusers/loaders/ip_adapter.py +260 -18
  38. diffusers/loaders/lora_base.py +261 -127
  39. diffusers/loaders/lora_conversion_utils.py +657 -35
  40. diffusers/loaders/lora_pipeline.py +2778 -1246
  41. diffusers/loaders/peft.py +78 -112
  42. diffusers/loaders/single_file.py +2 -2
  43. diffusers/loaders/single_file_model.py +64 -15
  44. diffusers/loaders/single_file_utils.py +395 -7
  45. diffusers/loaders/textual_inversion.py +3 -2
  46. diffusers/loaders/transformer_flux.py +10 -11
  47. diffusers/loaders/transformer_sd3.py +8 -3
  48. diffusers/loaders/unet.py +24 -21
  49. diffusers/loaders/unet_loader_utils.py +6 -3
  50. diffusers/loaders/utils.py +1 -1
  51. diffusers/models/__init__.py +23 -1
  52. diffusers/models/activations.py +5 -5
  53. diffusers/models/adapter.py +2 -3
  54. diffusers/models/attention.py +488 -7
  55. diffusers/models/attention_dispatch.py +1218 -0
  56. diffusers/models/attention_flax.py +10 -10
  57. diffusers/models/attention_processor.py +113 -667
  58. diffusers/models/auto_model.py +49 -12
  59. diffusers/models/autoencoders/__init__.py +2 -0
  60. diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
  61. diffusers/models/autoencoders/autoencoder_dc.py +17 -4
  62. diffusers/models/autoencoders/autoencoder_kl.py +5 -5
  63. diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
  64. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
  65. diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1110 -0
  66. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
  67. diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
  68. diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
  69. diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
  70. diffusers/models/autoencoders/autoencoder_kl_qwenimage.py +1070 -0
  71. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
  72. diffusers/models/autoencoders/autoencoder_kl_wan.py +626 -62
  73. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
  74. diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
  75. diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
  76. diffusers/models/autoencoders/vae.py +13 -2
  77. diffusers/models/autoencoders/vq_model.py +2 -2
  78. diffusers/models/cache_utils.py +32 -10
  79. diffusers/models/controlnet.py +1 -1
  80. diffusers/models/controlnet_flux.py +1 -1
  81. diffusers/models/controlnet_sd3.py +1 -1
  82. diffusers/models/controlnet_sparsectrl.py +1 -1
  83. diffusers/models/controlnets/__init__.py +1 -0
  84. diffusers/models/controlnets/controlnet.py +3 -3
  85. diffusers/models/controlnets/controlnet_flax.py +1 -1
  86. diffusers/models/controlnets/controlnet_flux.py +21 -20
  87. diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
  88. diffusers/models/controlnets/controlnet_sana.py +290 -0
  89. diffusers/models/controlnets/controlnet_sd3.py +1 -1
  90. diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
  91. diffusers/models/controlnets/controlnet_union.py +5 -5
  92. diffusers/models/controlnets/controlnet_xs.py +7 -7
  93. diffusers/models/controlnets/multicontrolnet.py +4 -5
  94. diffusers/models/controlnets/multicontrolnet_union.py +5 -6
  95. diffusers/models/downsampling.py +2 -2
  96. diffusers/models/embeddings.py +36 -46
  97. diffusers/models/embeddings_flax.py +2 -2
  98. diffusers/models/lora.py +3 -3
  99. diffusers/models/model_loading_utils.py +233 -1
  100. diffusers/models/modeling_flax_utils.py +1 -2
  101. diffusers/models/modeling_utils.py +203 -108
  102. diffusers/models/normalization.py +4 -4
  103. diffusers/models/resnet.py +2 -2
  104. diffusers/models/resnet_flax.py +1 -1
  105. diffusers/models/transformers/__init__.py +7 -0
  106. diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
  107. diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
  108. diffusers/models/transformers/consisid_transformer_3d.py +1 -1
  109. diffusers/models/transformers/dit_transformer_2d.py +2 -2
  110. diffusers/models/transformers/dual_transformer_2d.py +1 -1
  111. diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
  112. diffusers/models/transformers/latte_transformer_3d.py +4 -5
  113. diffusers/models/transformers/lumina_nextdit2d.py +2 -2
  114. diffusers/models/transformers/pixart_transformer_2d.py +3 -3
  115. diffusers/models/transformers/prior_transformer.py +1 -1
  116. diffusers/models/transformers/sana_transformer.py +8 -3
  117. diffusers/models/transformers/stable_audio_transformer.py +5 -9
  118. diffusers/models/transformers/t5_film_transformer.py +3 -3
  119. diffusers/models/transformers/transformer_2d.py +1 -1
  120. diffusers/models/transformers/transformer_allegro.py +1 -1
  121. diffusers/models/transformers/transformer_chroma.py +641 -0
  122. diffusers/models/transformers/transformer_cogview3plus.py +5 -10
  123. diffusers/models/transformers/transformer_cogview4.py +353 -27
  124. diffusers/models/transformers/transformer_cosmos.py +586 -0
  125. diffusers/models/transformers/transformer_flux.py +376 -138
  126. diffusers/models/transformers/transformer_hidream_image.py +942 -0
  127. diffusers/models/transformers/transformer_hunyuan_video.py +12 -8
  128. diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
  129. diffusers/models/transformers/transformer_ltx.py +105 -24
  130. diffusers/models/transformers/transformer_lumina2.py +1 -1
  131. diffusers/models/transformers/transformer_mochi.py +1 -1
  132. diffusers/models/transformers/transformer_omnigen.py +2 -2
  133. diffusers/models/transformers/transformer_qwenimage.py +645 -0
  134. diffusers/models/transformers/transformer_sd3.py +7 -7
  135. diffusers/models/transformers/transformer_skyreels_v2.py +607 -0
  136. diffusers/models/transformers/transformer_temporal.py +1 -1
  137. diffusers/models/transformers/transformer_wan.py +316 -87
  138. diffusers/models/transformers/transformer_wan_vace.py +387 -0
  139. diffusers/models/unets/unet_1d.py +1 -1
  140. diffusers/models/unets/unet_1d_blocks.py +1 -1
  141. diffusers/models/unets/unet_2d.py +1 -1
  142. diffusers/models/unets/unet_2d_blocks.py +1 -1
  143. diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
  144. diffusers/models/unets/unet_2d_condition.py +4 -3
  145. diffusers/models/unets/unet_2d_condition_flax.py +2 -2
  146. diffusers/models/unets/unet_3d_blocks.py +1 -1
  147. diffusers/models/unets/unet_3d_condition.py +3 -3
  148. diffusers/models/unets/unet_i2vgen_xl.py +3 -3
  149. diffusers/models/unets/unet_kandinsky3.py +1 -1
  150. diffusers/models/unets/unet_motion_model.py +2 -2
  151. diffusers/models/unets/unet_stable_cascade.py +1 -1
  152. diffusers/models/upsampling.py +2 -2
  153. diffusers/models/vae_flax.py +2 -2
  154. diffusers/models/vq_model.py +1 -1
  155. diffusers/modular_pipelines/__init__.py +83 -0
  156. diffusers/modular_pipelines/components_manager.py +1068 -0
  157. diffusers/modular_pipelines/flux/__init__.py +66 -0
  158. diffusers/modular_pipelines/flux/before_denoise.py +689 -0
  159. diffusers/modular_pipelines/flux/decoders.py +109 -0
  160. diffusers/modular_pipelines/flux/denoise.py +227 -0
  161. diffusers/modular_pipelines/flux/encoders.py +412 -0
  162. diffusers/modular_pipelines/flux/modular_blocks.py +181 -0
  163. diffusers/modular_pipelines/flux/modular_pipeline.py +59 -0
  164. diffusers/modular_pipelines/modular_pipeline.py +2446 -0
  165. diffusers/modular_pipelines/modular_pipeline_utils.py +672 -0
  166. diffusers/modular_pipelines/node_utils.py +665 -0
  167. diffusers/modular_pipelines/stable_diffusion_xl/__init__.py +77 -0
  168. diffusers/modular_pipelines/stable_diffusion_xl/before_denoise.py +1874 -0
  169. diffusers/modular_pipelines/stable_diffusion_xl/decoders.py +208 -0
  170. diffusers/modular_pipelines/stable_diffusion_xl/denoise.py +771 -0
  171. diffusers/modular_pipelines/stable_diffusion_xl/encoders.py +887 -0
  172. diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py +380 -0
  173. diffusers/modular_pipelines/stable_diffusion_xl/modular_pipeline.py +365 -0
  174. diffusers/modular_pipelines/wan/__init__.py +66 -0
  175. diffusers/modular_pipelines/wan/before_denoise.py +365 -0
  176. diffusers/modular_pipelines/wan/decoders.py +105 -0
  177. diffusers/modular_pipelines/wan/denoise.py +261 -0
  178. diffusers/modular_pipelines/wan/encoders.py +242 -0
  179. diffusers/modular_pipelines/wan/modular_blocks.py +144 -0
  180. diffusers/modular_pipelines/wan/modular_pipeline.py +90 -0
  181. diffusers/pipelines/__init__.py +68 -6
  182. diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
  183. diffusers/pipelines/amused/pipeline_amused.py +7 -6
  184. diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
  185. diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
  186. diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
  187. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
  188. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
  189. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
  190. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
  191. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
  192. diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
  193. diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
  194. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +22 -13
  195. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
  196. diffusers/pipelines/auto_pipeline.py +23 -20
  197. diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
  198. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
  199. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
  200. diffusers/pipelines/chroma/__init__.py +49 -0
  201. diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
  202. diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
  203. diffusers/pipelines/chroma/pipeline_output.py +21 -0
  204. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +17 -16
  205. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +17 -16
  206. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +18 -17
  207. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +17 -16
  208. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
  209. diffusers/pipelines/cogview4/pipeline_cogview4.py +23 -22
  210. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
  211. diffusers/pipelines/consisid/consisid_utils.py +2 -2
  212. diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
  213. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
  214. diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
  215. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +11 -10
  216. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
  217. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
  218. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
  219. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
  220. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
  221. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +226 -107
  222. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +12 -8
  223. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +207 -105
  224. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
  225. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
  226. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
  227. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
  228. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
  229. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
  230. diffusers/pipelines/cosmos/__init__.py +54 -0
  231. diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
  232. diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
  233. diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
  234. diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
  235. diffusers/pipelines/cosmos/pipeline_output.py +40 -0
  236. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
  237. diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
  238. diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
  239. diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
  240. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
  241. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
  242. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
  243. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
  244. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
  245. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
  246. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
  247. diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
  248. diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
  249. diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
  250. diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
  251. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
  252. diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
  253. diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
  254. diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
  255. diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
  256. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
  257. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +8 -8
  258. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
  259. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
  260. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
  261. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
  262. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
  263. diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
  264. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
  265. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
  266. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
  267. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
  268. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
  269. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
  270. diffusers/pipelines/dit/pipeline_dit.py +4 -2
  271. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
  272. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
  273. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
  274. diffusers/pipelines/flux/__init__.py +4 -0
  275. diffusers/pipelines/flux/modeling_flux.py +1 -1
  276. diffusers/pipelines/flux/pipeline_flux.py +37 -36
  277. diffusers/pipelines/flux/pipeline_flux_control.py +9 -9
  278. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +7 -7
  279. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +7 -7
  280. diffusers/pipelines/flux/pipeline_flux_controlnet.py +7 -7
  281. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +31 -23
  282. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +3 -2
  283. diffusers/pipelines/flux/pipeline_flux_fill.py +7 -7
  284. diffusers/pipelines/flux/pipeline_flux_img2img.py +40 -7
  285. diffusers/pipelines/flux/pipeline_flux_inpaint.py +12 -7
  286. diffusers/pipelines/flux/pipeline_flux_kontext.py +1134 -0
  287. diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py +1460 -0
  288. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +2 -2
  289. diffusers/pipelines/flux/pipeline_output.py +6 -4
  290. diffusers/pipelines/free_init_utils.py +2 -2
  291. diffusers/pipelines/free_noise_utils.py +3 -3
  292. diffusers/pipelines/hidream_image/__init__.py +47 -0
  293. diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
  294. diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
  295. diffusers/pipelines/hunyuan_video/__init__.py +2 -0
  296. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
  297. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +26 -25
  298. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
  299. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
  300. diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
  301. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
  302. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
  303. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
  304. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
  305. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
  306. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
  307. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
  308. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  309. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
  310. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
  311. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
  312. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
  313. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
  314. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
  315. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
  316. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
  317. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
  318. diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
  319. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
  320. diffusers/pipelines/kolors/text_encoder.py +3 -3
  321. diffusers/pipelines/kolors/tokenizer.py +1 -1
  322. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
  323. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
  324. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
  325. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
  326. diffusers/pipelines/latte/pipeline_latte.py +12 -12
  327. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
  328. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
  329. diffusers/pipelines/ltx/__init__.py +4 -0
  330. diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
  331. diffusers/pipelines/ltx/pipeline_ltx.py +64 -18
  332. diffusers/pipelines/ltx/pipeline_ltx_condition.py +117 -38
  333. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +63 -18
  334. diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
  335. diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
  336. diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
  337. diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
  338. diffusers/pipelines/mochi/pipeline_mochi.py +15 -14
  339. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
  340. diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
  341. diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
  342. diffusers/pipelines/onnx_utils.py +15 -2
  343. diffusers/pipelines/pag/pag_utils.py +2 -2
  344. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
  345. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
  346. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
  347. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
  348. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
  349. diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
  350. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
  351. diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
  352. diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
  353. diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
  354. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
  355. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
  356. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
  357. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
  358. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
  359. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
  360. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
  361. diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
  362. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
  363. diffusers/pipelines/pia/pipeline_pia.py +8 -6
  364. diffusers/pipelines/pipeline_flax_utils.py +5 -6
  365. diffusers/pipelines/pipeline_loading_utils.py +113 -15
  366. diffusers/pipelines/pipeline_utils.py +127 -48
  367. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +14 -12
  368. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +31 -11
  369. diffusers/pipelines/qwenimage/__init__.py +55 -0
  370. diffusers/pipelines/qwenimage/pipeline_output.py +21 -0
  371. diffusers/pipelines/qwenimage/pipeline_qwenimage.py +726 -0
  372. diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py +882 -0
  373. diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py +829 -0
  374. diffusers/pipelines/qwenimage/pipeline_qwenimage_inpaint.py +1015 -0
  375. diffusers/pipelines/sana/__init__.py +4 -0
  376. diffusers/pipelines/sana/pipeline_sana.py +23 -21
  377. diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
  378. diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
  379. diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
  380. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
  381. diffusers/pipelines/shap_e/camera.py +1 -1
  382. diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
  383. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
  384. diffusers/pipelines/shap_e/renderer.py +3 -3
  385. diffusers/pipelines/skyreels_v2/__init__.py +59 -0
  386. diffusers/pipelines/skyreels_v2/pipeline_output.py +20 -0
  387. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2.py +610 -0
  388. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing.py +978 -0
  389. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_i2v.py +1059 -0
  390. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_v2v.py +1063 -0
  391. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_i2v.py +745 -0
  392. diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
  393. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
  394. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
  395. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
  396. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
  397. diffusers/pipelines/stable_diffusion/__init__.py +0 -7
  398. diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
  399. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
  400. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
  401. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
  402. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
  403. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +12 -11
  404. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
  405. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +11 -11
  406. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +10 -10
  407. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -9
  408. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
  409. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
  410. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
  411. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
  412. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
  413. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
  414. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
  415. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
  416. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
  417. diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
  418. diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
  419. diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
  420. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +13 -12
  421. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
  422. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
  423. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
  424. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
  425. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
  426. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
  427. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
  428. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
  429. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
  430. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
  431. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
  432. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  433. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
  434. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  435. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
  436. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
  437. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
  438. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
  439. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
  440. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
  441. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
  442. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
  443. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
  444. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
  445. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
  446. diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
  447. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
  448. diffusers/pipelines/unclip/text_proj.py +2 -2
  449. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
  450. diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
  451. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
  452. diffusers/pipelines/visualcloze/__init__.py +52 -0
  453. diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
  454. diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
  455. diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
  456. diffusers/pipelines/wan/__init__.py +2 -0
  457. diffusers/pipelines/wan/pipeline_wan.py +91 -30
  458. diffusers/pipelines/wan/pipeline_wan_i2v.py +145 -45
  459. diffusers/pipelines/wan/pipeline_wan_vace.py +975 -0
  460. diffusers/pipelines/wan/pipeline_wan_video2video.py +14 -16
  461. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
  462. diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
  463. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
  464. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
  465. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
  466. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
  467. diffusers/quantizers/__init__.py +3 -1
  468. diffusers/quantizers/base.py +17 -1
  469. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
  470. diffusers/quantizers/bitsandbytes/utils.py +10 -7
  471. diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
  472. diffusers/quantizers/gguf/utils.py +108 -16
  473. diffusers/quantizers/pipe_quant_config.py +202 -0
  474. diffusers/quantizers/quantization_config.py +18 -16
  475. diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
  476. diffusers/quantizers/torchao/torchao_quantizer.py +31 -1
  477. diffusers/schedulers/__init__.py +3 -1
  478. diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
  479. diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
  480. diffusers/schedulers/scheduling_consistency_models.py +1 -1
  481. diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
  482. diffusers/schedulers/scheduling_ddim.py +8 -8
  483. diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
  484. diffusers/schedulers/scheduling_ddim_flax.py +6 -6
  485. diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
  486. diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
  487. diffusers/schedulers/scheduling_ddpm.py +9 -9
  488. diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
  489. diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
  490. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
  491. diffusers/schedulers/scheduling_deis_multistep.py +16 -9
  492. diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
  493. diffusers/schedulers/scheduling_dpmsolver_multistep.py +18 -12
  494. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
  495. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
  496. diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
  497. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +19 -13
  498. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
  499. diffusers/schedulers/scheduling_edm_euler.py +20 -11
  500. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
  501. diffusers/schedulers/scheduling_euler_discrete.py +3 -3
  502. diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
  503. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
  504. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
  505. diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
  506. diffusers/schedulers/scheduling_heun_discrete.py +2 -2
  507. diffusers/schedulers/scheduling_ipndm.py +2 -2
  508. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
  509. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
  510. diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
  511. diffusers/schedulers/scheduling_lcm.py +3 -3
  512. diffusers/schedulers/scheduling_lms_discrete.py +2 -2
  513. diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
  514. diffusers/schedulers/scheduling_pndm.py +4 -4
  515. diffusers/schedulers/scheduling_pndm_flax.py +4 -4
  516. diffusers/schedulers/scheduling_repaint.py +9 -9
  517. diffusers/schedulers/scheduling_sasolver.py +15 -15
  518. diffusers/schedulers/scheduling_scm.py +1 -2
  519. diffusers/schedulers/scheduling_sde_ve.py +1 -1
  520. diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
  521. diffusers/schedulers/scheduling_tcd.py +3 -3
  522. diffusers/schedulers/scheduling_unclip.py +5 -5
  523. diffusers/schedulers/scheduling_unipc_multistep.py +21 -12
  524. diffusers/schedulers/scheduling_utils.py +3 -3
  525. diffusers/schedulers/scheduling_utils_flax.py +2 -2
  526. diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
  527. diffusers/training_utils.py +91 -5
  528. diffusers/utils/__init__.py +15 -0
  529. diffusers/utils/accelerate_utils.py +1 -1
  530. diffusers/utils/constants.py +4 -0
  531. diffusers/utils/doc_utils.py +1 -1
  532. diffusers/utils/dummy_pt_objects.py +432 -0
  533. diffusers/utils/dummy_torch_and_transformers_objects.py +480 -0
  534. diffusers/utils/dynamic_modules_utils.py +85 -8
  535. diffusers/utils/export_utils.py +1 -1
  536. diffusers/utils/hub_utils.py +33 -17
  537. diffusers/utils/import_utils.py +151 -18
  538. diffusers/utils/logging.py +1 -1
  539. diffusers/utils/outputs.py +2 -1
  540. diffusers/utils/peft_utils.py +96 -10
  541. diffusers/utils/state_dict_utils.py +20 -3
  542. diffusers/utils/testing_utils.py +195 -17
  543. diffusers/utils/torch_utils.py +43 -5
  544. diffusers/video_processor.py +2 -2
  545. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/METADATA +72 -57
  546. diffusers-0.35.0.dist-info/RECORD +703 -0
  547. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/WHEEL +1 -1
  548. diffusers-0.33.1.dist-info/RECORD +0 -608
  549. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/LICENSE +0 -0
  550. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/entry_points.txt +0 -0
  551. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/top_level.txt +0 -0
@@ -419,12 +419,7 @@ class WanVideoToVideoPipeline(DiffusionPipeline, WanLoraLoaderMixin):
419
419
  )
420
420
 
421
421
  if latents is None:
422
- if isinstance(generator, list):
423
- init_latents = [
424
- retrieve_latents(self.vae.encode(video[i].unsqueeze(0)), generator[i]) for i in range(batch_size)
425
- ]
426
- else:
427
- init_latents = [retrieve_latents(self.vae.encode(vid.unsqueeze(0)), generator) for vid in video]
422
+ init_latents = [retrieve_latents(self.vae.encode(vid.unsqueeze(0)), sample_mode="argmax") for vid in video]
428
423
 
429
424
  init_latents = torch.cat(init_latents, dim=0).to(dtype)
430
425
 
@@ -441,7 +436,7 @@ class WanVideoToVideoPipeline(DiffusionPipeline, WanLoraLoaderMixin):
441
436
  if hasattr(self.scheduler, "add_noise"):
442
437
  latents = self.scheduler.add_noise(init_latents, noise, timestep)
443
438
  else:
444
- latents = self.scheduelr.scale_noise(init_latents, timestep, noise)
439
+ latents = self.scheduler.scale_noise(init_latents, timestep, noise)
445
440
  else:
446
441
  latents = latents.to(device)
447
442
 
@@ -513,7 +508,7 @@ class WanVideoToVideoPipeline(DiffusionPipeline, WanLoraLoaderMixin):
513
508
 
514
509
  Args:
515
510
  prompt (`str` or `List[str]`, *optional*):
516
- The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
511
+ The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`
517
512
  instead.
518
513
  height (`int`, defaults to `480`):
519
514
  The height in pixels of the generated image.
@@ -525,11 +520,13 @@ class WanVideoToVideoPipeline(DiffusionPipeline, WanLoraLoaderMixin):
525
520
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
526
521
  expense of slower inference.
527
522
  guidance_scale (`float`, defaults to `5.0`):
528
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
529
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
530
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
531
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
532
- usually at the expense of lower image quality.
523
+ Guidance scale as defined in [Classifier-Free Diffusion
524
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
525
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
526
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
527
+ the text `prompt`, usually at the expense of lower image quality.
528
+ strength (`float`, defaults to `0.8`):
529
+ Higher strength leads to more differences between original image and generated video.
533
530
  num_videos_per_prompt (`int`, *optional*, defaults to 1):
534
531
  The number of images to generate per prompt.
535
532
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -542,7 +539,7 @@ class WanVideoToVideoPipeline(DiffusionPipeline, WanLoraLoaderMixin):
542
539
  prompt_embeds (`torch.Tensor`, *optional*):
543
540
  Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
544
541
  provided, text embeddings are generated from the `prompt` input argument.
545
- output_type (`str`, *optional*, defaults to `"pil"`):
542
+ output_type (`str`, *optional*, defaults to `"np"`):
546
543
  The output format of the generated image. Choose between `PIL.Image` or `np.array`.
547
544
  return_dict (`bool`, *optional*, defaults to `True`):
548
545
  Whether or not to return a [`WanPipelineOutput`] instead of a plain tuple.
@@ -559,8 +556,9 @@ class WanVideoToVideoPipeline(DiffusionPipeline, WanLoraLoaderMixin):
559
556
  The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
560
557
  will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
561
558
  `._callback_tensor_inputs` attribute of your pipeline class.
562
- autocast_dtype (`torch.dtype`, *optional*, defaults to `torch.bfloat16`):
563
- The dtype to use for the torch.amp.autocast.
559
+ max_sequence_length (`int`, defaults to `512`):
560
+ The maximum sequence length of the text encoder. If the prompt is longer than this, it will be
561
+ truncated. If the prompt is shorter, it will be padded to this length.
564
562
 
565
563
  Examples:
566
564
 
@@ -1,5 +1,5 @@
1
1
  # Copyright (c) 2022 Dominic Rampas MIT License
2
- # Copyright 2024 The HuggingFace Team. All rights reserved.
2
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
5
  # you may not use this file except in compliance with the License.
@@ -1,5 +1,5 @@
1
1
  # Copyright (c) 2023 Dominic Rampas MIT License
2
- # Copyright 2024 The HuggingFace Team. All rights reserved.
2
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
5
  # you may not use this file except in compliance with the License.
@@ -1,5 +1,5 @@
1
1
  # Copyright (c) 2023 Dominic Rampas MIT License
2
- # Copyright 2024 The HuggingFace Team. All rights reserved.
2
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
5
  # you may not use this file except in compliance with the License.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -21,7 +21,7 @@ from transformers import CLIPTextModel, CLIPTokenizer
21
21
  from ...schedulers import DDPMWuerstchenScheduler
22
22
  from ...utils import deprecate, is_torch_xla_available, logging, replace_example_docstring
23
23
  from ...utils.torch_utils import randn_tensor
24
- from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
24
+ from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, ImagePipelineOutput
25
25
  from .modeling_paella_vq_model import PaellaVQModel
26
26
  from .modeling_wuerstchen_diffnext import WuerstchenDiffNeXt
27
27
 
@@ -56,7 +56,7 @@ EXAMPLE_DOC_STRING = """
56
56
  """
57
57
 
58
58
 
59
- class WuerstchenDecoderPipeline(DiffusionPipeline):
59
+ class WuerstchenDecoderPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
60
60
  """
61
61
  Pipeline for generating images from the Wuerstchen model.
62
62
 
@@ -247,11 +247,11 @@ class WuerstchenDecoderPipeline(DiffusionPipeline):
247
247
  Custom timesteps to use for the denoising process. If not defined, equal spaced `num_inference_steps`
248
248
  timesteps are used. Must be in descending order.
249
249
  guidance_scale (`float`, *optional*, defaults to 0.0):
250
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
251
- `decoder_guidance_scale` is defined as `w` of equation 2. of [Imagen
252
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting
253
- `decoder_guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely
254
- linked to the text `prompt`, usually at the expense of lower image quality.
250
+ Guidance scale as defined in [Classifier-Free Diffusion
251
+ Guidance](https://huggingface.co/papers/2207.12598). `decoder_guidance_scale` is defined as `w` of
252
+ equation 2. of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by
253
+ setting `decoder_guidance_scale > 1`. Higher guidance scale encourages to generate images that are
254
+ closely linked to the text `prompt`, usually at the expense of lower image quality.
255
255
  negative_prompt (`str` or `List[str]`, *optional*):
256
256
  The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
257
257
  if `decoder_guidance_scale` is less than `1`).
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -18,7 +18,7 @@ from transformers import CLIPTextModel, CLIPTokenizer
18
18
 
19
19
  from ...schedulers import DDPMWuerstchenScheduler
20
20
  from ...utils import deprecate, replace_example_docstring
21
- from ..pipeline_utils import DiffusionPipeline
21
+ from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline
22
22
  from .modeling_paella_vq_model import PaellaVQModel
23
23
  from .modeling_wuerstchen_diffnext import WuerstchenDiffNeXt
24
24
  from .modeling_wuerstchen_prior import WuerstchenPrior
@@ -40,7 +40,7 @@ TEXT2IMAGE_EXAMPLE_DOC_STRING = """
40
40
  """
41
41
 
42
42
 
43
- class WuerstchenCombinedPipeline(DiffusionPipeline):
43
+ class WuerstchenCombinedPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
44
44
  """
45
45
  Combined Pipeline for text-to-image generation using Wuerstchen
46
46
 
@@ -68,6 +68,7 @@ class WuerstchenCombinedPipeline(DiffusionPipeline):
68
68
  The scheduler to be used for prior pipeline.
69
69
  """
70
70
 
71
+ _last_supported_version = "0.33.1"
71
72
  _load_connected_pipes = True
72
73
 
73
74
  def __init__(
@@ -112,7 +113,7 @@ class WuerstchenCombinedPipeline(DiffusionPipeline):
112
113
  def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
113
114
  self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
114
115
 
115
- def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
116
+ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = None):
116
117
  r"""
117
118
  Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
118
119
  to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
@@ -122,7 +123,7 @@ class WuerstchenCombinedPipeline(DiffusionPipeline):
122
123
  self.prior_pipe.enable_model_cpu_offload(gpu_id=gpu_id, device=device)
123
124
  self.decoder_pipe.enable_model_cpu_offload(gpu_id=gpu_id, device=device)
124
125
 
125
- def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
126
+ def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = None):
126
127
  r"""
127
128
  Offloads all models (`unet`, `text_encoder`, `vae`, and `safety checker` state dicts) to CPU using 🤗
128
129
  Accelerate, significantly reducing memory usage. Models are moved to a `torch.device('meta')` and loaded on a
@@ -190,11 +191,11 @@ class WuerstchenCombinedPipeline(DiffusionPipeline):
190
191
  width (`int`, *optional*, defaults to 512):
191
192
  The width in pixels of the generated image.
192
193
  prior_guidance_scale (`float`, *optional*, defaults to 4.0):
193
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
194
- `prior_guidance_scale` is defined as `w` of equation 2. of [Imagen
195
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting
196
- `prior_guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked
197
- to the text `prompt`, usually at the expense of lower image quality.
194
+ Guidance scale as defined in [Classifier-Free Diffusion
195
+ Guidance](https://huggingface.co/papers/2207.12598). `prior_guidance_scale` is defined as `w` of
196
+ equation 2. of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by
197
+ setting `prior_guidance_scale > 1`. Higher guidance scale encourages to generate images that are
198
+ closely linked to the text `prompt`, usually at the expense of lower image quality.
198
199
  prior_num_inference_steps (`Union[int, Dict[float, int]]`, *optional*, defaults to 60):
199
200
  The number of prior denoising steps. More denoising steps usually lead to a higher quality image at the
200
201
  expense of slower inference. For more specific timestep spacing, you can pass customized
@@ -210,11 +211,11 @@ class WuerstchenCombinedPipeline(DiffusionPipeline):
210
211
  Custom timesteps to use for the denoising process for the decoder. If not defined, equal spaced
211
212
  `num_inference_steps` timesteps are used. Must be in descending order.
212
213
  decoder_guidance_scale (`float`, *optional*, defaults to 0.0):
213
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
214
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
215
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
216
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
217
- usually at the expense of lower image quality.
214
+ Guidance scale as defined in [Classifier-Free Diffusion
215
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
216
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
217
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
218
+ the text `prompt`, usually at the expense of lower image quality.
218
219
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
219
220
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
220
221
  to make generation deterministic.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -325,11 +325,11 @@ class WuerstchenPriorPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixin)
325
325
  Custom timesteps to use for the denoising process. If not defined, equal spaced `num_inference_steps`
326
326
  timesteps are used. Must be in descending order.
327
327
  guidance_scale (`float`, *optional*, defaults to 8.0):
328
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
329
- `decoder_guidance_scale` is defined as `w` of equation 2. of [Imagen
330
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting
331
- `decoder_guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely
332
- linked to the text `prompt`, usually at the expense of lower image quality.
328
+ Guidance scale as defined in [Classifier-Free Diffusion
329
+ Guidance](https://huggingface.co/papers/2207.12598). `decoder_guidance_scale` is defined as `w` of
330
+ equation 2. of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by
331
+ setting `decoder_guidance_scale > 1`. Higher guidance scale encourages to generate images that are
332
+ closely linked to the text `prompt`, usually at the expense of lower image quality.
333
333
  negative_prompt (`str` or `List[str]`, *optional*):
334
334
  The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
335
335
  if `decoder_guidance_scale` is less than `1`).
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -12,5 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+
15
16
  from .auto import DiffusersAutoQuantizer
16
17
  from .base import DiffusersQuantizer
18
+ from .pipe_quant_config import PipelineQuantizationConfig
@@ -199,7 +199,7 @@ class DiffusersQuantizer(ABC):
199
199
 
200
200
  def dequantize(self, model):
201
201
  """
202
- Potentially dequantize the model to retrive the original model, with some loss in accuracy / performance. Note
202
+ Potentially dequantize the model to retrieve the original model, with some loss in accuracy / performance. Note
203
203
  not all quantization schemes support this.
204
204
  """
205
205
  model = self._dequantize(model)
@@ -209,6 +209,17 @@ class DiffusersQuantizer(ABC):
209
209
 
210
210
  return model
211
211
 
212
+ def get_cuda_warm_up_factor(self):
213
+ """
214
+ The factor to be used in `caching_allocator_warmup` to get the number of bytes to pre-allocate to warm up cuda.
215
+ A factor of 2 means we allocate all bytes in the empty model (since we allocate in fp16), a factor of 4 means
216
+ we allocate half the memory of the weights residing in the empty model, etc...
217
+ """
218
+ # By default we return 4, i.e. half the model size (this corresponds to the case where the model is not
219
+ # really pre-processed, i.e. we do not have the info that weights are going to be 8 bits before actual
220
+ # weight loading)
221
+ return 4
222
+
212
223
  def _dequantize(self, model):
213
224
  raise NotImplementedError(
214
225
  f"{self.quantization_config.quant_method} has no implementation of `dequantize`, please raise an issue on GitHub."
@@ -227,3 +238,8 @@ class DiffusersQuantizer(ABC):
227
238
  @property
228
239
  @abstractmethod
229
240
  def is_trainable(self): ...
241
+
242
+ @property
243
+ def is_compileable(self) -> bool:
244
+ """Flag indicating whether the quantized model can be compiled"""
245
+ return False
@@ -564,6 +564,10 @@ class BnB8BitDiffusersQuantizer(DiffusersQuantizer):
564
564
  # Because we're mandating `bitsandbytes` 0.43.3.
565
565
  return True
566
566
 
567
+ @property
568
+ def is_compileable(self) -> bool:
569
+ return True
570
+
567
571
  def _dequantize(self, model):
568
572
  from .utils import dequantize_and_replace
569
573
 
@@ -49,7 +49,7 @@ def _replace_with_bnb_linear(
49
49
  """
50
50
  Private method that wraps the recursion for module replacement.
51
51
 
52
- Returns the converted model and a boolean that indicates if the conversion has been successfull or not.
52
+ Returns the converted model and a boolean that indicates if the conversion has been successful or not.
53
53
  """
54
54
  for name, module in model.named_children():
55
55
  if current_key_name is None:
@@ -121,8 +121,9 @@ def replace_with_bnb_linear(model, modules_to_not_convert=None, current_key_name
121
121
 
122
122
  References:
123
123
  * `bnb.nn.Linear8bit`: [LLM.int8(): 8-bit Matrix Multiplication for Transformers at
124
- Scale](https://arxiv.org/abs/2208.07339)
125
- * `bnb.nn.Linear4bit`: [QLoRA: Efficient Finetuning of Quantized LLMs](https://arxiv.org/abs/2305.14314)
124
+ Scale](https://huggingface.co/papers/2208.07339)
125
+ * `bnb.nn.Linear4bit`: [QLoRA: Efficient Finetuning of Quantized
126
+ LLMs](https://huggingface.co/papers/2305.14314)
126
127
 
127
128
  Parameters:
128
129
  model (`torch.nn.Module`):
@@ -171,9 +172,11 @@ def dequantize_bnb_weight(weight: "torch.nn.Parameter", state=None, dtype: "torc
171
172
 
172
173
  if cls_name == "Params4bit":
173
174
  output_tensor = bnb.functional.dequantize_4bit(weight.data, weight.quant_state)
174
- logger.warning_once(
175
- f"The model is going to be dequantized in {output_tensor.dtype} - if you want to upcast it to another dtype, make sure to pass the desired dtype when quantizing the model through `bnb_4bit_quant_type` argument of `BitsAndBytesConfig`"
176
- )
175
+ msg = f"The model is going to be dequantized in {output_tensor.dtype} - if you want to upcast it to another dtype, make sure to pass the desired dtype when quantizing the model through `bnb_4bit_quant_type` argument of `BitsAndBytesConfig`"
176
+ if dtype:
177
+ msg = f"The model is going to be first dequantized in {output_tensor.dtype} and type-casted to {dtype}"
178
+ output_tensor = output_tensor.to(dtype)
179
+ logger.warning_once(msg)
177
180
  return output_tensor
178
181
 
179
182
  if state.SCB is None:
@@ -221,7 +224,7 @@ def _dequantize_and_replace(
221
224
  performance drop compared to the original model before quantization - use it only for specific usecases such as
222
225
  QLoRA adapters merging.
223
226
 
224
- Returns the converted model and a boolean that indicates if the conversion has been successfull or not.
227
+ Returns the converted model and a boolean that indicates if the conversion has been successful or not.
225
228
  """
226
229
  quant_method = quantization_config.quantization_method()
227
230
 
@@ -49,7 +49,7 @@ class GGUFQuantizer(DiffusersQuantizer):
49
49
  def validate_environment(self, *args, **kwargs):
50
50
  if not is_accelerate_available() or is_accelerate_version("<", "0.26.0"):
51
51
  raise ImportError(
52
- "Loading GGUF Parameters requires `accelerate` installed in your enviroment: `pip install 'accelerate>=0.26.0'`"
52
+ "Loading GGUF Parameters requires `accelerate` installed in your environment: `pip install 'accelerate>=0.26.0'`"
53
53
  )
54
54
  if not is_gguf_available() or is_gguf_version("<", "0.10.0"):
55
55
  raise ImportError(
@@ -82,7 +82,7 @@ class GGUFQuantizer(DiffusersQuantizer):
82
82
  inferred_shape = _quant_shape_from_byte_shape(loaded_param_shape, type_size, block_size)
83
83
  if inferred_shape != current_param_shape:
84
84
  raise ValueError(
85
- f"{param_name} has an expected quantized shape of: {inferred_shape}, but receieved shape: {loaded_param_shape}"
85
+ f"{param_name} has an expected quantized shape of: {inferred_shape}, but received shape: {loaded_param_shape}"
86
86
  )
87
87
 
88
88
  return True
@@ -146,13 +146,22 @@ class GGUFQuantizer(DiffusersQuantizer):
146
146
  def is_trainable(self) -> bool:
147
147
  return False
148
148
 
149
+ @property
150
+ def is_compileable(self) -> bool:
151
+ return True
152
+
149
153
  def _dequantize(self, model):
150
154
  is_model_on_cpu = model.device.type == "cpu"
151
155
  if is_model_on_cpu:
152
156
  logger.info(
153
- "Model was found to be on CPU (could happen as a result of `enable_model_cpu_offload()`). So, moving it to GPU. After dequantization, will move the model back to CPU again to preserve the previous device."
157
+ "Model was found to be on CPU (could happen as a result of `enable_model_cpu_offload()`). So, moving it to accelerator. After dequantization, will move the model back to CPU again to preserve the previous device."
158
+ )
159
+ device = (
160
+ torch.accelerator.current_accelerator()
161
+ if hasattr(torch, "accelerator")
162
+ else torch.cuda.current_device()
154
163
  )
155
- model.to(torch.cuda.current_device())
164
+ model.to(device)
156
165
 
157
166
  model = _dequantize_gguf_and_restore_linear(model, self.modules_to_not_convert)
158
167
  if is_model_on_cpu:
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team and City96. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team and City96. All rights reserved.
2
2
  # #
3
3
  # # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # # you may not use this file except in compliance with the License.
@@ -12,15 +12,15 @@
12
12
  # # See the License for the specific language governing permissions and
13
13
  # # limitations under the License.
14
14
 
15
-
16
15
  import inspect
16
+ import os
17
17
  from contextlib import nullcontext
18
18
 
19
19
  import gguf
20
20
  import torch
21
21
  import torch.nn as nn
22
22
 
23
- from ...utils import is_accelerate_available
23
+ from ...utils import is_accelerate_available, is_kernels_available
24
24
 
25
25
 
26
26
  if is_accelerate_available():
@@ -29,6 +29,82 @@ if is_accelerate_available():
29
29
  from accelerate.hooks import add_hook_to_module, remove_hook_from_module
30
30
 
31
31
 
32
+ can_use_cuda_kernels = (
33
+ os.getenv("DIFFUSERS_GGUF_CUDA_KERNELS", "false").lower() in ["1", "true", "yes"]
34
+ and torch.cuda.is_available()
35
+ and torch.cuda.get_device_capability()[0] >= 7
36
+ )
37
+ if can_use_cuda_kernels and is_kernels_available():
38
+ from kernels import get_kernel
39
+
40
+ ops = get_kernel("Isotr0py/ggml")
41
+ else:
42
+ ops = None
43
+
44
+ UNQUANTIZED_TYPES = {gguf.GGMLQuantizationType.F32, gguf.GGMLQuantizationType.F16, gguf.GGMLQuantizationType.BF16}
45
+ STANDARD_QUANT_TYPES = {
46
+ gguf.GGMLQuantizationType.Q4_0,
47
+ gguf.GGMLQuantizationType.Q4_1,
48
+ gguf.GGMLQuantizationType.Q5_0,
49
+ gguf.GGMLQuantizationType.Q5_1,
50
+ gguf.GGMLQuantizationType.Q8_0,
51
+ gguf.GGMLQuantizationType.Q8_1,
52
+ }
53
+ KQUANT_TYPES = {
54
+ gguf.GGMLQuantizationType.Q2_K,
55
+ gguf.GGMLQuantizationType.Q3_K,
56
+ gguf.GGMLQuantizationType.Q4_K,
57
+ gguf.GGMLQuantizationType.Q5_K,
58
+ gguf.GGMLQuantizationType.Q6_K,
59
+ }
60
+ IMATRIX_QUANT_TYPES = {
61
+ gguf.GGMLQuantizationType.IQ1_M,
62
+ gguf.GGMLQuantizationType.IQ1_S,
63
+ gguf.GGMLQuantizationType.IQ2_XXS,
64
+ gguf.GGMLQuantizationType.IQ2_XS,
65
+ gguf.GGMLQuantizationType.IQ2_S,
66
+ gguf.GGMLQuantizationType.IQ3_XXS,
67
+ gguf.GGMLQuantizationType.IQ3_S,
68
+ gguf.GGMLQuantizationType.IQ4_XS,
69
+ gguf.GGMLQuantizationType.IQ4_NL,
70
+ }
71
+ # TODO(Isotr0py): Currently, we don't have MMQ kernel for I-Matrix quantization.
72
+ # Consolidate DEQUANT_TYPES, MMVQ_QUANT_TYPES and MMQ_QUANT_TYPES after we add
73
+ # MMQ kernel for I-Matrix quantization.
74
+ DEQUANT_TYPES = STANDARD_QUANT_TYPES | KQUANT_TYPES | IMATRIX_QUANT_TYPES
75
+ MMVQ_QUANT_TYPES = STANDARD_QUANT_TYPES | KQUANT_TYPES | IMATRIX_QUANT_TYPES
76
+ MMQ_QUANT_TYPES = STANDARD_QUANT_TYPES | KQUANT_TYPES
77
+
78
+
79
+ def _fused_mul_mat_gguf(x: torch.Tensor, qweight: torch.Tensor, qweight_type: int) -> torch.Tensor:
80
+ # there is no need to call any kernel for fp16/bf16
81
+ if qweight_type in UNQUANTIZED_TYPES:
82
+ return x @ qweight.T
83
+
84
+ # TODO(Isotr0py): GGUF's MMQ and MMVQ implementation are designed for
85
+ # contiguous batching and inefficient with diffusers' batching,
86
+ # so we disabled it now.
87
+
88
+ # elif qweight_type in MMVQ_QUANT_TYPES:
89
+ # y = ops.ggml_mul_mat_vec_a8(qweight, x, qweight_type, qweight.shape[0])
90
+ # elif qweight_type in MMQ_QUANT_TYPES:
91
+ # y = ops.ggml_mul_mat_a8(qweight, x, qweight_type, qweight.shape[0])
92
+
93
+ # If there is no available MMQ kernel, fallback to dequantize
94
+ if qweight_type in DEQUANT_TYPES:
95
+ block_size, type_size = gguf.GGML_QUANT_SIZES[qweight_type]
96
+ shape = (qweight.shape[0], qweight.shape[1] // type_size * block_size)
97
+ weight = ops.ggml_dequantize(qweight, qweight_type, *shape)
98
+ y = x @ weight.to(x.dtype).T
99
+ else:
100
+ # Raise an error if the quantization type is not supported.
101
+ # Might be useful if llama.cpp adds a new quantization type.
102
+ # Wrap to GGMLQuantizationType IntEnum to make sure it's a valid type.
103
+ qweight_type = gguf.GGMLQuantizationType(qweight_type)
104
+ raise NotImplementedError(f"Unsupported GGUF quantization type: {qweight_type}")
105
+ return y.as_tensor()
106
+
107
+
32
108
  # Copied from diffusers.quantizers.bitsandbytes.utils._create_accelerate_new_hook
33
109
  def _create_accelerate_new_hook(old_hook):
34
110
  r"""
@@ -408,6 +484,18 @@ class GGUFParameter(torch.nn.Parameter):
408
484
  def as_tensor(self):
409
485
  return torch.Tensor._make_subclass(torch.Tensor, self, self.requires_grad)
410
486
 
487
+ @staticmethod
488
+ def _extract_quant_type(args):
489
+ # When converting from original format checkpoints we often use splits, cats etc on tensors
490
+ # this method ensures that the returned tensor type from those operations remains GGUFParameter
491
+ # so that we preserve quant_type information
492
+ for arg in args:
493
+ if isinstance(arg, list) and isinstance(arg[0], GGUFParameter):
494
+ return arg[0].quant_type
495
+ if isinstance(arg, GGUFParameter):
496
+ return arg.quant_type
497
+ return None
498
+
411
499
  @classmethod
412
500
  def __torch_function__(cls, func, types, args=(), kwargs=None):
413
501
  if kwargs is None:
@@ -415,22 +503,13 @@ class GGUFParameter(torch.nn.Parameter):
415
503
 
416
504
  result = super().__torch_function__(func, types, args, kwargs)
417
505
 
418
- # When converting from original format checkpoints we often use splits, cats etc on tensors
419
- # this method ensures that the returned tensor type from those operations remains GGUFParameter
420
- # so that we preserve quant_type information
421
- quant_type = None
422
- for arg in args:
423
- if isinstance(arg, list) and isinstance(arg[0], GGUFParameter):
424
- quant_type = arg[0].quant_type
425
- break
426
- if isinstance(arg, GGUFParameter):
427
- quant_type = arg.quant_type
428
- break
429
506
  if isinstance(result, torch.Tensor):
507
+ quant_type = cls._extract_quant_type(args)
430
508
  return cls(result, quant_type=quant_type)
431
509
  # Handle tuples and lists
432
- elif isinstance(result, (tuple, list)):
510
+ elif type(result) in (list, tuple):
433
511
  # Preserve the original type (tuple or list)
512
+ quant_type = cls._extract_quant_type(args)
434
513
  wrapped = [cls(x, quant_type=quant_type) if isinstance(x, torch.Tensor) else x for x in result]
435
514
  return type(result)(wrapped)
436
515
  else:
@@ -448,11 +527,24 @@ class GGUFLinear(nn.Linear):
448
527
  ) -> None:
449
528
  super().__init__(in_features, out_features, bias, device)
450
529
  self.compute_dtype = compute_dtype
530
+ self.device = device
531
+
532
+ def forward(self, inputs: torch.Tensor):
533
+ if ops is not None and self.weight.is_cuda and inputs.is_cuda:
534
+ return self.forward_cuda(inputs)
535
+ return self.forward_native(inputs)
451
536
 
452
- def forward(self, inputs):
537
+ def forward_native(self, inputs: torch.Tensor):
453
538
  weight = dequantize_gguf_tensor(self.weight)
454
539
  weight = weight.to(self.compute_dtype)
455
540
  bias = self.bias.to(self.compute_dtype) if self.bias is not None else None
456
541
 
457
542
  output = torch.nn.functional.linear(inputs, weight, bias)
458
543
  return output
544
+
545
+ def forward_cuda(self, inputs: torch.Tensor):
546
+ quant_type = self.weight.quant_type
547
+ output = _fused_mul_mat_gguf(inputs.to(self.compute_dtype), self.weight, quant_type)
548
+ if self.bias is not None:
549
+ output += self.bias.to(self.compute_dtype)
550
+ return output