diffusers 0.30.2__py3-none-any.whl → 0.31.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. diffusers/__init__.py +38 -2
  2. diffusers/configuration_utils.py +12 -0
  3. diffusers/dependency_versions_table.py +1 -1
  4. diffusers/image_processor.py +257 -54
  5. diffusers/loaders/__init__.py +2 -0
  6. diffusers/loaders/ip_adapter.py +5 -1
  7. diffusers/loaders/lora_base.py +14 -7
  8. diffusers/loaders/lora_conversion_utils.py +332 -0
  9. diffusers/loaders/lora_pipeline.py +707 -41
  10. diffusers/loaders/peft.py +1 -0
  11. diffusers/loaders/single_file_utils.py +81 -4
  12. diffusers/loaders/textual_inversion.py +2 -0
  13. diffusers/loaders/unet.py +39 -8
  14. diffusers/models/__init__.py +4 -0
  15. diffusers/models/adapter.py +53 -53
  16. diffusers/models/attention.py +86 -10
  17. diffusers/models/attention_processor.py +169 -133
  18. diffusers/models/autoencoders/autoencoder_kl.py +71 -11
  19. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +287 -85
  20. diffusers/models/controlnet_flux.py +536 -0
  21. diffusers/models/controlnet_sd3.py +7 -3
  22. diffusers/models/controlnet_sparsectrl.py +0 -1
  23. diffusers/models/embeddings.py +238 -61
  24. diffusers/models/embeddings_flax.py +23 -9
  25. diffusers/models/model_loading_utils.py +182 -14
  26. diffusers/models/modeling_utils.py +283 -46
  27. diffusers/models/normalization.py +79 -0
  28. diffusers/models/transformers/__init__.py +1 -0
  29. diffusers/models/transformers/auraflow_transformer_2d.py +1 -0
  30. diffusers/models/transformers/cogvideox_transformer_3d.py +58 -36
  31. diffusers/models/transformers/pixart_transformer_2d.py +9 -1
  32. diffusers/models/transformers/transformer_cogview3plus.py +386 -0
  33. diffusers/models/transformers/transformer_flux.py +161 -44
  34. diffusers/models/transformers/transformer_sd3.py +7 -1
  35. diffusers/models/unets/unet_2d_condition.py +8 -8
  36. diffusers/models/unets/unet_motion_model.py +41 -63
  37. diffusers/models/upsampling.py +6 -6
  38. diffusers/pipelines/__init__.py +40 -7
  39. diffusers/pipelines/animatediff/__init__.py +2 -0
  40. diffusers/pipelines/animatediff/pipeline_animatediff.py +45 -21
  41. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +44 -20
  42. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +18 -4
  43. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +2 -0
  44. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +104 -66
  45. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +1341 -0
  46. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +1 -1
  47. diffusers/pipelines/auto_pipeline.py +39 -8
  48. diffusers/pipelines/cogvideo/__init__.py +6 -0
  49. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +32 -34
  50. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +794 -0
  51. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +837 -0
  52. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +825 -0
  53. diffusers/pipelines/cogvideo/pipeline_output.py +20 -0
  54. diffusers/pipelines/cogview3/__init__.py +47 -0
  55. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +674 -0
  56. diffusers/pipelines/cogview3/pipeline_output.py +21 -0
  57. diffusers/pipelines/controlnet/pipeline_controlnet.py +9 -1
  58. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +8 -0
  59. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +8 -0
  60. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +36 -13
  61. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +9 -1
  62. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +8 -1
  63. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +17 -3
  64. diffusers/pipelines/controlnet_sd3/__init__.py +4 -0
  65. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +3 -1
  66. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +1153 -0
  67. diffusers/pipelines/ddpm/pipeline_ddpm.py +2 -2
  68. diffusers/pipelines/deepfloyd_if/pipeline_output.py +6 -5
  69. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +16 -4
  70. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +1 -1
  71. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +1 -1
  72. diffusers/pipelines/flux/__init__.py +10 -0
  73. diffusers/pipelines/flux/pipeline_flux.py +53 -20
  74. diffusers/pipelines/flux/pipeline_flux_controlnet.py +984 -0
  75. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +988 -0
  76. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1182 -0
  77. diffusers/pipelines/flux/pipeline_flux_img2img.py +850 -0
  78. diffusers/pipelines/flux/pipeline_flux_inpaint.py +1015 -0
  79. diffusers/pipelines/free_noise_utils.py +365 -5
  80. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +15 -3
  81. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +2 -2
  82. diffusers/pipelines/kolors/pipeline_kolors.py +1 -1
  83. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +14 -11
  84. diffusers/pipelines/kolors/tokenizer.py +4 -0
  85. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +1 -1
  86. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +1 -1
  87. diffusers/pipelines/latte/pipeline_latte.py +2 -2
  88. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +15 -3
  89. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +15 -3
  90. diffusers/pipelines/lumina/pipeline_lumina.py +2 -2
  91. diffusers/pipelines/pag/__init__.py +6 -0
  92. diffusers/pipelines/pag/pag_utils.py +8 -2
  93. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +1 -1
  94. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +1544 -0
  95. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +2 -2
  96. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +1685 -0
  97. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +17 -5
  98. diffusers/pipelines/pag/pipeline_pag_kolors.py +1 -1
  99. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +1 -1
  100. diffusers/pipelines/pag/pipeline_pag_sd.py +18 -6
  101. diffusers/pipelines/pag/pipeline_pag_sd_3.py +12 -3
  102. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +5 -1
  103. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +1091 -0
  104. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +18 -6
  105. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +31 -16
  106. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +42 -19
  107. diffusers/pipelines/pia/pipeline_pia.py +2 -0
  108. diffusers/pipelines/pipeline_loading_utils.py +225 -27
  109. diffusers/pipelines/pipeline_utils.py +123 -180
  110. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +1 -1
  111. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +1 -1
  112. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +35 -3
  113. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +2 -2
  114. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +28 -6
  115. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +1 -1
  116. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +1 -1
  117. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +241 -81
  118. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +12 -3
  119. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +20 -4
  120. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +3 -3
  121. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -1
  122. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +16 -4
  123. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +16 -4
  124. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -4
  125. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +29 -14
  126. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +29 -14
  127. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +1 -1
  128. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +1 -1
  129. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +16 -4
  130. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +15 -3
  131. diffusers/quantizers/__init__.py +16 -0
  132. diffusers/quantizers/auto.py +126 -0
  133. diffusers/quantizers/base.py +233 -0
  134. diffusers/quantizers/bitsandbytes/__init__.py +2 -0
  135. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +558 -0
  136. diffusers/quantizers/bitsandbytes/utils.py +306 -0
  137. diffusers/quantizers/quantization_config.py +391 -0
  138. diffusers/schedulers/scheduling_ddim.py +4 -1
  139. diffusers/schedulers/scheduling_ddim_cogvideox.py +4 -1
  140. diffusers/schedulers/scheduling_ddim_parallel.py +4 -1
  141. diffusers/schedulers/scheduling_ddpm.py +4 -1
  142. diffusers/schedulers/scheduling_ddpm_parallel.py +4 -1
  143. diffusers/schedulers/scheduling_deis_multistep.py +78 -1
  144. diffusers/schedulers/scheduling_dpmsolver_multistep.py +82 -1
  145. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +80 -1
  146. diffusers/schedulers/scheduling_dpmsolver_sde.py +125 -10
  147. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +82 -1
  148. diffusers/schedulers/scheduling_edm_euler.py +8 -6
  149. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +4 -1
  150. diffusers/schedulers/scheduling_euler_discrete.py +92 -7
  151. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +4 -5
  152. diffusers/schedulers/scheduling_heun_discrete.py +114 -8
  153. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +116 -11
  154. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +110 -8
  155. diffusers/schedulers/scheduling_lms_discrete.py +76 -1
  156. diffusers/schedulers/scheduling_sasolver.py +78 -1
  157. diffusers/schedulers/scheduling_unclip.py +4 -1
  158. diffusers/schedulers/scheduling_unipc_multistep.py +78 -1
  159. diffusers/training_utils.py +48 -18
  160. diffusers/utils/__init__.py +2 -1
  161. diffusers/utils/dummy_pt_objects.py +60 -0
  162. diffusers/utils/dummy_torch_and_transformers_objects.py +195 -0
  163. diffusers/utils/hub_utils.py +16 -4
  164. diffusers/utils/import_utils.py +31 -8
  165. diffusers/utils/loading_utils.py +28 -4
  166. diffusers/utils/peft_utils.py +3 -3
  167. diffusers/utils/testing_utils.py +59 -0
  168. {diffusers-0.30.2.dist-info → diffusers-0.31.0.dist-info}/METADATA +7 -6
  169. {diffusers-0.30.2.dist-info → diffusers-0.31.0.dist-info}/RECORD +173 -147
  170. {diffusers-0.30.2.dist-info → diffusers-0.31.0.dist-info}/WHEEL +1 -1
  171. {diffusers-0.30.2.dist-info → diffusers-0.31.0.dist-info}/LICENSE +0 -0
  172. {diffusers-0.30.2.dist-info → diffusers-0.31.0.dist-info}/entry_points.txt +0 -0
  173. {diffusers-0.30.2.dist-info → diffusers-0.31.0.dist-info}/top_level.txt +0 -0
@@ -25,7 +25,7 @@ from transformers import (
25
25
 
26
26
  from ...callbacks import MultiPipelineCallbacks, PipelineCallback
27
27
  from ...image_processor import PipelineImageInput, VaeImageProcessor
28
- from ...loaders import SD3LoraLoaderMixin
28
+ from ...loaders import FromSingleFileMixin, SD3LoraLoaderMixin
29
29
  from ...models.autoencoders import AutoencoderKL
30
30
  from ...models.transformers import SD3Transformer2DModel
31
31
  from ...schedulers import FlowMatchEulerDiscreteScheduler
@@ -97,7 +97,7 @@ def retrieve_timesteps(
97
97
  sigmas: Optional[List[float]] = None,
98
98
  **kwargs,
99
99
  ):
100
- """
100
+ r"""
101
101
  Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
102
102
  custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
103
103
 
@@ -148,7 +148,7 @@ def retrieve_timesteps(
148
148
  return timesteps, num_inference_steps
149
149
 
150
150
 
151
- class StableDiffusion3InpaintPipeline(DiffusionPipeline):
151
+ class StableDiffusion3InpaintPipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSingleFileMixin):
152
152
  r"""
153
153
  Args:
154
154
  transformer ([`SD3Transformer2DModel`]):
@@ -602,9 +602,9 @@ class StableDiffusionKDiffusionPipeline(
602
602
  sigma_min: float = self.k_diffusion_model.sigmas[0].item()
603
603
  sigma_max: float = self.k_diffusion_model.sigmas[-1].item()
604
604
  sigmas = get_sigmas_karras(n=num_inference_steps, sigma_min=sigma_min, sigma_max=sigma_max)
605
- sigmas = sigmas.to(device)
606
605
  else:
607
606
  sigmas = self.scheduler.sigmas
607
+ sigmas = sigmas.to(device)
608
608
  sigmas = sigmas.to(prompt_embeds.dtype)
609
609
 
610
610
  # 6. Prepare latent variables
@@ -61,9 +61,21 @@ EXAMPLE_DOC_STRING = """
61
61
 
62
62
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
63
63
  def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
64
- """
65
- Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and
66
- Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). See Section 3.4
64
+ r"""
65
+ Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
66
+ Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
67
+ Flawed](https://arxiv.org/pdf/2305.08891.pdf).
68
+
69
+ Args:
70
+ noise_cfg (`torch.Tensor`):
71
+ The predicted noise tensor for the guided diffusion process.
72
+ noise_pred_text (`torch.Tensor`):
73
+ The predicted noise tensor for the text-guided diffusion process.
74
+ guidance_rescale (`float`, *optional*, defaults to 0.0):
75
+ A rescale factor applied to the noise predictions.
76
+
77
+ Returns:
78
+ noise_cfg (`torch.Tensor`): The rescaled noise prediction tensor.
67
79
  """
68
80
  std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
69
81
  std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
@@ -83,7 +95,7 @@ def retrieve_timesteps(
83
95
  sigmas: Optional[List[float]] = None,
84
96
  **kwargs,
85
97
  ):
86
- """
98
+ r"""
87
99
  Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
88
100
  custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
89
101
 
@@ -61,9 +61,21 @@ EXAMPLE_DOC_STRING = """
61
61
 
62
62
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
63
63
  def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
64
- """
65
- Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and
66
- Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). See Section 3.4
64
+ r"""
65
+ Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
66
+ Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
67
+ Flawed](https://arxiv.org/pdf/2305.08891.pdf).
68
+
69
+ Args:
70
+ noise_cfg (`torch.Tensor`):
71
+ The predicted noise tensor for the guided diffusion process.
72
+ noise_pred_text (`torch.Tensor`):
73
+ The predicted noise tensor for the text-guided diffusion process.
74
+ guidance_rescale (`float`, *optional*, defaults to 0.0):
75
+ A rescale factor applied to the noise predictions.
76
+
77
+ Returns:
78
+ noise_cfg (`torch.Tensor`): The rescaled noise prediction tensor.
67
79
  """
68
80
  std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
69
81
  std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
@@ -83,7 +95,7 @@ def retrieve_timesteps(
83
95
  sigmas: Optional[List[float]] = None,
84
96
  **kwargs,
85
97
  ):
86
- """
98
+ r"""
87
99
  Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
88
100
  custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
89
101
 
@@ -87,9 +87,21 @@ EXAMPLE_DOC_STRING = """
87
87
 
88
88
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
89
89
  def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
90
- """
91
- Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and
92
- Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). See Section 3.4
90
+ r"""
91
+ Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
92
+ Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
93
+ Flawed](https://arxiv.org/pdf/2305.08891.pdf).
94
+
95
+ Args:
96
+ noise_cfg (`torch.Tensor`):
97
+ The predicted noise tensor for the guided diffusion process.
98
+ noise_pred_text (`torch.Tensor`):
99
+ The predicted noise tensor for the text-guided diffusion process.
100
+ guidance_rescale (`float`, *optional*, defaults to 0.0):
101
+ A rescale factor applied to the noise predictions.
102
+
103
+ Returns:
104
+ noise_cfg (`torch.Tensor`): The rescaled noise prediction tensor.
93
105
  """
94
106
  std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
95
107
  std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
@@ -109,7 +121,7 @@ def retrieve_timesteps(
109
121
  sigmas: Optional[List[float]] = None,
110
122
  **kwargs,
111
123
  ):
112
- """
124
+ r"""
113
125
  Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
114
126
  custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
115
127
 
@@ -90,9 +90,21 @@ EXAMPLE_DOC_STRING = """
90
90
 
91
91
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
92
92
  def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
93
- """
94
- Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and
95
- Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). See Section 3.4
93
+ r"""
94
+ Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
95
+ Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
96
+ Flawed](https://arxiv.org/pdf/2305.08891.pdf).
97
+
98
+ Args:
99
+ noise_cfg (`torch.Tensor`):
100
+ The predicted noise tensor for the guided diffusion process.
101
+ noise_pred_text (`torch.Tensor`):
102
+ The predicted noise tensor for the text-guided diffusion process.
103
+ guidance_rescale (`float`, *optional*, defaults to 0.0):
104
+ A rescale factor applied to the noise predictions.
105
+
106
+ Returns:
107
+ noise_cfg (`torch.Tensor`): The rescaled noise prediction tensor.
96
108
  """
97
109
  std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
98
110
  std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
@@ -126,7 +138,7 @@ def retrieve_timesteps(
126
138
  sigmas: Optional[List[float]] = None,
127
139
  **kwargs,
128
140
  ):
129
- """
141
+ r"""
130
142
  Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
131
143
  custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
132
144
 
@@ -640,14 +652,16 @@ class StableDiffusionXLImg2ImgPipeline(
640
652
  if denoising_start is None:
641
653
  init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
642
654
  t_start = max(num_inference_steps - init_timestep, 0)
643
- else:
644
- t_start = 0
645
655
 
646
- timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
656
+ timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
657
+ if hasattr(self.scheduler, "set_begin_index"):
658
+ self.scheduler.set_begin_index(t_start * self.scheduler.order)
647
659
 
648
- # Strength is irrelevant if we directly request a timestep to start at;
649
- # that is, strength is determined by the denoising_start instead.
650
- if denoising_start is not None:
660
+ return timesteps, num_inference_steps - t_start
661
+
662
+ else:
663
+ # Strength is irrelevant if we directly request a timestep to start at;
664
+ # that is, strength is determined by the denoising_start instead.
651
665
  discrete_timestep_cutoff = int(
652
666
  round(
653
667
  self.scheduler.config.num_train_timesteps
@@ -655,7 +669,7 @@ class StableDiffusionXLImg2ImgPipeline(
655
669
  )
656
670
  )
657
671
 
658
- num_inference_steps = (timesteps < discrete_timestep_cutoff).sum().item()
672
+ num_inference_steps = (self.scheduler.timesteps < discrete_timestep_cutoff).sum().item()
659
673
  if self.scheduler.order == 2 and num_inference_steps % 2 == 0:
660
674
  # if the scheduler is a 2nd order scheduler we might have to do +1
661
675
  # because `num_inference_steps` might be even given that every timestep
@@ -666,11 +680,12 @@ class StableDiffusionXLImg2ImgPipeline(
666
680
  num_inference_steps = num_inference_steps + 1
667
681
 
668
682
  # because t_n+1 >= t_n, we slice the timesteps starting from the end
669
- timesteps = timesteps[-num_inference_steps:]
683
+ t_start = len(self.scheduler.timesteps) - num_inference_steps
684
+ timesteps = self.scheduler.timesteps[t_start:]
685
+ if hasattr(self.scheduler, "set_begin_index"):
686
+ self.scheduler.set_begin_index(t_start)
670
687
  return timesteps, num_inference_steps
671
688
 
672
- return timesteps, num_inference_steps - t_start
673
-
674
689
  def prepare_latents(
675
690
  self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None, add_noise=True
676
691
  ):
@@ -101,9 +101,21 @@ EXAMPLE_DOC_STRING = """
101
101
 
102
102
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
103
103
  def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
104
- """
105
- Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and
106
- Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). See Section 3.4
104
+ r"""
105
+ Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
106
+ Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
107
+ Flawed](https://arxiv.org/pdf/2305.08891.pdf).
108
+
109
+ Args:
110
+ noise_cfg (`torch.Tensor`):
111
+ The predicted noise tensor for the guided diffusion process.
112
+ noise_pred_text (`torch.Tensor`):
113
+ The predicted noise tensor for the text-guided diffusion process.
114
+ guidance_rescale (`float`, *optional*, defaults to 0.0):
115
+ A rescale factor applied to the noise predictions.
116
+
117
+ Returns:
118
+ noise_cfg (`torch.Tensor`): The rescaled noise prediction tensor.
107
119
  """
108
120
  std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
109
121
  std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
@@ -153,7 +165,7 @@ def retrieve_timesteps(
153
165
  sigmas: Optional[List[float]] = None,
154
166
  **kwargs,
155
167
  ):
156
- """
168
+ r"""
157
169
  Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
158
170
  custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
159
171
 
@@ -901,14 +913,16 @@ class StableDiffusionXLInpaintPipeline(
901
913
  if denoising_start is None:
902
914
  init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
903
915
  t_start = max(num_inference_steps - init_timestep, 0)
904
- else:
905
- t_start = 0
906
916
 
907
- timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
917
+ timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
918
+ if hasattr(self.scheduler, "set_begin_index"):
919
+ self.scheduler.set_begin_index(t_start * self.scheduler.order)
908
920
 
909
- # Strength is irrelevant if we directly request a timestep to start at;
910
- # that is, strength is determined by the denoising_start instead.
911
- if denoising_start is not None:
921
+ return timesteps, num_inference_steps - t_start
922
+
923
+ else:
924
+ # Strength is irrelevant if we directly request a timestep to start at;
925
+ # that is, strength is determined by the denoising_start instead.
912
926
  discrete_timestep_cutoff = int(
913
927
  round(
914
928
  self.scheduler.config.num_train_timesteps
@@ -916,7 +930,7 @@ class StableDiffusionXLInpaintPipeline(
916
930
  )
917
931
  )
918
932
 
919
- num_inference_steps = (timesteps < discrete_timestep_cutoff).sum().item()
933
+ num_inference_steps = (self.scheduler.timesteps < discrete_timestep_cutoff).sum().item()
920
934
  if self.scheduler.order == 2 and num_inference_steps % 2 == 0:
921
935
  # if the scheduler is a 2nd order scheduler we might have to do +1
922
936
  # because `num_inference_steps` might be even given that every timestep
@@ -927,11 +941,12 @@ class StableDiffusionXLInpaintPipeline(
927
941
  num_inference_steps = num_inference_steps + 1
928
942
 
929
943
  # because t_n+1 >= t_n, we slice the timesteps starting from the end
930
- timesteps = timesteps[-num_inference_steps:]
944
+ t_start = len(self.scheduler.timesteps) - num_inference_steps
945
+ timesteps = self.scheduler.timesteps[t_start:]
946
+ if hasattr(self.scheduler, "set_begin_index"):
947
+ self.scheduler.set_begin_index(t_start)
931
948
  return timesteps, num_inference_steps
932
949
 
933
- return timesteps, num_inference_steps - t_start
934
-
935
950
  # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl_img2img.StableDiffusionXLImg2ImgPipeline._get_add_time_ids
936
951
  def _get_add_time_ids(
937
952
  self,
@@ -71,7 +71,7 @@ def retrieve_timesteps(
71
71
  sigmas: Optional[List[float]] = None,
72
72
  **kwargs,
73
73
  ):
74
- """
74
+ r"""
75
75
  Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
76
76
  custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
77
77
 
@@ -127,7 +127,7 @@ def retrieve_timesteps(
127
127
  sigmas: Optional[List[float]] = None,
128
128
  **kwargs,
129
129
  ):
130
- """
130
+ r"""
131
131
  Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
132
132
  custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
133
133
 
@@ -119,9 +119,21 @@ def _preprocess_adapter_image(image, height, width):
119
119
 
120
120
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
121
121
  def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
122
- """
123
- Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and
124
- Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). See Section 3.4
122
+ r"""
123
+ Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
124
+ Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
125
+ Flawed](https://arxiv.org/pdf/2305.08891.pdf).
126
+
127
+ Args:
128
+ noise_cfg (`torch.Tensor`):
129
+ The predicted noise tensor for the guided diffusion process.
130
+ noise_pred_text (`torch.Tensor`):
131
+ The predicted noise tensor for the text-guided diffusion process.
132
+ guidance_rescale (`float`, *optional*, defaults to 0.0):
133
+ A rescale factor applied to the noise predictions.
134
+
135
+ Returns:
136
+ noise_cfg (`torch.Tensor`): The rescaled noise prediction tensor.
125
137
  """
126
138
  std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
127
139
  std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
@@ -141,7 +153,7 @@ def retrieve_timesteps(
141
153
  sigmas: Optional[List[float]] = None,
142
154
  **kwargs,
143
155
  ):
144
- """
156
+ r"""
145
157
  Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
146
158
  custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
147
159
 
@@ -310,9 +310,21 @@ def create_motion_field_and_warp_latents(motion_field_strength_x, motion_field_s
310
310
 
311
311
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
312
312
  def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
313
- """
314
- Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and
315
- Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). See Section 3.4
313
+ r"""
314
+ Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
315
+ Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
316
+ Flawed](https://arxiv.org/pdf/2305.08891.pdf).
317
+
318
+ Args:
319
+ noise_cfg (`torch.Tensor`):
320
+ The predicted noise tensor for the guided diffusion process.
321
+ noise_pred_text (`torch.Tensor`):
322
+ The predicted noise tensor for the text-guided diffusion process.
323
+ guidance_rescale (`float`, *optional*, defaults to 0.0):
324
+ A rescale factor applied to the noise predictions.
325
+
326
+ Returns:
327
+ noise_cfg (`torch.Tensor`): The rescaled noise prediction tensor.
316
328
  """
317
329
  std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
318
330
  std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
@@ -0,0 +1,16 @@
1
+ # Copyright 2024 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .auto import DiffusersAutoQuantizer
16
+ from .base import DiffusersQuantizer
@@ -0,0 +1,126 @@
1
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """
15
+ Adapted from
16
+ https://github.com/huggingface/transformers/blob/c409cd81777fb27aadc043ed3d8339dbc020fb3b/src/transformers/quantizers/auto.py
17
+ """
18
+ import warnings
19
+ from typing import Dict, Optional, Union
20
+
21
+ from .bitsandbytes import BnB4BitDiffusersQuantizer, BnB8BitDiffusersQuantizer
22
+ from .quantization_config import BitsAndBytesConfig, QuantizationConfigMixin, QuantizationMethod
23
+
24
+
25
+ AUTO_QUANTIZER_MAPPING = {
26
+ "bitsandbytes_4bit": BnB4BitDiffusersQuantizer,
27
+ "bitsandbytes_8bit": BnB8BitDiffusersQuantizer,
28
+ }
29
+
30
+ AUTO_QUANTIZATION_CONFIG_MAPPING = {
31
+ "bitsandbytes_4bit": BitsAndBytesConfig,
32
+ "bitsandbytes_8bit": BitsAndBytesConfig,
33
+ }
34
+
35
+
36
+ class DiffusersAutoQuantizer:
37
+ """
38
+ The auto diffusers quantizer class that takes care of automatically instantiating to the correct
39
+ `DiffusersQuantizer` given the `QuantizationConfig`.
40
+ """
41
+
42
+ @classmethod
43
+ def from_dict(cls, quantization_config_dict: Dict):
44
+ quant_method = quantization_config_dict.get("quant_method", None)
45
+ # We need a special care for bnb models to make sure everything is BC ..
46
+ if quantization_config_dict.get("load_in_8bit", False) or quantization_config_dict.get("load_in_4bit", False):
47
+ suffix = "_4bit" if quantization_config_dict.get("load_in_4bit", False) else "_8bit"
48
+ quant_method = QuantizationMethod.BITS_AND_BYTES + suffix
49
+ elif quant_method is None:
50
+ raise ValueError(
51
+ "The model's quantization config from the arguments has no `quant_method` attribute. Make sure that the model has been correctly quantized"
52
+ )
53
+
54
+ if quant_method not in AUTO_QUANTIZATION_CONFIG_MAPPING.keys():
55
+ raise ValueError(
56
+ f"Unknown quantization type, got {quant_method} - supported types are:"
57
+ f" {list(AUTO_QUANTIZER_MAPPING.keys())}"
58
+ )
59
+
60
+ target_cls = AUTO_QUANTIZATION_CONFIG_MAPPING[quant_method]
61
+ return target_cls.from_dict(quantization_config_dict)
62
+
63
+ @classmethod
64
+ def from_config(cls, quantization_config: Union[QuantizationConfigMixin, Dict], **kwargs):
65
+ # Convert it to a QuantizationConfig if the q_config is a dict
66
+ if isinstance(quantization_config, dict):
67
+ quantization_config = cls.from_dict(quantization_config)
68
+
69
+ quant_method = quantization_config.quant_method
70
+
71
+ # Again, we need a special care for bnb as we have a single quantization config
72
+ # class for both 4-bit and 8-bit quantization
73
+ if quant_method == QuantizationMethod.BITS_AND_BYTES:
74
+ if quantization_config.load_in_8bit:
75
+ quant_method += "_8bit"
76
+ else:
77
+ quant_method += "_4bit"
78
+
79
+ if quant_method not in AUTO_QUANTIZER_MAPPING.keys():
80
+ raise ValueError(
81
+ f"Unknown quantization type, got {quant_method} - supported types are:"
82
+ f" {list(AUTO_QUANTIZER_MAPPING.keys())}"
83
+ )
84
+
85
+ target_cls = AUTO_QUANTIZER_MAPPING[quant_method]
86
+ return target_cls(quantization_config, **kwargs)
87
+
88
+ @classmethod
89
+ def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
90
+ model_config = cls.load_config(pretrained_model_name_or_path, **kwargs)
91
+ if getattr(model_config, "quantization_config", None) is None:
92
+ raise ValueError(
93
+ f"Did not found a `quantization_config` in {pretrained_model_name_or_path}. Make sure that the model is correctly quantized."
94
+ )
95
+ quantization_config_dict = model_config.quantization_config
96
+ quantization_config = cls.from_dict(quantization_config_dict)
97
+ # Update with potential kwargs that are passed through from_pretrained.
98
+ quantization_config.update(kwargs)
99
+
100
+ return cls.from_config(quantization_config)
101
+
102
+ @classmethod
103
+ def merge_quantization_configs(
104
+ cls,
105
+ quantization_config: Union[dict, QuantizationConfigMixin],
106
+ quantization_config_from_args: Optional[QuantizationConfigMixin],
107
+ ):
108
+ """
109
+ handles situations where both quantization_config from args and quantization_config from model config are
110
+ present.
111
+ """
112
+ if quantization_config_from_args is not None:
113
+ warning_msg = (
114
+ "You passed `quantization_config` or equivalent parameters to `from_pretrained` but the model you're loading"
115
+ " already has a `quantization_config` attribute. The `quantization_config` from the model will be used."
116
+ )
117
+ else:
118
+ warning_msg = ""
119
+
120
+ if isinstance(quantization_config, dict):
121
+ quantization_config = cls.from_dict(quantization_config)
122
+
123
+ if warning_msg != "":
124
+ warnings.warn(warning_msg)
125
+
126
+ return quantization_config