diffusers 0.34.0__py3-none-any.whl → 0.35.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (191) hide show
  1. diffusers/__init__.py +98 -1
  2. diffusers/callbacks.py +35 -0
  3. diffusers/commands/custom_blocks.py +134 -0
  4. diffusers/commands/diffusers_cli.py +2 -0
  5. diffusers/commands/fp16_safetensors.py +1 -1
  6. diffusers/configuration_utils.py +11 -2
  7. diffusers/dependency_versions_table.py +3 -3
  8. diffusers/guiders/__init__.py +41 -0
  9. diffusers/guiders/adaptive_projected_guidance.py +188 -0
  10. diffusers/guiders/auto_guidance.py +190 -0
  11. diffusers/guiders/classifier_free_guidance.py +141 -0
  12. diffusers/guiders/classifier_free_zero_star_guidance.py +152 -0
  13. diffusers/guiders/frequency_decoupled_guidance.py +327 -0
  14. diffusers/guiders/guider_utils.py +309 -0
  15. diffusers/guiders/perturbed_attention_guidance.py +271 -0
  16. diffusers/guiders/skip_layer_guidance.py +262 -0
  17. diffusers/guiders/smoothed_energy_guidance.py +251 -0
  18. diffusers/guiders/tangential_classifier_free_guidance.py +143 -0
  19. diffusers/hooks/__init__.py +17 -0
  20. diffusers/hooks/_common.py +56 -0
  21. diffusers/hooks/_helpers.py +293 -0
  22. diffusers/hooks/faster_cache.py +7 -6
  23. diffusers/hooks/first_block_cache.py +259 -0
  24. diffusers/hooks/group_offloading.py +292 -286
  25. diffusers/hooks/hooks.py +56 -1
  26. diffusers/hooks/layer_skip.py +263 -0
  27. diffusers/hooks/layerwise_casting.py +2 -7
  28. diffusers/hooks/pyramid_attention_broadcast.py +14 -11
  29. diffusers/hooks/smoothed_energy_guidance_utils.py +167 -0
  30. diffusers/hooks/utils.py +43 -0
  31. diffusers/loaders/__init__.py +6 -0
  32. diffusers/loaders/ip_adapter.py +255 -4
  33. diffusers/loaders/lora_base.py +63 -30
  34. diffusers/loaders/lora_conversion_utils.py +434 -53
  35. diffusers/loaders/lora_pipeline.py +834 -37
  36. diffusers/loaders/peft.py +28 -5
  37. diffusers/loaders/single_file_model.py +44 -11
  38. diffusers/loaders/single_file_utils.py +170 -2
  39. diffusers/loaders/transformer_flux.py +9 -10
  40. diffusers/loaders/transformer_sd3.py +6 -1
  41. diffusers/loaders/unet.py +22 -5
  42. diffusers/loaders/unet_loader_utils.py +5 -2
  43. diffusers/models/__init__.py +8 -0
  44. diffusers/models/attention.py +484 -3
  45. diffusers/models/attention_dispatch.py +1218 -0
  46. diffusers/models/attention_processor.py +105 -663
  47. diffusers/models/auto_model.py +2 -2
  48. diffusers/models/autoencoders/__init__.py +1 -0
  49. diffusers/models/autoencoders/autoencoder_dc.py +14 -1
  50. diffusers/models/autoencoders/autoencoder_kl.py +1 -1
  51. diffusers/models/autoencoders/autoencoder_kl_cosmos.py +3 -1
  52. diffusers/models/autoencoders/autoencoder_kl_qwenimage.py +1070 -0
  53. diffusers/models/autoencoders/autoencoder_kl_wan.py +370 -40
  54. diffusers/models/cache_utils.py +31 -9
  55. diffusers/models/controlnets/controlnet_flux.py +5 -5
  56. diffusers/models/controlnets/controlnet_union.py +4 -4
  57. diffusers/models/embeddings.py +26 -34
  58. diffusers/models/model_loading_utils.py +233 -1
  59. diffusers/models/modeling_flax_utils.py +1 -2
  60. diffusers/models/modeling_utils.py +159 -94
  61. diffusers/models/transformers/__init__.py +2 -0
  62. diffusers/models/transformers/transformer_chroma.py +16 -117
  63. diffusers/models/transformers/transformer_cogview4.py +36 -2
  64. diffusers/models/transformers/transformer_cosmos.py +11 -4
  65. diffusers/models/transformers/transformer_flux.py +372 -132
  66. diffusers/models/transformers/transformer_hunyuan_video.py +6 -0
  67. diffusers/models/transformers/transformer_ltx.py +104 -23
  68. diffusers/models/transformers/transformer_qwenimage.py +645 -0
  69. diffusers/models/transformers/transformer_skyreels_v2.py +607 -0
  70. diffusers/models/transformers/transformer_wan.py +298 -85
  71. diffusers/models/transformers/transformer_wan_vace.py +15 -21
  72. diffusers/models/unets/unet_2d_condition.py +2 -1
  73. diffusers/modular_pipelines/__init__.py +83 -0
  74. diffusers/modular_pipelines/components_manager.py +1068 -0
  75. diffusers/modular_pipelines/flux/__init__.py +66 -0
  76. diffusers/modular_pipelines/flux/before_denoise.py +689 -0
  77. diffusers/modular_pipelines/flux/decoders.py +109 -0
  78. diffusers/modular_pipelines/flux/denoise.py +227 -0
  79. diffusers/modular_pipelines/flux/encoders.py +412 -0
  80. diffusers/modular_pipelines/flux/modular_blocks.py +181 -0
  81. diffusers/modular_pipelines/flux/modular_pipeline.py +59 -0
  82. diffusers/modular_pipelines/modular_pipeline.py +2446 -0
  83. diffusers/modular_pipelines/modular_pipeline_utils.py +672 -0
  84. diffusers/modular_pipelines/node_utils.py +665 -0
  85. diffusers/modular_pipelines/stable_diffusion_xl/__init__.py +77 -0
  86. diffusers/modular_pipelines/stable_diffusion_xl/before_denoise.py +1874 -0
  87. diffusers/modular_pipelines/stable_diffusion_xl/decoders.py +208 -0
  88. diffusers/modular_pipelines/stable_diffusion_xl/denoise.py +771 -0
  89. diffusers/modular_pipelines/stable_diffusion_xl/encoders.py +887 -0
  90. diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py +380 -0
  91. diffusers/modular_pipelines/stable_diffusion_xl/modular_pipeline.py +365 -0
  92. diffusers/modular_pipelines/wan/__init__.py +66 -0
  93. diffusers/modular_pipelines/wan/before_denoise.py +365 -0
  94. diffusers/modular_pipelines/wan/decoders.py +105 -0
  95. diffusers/modular_pipelines/wan/denoise.py +261 -0
  96. diffusers/modular_pipelines/wan/encoders.py +242 -0
  97. diffusers/modular_pipelines/wan/modular_blocks.py +144 -0
  98. diffusers/modular_pipelines/wan/modular_pipeline.py +90 -0
  99. diffusers/pipelines/__init__.py +31 -0
  100. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +2 -3
  101. diffusers/pipelines/auto_pipeline.py +17 -13
  102. diffusers/pipelines/chroma/pipeline_chroma.py +5 -5
  103. diffusers/pipelines/chroma/pipeline_chroma_img2img.py +5 -5
  104. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +9 -8
  105. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +9 -8
  106. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +10 -9
  107. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +9 -8
  108. diffusers/pipelines/cogview4/pipeline_cogview4.py +16 -15
  109. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +3 -2
  110. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +212 -93
  111. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +7 -3
  112. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +194 -92
  113. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +1 -1
  114. diffusers/pipelines/dit/pipeline_dit.py +3 -1
  115. diffusers/pipelines/flux/__init__.py +4 -0
  116. diffusers/pipelines/flux/pipeline_flux.py +34 -26
  117. diffusers/pipelines/flux/pipeline_flux_control.py +8 -8
  118. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +1 -1
  119. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +1 -1
  120. diffusers/pipelines/flux/pipeline_flux_controlnet.py +1 -1
  121. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +1 -1
  122. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1 -1
  123. diffusers/pipelines/flux/pipeline_flux_fill.py +1 -1
  124. diffusers/pipelines/flux/pipeline_flux_img2img.py +1 -1
  125. diffusers/pipelines/flux/pipeline_flux_inpaint.py +1 -1
  126. diffusers/pipelines/flux/pipeline_flux_kontext.py +1134 -0
  127. diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py +1460 -0
  128. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
  129. diffusers/pipelines/flux/pipeline_output.py +6 -4
  130. diffusers/pipelines/hidream_image/pipeline_hidream_image.py +5 -5
  131. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +25 -24
  132. diffusers/pipelines/ltx/pipeline_ltx.py +13 -12
  133. diffusers/pipelines/ltx/pipeline_ltx_condition.py +10 -9
  134. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +13 -12
  135. diffusers/pipelines/mochi/pipeline_mochi.py +9 -8
  136. diffusers/pipelines/pipeline_flax_utils.py +2 -2
  137. diffusers/pipelines/pipeline_loading_utils.py +24 -2
  138. diffusers/pipelines/pipeline_utils.py +22 -15
  139. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +3 -1
  140. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +20 -0
  141. diffusers/pipelines/qwenimage/__init__.py +55 -0
  142. diffusers/pipelines/qwenimage/pipeline_output.py +21 -0
  143. diffusers/pipelines/qwenimage/pipeline_qwenimage.py +726 -0
  144. diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py +849 -0
  145. diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py +829 -0
  146. diffusers/pipelines/qwenimage/pipeline_qwenimage_inpaint.py +1015 -0
  147. diffusers/pipelines/sana/pipeline_sana_sprint.py +5 -5
  148. diffusers/pipelines/skyreels_v2/__init__.py +59 -0
  149. diffusers/pipelines/skyreels_v2/pipeline_output.py +20 -0
  150. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2.py +610 -0
  151. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing.py +978 -0
  152. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_i2v.py +1059 -0
  153. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_v2v.py +1063 -0
  154. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_i2v.py +745 -0
  155. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -1
  156. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +1 -1
  157. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +1 -1
  158. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +2 -1
  159. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +6 -5
  160. diffusers/pipelines/wan/pipeline_wan.py +78 -20
  161. diffusers/pipelines/wan/pipeline_wan_i2v.py +112 -32
  162. diffusers/pipelines/wan/pipeline_wan_vace.py +1 -2
  163. diffusers/quantizers/__init__.py +1 -177
  164. diffusers/quantizers/base.py +11 -0
  165. diffusers/quantizers/gguf/utils.py +92 -3
  166. diffusers/quantizers/pipe_quant_config.py +202 -0
  167. diffusers/quantizers/torchao/torchao_quantizer.py +26 -0
  168. diffusers/schedulers/scheduling_deis_multistep.py +8 -1
  169. diffusers/schedulers/scheduling_dpmsolver_multistep.py +6 -0
  170. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +6 -0
  171. diffusers/schedulers/scheduling_scm.py +0 -1
  172. diffusers/schedulers/scheduling_unipc_multistep.py +10 -1
  173. diffusers/schedulers/scheduling_utils.py +2 -2
  174. diffusers/schedulers/scheduling_utils_flax.py +1 -1
  175. diffusers/training_utils.py +78 -0
  176. diffusers/utils/__init__.py +10 -0
  177. diffusers/utils/constants.py +4 -0
  178. diffusers/utils/dummy_pt_objects.py +312 -0
  179. diffusers/utils/dummy_torch_and_transformers_objects.py +255 -0
  180. diffusers/utils/dynamic_modules_utils.py +84 -25
  181. diffusers/utils/hub_utils.py +33 -17
  182. diffusers/utils/import_utils.py +70 -0
  183. diffusers/utils/peft_utils.py +11 -8
  184. diffusers/utils/testing_utils.py +136 -10
  185. diffusers/utils/torch_utils.py +18 -0
  186. {diffusers-0.34.0.dist-info → diffusers-0.35.1.dist-info}/METADATA +6 -6
  187. {diffusers-0.34.0.dist-info → diffusers-0.35.1.dist-info}/RECORD +191 -127
  188. {diffusers-0.34.0.dist-info → diffusers-0.35.1.dist-info}/LICENSE +0 -0
  189. {diffusers-0.34.0.dist-info → diffusers-0.35.1.dist-info}/WHEEL +0 -0
  190. {diffusers-0.34.0.dist-info → diffusers-0.35.1.dist-info}/entry_points.txt +0 -0
  191. {diffusers-0.34.0.dist-info → diffusers-0.35.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,188 @@
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import math
16
+ from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
17
+
18
+ import torch
19
+
20
+ from ..configuration_utils import register_to_config
21
+ from .guider_utils import BaseGuidance, rescale_noise_cfg
22
+
23
+
24
+ if TYPE_CHECKING:
25
+ from ..modular_pipelines.modular_pipeline import BlockState
26
+
27
+
28
+ class AdaptiveProjectedGuidance(BaseGuidance):
29
+ """
30
+ Adaptive Projected Guidance (APG): https://huggingface.co/papers/2410.02416
31
+
32
+ Args:
33
+ guidance_scale (`float`, defaults to `7.5`):
34
+ The scale parameter for classifier-free guidance. Higher values result in stronger conditioning on the text
35
+ prompt, while lower values allow for more freedom in generation. Higher values may lead to saturation and
36
+ deterioration of image quality.
37
+ adaptive_projected_guidance_momentum (`float`, defaults to `None`):
38
+ The momentum parameter for the adaptive projected guidance. Disabled if set to `None`.
39
+ adaptive_projected_guidance_rescale (`float`, defaults to `15.0`):
40
+ The rescale factor applied to the noise predictions. This is used to improve image quality and fix
41
+ guidance_rescale (`float`, defaults to `0.0`):
42
+ The rescale factor applied to the noise predictions. This is used to improve image quality and fix
43
+ overexposure. Based on Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
44
+ Flawed](https://huggingface.co/papers/2305.08891).
45
+ use_original_formulation (`bool`, defaults to `False`):
46
+ Whether to use the original formulation of classifier-free guidance as proposed in the paper. By default,
47
+ we use the diffusers-native implementation that has been in the codebase for a long time. See
48
+ [~guiders.classifier_free_guidance.ClassifierFreeGuidance] for more details.
49
+ start (`float`, defaults to `0.0`):
50
+ The fraction of the total number of denoising steps after which guidance starts.
51
+ stop (`float`, defaults to `1.0`):
52
+ The fraction of the total number of denoising steps after which guidance stops.
53
+ """
54
+
55
+ _input_predictions = ["pred_cond", "pred_uncond"]
56
+
57
+ @register_to_config
58
+ def __init__(
59
+ self,
60
+ guidance_scale: float = 7.5,
61
+ adaptive_projected_guidance_momentum: Optional[float] = None,
62
+ adaptive_projected_guidance_rescale: float = 15.0,
63
+ eta: float = 1.0,
64
+ guidance_rescale: float = 0.0,
65
+ use_original_formulation: bool = False,
66
+ start: float = 0.0,
67
+ stop: float = 1.0,
68
+ ):
69
+ super().__init__(start, stop)
70
+
71
+ self.guidance_scale = guidance_scale
72
+ self.adaptive_projected_guidance_momentum = adaptive_projected_guidance_momentum
73
+ self.adaptive_projected_guidance_rescale = adaptive_projected_guidance_rescale
74
+ self.eta = eta
75
+ self.guidance_rescale = guidance_rescale
76
+ self.use_original_formulation = use_original_formulation
77
+ self.momentum_buffer = None
78
+
79
+ def prepare_inputs(
80
+ self, data: "BlockState", input_fields: Optional[Dict[str, Union[str, Tuple[str, str]]]] = None
81
+ ) -> List["BlockState"]:
82
+ if input_fields is None:
83
+ input_fields = self._input_fields
84
+
85
+ if self._step == 0:
86
+ if self.adaptive_projected_guidance_momentum is not None:
87
+ self.momentum_buffer = MomentumBuffer(self.adaptive_projected_guidance_momentum)
88
+ tuple_indices = [0] if self.num_conditions == 1 else [0, 1]
89
+ data_batches = []
90
+ for i in range(self.num_conditions):
91
+ data_batch = self._prepare_batch(input_fields, data, tuple_indices[i], self._input_predictions[i])
92
+ data_batches.append(data_batch)
93
+ return data_batches
94
+
95
+ def forward(self, pred_cond: torch.Tensor, pred_uncond: Optional[torch.Tensor] = None) -> torch.Tensor:
96
+ pred = None
97
+
98
+ if not self._is_apg_enabled():
99
+ pred = pred_cond
100
+ else:
101
+ pred = normalized_guidance(
102
+ pred_cond,
103
+ pred_uncond,
104
+ self.guidance_scale,
105
+ self.momentum_buffer,
106
+ self.eta,
107
+ self.adaptive_projected_guidance_rescale,
108
+ self.use_original_formulation,
109
+ )
110
+
111
+ if self.guidance_rescale > 0.0:
112
+ pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale)
113
+
114
+ return pred, {}
115
+
116
+ @property
117
+ def is_conditional(self) -> bool:
118
+ return self._count_prepared == 1
119
+
120
+ @property
121
+ def num_conditions(self) -> int:
122
+ num_conditions = 1
123
+ if self._is_apg_enabled():
124
+ num_conditions += 1
125
+ return num_conditions
126
+
127
+ def _is_apg_enabled(self) -> bool:
128
+ if not self._enabled:
129
+ return False
130
+
131
+ is_within_range = True
132
+ if self._num_inference_steps is not None:
133
+ skip_start_step = int(self._start * self._num_inference_steps)
134
+ skip_stop_step = int(self._stop * self._num_inference_steps)
135
+ is_within_range = skip_start_step <= self._step < skip_stop_step
136
+
137
+ is_close = False
138
+ if self.use_original_formulation:
139
+ is_close = math.isclose(self.guidance_scale, 0.0)
140
+ else:
141
+ is_close = math.isclose(self.guidance_scale, 1.0)
142
+
143
+ return is_within_range and not is_close
144
+
145
+
146
+ class MomentumBuffer:
147
+ def __init__(self, momentum: float):
148
+ self.momentum = momentum
149
+ self.running_average = 0
150
+
151
+ def update(self, update_value: torch.Tensor):
152
+ new_average = self.momentum * self.running_average
153
+ self.running_average = update_value + new_average
154
+
155
+
156
+ def normalized_guidance(
157
+ pred_cond: torch.Tensor,
158
+ pred_uncond: torch.Tensor,
159
+ guidance_scale: float,
160
+ momentum_buffer: Optional[MomentumBuffer] = None,
161
+ eta: float = 1.0,
162
+ norm_threshold: float = 0.0,
163
+ use_original_formulation: bool = False,
164
+ ):
165
+ diff = pred_cond - pred_uncond
166
+ dim = [-i for i in range(1, len(diff.shape))]
167
+
168
+ if momentum_buffer is not None:
169
+ momentum_buffer.update(diff)
170
+ diff = momentum_buffer.running_average
171
+
172
+ if norm_threshold > 0:
173
+ ones = torch.ones_like(diff)
174
+ diff_norm = diff.norm(p=2, dim=dim, keepdim=True)
175
+ scale_factor = torch.minimum(ones, norm_threshold / diff_norm)
176
+ diff = diff * scale_factor
177
+
178
+ v0, v1 = diff.double(), pred_cond.double()
179
+ v1 = torch.nn.functional.normalize(v1, dim=dim)
180
+ v0_parallel = (v0 * v1).sum(dim=dim, keepdim=True) * v1
181
+ v0_orthogonal = v0 - v0_parallel
182
+ diff_parallel, diff_orthogonal = v0_parallel.type_as(diff), v0_orthogonal.type_as(diff)
183
+ normalized_update = diff_orthogonal + eta * diff_parallel
184
+
185
+ pred = pred_cond if use_original_formulation else pred_uncond
186
+ pred = pred + guidance_scale * normalized_update
187
+
188
+ return pred
@@ -0,0 +1,190 @@
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import math
16
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
17
+
18
+ import torch
19
+
20
+ from ..configuration_utils import register_to_config
21
+ from ..hooks import HookRegistry, LayerSkipConfig
22
+ from ..hooks.layer_skip import _apply_layer_skip_hook
23
+ from .guider_utils import BaseGuidance, rescale_noise_cfg
24
+
25
+
26
+ if TYPE_CHECKING:
27
+ from ..modular_pipelines.modular_pipeline import BlockState
28
+
29
+
30
+ class AutoGuidance(BaseGuidance):
31
+ """
32
+ AutoGuidance: https://huggingface.co/papers/2406.02507
33
+
34
+ Args:
35
+ guidance_scale (`float`, defaults to `7.5`):
36
+ The scale parameter for classifier-free guidance. Higher values result in stronger conditioning on the text
37
+ prompt, while lower values allow for more freedom in generation. Higher values may lead to saturation and
38
+ deterioration of image quality.
39
+ auto_guidance_layers (`int` or `List[int]`, *optional*):
40
+ The layer indices to apply skip layer guidance to. Can be a single integer or a list of integers. If not
41
+ provided, `skip_layer_config` must be provided.
42
+ auto_guidance_config (`LayerSkipConfig` or `List[LayerSkipConfig]`, *optional*):
43
+ The configuration for the skip layer guidance. Can be a single `LayerSkipConfig` or a list of
44
+ `LayerSkipConfig`. If not provided, `skip_layer_guidance_layers` must be provided.
45
+ dropout (`float`, *optional*):
46
+ The dropout probability for autoguidance on the enabled skip layers (either with `auto_guidance_layers` or
47
+ `auto_guidance_config`). If not provided, the dropout probability will be set to 1.0.
48
+ guidance_rescale (`float`, defaults to `0.0`):
49
+ The rescale factor applied to the noise predictions. This is used to improve image quality and fix
50
+ overexposure. Based on Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
51
+ Flawed](https://huggingface.co/papers/2305.08891).
52
+ use_original_formulation (`bool`, defaults to `False`):
53
+ Whether to use the original formulation of classifier-free guidance as proposed in the paper. By default,
54
+ we use the diffusers-native implementation that has been in the codebase for a long time. See
55
+ [~guiders.classifier_free_guidance.ClassifierFreeGuidance] for more details.
56
+ start (`float`, defaults to `0.0`):
57
+ The fraction of the total number of denoising steps after which guidance starts.
58
+ stop (`float`, defaults to `1.0`):
59
+ The fraction of the total number of denoising steps after which guidance stops.
60
+ """
61
+
62
+ _input_predictions = ["pred_cond", "pred_uncond"]
63
+
64
+ @register_to_config
65
+ def __init__(
66
+ self,
67
+ guidance_scale: float = 7.5,
68
+ auto_guidance_layers: Optional[Union[int, List[int]]] = None,
69
+ auto_guidance_config: Union[LayerSkipConfig, List[LayerSkipConfig], Dict[str, Any]] = None,
70
+ dropout: Optional[float] = None,
71
+ guidance_rescale: float = 0.0,
72
+ use_original_formulation: bool = False,
73
+ start: float = 0.0,
74
+ stop: float = 1.0,
75
+ ):
76
+ super().__init__(start, stop)
77
+
78
+ self.guidance_scale = guidance_scale
79
+ self.auto_guidance_layers = auto_guidance_layers
80
+ self.auto_guidance_config = auto_guidance_config
81
+ self.dropout = dropout
82
+ self.guidance_rescale = guidance_rescale
83
+ self.use_original_formulation = use_original_formulation
84
+
85
+ if auto_guidance_layers is None and auto_guidance_config is None:
86
+ raise ValueError(
87
+ "Either `auto_guidance_layers` or `auto_guidance_config` must be provided to enable Skip Layer Guidance."
88
+ )
89
+ if auto_guidance_layers is not None and auto_guidance_config is not None:
90
+ raise ValueError("Only one of `auto_guidance_layers` or `auto_guidance_config` can be provided.")
91
+ if (dropout is None and auto_guidance_layers is not None) or (
92
+ dropout is not None and auto_guidance_layers is None
93
+ ):
94
+ raise ValueError("`dropout` must be provided if `auto_guidance_layers` is provided.")
95
+
96
+ if auto_guidance_layers is not None:
97
+ if isinstance(auto_guidance_layers, int):
98
+ auto_guidance_layers = [auto_guidance_layers]
99
+ if not isinstance(auto_guidance_layers, list):
100
+ raise ValueError(
101
+ f"Expected `auto_guidance_layers` to be an int or a list of ints, but got {type(auto_guidance_layers)}."
102
+ )
103
+ auto_guidance_config = [
104
+ LayerSkipConfig(layer, fqn="auto", dropout=dropout) for layer in auto_guidance_layers
105
+ ]
106
+
107
+ if isinstance(auto_guidance_config, dict):
108
+ auto_guidance_config = LayerSkipConfig.from_dict(auto_guidance_config)
109
+
110
+ if isinstance(auto_guidance_config, LayerSkipConfig):
111
+ auto_guidance_config = [auto_guidance_config]
112
+
113
+ if not isinstance(auto_guidance_config, list):
114
+ raise ValueError(
115
+ f"Expected `auto_guidance_config` to be a LayerSkipConfig or a list of LayerSkipConfig, but got {type(auto_guidance_config)}."
116
+ )
117
+ elif isinstance(next(iter(auto_guidance_config), None), dict):
118
+ auto_guidance_config = [LayerSkipConfig.from_dict(config) for config in auto_guidance_config]
119
+
120
+ self.auto_guidance_config = auto_guidance_config
121
+ self._auto_guidance_hook_names = [f"AutoGuidance_{i}" for i in range(len(self.auto_guidance_config))]
122
+
123
+ def prepare_models(self, denoiser: torch.nn.Module) -> None:
124
+ self._count_prepared += 1
125
+ if self._is_ag_enabled() and self.is_unconditional:
126
+ for name, config in zip(self._auto_guidance_hook_names, self.auto_guidance_config):
127
+ _apply_layer_skip_hook(denoiser, config, name=name)
128
+
129
+ def cleanup_models(self, denoiser: torch.nn.Module) -> None:
130
+ if self._is_ag_enabled() and self.is_unconditional:
131
+ for name in self._auto_guidance_hook_names:
132
+ registry = HookRegistry.check_if_exists_or_initialize(denoiser)
133
+ registry.remove_hook(name, recurse=True)
134
+
135
+ def prepare_inputs(
136
+ self, data: "BlockState", input_fields: Optional[Dict[str, Union[str, Tuple[str, str]]]] = None
137
+ ) -> List["BlockState"]:
138
+ if input_fields is None:
139
+ input_fields = self._input_fields
140
+
141
+ tuple_indices = [0] if self.num_conditions == 1 else [0, 1]
142
+ data_batches = []
143
+ for i in range(self.num_conditions):
144
+ data_batch = self._prepare_batch(input_fields, data, tuple_indices[i], self._input_predictions[i])
145
+ data_batches.append(data_batch)
146
+ return data_batches
147
+
148
+ def forward(self, pred_cond: torch.Tensor, pred_uncond: Optional[torch.Tensor] = None) -> torch.Tensor:
149
+ pred = None
150
+
151
+ if not self._is_ag_enabled():
152
+ pred = pred_cond
153
+ else:
154
+ shift = pred_cond - pred_uncond
155
+ pred = pred_cond if self.use_original_formulation else pred_uncond
156
+ pred = pred + self.guidance_scale * shift
157
+
158
+ if self.guidance_rescale > 0.0:
159
+ pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale)
160
+
161
+ return pred, {}
162
+
163
+ @property
164
+ def is_conditional(self) -> bool:
165
+ return self._count_prepared == 1
166
+
167
+ @property
168
+ def num_conditions(self) -> int:
169
+ num_conditions = 1
170
+ if self._is_ag_enabled():
171
+ num_conditions += 1
172
+ return num_conditions
173
+
174
+ def _is_ag_enabled(self) -> bool:
175
+ if not self._enabled:
176
+ return False
177
+
178
+ is_within_range = True
179
+ if self._num_inference_steps is not None:
180
+ skip_start_step = int(self._start * self._num_inference_steps)
181
+ skip_stop_step = int(self._stop * self._num_inference_steps)
182
+ is_within_range = skip_start_step <= self._step < skip_stop_step
183
+
184
+ is_close = False
185
+ if self.use_original_formulation:
186
+ is_close = math.isclose(self.guidance_scale, 0.0)
187
+ else:
188
+ is_close = math.isclose(self.guidance_scale, 1.0)
189
+
190
+ return is_within_range and not is_close
@@ -0,0 +1,141 @@
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import math
16
+ from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
17
+
18
+ import torch
19
+
20
+ from ..configuration_utils import register_to_config
21
+ from .guider_utils import BaseGuidance, rescale_noise_cfg
22
+
23
+
24
+ if TYPE_CHECKING:
25
+ from ..modular_pipelines.modular_pipeline import BlockState
26
+
27
+
28
+ class ClassifierFreeGuidance(BaseGuidance):
29
+ """
30
+ Classifier-free guidance (CFG): https://huggingface.co/papers/2207.12598
31
+
32
+ CFG is a technique used to improve generation quality and condition-following in diffusion models. It works by
33
+ jointly training a model on both conditional and unconditional data, and using a weighted sum of the two during
34
+ inference. This allows the model to tradeoff between generation quality and sample diversity. The original paper
35
+ proposes scaling and shifting the conditional distribution based on the difference between conditional and
36
+ unconditional predictions. [x_pred = x_cond + scale * (x_cond - x_uncond)]
37
+
38
+ Diffusers implemented the scaling and shifting on the unconditional prediction instead based on the [Imagen
39
+ paper](https://huggingface.co/papers/2205.11487), which is equivalent to what the original paper proposed in
40
+ theory. [x_pred = x_uncond + scale * (x_cond - x_uncond)]
41
+
42
+ The intution behind the original formulation can be thought of as moving the conditional distribution estimates
43
+ further away from the unconditional distribution estimates, while the diffusers-native implementation can be
44
+ thought of as moving the unconditional distribution towards the conditional distribution estimates to get rid of
45
+ the unconditional predictions (usually negative features like "bad quality, bad anotomy, watermarks", etc.)
46
+
47
+ The `use_original_formulation` argument can be set to `True` to use the original CFG formulation mentioned in the
48
+ paper. By default, we use the diffusers-native implementation that has been in the codebase for a long time.
49
+
50
+ Args:
51
+ guidance_scale (`float`, defaults to `7.5`):
52
+ The scale parameter for classifier-free guidance. Higher values result in stronger conditioning on the text
53
+ prompt, while lower values allow for more freedom in generation. Higher values may lead to saturation and
54
+ deterioration of image quality.
55
+ guidance_rescale (`float`, defaults to `0.0`):
56
+ The rescale factor applied to the noise predictions. This is used to improve image quality and fix
57
+ overexposure. Based on Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
58
+ Flawed](https://huggingface.co/papers/2305.08891).
59
+ use_original_formulation (`bool`, defaults to `False`):
60
+ Whether to use the original formulation of classifier-free guidance as proposed in the paper. By default,
61
+ we use the diffusers-native implementation that has been in the codebase for a long time. See
62
+ [~guiders.classifier_free_guidance.ClassifierFreeGuidance] for more details.
63
+ start (`float`, defaults to `0.0`):
64
+ The fraction of the total number of denoising steps after which guidance starts.
65
+ stop (`float`, defaults to `1.0`):
66
+ The fraction of the total number of denoising steps after which guidance stops.
67
+ """
68
+
69
+ _input_predictions = ["pred_cond", "pred_uncond"]
70
+
71
+ @register_to_config
72
+ def __init__(
73
+ self,
74
+ guidance_scale: float = 7.5,
75
+ guidance_rescale: float = 0.0,
76
+ use_original_formulation: bool = False,
77
+ start: float = 0.0,
78
+ stop: float = 1.0,
79
+ ):
80
+ super().__init__(start, stop)
81
+
82
+ self.guidance_scale = guidance_scale
83
+ self.guidance_rescale = guidance_rescale
84
+ self.use_original_formulation = use_original_formulation
85
+
86
+ def prepare_inputs(
87
+ self, data: "BlockState", input_fields: Optional[Dict[str, Union[str, Tuple[str, str]]]] = None
88
+ ) -> List["BlockState"]:
89
+ if input_fields is None:
90
+ input_fields = self._input_fields
91
+
92
+ tuple_indices = [0] if self.num_conditions == 1 else [0, 1]
93
+ data_batches = []
94
+ for i in range(self.num_conditions):
95
+ data_batch = self._prepare_batch(input_fields, data, tuple_indices[i], self._input_predictions[i])
96
+ data_batches.append(data_batch)
97
+ return data_batches
98
+
99
+ def forward(self, pred_cond: torch.Tensor, pred_uncond: Optional[torch.Tensor] = None) -> torch.Tensor:
100
+ pred = None
101
+
102
+ if not self._is_cfg_enabled():
103
+ pred = pred_cond
104
+ else:
105
+ shift = pred_cond - pred_uncond
106
+ pred = pred_cond if self.use_original_formulation else pred_uncond
107
+ pred = pred + self.guidance_scale * shift
108
+
109
+ if self.guidance_rescale > 0.0:
110
+ pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale)
111
+
112
+ return pred, {}
113
+
114
+ @property
115
+ def is_conditional(self) -> bool:
116
+ return self._count_prepared == 1
117
+
118
+ @property
119
+ def num_conditions(self) -> int:
120
+ num_conditions = 1
121
+ if self._is_cfg_enabled():
122
+ num_conditions += 1
123
+ return num_conditions
124
+
125
+ def _is_cfg_enabled(self) -> bool:
126
+ if not self._enabled:
127
+ return False
128
+
129
+ is_within_range = True
130
+ if self._num_inference_steps is not None:
131
+ skip_start_step = int(self._start * self._num_inference_steps)
132
+ skip_stop_step = int(self._stop * self._num_inference_steps)
133
+ is_within_range = skip_start_step <= self._step < skip_stop_step
134
+
135
+ is_close = False
136
+ if self.use_original_formulation:
137
+ is_close = math.isclose(self.guidance_scale, 0.0)
138
+ else:
139
+ is_close = math.isclose(self.guidance_scale, 1.0)
140
+
141
+ return is_within_range and not is_close
@@ -0,0 +1,152 @@
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import math
16
+ from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
17
+
18
+ import torch
19
+
20
+ from ..configuration_utils import register_to_config
21
+ from .guider_utils import BaseGuidance, rescale_noise_cfg
22
+
23
+
24
+ if TYPE_CHECKING:
25
+ from ..modular_pipelines.modular_pipeline import BlockState
26
+
27
+
28
+ class ClassifierFreeZeroStarGuidance(BaseGuidance):
29
+ """
30
+ Classifier-free Zero* (CFG-Zero*): https://huggingface.co/papers/2503.18886
31
+
32
+ This is an implementation of the Classifier-Free Zero* guidance technique, which is a variant of classifier-free
33
+ guidance. It proposes zero initialization of the noise predictions for the first few steps of the diffusion
34
+ process, and also introduces an optimal rescaling factor for the noise predictions, which can help in improving the
35
+ quality of generated images.
36
+
37
+ The authors of the paper suggest setting zero initialization in the first 4% of the inference steps.
38
+
39
+ Args:
40
+ guidance_scale (`float`, defaults to `7.5`):
41
+ The scale parameter for classifier-free guidance. Higher values result in stronger conditioning on the text
42
+ prompt, while lower values allow for more freedom in generation. Higher values may lead to saturation and
43
+ deterioration of image quality.
44
+ zero_init_steps (`int`, defaults to `1`):
45
+ The number of inference steps for which the noise predictions are zeroed out (see Section 4.2).
46
+ guidance_rescale (`float`, defaults to `0.0`):
47
+ The rescale factor applied to the noise predictions. This is used to improve image quality and fix
48
+ overexposure. Based on Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
49
+ Flawed](https://huggingface.co/papers/2305.08891).
50
+ use_original_formulation (`bool`, defaults to `False`):
51
+ Whether to use the original formulation of classifier-free guidance as proposed in the paper. By default,
52
+ we use the diffusers-native implementation that has been in the codebase for a long time. See
53
+ [~guiders.classifier_free_guidance.ClassifierFreeGuidance] for more details.
54
+ start (`float`, defaults to `0.01`):
55
+ The fraction of the total number of denoising steps after which guidance starts.
56
+ stop (`float`, defaults to `0.2`):
57
+ The fraction of the total number of denoising steps after which guidance stops.
58
+ """
59
+
60
+ _input_predictions = ["pred_cond", "pred_uncond"]
61
+
62
+ @register_to_config
63
+ def __init__(
64
+ self,
65
+ guidance_scale: float = 7.5,
66
+ zero_init_steps: int = 1,
67
+ guidance_rescale: float = 0.0,
68
+ use_original_formulation: bool = False,
69
+ start: float = 0.0,
70
+ stop: float = 1.0,
71
+ ):
72
+ super().__init__(start, stop)
73
+
74
+ self.guidance_scale = guidance_scale
75
+ self.zero_init_steps = zero_init_steps
76
+ self.guidance_rescale = guidance_rescale
77
+ self.use_original_formulation = use_original_formulation
78
+
79
+ def prepare_inputs(
80
+ self, data: "BlockState", input_fields: Optional[Dict[str, Union[str, Tuple[str, str]]]] = None
81
+ ) -> List["BlockState"]:
82
+ if input_fields is None:
83
+ input_fields = self._input_fields
84
+
85
+ tuple_indices = [0] if self.num_conditions == 1 else [0, 1]
86
+ data_batches = []
87
+ for i in range(self.num_conditions):
88
+ data_batch = self._prepare_batch(input_fields, data, tuple_indices[i], self._input_predictions[i])
89
+ data_batches.append(data_batch)
90
+ return data_batches
91
+
92
+ def forward(self, pred_cond: torch.Tensor, pred_uncond: Optional[torch.Tensor] = None) -> torch.Tensor:
93
+ pred = None
94
+
95
+ if self._step < self.zero_init_steps:
96
+ pred = torch.zeros_like(pred_cond)
97
+ elif not self._is_cfg_enabled():
98
+ pred = pred_cond
99
+ else:
100
+ pred_cond_flat = pred_cond.flatten(1)
101
+ pred_uncond_flat = pred_uncond.flatten(1)
102
+ alpha = cfg_zero_star_scale(pred_cond_flat, pred_uncond_flat)
103
+ alpha = alpha.view(-1, *(1,) * (len(pred_cond.shape) - 1))
104
+ pred_uncond = pred_uncond * alpha
105
+ shift = pred_cond - pred_uncond
106
+ pred = pred_cond if self.use_original_formulation else pred_uncond
107
+ pred = pred + self.guidance_scale * shift
108
+
109
+ if self.guidance_rescale > 0.0:
110
+ pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale)
111
+
112
+ return pred, {}
113
+
114
+ @property
115
+ def is_conditional(self) -> bool:
116
+ return self._count_prepared == 1
117
+
118
+ @property
119
+ def num_conditions(self) -> int:
120
+ num_conditions = 1
121
+ if self._is_cfg_enabled():
122
+ num_conditions += 1
123
+ return num_conditions
124
+
125
+ def _is_cfg_enabled(self) -> bool:
126
+ if not self._enabled:
127
+ return False
128
+
129
+ is_within_range = True
130
+ if self._num_inference_steps is not None:
131
+ skip_start_step = int(self._start * self._num_inference_steps)
132
+ skip_stop_step = int(self._stop * self._num_inference_steps)
133
+ is_within_range = skip_start_step <= self._step < skip_stop_step
134
+
135
+ is_close = False
136
+ if self.use_original_formulation:
137
+ is_close = math.isclose(self.guidance_scale, 0.0)
138
+ else:
139
+ is_close = math.isclose(self.guidance_scale, 1.0)
140
+
141
+ return is_within_range and not is_close
142
+
143
+
144
+ def cfg_zero_star_scale(cond: torch.Tensor, uncond: torch.Tensor, eps: float = 1e-8) -> torch.Tensor:
145
+ cond_dtype = cond.dtype
146
+ cond = cond.float()
147
+ uncond = uncond.float()
148
+ dot_product = torch.sum(cond * uncond, dim=1, keepdim=True)
149
+ squared_norm = torch.sum(uncond**2, dim=1, keepdim=True) + eps
150
+ # st_star = v_cond^T * v_uncond / ||v_uncond||^2
151
+ scale = dot_product / squared_norm
152
+ return scale.to(dtype=cond_dtype)