cache-dit 0.2.31__py3-none-any.whl → 0.2.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cache-dit might be problematic. Click here for more details.

cache_dit/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.2.31'
32
- __version_tuple__ = version_tuple = (0, 2, 31)
31
+ __version__ = version = '0.2.32'
32
+ __version_tuple__ = version_tuple = (0, 2, 32)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -254,7 +254,7 @@ def skyreelsv2_adapter(pipe, **kwargs) -> BlockAdapter:
254
254
  )
255
255
 
256
256
 
257
- @BlockAdapterRegistry.register("SD3")
257
+ @BlockAdapterRegistry.register("StableDiffusion3")
258
258
  def sd3_adapter(pipe, **kwargs) -> BlockAdapter:
259
259
  from diffusers import SD3Transformer2DModel
260
260
 
@@ -114,7 +114,7 @@ class CachedAdapter:
114
114
  **cache_context_kwargs,
115
115
  ):
116
116
  # Check cache_context_kwargs
117
- if not cache_context_kwargs["enable_spearate_cfg"]:
117
+ if cache_context_kwargs["enable_spearate_cfg"] is None:
118
118
  # Check cfg for some specific case if users don't set it as True
119
119
  if BlockAdapterRegistry.has_separate_cfg(block_adapter):
120
120
  cache_context_kwargs["enable_spearate_cfg"] = True
@@ -131,6 +131,12 @@ class CachedAdapter:
131
131
  f"register: {cache_context_kwargs['enable_spearate_cfg']}, "
132
132
  f"Pipeline: {block_adapter.pipe.__class__.__name__}."
133
133
  )
134
+ else:
135
+ logger.info(
136
+ f"Use custom 'enable_spearate_cfg' from cache context "
137
+ f"kwargs: {cache_context_kwargs['enable_spearate_cfg']}. "
138
+ f"Pipeline: {block_adapter.pipe.__class__.__name__}."
139
+ )
134
140
 
135
141
  if (
136
142
  cache_type := cache_context_kwargs.pop("cache_type", None)
@@ -345,12 +345,19 @@ class CachedBlocks_Pattern_Base(torch.nn.Module):
345
345
 
346
346
  # compute hidden_states residual
347
347
  hidden_states = hidden_states.contiguous()
348
- encoder_hidden_states = encoder_hidden_states.contiguous()
349
348
 
350
349
  hidden_states_residual = hidden_states - original_hidden_states
351
- encoder_hidden_states_residual = (
352
- encoder_hidden_states - original_encoder_hidden_states
353
- )
350
+
351
+ if (
352
+ encoder_hidden_states is not None
353
+ and original_encoder_hidden_states is not None
354
+ ):
355
+ encoder_hidden_states = encoder_hidden_states.contiguous()
356
+ encoder_hidden_states_residual = (
357
+ encoder_hidden_states - original_encoder_hidden_states
358
+ )
359
+ else:
360
+ encoder_hidden_states_residual = None
354
361
 
355
362
  return (
356
363
  hidden_states,
@@ -400,9 +407,16 @@ class CachedBlocks_Pattern_Base(torch.nn.Module):
400
407
  Bn_i_hidden_states_residual = (
401
408
  hidden_states - Bn_i_original_hidden_states
402
409
  )
403
- Bn_i_encoder_hidden_states_residual = (
404
- encoder_hidden_states - Bn_i_original_encoder_hidden_states
405
- )
410
+ if (
411
+ encoder_hidden_states is not None
412
+ and Bn_i_original_encoder_hidden_states is not None
413
+ ):
414
+ Bn_i_encoder_hidden_states_residual = (
415
+ encoder_hidden_states
416
+ - Bn_i_original_encoder_hidden_states
417
+ )
418
+ else:
419
+ Bn_i_encoder_hidden_states_residual = None
406
420
 
407
421
  # Save original_hidden_states for diff calculation.
408
422
  self.cache_manager.set_Bn_buffer(
@@ -24,7 +24,7 @@ def enable_cache(
24
24
  max_continuous_cached_steps: int = -1,
25
25
  residual_diff_threshold: float = 0.08,
26
26
  # Cache CFG or not
27
- enable_spearate_cfg: bool = False,
27
+ enable_spearate_cfg: bool | None = None,
28
28
  cfg_compute_first: bool = False,
29
29
  cfg_diff_compute_separate: bool = True,
30
30
  # Hybird TaylorSeer
@@ -70,7 +70,7 @@ def enable_cache(
70
70
  residual_diff_threshold (`float`, *required*, defaults to 0.08):
71
71
  he value of residual diff threshold, a higher value leads to faster performance at the
72
72
  cost of lower precision.
73
- enable_spearate_cfg (`bool`, *required*, defaults to False):
73
+ enable_spearate_cfg (`bool`, *required*, defaults to None):
74
74
  Whether to do separate cfg or not, such as Wan 2.1, Qwen-Image. For model that fused CFG
75
75
  and non-CFG into single forward step, should set enable_spearate_cfg as False, for example:
76
76
  CogVideoX, HunyuanVideo, Mochi, etc.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cache_dit
3
- Version: 0.2.31
3
+ Version: 0.2.32
4
4
  Summary: 🤗 A Unified and Training-free Cache Acceleration Toolbox for Diffusion Transformers
5
5
  Author: DefTruth, vipshop.com, etc.
6
6
  Maintainer: DefTruth, vipshop.com, etc
@@ -59,32 +59,37 @@ Dynamic: requires-python
59
59
  🔥<b><a href="#unified">Unified Cache APIs</a> | <a href="#dbcache">DBCache</a> | <a href="#taylorseer">Hybrid TaylorSeer</a> | <a href="#cfg">Hybrid Cache CFG</a></b>🔥
60
60
  </p>
61
61
  <p align="center">
62
- 🎉Now, <b>cache-dit</b> covers <b>most</b> mainstream Diffusers' <b>DiT</b> Pipelines🎉<br>
63
- 🔥<a href="#supported">Qwen-Image</a> | <a href="#supported">FLUX.1</a> | <a href="#supported">Wan 2.1</a> | <a href="#supported"> Wan 2.2 </a> | <a href="#supported">HunyuanVideo</a>🔥<br>
64
- 🔥<a href="#supported">HunyuanDiT</a> | <a href="#supported">HiDream</a> | <a href="#supported">Mochi</a> | <a href="#supported"> CogVideoX </a> | <a href="#supported">CogVideoX1.5</a>🔥<br>
65
- 🔥<a href="#supported">CogView3Plus</a> | <a href="#supported">CogView4</a> | <a href="#supported">Chroma</a> | <a href="#supported"> LTXVideo </a> | <a href="#supported">PixArt</a>🔥<br>
66
- 🔥<a href="#supported">Cosmos</a> | <a href="#supported">SkyReelsV2</a> | <a href="#supported">VisualCloze</a> | <a href="#supported"> ... </a> | <a href="#supported">Lumina2</a>🔥
62
+ 🎉Now, <b>cache-dit</b> covers <b>most</b> mainstream Diffusers' <b>DiT</b> Pipelines🎉<br>
63
+ 🔥<a href="#supported">Qwen-Image</a> | <a href="#supported">FLUX.1</a> | <a href="#supported">Qwen-Image-Lightning</a> | <a href="#supported"> Wan 2.1/2.2 </a>🔥<br>
64
+ 🔥<a href="#supported">HunyuanVideo</a> | <a href="#supported">HunyuanDiT</a> | <a href="#supported">HiDream</a> | <a href="#supported">Mochi</a> | <a href="#supported">CogVideoX 1/1.5</a>🔥<br>
65
+ 🔥<a href="#supported">CogView3Plus</a> | <a href="#supported">CogView4</a> | <a href="#supported">Chroma</a> | <a href="#supported"> LTXVideo </a> | <a href="#supported">PixArt</a>🔥<br>
66
+ 🔥<a href="#supported">Cosmos</a> | <a href="#supported">SkyReelsV2</a> | <a href="#supported">VisualCloze</a> | <a href="#supported"> OmniGen </a> | <a href="#supported">Lumina 1/2</a>🔥<br>
67
+ 🔥<a href="#supported">Allegro</a> | <a href="#supported">EasyAnimate</a> | <a href="#supported">SD 3/3.5</a> | <a href="#supported"> ... </a> | <a href="#supported">DiT-XL</a>🔥
67
68
  </p>
68
69
  </div>
69
70
  <div align='center'>
70
71
  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/gifs/wan2.2.C0_Q0_NONE.gif width=160px>
71
72
  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/gifs/wan2.2.C1_Q0_DBCACHE_F1B0_W2M8MC2_T1O2_R0.08.gif width=160px>
72
73
  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/gifs/wan2.2.C1_Q1_fp8_w8a8_dq_DBCACHE_F1B0_W2M8MC2_T1O2_R0.08.gif width=160px>
73
- <p><b>🔥Wan2.2 MoE</b> Baseline | <b><a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:~2.0x↑🎉</b> | +FP8 DQ:<b>~2.4x↑🎉</b></p>
74
+ <p><b>🔥Wan2.2 MoE</b> | <b><a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:~2.0x↑🎉</b> | +FP8 DQ:<b>~2.4x↑🎉</b></p>
74
75
  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/qwen-image.C0_Q0_NONE.png width=160px>
75
76
  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/qwen-image.C1_Q0_DBCACHE_F8B0_W8M0MC0_T1O4_R0.12_S23.png width=160px>
76
77
  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/qwen-image.C1_Q1_fp8_w8a8_dq_DBCACHE_F8B0_W8M0MC0_T1O4_R0.12_S18.png width=160px>
77
- <p><b>🔥Qwen-Image</b> Baseline | <b><a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:~1.8x↑🎉</b> | +FP8 DQ:<b>~2.2x↑🎉</b><br>♥️ Please consider to leave a <b>⭐️ Star</b> to support us ~ ♥️</p>
78
- </p>
78
+ <p><b>🔥Qwen-Image</b> | <b><a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:~1.8x↑🎉</b> | +FP8 DQ:<b>~2.2x↑🎉</b></p>
79
+ <img src=./assets/qwen-image-lightning.4steps.C0_L1_Q0_NONE.png width=200px>
80
+ <img src=./assets/qwen-image-lightning.4steps.C0_L1_Q0_DBCACHE_F16B16_W2M1MC1_T0O2_R0.9_S1.png width=200px>
81
+ <p><b>🔥Qwen-Image-Lightning</b> 4 steps | <b><a href="https://github.com/vipshop/cache-dit">+cache-dit</a></b> 3.5 steps:<b>~1.14x↑🎉</b>
82
+ <br>♥️ Please consider to leave a <b>⭐️ Star</b> to support us ~ ♥️</p>
79
83
  </div>
80
84
 
81
85
  ## 🔥News
82
86
 
87
+ - [2025-09-08] 🔥[**Qwen-Image-Lightning**](./examples/pipeline/run_qwen_image_lightning.py) **7.1/3.5 steps🎉** inference with **[DBCache: F16B16](https://github.com/vipshop/cache-dit)**.
83
88
  - [2025-09-03] 🎉[**Wan2.2-MoE**](https://github.com/Wan-Video) **2.4x↑🎉** speedup! Please refer to [run_wan_2.2.py](./examples/pipeline/run_wan_2.2.py) as an example.
84
89
  - [2025-08-19] 🔥[**Qwen-Image-Edit**](https://github.com/QwenLM/Qwen-Image) **2x↑🎉** speedup! Check the example: [run_qwen_image_edit.py](./examples/pipeline/run_qwen_image_edit.py).
85
90
  - [2025-08-12] 🎉First caching mechanism in [QwenLM/Qwen-Image](https://github.com/QwenLM/Qwen-Image) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check this [PR](https://github.com/QwenLM/Qwen-Image/pull/61).
86
91
  - [2025-08-11] 🔥[**Qwen-Image**](https://github.com/QwenLM/Qwen-Image) **1.8x↑🎉** speedup! Please refer to [run_qwen_image.py](./examples/pipeline/run_qwen_image.py) as an example.
87
- - [2025-07-13] 🎉[**FLUX.1-Dev**](https://github.com/xlite-dev/flux-faster) **3.3x↑🎉** speedup! NVIDIA L20 with **[cache-dit](https://github.com/vipshop/cache-dit)** + `compile + FP8 DQ`.
92
+ - [2025-07-13] 🎉[**FLUX.1-dev**](https://github.com/xlite-dev/flux-faster) **3.3x↑🎉** speedup! NVIDIA L20 with **[cache-dit](https://github.com/vipshop/cache-dit)** + **compile + FP8 DQ**.
88
93
 
89
94
  <details>
90
95
  <summary> Previous News </summary>
@@ -134,6 +139,7 @@ pip3 install git+https://github.com/vipshop/cache-dit.git
134
139
 
135
140
  Currently, **cache-dit** library supports almost **Any** Diffusion Transformers (with **Transformer Blocks** that match the specific Input and Output **patterns**). Please check [🎉Unified Cache APIs](#unified) for more details. Here are just some of the tested models listed:
136
141
 
142
+ - [🚀Qwen-Image-Lightning](https://github.com/vipshop/cache-dit/raw/main/examples)
137
143
  - [🚀Qwen-Image-Edit](https://github.com/vipshop/cache-dit/raw/main/examples)
138
144
  - [🚀Qwen-Image](https://github.com/vipshop/cache-dit/raw/main/examples)
139
145
  - [🚀FLUX.1-dev](https://github.com/vipshop/cache-dit/raw/main/examples)
@@ -147,7 +153,10 @@ Currently, **cache-dit** library supports almost **Any** Diffusion Transformers
147
153
  - [🚀mochi-1-preview](https://github.com/vipshop/cache-dit/raw/main/examples)
148
154
  - [🚀HunyuanVideo](https://github.com/vipshop/cache-dit/raw/main/examples)
149
155
  - [🚀HunyuanDiT](https://github.com/vipshop/cache-dit/raw/main/examples)
150
- - [🚀HiDream](https://github.com/vipshop/cache-dit/raw/main/examples)
156
+ - [🚀HiDream-I1-Full](https://github.com/vipshop/cache-dit/raw/main/examples)
157
+ - [🚀PixArt-Alpha](https://github.com/vipshop/cache-dit/raw/main/examples)
158
+ - [🚀PixArt-Sigma](https://github.com/vipshop/cache-dit/raw/main/examples)
159
+ - [🚀SD-3/3.5](https://github.com/vipshop/cache-dit/raw/main/examples)
151
160
 
152
161
  </details>
153
162
 
@@ -1,21 +1,21 @@
1
1
  cache_dit/__init__.py,sha256=kX9V-FegZG4c8LMwI4PTmMqH794MEW0pzDArdhC0cJw,1241
2
- cache_dit/_version.py,sha256=cMx3p02rk8iaGjj6X7bw0aOcGW7d-iY_EBO9S_9o-b4,706
2
+ cache_dit/_version.py,sha256=J0YTFDgdG9rY1Xk5pUbWWGgbT2rbSasvUHcntxayVtA,706
3
3
  cache_dit/logger.py,sha256=0zsu42hN-3-rgGC_C29ms1IvVpV4_b4_SwJCKSenxBE,4304
4
4
  cache_dit/utils.py,sha256=WK7eqgH6gCYNHXNLmWyxBDU0XSHTPg7CfOcyXlGXBqE,10510
5
5
  cache_dit/cache_factory/.gitignore,sha256=5Cb-qT9wsTUoMJ7vACDF7ZcLpAXhi5v-xdcWSRit988,23
6
6
  cache_dit/cache_factory/__init__.py,sha256=Iw6-iJLFbdzCsIDZXXOw371L-HPmoeZO_P9a3sDjP5s,1103
7
- cache_dit/cache_factory/cache_adapters.py,sha256=6YbBSfKEGdWi9oY1ceuxi-MpHcaDYoQ-t6NTaLZITR4,17938
8
- cache_dit/cache_factory/cache_interface.py,sha256=y1nY6R3MucRmAnG2UJRI_tIKrRk27FktGWLbfckf3zE,8543
7
+ cache_dit/cache_factory/cache_adapters.py,sha256=dmNX68nBD52HtQvHnNAuSn1zjDWrQdycD0qXy-w-mwc,18212
8
+ cache_dit/cache_factory/cache_interface.py,sha256=LpyCy-tQ_GcTRAYLpMMf9hFVIktABHI6CObn5Ll8bMw,8548
9
9
  cache_dit/cache_factory/cache_types.py,sha256=ooukxQRG55uTLmaZ0SKw6gIeY6SQHhMxkbv55uj2Sqk,991
10
10
  cache_dit/cache_factory/forward_pattern.py,sha256=FumlCuZ-TSmSYH0hGBHctSJ-oGLCftdZjLygqhsmdR4,2258
11
11
  cache_dit/cache_factory/utils.py,sha256=XkVM9AXcB9zYq8-S8QKAsGz80r3tA6U3lBNGDGeHOe4,1871
12
- cache_dit/cache_factory/block_adapters/__init__.py,sha256=x2ivShzOy2z3p1WUArzoChR4jaLHhNXkXMSk-RPzR3g,17534
12
+ cache_dit/cache_factory/block_adapters/__init__.py,sha256=OZM5vJwmQIkoIwVmMxKXiHqKvs31NyAva1Z91C_ko3w,17547
13
13
  cache_dit/cache_factory/block_adapters/block_adapters.py,sha256=EQBiJYyoInKU1ND69wTm7M0n5Ja4I8QW01SgRpBjSn8,21671
14
14
  cache_dit/cache_factory/block_adapters/block_registers.py,sha256=ZeN2wGPmuf2u3puSsBx8x-rl3wRo8-cWcuWNcrssVfA,2553
15
15
  cache_dit/cache_factory/cache_blocks/__init__.py,sha256=08Ox7kD05lkRKCOsVTdEZeKAWBheqpxfrAT1Nz7eclI,2916
16
16
  cache_dit/cache_factory/cache_blocks/pattern_0_1_2.py,sha256=ElMps6_7uI74tSF9GDR_dEI0bZEhdzcepM29xFWnYo8,428
17
17
  cache_dit/cache_factory/cache_blocks/pattern_3_4_5.py,sha256=nf2f5wdxp6tfq9AhFyMyBeKiZfxh63WG1g8q-c2BBSg,10182
18
- cache_dit/cache_factory/cache_blocks/pattern_base.py,sha256=_sajtb-Cz8yrCRBRSiJREzFG7h6265K9pXeAz5i1meY,20814
18
+ cache_dit/cache_factory/cache_blocks/pattern_base.py,sha256=f1ojREQcDoBtDG3dzl8t1g_Vru8140LVDRPWlY-kAXw,21311
19
19
  cache_dit/cache_factory/cache_blocks/utils.py,sha256=dGOC1tMMOvcbvEgx44eTESKn_jsv-0RZ3tRHPa3wmQ4,1315
20
20
  cache_dit/cache_factory/cache_contexts/__init__.py,sha256=rqnJ5__zqnpVHK5A1OqWILpNh5Ss-0ZDTGgtxZMKGGo,250
21
21
  cache_dit/cache_factory/cache_contexts/cache_context.py,sha256=N88WLdd4KE9DuMWmpX8URcF55E2zWNwcKMxgVYkxMJY,13691
@@ -41,9 +41,9 @@ cache_dit/quantize/__init__.py,sha256=kWYoMAyZgBXu9BJlZjTQ0dRffW9GqeeY9_iTkXrb70
41
41
  cache_dit/quantize/quantize_ao.py,sha256=mGspqYgQtenl3QnKPtsSYsSD7LbVX93f1M940bhXKLU,6066
42
42
  cache_dit/quantize/quantize_interface.py,sha256=2s_R7xPSKuJeFpEGeLwRxnq_CqJcBG3a3lzyW5wh-UM,1241
43
43
  cache_dit/quantize/quantize_svdq.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
- cache_dit-0.2.31.dist-info/licenses/LICENSE,sha256=Dqb07Ik2dV41s9nIdMUbiRWEfDqo7-dQeRiY7kPO8PE,3769
45
- cache_dit-0.2.31.dist-info/METADATA,sha256=MrRvt7HL8pNm0ZsBxKO25pBcCJhHPG7HddwjT_euy_I,23198
46
- cache_dit-0.2.31.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
47
- cache_dit-0.2.31.dist-info/entry_points.txt,sha256=FX2gysXaZx6NeK1iCLMcIdP8Q4_qikkIHtEmi3oWn8o,65
48
- cache_dit-0.2.31.dist-info/top_level.txt,sha256=ZJDydonLEhujzz0FOkVbO-BqfzO9d_VqRHmZU-3MOZo,10
49
- cache_dit-0.2.31.dist-info/RECORD,,
44
+ cache_dit-0.2.32.dist-info/licenses/LICENSE,sha256=Dqb07Ik2dV41s9nIdMUbiRWEfDqo7-dQeRiY7kPO8PE,3769
45
+ cache_dit-0.2.32.dist-info/METADATA,sha256=WQ9GP-Om05j3NBvtifkmbz5t20XBU_-KJQptrK7jQBs,24222
46
+ cache_dit-0.2.32.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
47
+ cache_dit-0.2.32.dist-info/entry_points.txt,sha256=FX2gysXaZx6NeK1iCLMcIdP8Q4_qikkIHtEmi3oWn8o,65
48
+ cache_dit-0.2.32.dist-info/top_level.txt,sha256=ZJDydonLEhujzz0FOkVbO-BqfzO9d_VqRHmZU-3MOZo,10
49
+ cache_dit-0.2.32.dist-info/RECORD,,