cache-dit 0.2.31__py3-none-any.whl → 0.2.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cache-dit might be problematic. Click here for more details.
- cache_dit/_version.py +2 -2
- cache_dit/cache_factory/block_adapters/__init__.py +1 -1
- cache_dit/cache_factory/cache_adapters.py +7 -1
- cache_dit/cache_factory/cache_blocks/pattern_base.py +21 -7
- cache_dit/cache_factory/cache_interface.py +2 -2
- {cache_dit-0.2.31.dist-info → cache_dit-0.2.32.dist-info}/METADATA +20 -11
- {cache_dit-0.2.31.dist-info → cache_dit-0.2.32.dist-info}/RECORD +11 -11
- {cache_dit-0.2.31.dist-info → cache_dit-0.2.32.dist-info}/WHEEL +0 -0
- {cache_dit-0.2.31.dist-info → cache_dit-0.2.32.dist-info}/entry_points.txt +0 -0
- {cache_dit-0.2.31.dist-info → cache_dit-0.2.32.dist-info}/licenses/LICENSE +0 -0
- {cache_dit-0.2.31.dist-info → cache_dit-0.2.32.dist-info}/top_level.txt +0 -0
cache_dit/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.2.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 2,
|
|
31
|
+
__version__ = version = '0.2.32'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 2, 32)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -254,7 +254,7 @@ def skyreelsv2_adapter(pipe, **kwargs) -> BlockAdapter:
|
|
|
254
254
|
)
|
|
255
255
|
|
|
256
256
|
|
|
257
|
-
@BlockAdapterRegistry.register("
|
|
257
|
+
@BlockAdapterRegistry.register("StableDiffusion3")
|
|
258
258
|
def sd3_adapter(pipe, **kwargs) -> BlockAdapter:
|
|
259
259
|
from diffusers import SD3Transformer2DModel
|
|
260
260
|
|
|
@@ -114,7 +114,7 @@ class CachedAdapter:
|
|
|
114
114
|
**cache_context_kwargs,
|
|
115
115
|
):
|
|
116
116
|
# Check cache_context_kwargs
|
|
117
|
-
if
|
|
117
|
+
if cache_context_kwargs["enable_spearate_cfg"] is None:
|
|
118
118
|
# Check cfg for some specific case if users don't set it as True
|
|
119
119
|
if BlockAdapterRegistry.has_separate_cfg(block_adapter):
|
|
120
120
|
cache_context_kwargs["enable_spearate_cfg"] = True
|
|
@@ -131,6 +131,12 @@ class CachedAdapter:
|
|
|
131
131
|
f"register: {cache_context_kwargs['enable_spearate_cfg']}, "
|
|
132
132
|
f"Pipeline: {block_adapter.pipe.__class__.__name__}."
|
|
133
133
|
)
|
|
134
|
+
else:
|
|
135
|
+
logger.info(
|
|
136
|
+
f"Use custom 'enable_spearate_cfg' from cache context "
|
|
137
|
+
f"kwargs: {cache_context_kwargs['enable_spearate_cfg']}. "
|
|
138
|
+
f"Pipeline: {block_adapter.pipe.__class__.__name__}."
|
|
139
|
+
)
|
|
134
140
|
|
|
135
141
|
if (
|
|
136
142
|
cache_type := cache_context_kwargs.pop("cache_type", None)
|
|
@@ -345,12 +345,19 @@ class CachedBlocks_Pattern_Base(torch.nn.Module):
|
|
|
345
345
|
|
|
346
346
|
# compute hidden_states residual
|
|
347
347
|
hidden_states = hidden_states.contiguous()
|
|
348
|
-
encoder_hidden_states = encoder_hidden_states.contiguous()
|
|
349
348
|
|
|
350
349
|
hidden_states_residual = hidden_states - original_hidden_states
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
350
|
+
|
|
351
|
+
if (
|
|
352
|
+
encoder_hidden_states is not None
|
|
353
|
+
and original_encoder_hidden_states is not None
|
|
354
|
+
):
|
|
355
|
+
encoder_hidden_states = encoder_hidden_states.contiguous()
|
|
356
|
+
encoder_hidden_states_residual = (
|
|
357
|
+
encoder_hidden_states - original_encoder_hidden_states
|
|
358
|
+
)
|
|
359
|
+
else:
|
|
360
|
+
encoder_hidden_states_residual = None
|
|
354
361
|
|
|
355
362
|
return (
|
|
356
363
|
hidden_states,
|
|
@@ -400,9 +407,16 @@ class CachedBlocks_Pattern_Base(torch.nn.Module):
|
|
|
400
407
|
Bn_i_hidden_states_residual = (
|
|
401
408
|
hidden_states - Bn_i_original_hidden_states
|
|
402
409
|
)
|
|
403
|
-
|
|
404
|
-
encoder_hidden_states
|
|
405
|
-
|
|
410
|
+
if (
|
|
411
|
+
encoder_hidden_states is not None
|
|
412
|
+
and Bn_i_original_encoder_hidden_states is not None
|
|
413
|
+
):
|
|
414
|
+
Bn_i_encoder_hidden_states_residual = (
|
|
415
|
+
encoder_hidden_states
|
|
416
|
+
- Bn_i_original_encoder_hidden_states
|
|
417
|
+
)
|
|
418
|
+
else:
|
|
419
|
+
Bn_i_encoder_hidden_states_residual = None
|
|
406
420
|
|
|
407
421
|
# Save original_hidden_states for diff calculation.
|
|
408
422
|
self.cache_manager.set_Bn_buffer(
|
|
@@ -24,7 +24,7 @@ def enable_cache(
|
|
|
24
24
|
max_continuous_cached_steps: int = -1,
|
|
25
25
|
residual_diff_threshold: float = 0.08,
|
|
26
26
|
# Cache CFG or not
|
|
27
|
-
enable_spearate_cfg: bool =
|
|
27
|
+
enable_spearate_cfg: bool | None = None,
|
|
28
28
|
cfg_compute_first: bool = False,
|
|
29
29
|
cfg_diff_compute_separate: bool = True,
|
|
30
30
|
# Hybird TaylorSeer
|
|
@@ -70,7 +70,7 @@ def enable_cache(
|
|
|
70
70
|
residual_diff_threshold (`float`, *required*, defaults to 0.08):
|
|
71
71
|
he value of residual diff threshold, a higher value leads to faster performance at the
|
|
72
72
|
cost of lower precision.
|
|
73
|
-
enable_spearate_cfg (`bool`, *required*, defaults to
|
|
73
|
+
enable_spearate_cfg (`bool`, *required*, defaults to None):
|
|
74
74
|
Whether to do separate cfg or not, such as Wan 2.1, Qwen-Image. For model that fused CFG
|
|
75
75
|
and non-CFG into single forward step, should set enable_spearate_cfg as False, for example:
|
|
76
76
|
CogVideoX, HunyuanVideo, Mochi, etc.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cache_dit
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.32
|
|
4
4
|
Summary: 🤗 A Unified and Training-free Cache Acceleration Toolbox for Diffusion Transformers
|
|
5
5
|
Author: DefTruth, vipshop.com, etc.
|
|
6
6
|
Maintainer: DefTruth, vipshop.com, etc
|
|
@@ -59,32 +59,37 @@ Dynamic: requires-python
|
|
|
59
59
|
🔥<b><a href="#unified">Unified Cache APIs</a> | <a href="#dbcache">DBCache</a> | <a href="#taylorseer">Hybrid TaylorSeer</a> | <a href="#cfg">Hybrid Cache CFG</a></b>🔥
|
|
60
60
|
</p>
|
|
61
61
|
<p align="center">
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
62
|
+
🎉Now, <b>cache-dit</b> covers <b>most</b> mainstream Diffusers' <b>DiT</b> Pipelines🎉<br>
|
|
63
|
+
🔥<a href="#supported">Qwen-Image</a> | <a href="#supported">FLUX.1</a> | <a href="#supported">Qwen-Image-Lightning</a> | <a href="#supported"> Wan 2.1/2.2 </a>🔥<br>
|
|
64
|
+
🔥<a href="#supported">HunyuanVideo</a> | <a href="#supported">HunyuanDiT</a> | <a href="#supported">HiDream</a> | <a href="#supported">Mochi</a> | <a href="#supported">CogVideoX 1/1.5</a>🔥<br>
|
|
65
|
+
🔥<a href="#supported">CogView3Plus</a> | <a href="#supported">CogView4</a> | <a href="#supported">Chroma</a> | <a href="#supported"> LTXVideo </a> | <a href="#supported">PixArt</a>🔥<br>
|
|
66
|
+
🔥<a href="#supported">Cosmos</a> | <a href="#supported">SkyReelsV2</a> | <a href="#supported">VisualCloze</a> | <a href="#supported"> OmniGen </a> | <a href="#supported">Lumina 1/2</a>🔥<br>
|
|
67
|
+
🔥<a href="#supported">Allegro</a> | <a href="#supported">EasyAnimate</a> | <a href="#supported">SD 3/3.5</a> | <a href="#supported"> ... </a> | <a href="#supported">DiT-XL</a>🔥
|
|
67
68
|
</p>
|
|
68
69
|
</div>
|
|
69
70
|
<div align='center'>
|
|
70
71
|
<img src=https://github.com/vipshop/cache-dit/raw/main/assets/gifs/wan2.2.C0_Q0_NONE.gif width=160px>
|
|
71
72
|
<img src=https://github.com/vipshop/cache-dit/raw/main/assets/gifs/wan2.2.C1_Q0_DBCACHE_F1B0_W2M8MC2_T1O2_R0.08.gif width=160px>
|
|
72
73
|
<img src=https://github.com/vipshop/cache-dit/raw/main/assets/gifs/wan2.2.C1_Q1_fp8_w8a8_dq_DBCACHE_F1B0_W2M8MC2_T1O2_R0.08.gif width=160px>
|
|
73
|
-
<p><b>🔥Wan2.2 MoE</b>
|
|
74
|
+
<p><b>🔥Wan2.2 MoE</b> | <b><a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:~2.0x↑🎉</b> | +FP8 DQ:<b>~2.4x↑🎉</b></p>
|
|
74
75
|
<img src=https://github.com/vipshop/cache-dit/raw/main/assets/qwen-image.C0_Q0_NONE.png width=160px>
|
|
75
76
|
<img src=https://github.com/vipshop/cache-dit/raw/main/assets/qwen-image.C1_Q0_DBCACHE_F8B0_W8M0MC0_T1O4_R0.12_S23.png width=160px>
|
|
76
77
|
<img src=https://github.com/vipshop/cache-dit/raw/main/assets/qwen-image.C1_Q1_fp8_w8a8_dq_DBCACHE_F8B0_W8M0MC0_T1O4_R0.12_S18.png width=160px>
|
|
77
|
-
<p><b>🔥Qwen-Image</b>
|
|
78
|
-
|
|
78
|
+
<p><b>🔥Qwen-Image</b> | <b><a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:~1.8x↑🎉</b> | +FP8 DQ:<b>~2.2x↑🎉</b></p>
|
|
79
|
+
<img src=./assets/qwen-image-lightning.4steps.C0_L1_Q0_NONE.png width=200px>
|
|
80
|
+
<img src=./assets/qwen-image-lightning.4steps.C0_L1_Q0_DBCACHE_F16B16_W2M1MC1_T0O2_R0.9_S1.png width=200px>
|
|
81
|
+
<p><b>🔥Qwen-Image-Lightning</b> 4 steps | <b><a href="https://github.com/vipshop/cache-dit">+cache-dit</a></b> 3.5 steps:<b>~1.14x↑🎉</b>
|
|
82
|
+
<br>♥️ Please consider to leave a <b>⭐️ Star</b> to support us ~ ♥️</p>
|
|
79
83
|
</div>
|
|
80
84
|
|
|
81
85
|
## 🔥News
|
|
82
86
|
|
|
87
|
+
- [2025-09-08] 🔥[**Qwen-Image-Lightning**](./examples/pipeline/run_qwen_image_lightning.py) **7.1/3.5 steps🎉** inference with **[DBCache: F16B16](https://github.com/vipshop/cache-dit)**.
|
|
83
88
|
- [2025-09-03] 🎉[**Wan2.2-MoE**](https://github.com/Wan-Video) **2.4x↑🎉** speedup! Please refer to [run_wan_2.2.py](./examples/pipeline/run_wan_2.2.py) as an example.
|
|
84
89
|
- [2025-08-19] 🔥[**Qwen-Image-Edit**](https://github.com/QwenLM/Qwen-Image) **2x↑🎉** speedup! Check the example: [run_qwen_image_edit.py](./examples/pipeline/run_qwen_image_edit.py).
|
|
85
90
|
- [2025-08-12] 🎉First caching mechanism in [QwenLM/Qwen-Image](https://github.com/QwenLM/Qwen-Image) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check this [PR](https://github.com/QwenLM/Qwen-Image/pull/61).
|
|
86
91
|
- [2025-08-11] 🔥[**Qwen-Image**](https://github.com/QwenLM/Qwen-Image) **1.8x↑🎉** speedup! Please refer to [run_qwen_image.py](./examples/pipeline/run_qwen_image.py) as an example.
|
|
87
|
-
- [2025-07-13] 🎉[**FLUX.1-
|
|
92
|
+
- [2025-07-13] 🎉[**FLUX.1-dev**](https://github.com/xlite-dev/flux-faster) **3.3x↑🎉** speedup! NVIDIA L20 with **[cache-dit](https://github.com/vipshop/cache-dit)** + **compile + FP8 DQ**.
|
|
88
93
|
|
|
89
94
|
<details>
|
|
90
95
|
<summary> Previous News </summary>
|
|
@@ -134,6 +139,7 @@ pip3 install git+https://github.com/vipshop/cache-dit.git
|
|
|
134
139
|
|
|
135
140
|
Currently, **cache-dit** library supports almost **Any** Diffusion Transformers (with **Transformer Blocks** that match the specific Input and Output **patterns**). Please check [🎉Unified Cache APIs](#unified) for more details. Here are just some of the tested models listed:
|
|
136
141
|
|
|
142
|
+
- [🚀Qwen-Image-Lightning](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
137
143
|
- [🚀Qwen-Image-Edit](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
138
144
|
- [🚀Qwen-Image](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
139
145
|
- [🚀FLUX.1-dev](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
@@ -147,7 +153,10 @@ Currently, **cache-dit** library supports almost **Any** Diffusion Transformers
|
|
|
147
153
|
- [🚀mochi-1-preview](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
148
154
|
- [🚀HunyuanVideo](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
149
155
|
- [🚀HunyuanDiT](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
150
|
-
- [🚀HiDream](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
156
|
+
- [🚀HiDream-I1-Full](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
157
|
+
- [🚀PixArt-Alpha](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
158
|
+
- [🚀PixArt-Sigma](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
159
|
+
- [🚀SD-3/3.5](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
151
160
|
|
|
152
161
|
</details>
|
|
153
162
|
|
|
@@ -1,21 +1,21 @@
|
|
|
1
1
|
cache_dit/__init__.py,sha256=kX9V-FegZG4c8LMwI4PTmMqH794MEW0pzDArdhC0cJw,1241
|
|
2
|
-
cache_dit/_version.py,sha256=
|
|
2
|
+
cache_dit/_version.py,sha256=J0YTFDgdG9rY1Xk5pUbWWGgbT2rbSasvUHcntxayVtA,706
|
|
3
3
|
cache_dit/logger.py,sha256=0zsu42hN-3-rgGC_C29ms1IvVpV4_b4_SwJCKSenxBE,4304
|
|
4
4
|
cache_dit/utils.py,sha256=WK7eqgH6gCYNHXNLmWyxBDU0XSHTPg7CfOcyXlGXBqE,10510
|
|
5
5
|
cache_dit/cache_factory/.gitignore,sha256=5Cb-qT9wsTUoMJ7vACDF7ZcLpAXhi5v-xdcWSRit988,23
|
|
6
6
|
cache_dit/cache_factory/__init__.py,sha256=Iw6-iJLFbdzCsIDZXXOw371L-HPmoeZO_P9a3sDjP5s,1103
|
|
7
|
-
cache_dit/cache_factory/cache_adapters.py,sha256=
|
|
8
|
-
cache_dit/cache_factory/cache_interface.py,sha256=
|
|
7
|
+
cache_dit/cache_factory/cache_adapters.py,sha256=dmNX68nBD52HtQvHnNAuSn1zjDWrQdycD0qXy-w-mwc,18212
|
|
8
|
+
cache_dit/cache_factory/cache_interface.py,sha256=LpyCy-tQ_GcTRAYLpMMf9hFVIktABHI6CObn5Ll8bMw,8548
|
|
9
9
|
cache_dit/cache_factory/cache_types.py,sha256=ooukxQRG55uTLmaZ0SKw6gIeY6SQHhMxkbv55uj2Sqk,991
|
|
10
10
|
cache_dit/cache_factory/forward_pattern.py,sha256=FumlCuZ-TSmSYH0hGBHctSJ-oGLCftdZjLygqhsmdR4,2258
|
|
11
11
|
cache_dit/cache_factory/utils.py,sha256=XkVM9AXcB9zYq8-S8QKAsGz80r3tA6U3lBNGDGeHOe4,1871
|
|
12
|
-
cache_dit/cache_factory/block_adapters/__init__.py,sha256=
|
|
12
|
+
cache_dit/cache_factory/block_adapters/__init__.py,sha256=OZM5vJwmQIkoIwVmMxKXiHqKvs31NyAva1Z91C_ko3w,17547
|
|
13
13
|
cache_dit/cache_factory/block_adapters/block_adapters.py,sha256=EQBiJYyoInKU1ND69wTm7M0n5Ja4I8QW01SgRpBjSn8,21671
|
|
14
14
|
cache_dit/cache_factory/block_adapters/block_registers.py,sha256=ZeN2wGPmuf2u3puSsBx8x-rl3wRo8-cWcuWNcrssVfA,2553
|
|
15
15
|
cache_dit/cache_factory/cache_blocks/__init__.py,sha256=08Ox7kD05lkRKCOsVTdEZeKAWBheqpxfrAT1Nz7eclI,2916
|
|
16
16
|
cache_dit/cache_factory/cache_blocks/pattern_0_1_2.py,sha256=ElMps6_7uI74tSF9GDR_dEI0bZEhdzcepM29xFWnYo8,428
|
|
17
17
|
cache_dit/cache_factory/cache_blocks/pattern_3_4_5.py,sha256=nf2f5wdxp6tfq9AhFyMyBeKiZfxh63WG1g8q-c2BBSg,10182
|
|
18
|
-
cache_dit/cache_factory/cache_blocks/pattern_base.py,sha256=
|
|
18
|
+
cache_dit/cache_factory/cache_blocks/pattern_base.py,sha256=f1ojREQcDoBtDG3dzl8t1g_Vru8140LVDRPWlY-kAXw,21311
|
|
19
19
|
cache_dit/cache_factory/cache_blocks/utils.py,sha256=dGOC1tMMOvcbvEgx44eTESKn_jsv-0RZ3tRHPa3wmQ4,1315
|
|
20
20
|
cache_dit/cache_factory/cache_contexts/__init__.py,sha256=rqnJ5__zqnpVHK5A1OqWILpNh5Ss-0ZDTGgtxZMKGGo,250
|
|
21
21
|
cache_dit/cache_factory/cache_contexts/cache_context.py,sha256=N88WLdd4KE9DuMWmpX8URcF55E2zWNwcKMxgVYkxMJY,13691
|
|
@@ -41,9 +41,9 @@ cache_dit/quantize/__init__.py,sha256=kWYoMAyZgBXu9BJlZjTQ0dRffW9GqeeY9_iTkXrb70
|
|
|
41
41
|
cache_dit/quantize/quantize_ao.py,sha256=mGspqYgQtenl3QnKPtsSYsSD7LbVX93f1M940bhXKLU,6066
|
|
42
42
|
cache_dit/quantize/quantize_interface.py,sha256=2s_R7xPSKuJeFpEGeLwRxnq_CqJcBG3a3lzyW5wh-UM,1241
|
|
43
43
|
cache_dit/quantize/quantize_svdq.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
|
-
cache_dit-0.2.
|
|
45
|
-
cache_dit-0.2.
|
|
46
|
-
cache_dit-0.2.
|
|
47
|
-
cache_dit-0.2.
|
|
48
|
-
cache_dit-0.2.
|
|
49
|
-
cache_dit-0.2.
|
|
44
|
+
cache_dit-0.2.32.dist-info/licenses/LICENSE,sha256=Dqb07Ik2dV41s9nIdMUbiRWEfDqo7-dQeRiY7kPO8PE,3769
|
|
45
|
+
cache_dit-0.2.32.dist-info/METADATA,sha256=WQ9GP-Om05j3NBvtifkmbz5t20XBU_-KJQptrK7jQBs,24222
|
|
46
|
+
cache_dit-0.2.32.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
47
|
+
cache_dit-0.2.32.dist-info/entry_points.txt,sha256=FX2gysXaZx6NeK1iCLMcIdP8Q4_qikkIHtEmi3oWn8o,65
|
|
48
|
+
cache_dit-0.2.32.dist-info/top_level.txt,sha256=ZJDydonLEhujzz0FOkVbO-BqfzO9d_VqRHmZU-3MOZo,10
|
|
49
|
+
cache_dit-0.2.32.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|