cache-dit 0.2.26__py3-none-any.whl → 0.2.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cache_dit/__init__.py +8 -6
- cache_dit/_version.py +2 -2
- cache_dit/cache_factory/__init__.py +17 -4
- cache_dit/cache_factory/block_adapters/__init__.py +555 -0
- cache_dit/cache_factory/block_adapters/block_adapters.py +538 -0
- cache_dit/cache_factory/block_adapters/block_registers.py +77 -0
- cache_dit/cache_factory/cache_adapters.py +262 -938
- cache_dit/cache_factory/cache_blocks/__init__.py +60 -11
- cache_dit/cache_factory/cache_blocks/pattern_0_1_2.py +2 -2
- cache_dit/cache_factory/cache_blocks/pattern_3_4_5.py +45 -41
- cache_dit/cache_factory/cache_blocks/pattern_base.py +106 -80
- cache_dit/cache_factory/cache_blocks/utils.py +16 -10
- cache_dit/cache_factory/cache_contexts/__init__.py +5 -0
- cache_dit/cache_factory/cache_contexts/cache_context.py +327 -0
- cache_dit/cache_factory/cache_contexts/cache_manager.py +833 -0
- cache_dit/cache_factory/cache_interface.py +31 -31
- cache_dit/cache_factory/patch_functors/functor_chroma.py +3 -0
- cache_dit/cache_factory/patch_functors/functor_flux.py +4 -0
- cache_dit/quantize/quantize_ao.py +1 -0
- cache_dit/utils.py +26 -26
- {cache_dit-0.2.26.dist-info → cache_dit-0.2.28.dist-info}/METADATA +59 -23
- cache_dit-0.2.28.dist-info/RECORD +47 -0
- cache_dit/cache_factory/cache_context.py +0 -1155
- cache_dit-0.2.26.dist-info/RECORD +0 -42
- /cache_dit/cache_factory/{taylorseer.py → cache_contexts/taylorseer.py} +0 -0
- {cache_dit-0.2.26.dist-info → cache_dit-0.2.28.dist-info}/WHEEL +0 -0
- {cache_dit-0.2.26.dist-info → cache_dit-0.2.28.dist-info}/entry_points.txt +0 -0
- {cache_dit-0.2.26.dist-info → cache_dit-0.2.28.dist-info}/licenses/LICENSE +0 -0
- {cache_dit-0.2.26.dist-info → cache_dit-0.2.28.dist-info}/top_level.txt +0 -0
|
@@ -1,23 +1,18 @@
|
|
|
1
1
|
from typing import Any, Tuple, List
|
|
2
2
|
from diffusers import DiffusionPipeline
|
|
3
|
-
from cache_dit.cache_factory.forward_pattern import ForwardPattern
|
|
4
3
|
from cache_dit.cache_factory.cache_types import CacheType
|
|
5
|
-
from cache_dit.cache_factory.
|
|
6
|
-
from cache_dit.cache_factory.
|
|
4
|
+
from cache_dit.cache_factory.block_adapters import BlockAdapter
|
|
5
|
+
from cache_dit.cache_factory.block_adapters import BlockAdapterRegistry
|
|
6
|
+
from cache_dit.cache_factory.cache_adapters import CachedAdapter
|
|
7
7
|
|
|
8
8
|
from cache_dit.logger import init_logger
|
|
9
9
|
|
|
10
10
|
logger = init_logger(__name__)
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
def supported_pipelines() -> Tuple[int, List[str]]:
|
|
14
|
-
return UnifiedCacheAdapter.supported_pipelines()
|
|
15
|
-
|
|
16
|
-
|
|
17
13
|
def enable_cache(
|
|
18
|
-
#
|
|
14
|
+
# DiffusionPipeline or BlockAdapter
|
|
19
15
|
pipe_or_adapter: DiffusionPipeline | BlockAdapter | Any,
|
|
20
|
-
forward_pattern: ForwardPattern = ForwardPattern.Pattern_0,
|
|
21
16
|
# Cache context kwargs
|
|
22
17
|
Fn_compute_blocks: int = 8,
|
|
23
18
|
Bn_compute_blocks: int = 0,
|
|
@@ -26,7 +21,7 @@ def enable_cache(
|
|
|
26
21
|
max_continuous_cached_steps: int = -1,
|
|
27
22
|
residual_diff_threshold: float = 0.08,
|
|
28
23
|
# Cache CFG or not
|
|
29
|
-
|
|
24
|
+
enable_spearate_cfg: bool = False,
|
|
30
25
|
cfg_compute_first: bool = False,
|
|
31
26
|
cfg_diff_compute_separate: bool = True,
|
|
32
27
|
# Hybird TaylorSeer
|
|
@@ -34,7 +29,7 @@ def enable_cache(
|
|
|
34
29
|
enable_encoder_taylorseer: bool = False,
|
|
35
30
|
taylorseer_cache_type: str = "residual",
|
|
36
31
|
taylorseer_order: int = 2,
|
|
37
|
-
**
|
|
32
|
+
**other_cache_context_kwargs,
|
|
38
33
|
) -> DiffusionPipeline | Any:
|
|
39
34
|
r"""
|
|
40
35
|
Unified Cache API for almost Any Diffusion Transformers (with Transformer Blocks
|
|
@@ -48,9 +43,6 @@ def enable_cache(
|
|
|
48
43
|
The standard Diffusion Pipeline or custom BlockAdapter (from cache-dit or user-defined).
|
|
49
44
|
For example: cache_dit.enable_cache(FluxPipeline(...)). Please check https://github.com/vipshop/cache-dit/blob/main/docs/BlockAdapter.md
|
|
50
45
|
for the usgae of BlockAdapter.
|
|
51
|
-
forward_pattern (`ForwardPattern`, *required*, defaults to `ForwardPattern.Pattern_0`):
|
|
52
|
-
The forward pattern of Transformer block, please check https://github.com/vipshop/cache-dit/tree/main?tab=readme-ov-file#forward-pattern-matching
|
|
53
|
-
for more details.
|
|
54
46
|
Fn_compute_blocks (`int`, *required*, defaults to 8):
|
|
55
47
|
Specifies that `DBCache` uses the **first n** Transformer blocks to fit the information
|
|
56
48
|
at time step t, enabling the calculation of a more stable L1 diff and delivering more
|
|
@@ -72,9 +64,9 @@ def enable_cache(
|
|
|
72
64
|
residual_diff_threshold (`float`, *required*, defaults to 0.08):
|
|
73
65
|
he value of residual diff threshold, a higher value leads to faster performance at the
|
|
74
66
|
cost of lower precision.
|
|
75
|
-
|
|
67
|
+
enable_spearate_cfg (`bool`, *required*, defaults to False):
|
|
76
68
|
Whether to do separate cfg or not, such as Wan 2.1, Qwen-Image. For model that fused CFG
|
|
77
|
-
and non-CFG into single forward step, should set
|
|
69
|
+
and non-CFG into single forward step, should set enable_spearate_cfg as False, for example:
|
|
78
70
|
CogVideoX, HunyuanVideo, Mochi, etc.
|
|
79
71
|
cfg_compute_first (`bool`, *required*, defaults to False):
|
|
80
72
|
Compute cfg forward first or not, default False, namely, 0, 2, 4, ..., -> non-CFG step;
|
|
@@ -97,7 +89,7 @@ def enable_cache(
|
|
|
97
89
|
The order of taylorseer, higher values of n_derivatives will lead to longer computation time,
|
|
98
90
|
but may improve precision significantly.
|
|
99
91
|
other_cache_kwargs: (`dict`, *optional*, defaults to {})
|
|
100
|
-
Other cache context kwargs, please check https://github.com/vipshop/cache-dit/blob/main/src/cache_dit/cache_factory/cache_context.py
|
|
92
|
+
Other cache context kwargs, please check https://github.com/vipshop/cache-dit/blob/main/src/cache_dit/cache_factory/cache_contexts/cache_context.py
|
|
101
93
|
for more details.
|
|
102
94
|
|
|
103
95
|
Examples:
|
|
@@ -111,7 +103,11 @@ def enable_cache(
|
|
|
111
103
|
"""
|
|
112
104
|
|
|
113
105
|
# Collect cache context kwargs
|
|
114
|
-
cache_context_kwargs =
|
|
106
|
+
cache_context_kwargs = other_cache_context_kwargs.copy()
|
|
107
|
+
if cache_type := cache_context_kwargs.get("cache_type", None):
|
|
108
|
+
if cache_type == CacheType.NONE:
|
|
109
|
+
return pipe_or_adapter
|
|
110
|
+
|
|
115
111
|
cache_context_kwargs["cache_type"] = CacheType.DBCache
|
|
116
112
|
cache_context_kwargs["Fn_compute_blocks"] = Fn_compute_blocks
|
|
117
113
|
cache_context_kwargs["Bn_compute_blocks"] = Bn_compute_blocks
|
|
@@ -121,7 +117,7 @@ def enable_cache(
|
|
|
121
117
|
max_continuous_cached_steps
|
|
122
118
|
)
|
|
123
119
|
cache_context_kwargs["residual_diff_threshold"] = residual_diff_threshold
|
|
124
|
-
cache_context_kwargs["
|
|
120
|
+
cache_context_kwargs["enable_spearate_cfg"] = enable_spearate_cfg
|
|
125
121
|
cache_context_kwargs["cfg_compute_first"] = cfg_compute_first
|
|
126
122
|
cache_context_kwargs["cfg_diff_compute_separate"] = (
|
|
127
123
|
cfg_diff_compute_separate
|
|
@@ -131,31 +127,35 @@ def enable_cache(
|
|
|
131
127
|
enable_encoder_taylorseer
|
|
132
128
|
)
|
|
133
129
|
cache_context_kwargs["taylorseer_cache_type"] = taylorseer_cache_type
|
|
134
|
-
|
|
135
|
-
cache_context_kwargs["taylorseer_kwargs"][
|
|
136
|
-
"n_derivatives"
|
|
137
|
-
] = taylorseer_order
|
|
138
|
-
else:
|
|
139
|
-
cache_context_kwargs["taylorseer_kwargs"] = {
|
|
140
|
-
"n_derivatives": taylorseer_order
|
|
141
|
-
}
|
|
130
|
+
cache_context_kwargs["taylorseer_order"] = taylorseer_order
|
|
142
131
|
|
|
143
132
|
if isinstance(pipe_or_adapter, BlockAdapter):
|
|
144
|
-
return
|
|
133
|
+
return CachedAdapter.apply(
|
|
145
134
|
pipe=None,
|
|
146
135
|
block_adapter=pipe_or_adapter,
|
|
147
|
-
forward_pattern=forward_pattern,
|
|
148
136
|
**cache_context_kwargs,
|
|
149
137
|
)
|
|
150
138
|
elif isinstance(pipe_or_adapter, DiffusionPipeline):
|
|
151
|
-
return
|
|
139
|
+
return CachedAdapter.apply(
|
|
152
140
|
pipe=pipe_or_adapter,
|
|
153
141
|
block_adapter=None,
|
|
154
|
-
forward_pattern=forward_pattern,
|
|
155
142
|
**cache_context_kwargs,
|
|
156
143
|
)
|
|
157
144
|
else:
|
|
158
145
|
raise ValueError(
|
|
146
|
+
f"type: {type(pipe_or_adapter)} is not valid, "
|
|
159
147
|
"Please pass DiffusionPipeline or BlockAdapter"
|
|
160
148
|
"for the 1's position param: pipe_or_adapter"
|
|
161
149
|
)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def supported_pipelines(
|
|
153
|
+
**kwargs,
|
|
154
|
+
) -> Tuple[int, List[str]]:
|
|
155
|
+
return BlockAdapterRegistry.supported_pipelines(**kwargs)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def get_adapter(
|
|
159
|
+
pipe: DiffusionPipeline | str | Any,
|
|
160
|
+
) -> BlockAdapter:
|
|
161
|
+
return BlockAdapterRegistry.get_adapter(pipe)
|
|
@@ -30,6 +30,9 @@ class ChromaPatchFunctor(PatchFunctor):
|
|
|
30
30
|
blocks: torch.nn.ModuleList = None,
|
|
31
31
|
**kwargs,
|
|
32
32
|
) -> ChromaTransformer2DModel:
|
|
33
|
+
if getattr(transformer, "_is_patched", False):
|
|
34
|
+
return transformer
|
|
35
|
+
|
|
33
36
|
if blocks is None:
|
|
34
37
|
blocks = transformer.single_transformer_blocks
|
|
35
38
|
|
|
@@ -30,6 +30,10 @@ class FluxPatchFunctor(PatchFunctor):
|
|
|
30
30
|
blocks: torch.nn.ModuleList = None,
|
|
31
31
|
**kwargs,
|
|
32
32
|
) -> FluxTransformer2DModel:
|
|
33
|
+
|
|
34
|
+
if getattr(transformer, "_is_patched", False):
|
|
35
|
+
return transformer
|
|
36
|
+
|
|
33
37
|
if blocks is None:
|
|
34
38
|
blocks = transformer.single_transformer_blocks
|
|
35
39
|
|
|
@@ -179,6 +179,7 @@ def quantize_ao(
|
|
|
179
179
|
force_empty_cache()
|
|
180
180
|
|
|
181
181
|
logger.info(
|
|
182
|
+
f"Quantized Method: {quant_type:>5}\n"
|
|
182
183
|
f"Quantized Linear Layers: {num_quant_linear:>5}\n"
|
|
183
184
|
f"Skipped Linear Layers: {num_skip_linear:>5}\n"
|
|
184
185
|
f"Total Linear Layers: {num_linear_layers:>5}\n"
|
cache_dit/utils.py
CHANGED
|
@@ -7,7 +7,6 @@ from diffusers import DiffusionPipeline
|
|
|
7
7
|
|
|
8
8
|
from typing import Dict, Any
|
|
9
9
|
from cache_dit.logger import init_logger
|
|
10
|
-
from cache_dit.cache_factory import CacheType
|
|
11
10
|
|
|
12
11
|
|
|
13
12
|
logger = init_logger(__name__)
|
|
@@ -30,27 +29,32 @@ class CacheStats:
|
|
|
30
29
|
|
|
31
30
|
|
|
32
31
|
def summary(
|
|
33
|
-
|
|
32
|
+
pipe_or_module: DiffusionPipeline | torch.nn.Module | Any,
|
|
34
33
|
details: bool = False,
|
|
35
34
|
logging: bool = True,
|
|
36
35
|
) -> CacheStats:
|
|
37
36
|
cache_stats = CacheStats()
|
|
38
|
-
|
|
39
|
-
if not isinstance(
|
|
40
|
-
assert hasattr(
|
|
41
|
-
|
|
37
|
+
|
|
38
|
+
if not isinstance(pipe_or_module, torch.nn.Module):
|
|
39
|
+
assert hasattr(pipe_or_module, "transformer")
|
|
40
|
+
module = pipe_or_module.transformer
|
|
41
|
+
cls_name = module.__class__.__name__
|
|
42
42
|
else:
|
|
43
|
-
|
|
43
|
+
module = pipe_or_module
|
|
44
|
+
|
|
45
|
+
cls_name = module.__class__.__name__
|
|
46
|
+
if isinstance(module, torch.nn.ModuleList):
|
|
47
|
+
cls_name = module[0].__class__.__name__
|
|
44
48
|
|
|
45
|
-
if hasattr(
|
|
46
|
-
cache_options =
|
|
49
|
+
if hasattr(module, "_cache_context_kwargs"):
|
|
50
|
+
cache_options = module._cache_context_kwargs
|
|
47
51
|
cache_stats.cache_options = cache_options
|
|
48
52
|
if logging:
|
|
49
53
|
print(f"\n🤗Cache Options: {cls_name}\n\n{cache_options}")
|
|
50
54
|
|
|
51
|
-
if hasattr(
|
|
52
|
-
cached_steps: list[int] =
|
|
53
|
-
residual_diffs: dict[str, float] = dict(
|
|
55
|
+
if hasattr(module, "_cached_steps"):
|
|
56
|
+
cached_steps: list[int] = module._cached_steps
|
|
57
|
+
residual_diffs: dict[str, float] = dict(module._residual_diffs)
|
|
54
58
|
cache_stats.cached_steps = cached_steps
|
|
55
59
|
cache_stats.residual_diffs = residual_diffs
|
|
56
60
|
|
|
@@ -91,11 +95,9 @@ def summary(
|
|
|
91
95
|
compact=True,
|
|
92
96
|
)
|
|
93
97
|
|
|
94
|
-
if hasattr(
|
|
95
|
-
cfg_cached_steps: list[int] =
|
|
96
|
-
cfg_residual_diffs: dict[str, float] = dict(
|
|
97
|
-
transformer._cfg_residual_diffs
|
|
98
|
-
)
|
|
98
|
+
if hasattr(module, "_cfg_cached_steps"):
|
|
99
|
+
cfg_cached_steps: list[int] = module._cfg_cached_steps
|
|
100
|
+
cfg_residual_diffs: dict[str, float] = dict(module._cfg_residual_diffs)
|
|
99
101
|
cache_stats.cfg_cached_steps = cfg_cached_steps
|
|
100
102
|
cache_stats.cfg_residual_diffs = cfg_residual_diffs
|
|
101
103
|
|
|
@@ -153,9 +155,15 @@ def strify(
|
|
|
153
155
|
cache_options = stats.cache_options
|
|
154
156
|
cached_steps = len(stats.cached_steps)
|
|
155
157
|
elif isinstance(pipe_or_stats, dict):
|
|
158
|
+
from cache_dit.cache_factory import CacheType
|
|
159
|
+
|
|
156
160
|
# Assume cache_context_kwargs
|
|
157
161
|
cache_options = pipe_or_stats
|
|
158
162
|
cached_steps = None
|
|
163
|
+
cache_type = cache_options.get("cache_type", CacheType.NONE)
|
|
164
|
+
|
|
165
|
+
if cache_type == CacheType.NONE:
|
|
166
|
+
return "NONE"
|
|
159
167
|
else:
|
|
160
168
|
raise ValueError(
|
|
161
169
|
"Please set pipe_or_stats param as one of: "
|
|
@@ -165,17 +173,9 @@ def strify(
|
|
|
165
173
|
if not cache_options:
|
|
166
174
|
return "NONE"
|
|
167
175
|
|
|
168
|
-
if cache_options.get("cache_type", None) != CacheType.DBCache:
|
|
169
|
-
return "NONE"
|
|
170
|
-
|
|
171
176
|
def get_taylorseer_order():
|
|
172
177
|
taylorseer_order = 0
|
|
173
|
-
if "
|
|
174
|
-
if "n_derivatives" in cache_options["taylorseer_kwargs"]:
|
|
175
|
-
taylorseer_order = cache_options["taylorseer_kwargs"][
|
|
176
|
-
"n_derivatives"
|
|
177
|
-
]
|
|
178
|
-
elif "taylorseer_order" in cache_options:
|
|
178
|
+
if "taylorseer_order" in cache_options:
|
|
179
179
|
taylorseer_order = cache_options["taylorseer_order"]
|
|
180
180
|
return taylorseer_order
|
|
181
181
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cache_dit
|
|
3
|
-
Version: 0.2.
|
|
4
|
-
Summary: 🤗
|
|
3
|
+
Version: 0.2.28
|
|
4
|
+
Summary: 🤗 A Unified and Training-free Cache Acceleration Toolbox for Diffusion Transformers
|
|
5
5
|
Author: DefTruth, vipshop.com, etc.
|
|
6
6
|
Maintainer: DefTruth, vipshop.com, etc
|
|
7
7
|
Project-URL: Repository, https://github.com/vipshop/cache-dit.git
|
|
@@ -44,7 +44,7 @@ Dynamic: requires-python
|
|
|
44
44
|
<img src=https://github.com/vipshop/cache-dit/raw/main/assets/cache-dit-logo.png height="120">
|
|
45
45
|
|
|
46
46
|
<p align="center">
|
|
47
|
-
|
|
47
|
+
A <b>Unified</b> and Training-free <b>Cache Acceleration</b> Toolbox for <b>Diffusion Transformers</b> <br>
|
|
48
48
|
♥️ <b>Cache Acceleration</b> with <b>One-line</b> Code ~ ♥️
|
|
49
49
|
</p>
|
|
50
50
|
<div align='center'>
|
|
@@ -59,23 +59,24 @@ Dynamic: requires-python
|
|
|
59
59
|
🔥<b><a href="#unified">Unified Cache APIs</a> | <a href="#dbcache">DBCache</a> | <a href="#taylorseer">Hybrid TaylorSeer</a> | <a href="#cfg">Hybrid Cache CFG</a></b>🔥
|
|
60
60
|
</p>
|
|
61
61
|
<p align="center">
|
|
62
|
-
🎉Now, <b>cache-dit</b> covers <b>
|
|
62
|
+
🎉Now, <b>cache-dit</b> covers <b>100%</b> Diffusers' <b>DiT-based</b> Pipelines🎉<br>
|
|
63
63
|
🔥<b><a href="#supported">Qwen-Image</a> | <a href="#supported">FLUX.1</a> | <a href="#supported">Wan 2.1/2.2</a> | <a href="#supported"> ... </a> | <a href="#supported">CogVideoX</a></b>🔥
|
|
64
64
|
</p>
|
|
65
65
|
</div>
|
|
66
66
|
|
|
67
67
|
## 🔥News
|
|
68
68
|
|
|
69
|
-
- [2025-
|
|
70
|
-
- [2025-08-
|
|
71
|
-
- [2025-08-19] 🔥[**Qwen-Image-Edit**](https://github.com/QwenLM/Qwen-Image) **2x⚡️** speedup! Check the example at [run_qwen_image_edit.py](./examples/run_qwen_image_edit.py).
|
|
69
|
+
- [2025-09-03] 🎉[**Wan2.2-MoE**](https://github.com/Wan-Video) **2.4x⚡️** speedup! Please refer to [run_wan_2.2.py](./examples/pipeline/run_wan_2.2.py) as an example.
|
|
70
|
+
- [2025-08-19] 🔥[**Qwen-Image-Edit**](https://github.com/QwenLM/Qwen-Image) **2x⚡️** speedup! Check the example: [run_qwen_image_edit.py](./examples/pipeline/run_qwen_image_edit.py).
|
|
72
71
|
- [2025-08-12] 🎉First caching mechanism in [QwenLM/Qwen-Image](https://github.com/QwenLM/Qwen-Image) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check this [PR](https://github.com/QwenLM/Qwen-Image/pull/61).
|
|
73
|
-
- [2025-08-11] 🔥[**Qwen-Image**](https://github.com/QwenLM/Qwen-Image) **1.8x⚡️** speedup! Please refer [run_qwen_image.py](./examples/run_qwen_image.py) as an example.
|
|
72
|
+
- [2025-08-11] 🔥[**Qwen-Image**](https://github.com/QwenLM/Qwen-Image) **1.8x⚡️** speedup! Please refer to [run_qwen_image.py](./examples/pipeline/run_qwen_image.py) as an example.
|
|
74
73
|
|
|
75
74
|
<details>
|
|
76
75
|
<summary> Previous News </summary>
|
|
77
|
-
|
|
78
|
-
- [2025-
|
|
76
|
+
|
|
77
|
+
- [2025-09-01] 📚[**Hybird Forward Pattern**](#unified) is supported! Please check [FLUX.1-dev](./examples/run_flux_adapter.py) as an example.
|
|
78
|
+
- [2025-08-29] 🔥</b>Covers <b>100%</b> Diffusers' <b>DiT-based</b> Pipelines: **[BlockAdapter](#unified) + [Pattern Matching](#unified).**
|
|
79
|
+
- [2025-08-10] 🔥[**FLUX.1-Kontext-dev**](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev) is supported! Please refer [run_flux_kontext.py](./examples/pipeline/run_flux_kontext.py) as an example.
|
|
79
80
|
- [2025-07-18] 🎉First caching mechanism in [🤗huggingface/flux-fast](https://github.com/huggingface/flux-fast) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check the [PR](https://github.com/huggingface/flux-fast/pull/13).
|
|
80
81
|
- [2025-07-13] **[🤗flux-faster](https://github.com/xlite-dev/flux-faster)** is released! **3.3x** speedup for FLUX.1 on NVIDIA L20 with **[cache-dit](https://github.com/vipshop/cache-dit)**.
|
|
81
82
|
|
|
@@ -88,6 +89,11 @@ Dynamic: requires-python
|
|
|
88
89
|
- [⚙️Installation](#️installation)
|
|
89
90
|
- [🔥Supported Models](#supported)
|
|
90
91
|
- [🎉Unified Cache APIs](#unified)
|
|
92
|
+
- [📚Forward Pattern Matching](#unified)
|
|
93
|
+
- [🎉Cache with One-line Code](#unified)
|
|
94
|
+
- [🔥Automatic Block Adapter](#unified)
|
|
95
|
+
- [📚Hybird Forward Pattern](#unified)
|
|
96
|
+
- [🤖Cache Acceleration Stats](#unified)
|
|
91
97
|
- [⚡️Dual Block Cache](#dbcache)
|
|
92
98
|
- [🔥Hybrid TaylorSeer](#taylorseer)
|
|
93
99
|
- [⚡️Hybrid Cache CFG](#cfg)
|
|
@@ -176,11 +182,11 @@ Currently, **cache-dit** library supports almost **Any** Diffusion Transformers
|
|
|
176
182
|
|
|
177
183
|
Currently, for any **Diffusion** models with **Transformer Blocks** that match the specific **Input/Output patterns**, we can use the **Unified Cache APIs** from **cache-dit**, namely, the `cache_dit.enable_cache(...)` API. The **Unified Cache APIs** are currently in the experimental phase; please stay tuned for updates. The supported patterns are listed as follows:
|
|
178
184
|
|
|
179
|
-

|
|
185
|
+

|
|
180
186
|
|
|
181
187
|
### ♥️Cache Acceleration with One-line Code
|
|
182
188
|
|
|
183
|
-
In most cases, you only need to call **one-line** of code, that is `cache_dit.enable_cache(...)`. After this API is called, you just need to call the pipe as normal. The `pipe` param can be **any** Diffusion Pipeline. Please refer to [Qwen-Image](./examples/run_qwen_image.py) as an example.
|
|
189
|
+
In most cases, you only need to call **one-line** of code, that is `cache_dit.enable_cache(...)`. After this API is called, you just need to call the pipe as normal. The `pipe` param can be **any** Diffusion Pipeline. Please refer to [Qwen-Image](./examples/pipeline/run_qwen_image.py) as an example.
|
|
184
190
|
|
|
185
191
|
```python
|
|
186
192
|
import cache_dit
|
|
@@ -198,17 +204,20 @@ output = pipe(...)
|
|
|
198
204
|
|
|
199
205
|
### 🔥Automatic Block Adapter
|
|
200
206
|
|
|
201
|
-
But in some cases, you may have a **modified** Diffusion Pipeline or Transformer that is not located in the diffusers library or not officially supported by **cache-dit** at this time. The **BlockAdapter** can help you solve this problems. Please refer to [Qwen-Image w/ BlockAdapter](./examples/run_qwen_image_adapter.py) as an example.
|
|
207
|
+
But in some cases, you may have a **modified** Diffusion Pipeline or Transformer that is not located in the diffusers library or not officially supported by **cache-dit** at this time. The **BlockAdapter** can help you solve this problems. Please refer to [🔥Qwen-Image w/ BlockAdapter](./examples/adapter/run_qwen_image_adapter.py) as an example.
|
|
202
208
|
|
|
203
209
|
```python
|
|
204
210
|
from cache_dit import ForwardPattern, BlockAdapter
|
|
205
211
|
|
|
206
|
-
# Use BlockAdapter with `auto` mode.
|
|
212
|
+
# Use 🔥BlockAdapter with `auto` mode.
|
|
207
213
|
cache_dit.enable_cache(
|
|
208
|
-
BlockAdapter(
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
214
|
+
BlockAdapter(
|
|
215
|
+
# Any DiffusionPipeline, Qwen-Image, etc.
|
|
216
|
+
pipe=pipe, auto=True,
|
|
217
|
+
# Check `📚Forward Pattern Matching` documentation and hack the code of
|
|
218
|
+
# of Qwen-Image, you will find that it has satisfied `FORWARD_PATTERN_1`.
|
|
219
|
+
forward_pattern=ForwardPattern.Pattern_1,
|
|
220
|
+
),
|
|
212
221
|
)
|
|
213
222
|
|
|
214
223
|
# Or, manually setup transformer configurations.
|
|
@@ -218,12 +227,39 @@ cache_dit.enable_cache(
|
|
|
218
227
|
transformer=pipe.transformer,
|
|
219
228
|
blocks=pipe.transformer.transformer_blocks,
|
|
220
229
|
blocks_name="transformer_blocks",
|
|
230
|
+
forward_pattern=ForwardPattern.Pattern_1,
|
|
221
231
|
),
|
|
222
|
-
forward_pattern=ForwardPattern.Pattern_1,
|
|
223
232
|
)
|
|
224
233
|
```
|
|
225
234
|
For such situations, **BlockAdapter** can help you quickly apply various cache acceleration features to your own Diffusion Pipelines and Transformers. Please check the [📚BlockAdapter.md](./docs/BlockAdapter.md) for more details.
|
|
226
235
|
|
|
236
|
+
### 📚Hybird Forward Pattern
|
|
237
|
+
|
|
238
|
+
Sometimes, a Transformer class will contain more than one transformer `blocks`. For example, **FLUX.1** (HiDream, Chroma, etc) contains transformer_blocks and single_transformer_blocks (with different forward patterns). The **BlockAdapter** can also help you solve this problem. Please refer to [📚FLUX.1](./examples/adapter/run_flux_adapter.py) as an example.
|
|
239
|
+
|
|
240
|
+
```python
|
|
241
|
+
# For diffusers <= 0.34.0, FLUX.1 transformer_blocks and
|
|
242
|
+
# single_transformer_blocks have different forward patterns.
|
|
243
|
+
cache_dit.enable_cache(
|
|
244
|
+
BlockAdapter(
|
|
245
|
+
pipe=pipe, # FLUX.1, etc.
|
|
246
|
+
transformer=pipe.transformer,
|
|
247
|
+
blocks=[
|
|
248
|
+
pipe.transformer.transformer_blocks,
|
|
249
|
+
pipe.transformer.single_transformer_blocks,
|
|
250
|
+
],
|
|
251
|
+
blocks_name=[
|
|
252
|
+
"transformer_blocks",
|
|
253
|
+
"single_transformer_blocks",
|
|
254
|
+
],
|
|
255
|
+
forward_pattern=[
|
|
256
|
+
ForwardPattern.Pattern_1,
|
|
257
|
+
ForwardPattern.Pattern_3,
|
|
258
|
+
],
|
|
259
|
+
),
|
|
260
|
+
)
|
|
261
|
+
```
|
|
262
|
+
|
|
227
263
|
### 🤖Cache Acceleration Stats Summary
|
|
228
264
|
|
|
229
265
|
After finishing each inference of `pipe(...)`, you can call the `cache_dit.summary()` API on pipe to get the details of the **Cache Acceleration Stats** for the current inference.
|
|
@@ -347,7 +383,7 @@ cache_dit.enable_cache(
|
|
|
347
383
|
|
|
348
384
|
<div id="cfg"></div>
|
|
349
385
|
|
|
350
|
-
cache-dit supports caching for **CFG (classifier-free guidance)**. For models that fuse CFG and non-CFG into a single forward step, or models that do not include CFG (classifier-free guidance) in the forward step, please set `
|
|
386
|
+
cache-dit supports caching for **CFG (classifier-free guidance)**. For models that fuse CFG and non-CFG into a single forward step, or models that do not include CFG (classifier-free guidance) in the forward step, please set `enable_spearate_cfg` param to **False (default)**. Otherwise, set it to True. For examples:
|
|
351
387
|
|
|
352
388
|
```python
|
|
353
389
|
cache_dit.enable_cache(
|
|
@@ -355,10 +391,10 @@ cache_dit.enable_cache(
|
|
|
355
391
|
...,
|
|
356
392
|
# CFG: classifier free guidance or not
|
|
357
393
|
# For model that fused CFG and non-CFG into single forward step,
|
|
358
|
-
# should set
|
|
394
|
+
# should set enable_spearate_cfg as False. For example, set it as True
|
|
359
395
|
# for Wan 2.1/Qwen-Image and set it as False for FLUX.1, HunyuanVideo,
|
|
360
396
|
# CogVideoX, Mochi, LTXVideo, Allegro, CogView3Plus, EasyAnimate, SD3, etc.
|
|
361
|
-
|
|
397
|
+
enable_spearate_cfg=True, # Wan 2.1, Qwen-Image, CogView4, Cosmos, SkyReelsV2, etc.
|
|
362
398
|
# Compute cfg forward first or not, default False, namely,
|
|
363
399
|
# 0, 2, 4, ..., -> non-CFG step; 1, 3, 5, ... -> CFG step.
|
|
364
400
|
cfg_compute_first=False,
|
|
@@ -433,7 +469,7 @@ The **cache-dit** codebase is adapted from FBCache. Special thanks to their exce
|
|
|
433
469
|
|
|
434
470
|
```BibTeX
|
|
435
471
|
@misc{cache-dit@2025,
|
|
436
|
-
title={cache-dit:
|
|
472
|
+
title={cache-dit: A Unified and Training-free Cache Acceleration Toolbox for Diffusion Transformers},
|
|
437
473
|
url={https://github.com/vipshop/cache-dit.git},
|
|
438
474
|
note={Open-source software available at https://github.com/vipshop/cache-dit.git},
|
|
439
475
|
author={vipshop.com},
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
cache_dit/__init__.py,sha256=V4jCkTic4XvWojCUqYcjlvxiNM2DjGQbOLk6R-tAx2A,1191
|
|
2
|
+
cache_dit/_version.py,sha256=hCl1MKhh249NDbigjeJY-mrKYvjPFbJ7oklAepBQrto,706
|
|
3
|
+
cache_dit/logger.py,sha256=0zsu42hN-3-rgGC_C29ms1IvVpV4_b4_SwJCKSenxBE,4304
|
|
4
|
+
cache_dit/utils.py,sha256=pb5298XKmaZDoHwyteYRhixAG_0DGWrvvaObeShIhOM,7146
|
|
5
|
+
cache_dit/cache_factory/.gitignore,sha256=5Cb-qT9wsTUoMJ7vACDF7ZcLpAXhi5v-xdcWSRit988,23
|
|
6
|
+
cache_dit/cache_factory/__init__.py,sha256=M8q9furJOq2AZcLHRuCXZCjR9fNSELoEYdsCofIjMAo,1037
|
|
7
|
+
cache_dit/cache_factory/cache_adapters.py,sha256=q7MxY44qw90h449Gr8W5iJjSwXPJR-YIyRmu_KJnQo0,13284
|
|
8
|
+
cache_dit/cache_factory/cache_interface.py,sha256=2jcuTZ4D_P0M5pSw0z3BMPalobYen3YO1yKvRjaQjdQ,8332
|
|
9
|
+
cache_dit/cache_factory/cache_types.py,sha256=FIFa6ZBfvvSMMHyBBhvarvgg2Y2wbRgITcG_uGylGe0,991
|
|
10
|
+
cache_dit/cache_factory/forward_pattern.py,sha256=FumlCuZ-TSmSYH0hGBHctSJ-oGLCftdZjLygqhsmdR4,2258
|
|
11
|
+
cache_dit/cache_factory/utils.py,sha256=XkVM9AXcB9zYq8-S8QKAsGz80r3tA6U3lBNGDGeHOe4,1871
|
|
12
|
+
cache_dit/cache_factory/block_adapters/__init__.py,sha256=UFuHxNR7Y0RZoCl97wn0u2d_2rj8PzNsWfzgda5AoKM,17395
|
|
13
|
+
cache_dit/cache_factory/block_adapters/block_adapters.py,sha256=WsqGUDSDU_5-pIXwDqAK_k4a-4jgbFGoLCoF6kAjLt8,19198
|
|
14
|
+
cache_dit/cache_factory/block_adapters/block_registers.py,sha256=99ouWioxldlZJYQWhcUkOu94f8vO9V9QGzVNhKWtyO4,2005
|
|
15
|
+
cache_dit/cache_factory/cache_blocks/__init__.py,sha256=OWjnpJxA8EJVoRzuyb5miuiRphUFj831-bbtWsTDjnM,2750
|
|
16
|
+
cache_dit/cache_factory/cache_blocks/pattern_0_1_2.py,sha256=ElMps6_7uI74tSF9GDR_dEI0bZEhdzcepM29xFWnYo8,428
|
|
17
|
+
cache_dit/cache_factory/cache_blocks/pattern_3_4_5.py,sha256=CtBr6nvtAW8SAeEwPwiwWtPgrmwyb5ukb-j3IwFULJU,9953
|
|
18
|
+
cache_dit/cache_factory/cache_blocks/pattern_base.py,sha256=XSDy3hsaKbAZPGZY92YgGA0qLgjQyIX8irQkb2R5T2c,20331
|
|
19
|
+
cache_dit/cache_factory/cache_blocks/utils.py,sha256=wfreGvtK22hDnXuw0z0hUw-9ywu91FnExfPkP8ZzlkA,891
|
|
20
|
+
cache_dit/cache_factory/cache_contexts/__init__.py,sha256=rqnJ5__zqnpVHK5A1OqWILpNh5Ss-0ZDTGgtxZMKGGo,250
|
|
21
|
+
cache_dit/cache_factory/cache_contexts/cache_context.py,sha256=N88WLdd4KE9DuMWmpX8URcF55E2zWNwcKMxgVYkxMJY,13691
|
|
22
|
+
cache_dit/cache_factory/cache_contexts/cache_manager.py,sha256=wSghuX93KmCxE4pFEVKuyrO0Jt5STu_x4CxypS2EdxI,34276
|
|
23
|
+
cache_dit/cache_factory/cache_contexts/taylorseer.py,sha256=etSUIZzDvqW3ScKCbccTPcFaSmxV1T-xAXdk-p3e3wk,3802
|
|
24
|
+
cache_dit/cache_factory/patch_functors/__init__.py,sha256=yK05iONMGILsTZ83ynrUUJtiJKJ_FDjxmVIzRLy416s,252
|
|
25
|
+
cache_dit/cache_factory/patch_functors/functor_base.py,sha256=Ahk0fTfrHgNdEl-9JSkACvfyyv9G-Ei5OSz7XBIlX5o,357
|
|
26
|
+
cache_dit/cache_factory/patch_functors/functor_chroma.py,sha256=IFCuFU8HCnohM9Qpij7oU_UG1T8Ok8uTI3B9Nw7BHfY,10029
|
|
27
|
+
cache_dit/cache_factory/patch_functors/functor_flux.py,sha256=3Q8x-PEeDiUtuv-FnQ2qEzo4qbpsYOPB9uf7YridE88,9538
|
|
28
|
+
cache_dit/compile/__init__.py,sha256=FcTVzCeyypl-mxlc59_ehHL3lBNiDAFsXuRoJ-5Cfi0,56
|
|
29
|
+
cache_dit/compile/utils.py,sha256=nN2OIrSdwRR5zGxJinKDqb07pXpvTNTF3g_OgLkeeBU,3858
|
|
30
|
+
cache_dit/custom_ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
|
+
cache_dit/custom_ops/triton_taylorseer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
32
|
+
cache_dit/metrics/__init__.py,sha256=RaUhl5dieF40RqnizGzR30qoJJ9dyMUEADwgwMaMQrE,575
|
|
33
|
+
cache_dit/metrics/config.py,sha256=ieOgD9ayz722RjVzk24bSIqS2D6o7TZjGk8KeXV-OLQ,551
|
|
34
|
+
cache_dit/metrics/fid.py,sha256=9Ivtazl6mW0Bon2VXa-Ia5Xj2ewxRD3V1Qkd69zYM3Y,17066
|
|
35
|
+
cache_dit/metrics/inception.py,sha256=pBVe2X6ylLPIXTG4-GWDM9DWnCviMJbJ45R3ulhktR0,12759
|
|
36
|
+
cache_dit/metrics/lpips.py,sha256=I2qCNi6qJh5TRsaIsdxO0WoRX1DN7U_H3zS0oCSahYM,1032
|
|
37
|
+
cache_dit/metrics/metrics.py,sha256=8jvM1sF-nDxUuwCRy44QEoo4dYVLCQVh1QyAMs4eaQY,27840
|
|
38
|
+
cache_dit/quantize/__init__.py,sha256=kWYoMAyZgBXu9BJlZjTQ0dRffW9GqeeY9_iTkXrb70A,59
|
|
39
|
+
cache_dit/quantize/quantize_ao.py,sha256=mGspqYgQtenl3QnKPtsSYsSD7LbVX93f1M940bhXKLU,6066
|
|
40
|
+
cache_dit/quantize/quantize_interface.py,sha256=2s_R7xPSKuJeFpEGeLwRxnq_CqJcBG3a3lzyW5wh-UM,1241
|
|
41
|
+
cache_dit/quantize/quantize_svdq.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
42
|
+
cache_dit-0.2.28.dist-info/licenses/LICENSE,sha256=Dqb07Ik2dV41s9nIdMUbiRWEfDqo7-dQeRiY7kPO8PE,3769
|
|
43
|
+
cache_dit-0.2.28.dist-info/METADATA,sha256=03FPh4nIDfjSFMfkDz-sWr2g3l30UsQek8VjQ6TPn8g,23204
|
|
44
|
+
cache_dit-0.2.28.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
45
|
+
cache_dit-0.2.28.dist-info/entry_points.txt,sha256=FX2gysXaZx6NeK1iCLMcIdP8Q4_qikkIHtEmi3oWn8o,65
|
|
46
|
+
cache_dit-0.2.28.dist-info/top_level.txt,sha256=ZJDydonLEhujzz0FOkVbO-BqfzO9d_VqRHmZU-3MOZo,10
|
|
47
|
+
cache_dit-0.2.28.dist-info/RECORD,,
|