cache-dit 0.2.27__py3-none-any.whl → 0.2.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cache-dit might be problematic. Click here for more details.
- cache_dit/__init__.py +2 -0
- cache_dit/_version.py +2 -2
- cache_dit/cache_factory/__init__.py +3 -0
- cache_dit/cache_factory/block_adapters/__init__.py +105 -111
- cache_dit/cache_factory/block_adapters/block_adapters.py +314 -41
- cache_dit/cache_factory/block_adapters/block_registers.py +15 -6
- cache_dit/cache_factory/cache_adapters.py +244 -116
- cache_dit/cache_factory/cache_blocks/__init__.py +55 -4
- cache_dit/cache_factory/cache_blocks/pattern_3_4_5.py +36 -37
- cache_dit/cache_factory/cache_blocks/pattern_base.py +83 -76
- cache_dit/cache_factory/cache_blocks/utils.py +26 -8
- cache_dit/cache_factory/cache_contexts/__init__.py +4 -1
- cache_dit/cache_factory/cache_contexts/cache_context.py +14 -876
- cache_dit/cache_factory/cache_contexts/cache_manager.py +847 -0
- cache_dit/cache_factory/cache_interface.py +91 -24
- cache_dit/cache_factory/patch_functors/functor_chroma.py +1 -1
- cache_dit/cache_factory/patch_functors/functor_flux.py +1 -1
- cache_dit/utils.py +164 -58
- {cache_dit-0.2.27.dist-info → cache_dit-0.2.29.dist-info}/METADATA +59 -34
- {cache_dit-0.2.27.dist-info → cache_dit-0.2.29.dist-info}/RECORD +24 -24
- {cache_dit-0.2.27.dist-info → cache_dit-0.2.29.dist-info}/WHEEL +0 -0
- {cache_dit-0.2.27.dist-info → cache_dit-0.2.29.dist-info}/entry_points.txt +0 -0
- {cache_dit-0.2.27.dist-info → cache_dit-0.2.29.dist-info}/licenses/LICENSE +0 -0
- {cache_dit-0.2.27.dist-info → cache_dit-0.2.29.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +1,11 @@
|
|
|
1
|
+
import torch
|
|
1
2
|
from typing import Any, Tuple, List
|
|
2
3
|
from diffusers import DiffusionPipeline
|
|
3
4
|
from cache_dit.cache_factory.cache_types import CacheType
|
|
4
5
|
from cache_dit.cache_factory.block_adapters import BlockAdapter
|
|
5
6
|
from cache_dit.cache_factory.block_adapters import BlockAdapterRegistry
|
|
6
7
|
from cache_dit.cache_factory.cache_adapters import CachedAdapter
|
|
8
|
+
from cache_dit.cache_factory.cache_contexts import CachedContextManager
|
|
7
9
|
|
|
8
10
|
from cache_dit.logger import init_logger
|
|
9
11
|
|
|
@@ -12,7 +14,7 @@ logger = init_logger(__name__)
|
|
|
12
14
|
|
|
13
15
|
def enable_cache(
|
|
14
16
|
# DiffusionPipeline or BlockAdapter
|
|
15
|
-
pipe_or_adapter: DiffusionPipeline | BlockAdapter
|
|
17
|
+
pipe_or_adapter: DiffusionPipeline | BlockAdapter,
|
|
16
18
|
# Cache context kwargs
|
|
17
19
|
Fn_compute_blocks: int = 8,
|
|
18
20
|
Bn_compute_blocks: int = 0,
|
|
@@ -21,7 +23,7 @@ def enable_cache(
|
|
|
21
23
|
max_continuous_cached_steps: int = -1,
|
|
22
24
|
residual_diff_threshold: float = 0.08,
|
|
23
25
|
# Cache CFG or not
|
|
24
|
-
|
|
26
|
+
enable_spearate_cfg: bool = False,
|
|
25
27
|
cfg_compute_first: bool = False,
|
|
26
28
|
cfg_diff_compute_separate: bool = True,
|
|
27
29
|
# Hybird TaylorSeer
|
|
@@ -30,7 +32,7 @@ def enable_cache(
|
|
|
30
32
|
taylorseer_cache_type: str = "residual",
|
|
31
33
|
taylorseer_order: int = 2,
|
|
32
34
|
**other_cache_context_kwargs,
|
|
33
|
-
) ->
|
|
35
|
+
) -> BlockAdapter:
|
|
34
36
|
r"""
|
|
35
37
|
Unified Cache API for almost Any Diffusion Transformers (with Transformer Blocks
|
|
36
38
|
that match the specific Input and Output patterns).
|
|
@@ -64,9 +66,9 @@ def enable_cache(
|
|
|
64
66
|
residual_diff_threshold (`float`, *required*, defaults to 0.08):
|
|
65
67
|
he value of residual diff threshold, a higher value leads to faster performance at the
|
|
66
68
|
cost of lower precision.
|
|
67
|
-
|
|
69
|
+
enable_spearate_cfg (`bool`, *required*, defaults to False):
|
|
68
70
|
Whether to do separate cfg or not, such as Wan 2.1, Qwen-Image. For model that fused CFG
|
|
69
|
-
and non-CFG into single forward step, should set
|
|
71
|
+
and non-CFG into single forward step, should set enable_spearate_cfg as False, for example:
|
|
70
72
|
CogVideoX, HunyuanVideo, Mochi, etc.
|
|
71
73
|
cfg_compute_first (`bool`, *required*, defaults to False):
|
|
72
74
|
Compute cfg forward first or not, default False, namely, 0, 2, 4, ..., -> non-CFG step;
|
|
@@ -89,7 +91,7 @@ def enable_cache(
|
|
|
89
91
|
The order of taylorseer, higher values of n_derivatives will lead to longer computation time,
|
|
90
92
|
but may improve precision significantly.
|
|
91
93
|
other_cache_kwargs: (`dict`, *optional*, defaults to {})
|
|
92
|
-
Other cache context kwargs, please check https://github.com/vipshop/cache-dit/blob/main/src/cache_dit/cache_factory/cache_context.py
|
|
94
|
+
Other cache context kwargs, please check https://github.com/vipshop/cache-dit/blob/main/src/cache_dit/cache_factory/cache_contexts/cache_context.py
|
|
93
95
|
for more details.
|
|
94
96
|
|
|
95
97
|
Examples:
|
|
@@ -104,6 +106,10 @@ def enable_cache(
|
|
|
104
106
|
|
|
105
107
|
# Collect cache context kwargs
|
|
106
108
|
cache_context_kwargs = other_cache_context_kwargs.copy()
|
|
109
|
+
if cache_type := cache_context_kwargs.get("cache_type", None):
|
|
110
|
+
if cache_type == CacheType.NONE:
|
|
111
|
+
return pipe_or_adapter
|
|
112
|
+
|
|
107
113
|
cache_context_kwargs["cache_type"] = CacheType.DBCache
|
|
108
114
|
cache_context_kwargs["Fn_compute_blocks"] = Fn_compute_blocks
|
|
109
115
|
cache_context_kwargs["Bn_compute_blocks"] = Bn_compute_blocks
|
|
@@ -113,7 +119,7 @@ def enable_cache(
|
|
|
113
119
|
max_continuous_cached_steps
|
|
114
120
|
)
|
|
115
121
|
cache_context_kwargs["residual_diff_threshold"] = residual_diff_threshold
|
|
116
|
-
cache_context_kwargs["
|
|
122
|
+
cache_context_kwargs["enable_spearate_cfg"] = enable_spearate_cfg
|
|
117
123
|
cache_context_kwargs["cfg_compute_first"] = cfg_compute_first
|
|
118
124
|
cache_context_kwargs["cfg_diff_compute_separate"] = (
|
|
119
125
|
cfg_diff_compute_separate
|
|
@@ -123,25 +129,11 @@ def enable_cache(
|
|
|
123
129
|
enable_encoder_taylorseer
|
|
124
130
|
)
|
|
125
131
|
cache_context_kwargs["taylorseer_cache_type"] = taylorseer_cache_type
|
|
126
|
-
|
|
127
|
-
cache_context_kwargs["taylorseer_kwargs"][
|
|
128
|
-
"n_derivatives"
|
|
129
|
-
] = taylorseer_order
|
|
130
|
-
else:
|
|
131
|
-
cache_context_kwargs["taylorseer_kwargs"] = {
|
|
132
|
-
"n_derivatives": taylorseer_order
|
|
133
|
-
}
|
|
132
|
+
cache_context_kwargs["taylorseer_order"] = taylorseer_order
|
|
134
133
|
|
|
135
|
-
if isinstance(pipe_or_adapter, BlockAdapter):
|
|
136
|
-
return CachedAdapter.apply(
|
|
137
|
-
pipe=None,
|
|
138
|
-
block_adapter=pipe_or_adapter,
|
|
139
|
-
**cache_context_kwargs,
|
|
140
|
-
)
|
|
141
|
-
elif isinstance(pipe_or_adapter, DiffusionPipeline):
|
|
134
|
+
if isinstance(pipe_or_adapter, (DiffusionPipeline, BlockAdapter)):
|
|
142
135
|
return CachedAdapter.apply(
|
|
143
|
-
|
|
144
|
-
block_adapter=None,
|
|
136
|
+
pipe_or_adapter,
|
|
145
137
|
**cache_context_kwargs,
|
|
146
138
|
)
|
|
147
139
|
else:
|
|
@@ -152,6 +144,81 @@ def enable_cache(
|
|
|
152
144
|
)
|
|
153
145
|
|
|
154
146
|
|
|
147
|
+
def disable_cache(
|
|
148
|
+
# DiffusionPipeline or BlockAdapter
|
|
149
|
+
pipe_or_adapter: DiffusionPipeline | BlockAdapter,
|
|
150
|
+
):
|
|
151
|
+
from cache_dit.cache_factory.cache_blocks.utils import (
|
|
152
|
+
remove_cached_stats,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
def _disable_blocks(blocks: torch.nn.ModuleList):
|
|
156
|
+
if blocks is None:
|
|
157
|
+
return
|
|
158
|
+
if hasattr(blocks, "_forward_pattern"):
|
|
159
|
+
del blocks._forward_pattern
|
|
160
|
+
if hasattr(blocks, "_cache_context_kwargs"):
|
|
161
|
+
del blocks._cache_context_kwargs
|
|
162
|
+
remove_cached_stats(blocks)
|
|
163
|
+
|
|
164
|
+
def _disable_transformer(transformer: torch.nn.Module):
|
|
165
|
+
if transformer is None or not BlockAdapter.is_cached(transformer):
|
|
166
|
+
return
|
|
167
|
+
if original_forward := getattr(transformer, "_original_forward"):
|
|
168
|
+
transformer.forward = original_forward.__get__(transformer)
|
|
169
|
+
del transformer._original_forward
|
|
170
|
+
if hasattr(transformer, "_is_cached"):
|
|
171
|
+
del transformer._is_cached
|
|
172
|
+
if hasattr(transformer, "_forward_pattern"):
|
|
173
|
+
del transformer._forward_pattern
|
|
174
|
+
if hasattr(transformer, "_has_separate_cfg"):
|
|
175
|
+
del transformer._has_separate_cfg
|
|
176
|
+
if hasattr(transformer, "_cache_context_kwargs"):
|
|
177
|
+
del transformer._cache_context_kwargs
|
|
178
|
+
remove_cached_stats(transformer)
|
|
179
|
+
for blocks in BlockAdapter.find_blocks(transformer):
|
|
180
|
+
_disable_blocks(blocks)
|
|
181
|
+
|
|
182
|
+
def _disable_pipe(pipe: DiffusionPipeline):
|
|
183
|
+
if pipe is None or not BlockAdapter.is_cached(pipe):
|
|
184
|
+
return
|
|
185
|
+
if original_call := getattr(pipe, "_original_call"):
|
|
186
|
+
pipe.__class__.__call__ = original_call
|
|
187
|
+
del pipe.__class__._original_call
|
|
188
|
+
if cache_manager := getattr(pipe, "_cache_manager"):
|
|
189
|
+
assert isinstance(cache_manager, CachedContextManager)
|
|
190
|
+
cache_manager.clear_contexts()
|
|
191
|
+
del pipe._cache_manager
|
|
192
|
+
if hasattr(pipe, "_is_cached"):
|
|
193
|
+
del pipe.__class__._is_cached
|
|
194
|
+
if hasattr(pipe, "_cache_context_kwargs"):
|
|
195
|
+
del pipe._cache_context_kwargs
|
|
196
|
+
remove_cached_stats(pipe)
|
|
197
|
+
|
|
198
|
+
if isinstance(pipe_or_adapter, DiffusionPipeline):
|
|
199
|
+
pipe = pipe_or_adapter
|
|
200
|
+
_disable_pipe(pipe)
|
|
201
|
+
if hasattr(pipe, "transformer"):
|
|
202
|
+
_disable_transformer(pipe.transformer)
|
|
203
|
+
if hasattr(pipe, "transformer_2"): # Wan 2.2
|
|
204
|
+
_disable_transformer(pipe.transformer_2)
|
|
205
|
+
pipe_cls_name = pipe.__class__.__name__
|
|
206
|
+
logger.warning(f"Cache Acceleration is disabled for: {pipe_cls_name}")
|
|
207
|
+
elif isinstance(pipe_or_adapter, BlockAdapter):
|
|
208
|
+
# BlockAdapter
|
|
209
|
+
adapter = pipe_or_adapter
|
|
210
|
+
BlockAdapter.assert_normalized(adapter)
|
|
211
|
+
_disable_pipe(adapter.pipe)
|
|
212
|
+
for transformer in BlockAdapter.flatten(adapter.transformer):
|
|
213
|
+
_disable_transformer(transformer)
|
|
214
|
+
for blocks in BlockAdapter.flatten(adapter.blocks):
|
|
215
|
+
_disable_blocks(blocks)
|
|
216
|
+
pipe_cls_name = adapter.pipe.__class__.__name__
|
|
217
|
+
logger.warning(f"Cache Acceleration is disabled for: {pipe_cls_name}")
|
|
218
|
+
else:
|
|
219
|
+
pass # do nothing
|
|
220
|
+
|
|
221
|
+
|
|
155
222
|
def supported_pipelines(
|
|
156
223
|
**kwargs,
|
|
157
224
|
) -> Tuple[int, List[str]]:
|
|
@@ -30,7 +30,7 @@ class ChromaPatchFunctor(PatchFunctor):
|
|
|
30
30
|
blocks: torch.nn.ModuleList = None,
|
|
31
31
|
**kwargs,
|
|
32
32
|
) -> ChromaTransformer2DModel:
|
|
33
|
-
if
|
|
33
|
+
if hasattr(transformer, "_is_patched"):
|
|
34
34
|
return transformer
|
|
35
35
|
|
|
36
36
|
if blocks is None:
|
cache_dit/utils.py
CHANGED
|
@@ -5,9 +5,9 @@ import numpy as np
|
|
|
5
5
|
from pprint import pprint
|
|
6
6
|
from diffusers import DiffusionPipeline
|
|
7
7
|
|
|
8
|
-
from typing import Dict, Any
|
|
8
|
+
from typing import Dict, Any, List, Union
|
|
9
|
+
from cache_dit.cache_factory import BlockAdapter
|
|
9
10
|
from cache_dit.logger import init_logger
|
|
10
|
-
from cache_dit.cache_factory import CacheType
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
logger = init_logger(__name__)
|
|
@@ -30,9 +30,168 @@ class CacheStats:
|
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
def summary(
|
|
33
|
-
|
|
33
|
+
adapter_or_others: Union[
|
|
34
|
+
BlockAdapter,
|
|
35
|
+
DiffusionPipeline,
|
|
36
|
+
torch.nn.Module,
|
|
37
|
+
],
|
|
34
38
|
details: bool = False,
|
|
35
39
|
logging: bool = True,
|
|
40
|
+
**kwargs,
|
|
41
|
+
) -> List[CacheStats]:
|
|
42
|
+
if adapter_or_others is None:
|
|
43
|
+
return [CacheStats()]
|
|
44
|
+
|
|
45
|
+
if not isinstance(adapter_or_others, BlockAdapter):
|
|
46
|
+
if not isinstance(adapter_or_others, DiffusionPipeline):
|
|
47
|
+
transformer = adapter_or_others
|
|
48
|
+
transformer_2 = None
|
|
49
|
+
else:
|
|
50
|
+
transformer = adapter_or_others.transformer
|
|
51
|
+
transformer_2 = None
|
|
52
|
+
if hasattr(adapter_or_others, "transformer_2"):
|
|
53
|
+
transformer_2 = adapter_or_others.transformer_2
|
|
54
|
+
|
|
55
|
+
blocks_stats: List[CacheStats] = []
|
|
56
|
+
for blocks in BlockAdapter.find_blocks(transformer):
|
|
57
|
+
blocks_stats.append(
|
|
58
|
+
_summary(
|
|
59
|
+
blocks,
|
|
60
|
+
details=details,
|
|
61
|
+
logging=logging,
|
|
62
|
+
**kwargs,
|
|
63
|
+
)
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
if transformer_2 is not None:
|
|
67
|
+
for blocks in BlockAdapter.find_blocks(transformer_2):
|
|
68
|
+
blocks_stats.append(
|
|
69
|
+
_summary(
|
|
70
|
+
blocks,
|
|
71
|
+
details=details,
|
|
72
|
+
logging=logging,
|
|
73
|
+
**kwargs,
|
|
74
|
+
)
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
blocks_stats.append(
|
|
78
|
+
_summary(
|
|
79
|
+
transformer,
|
|
80
|
+
details=details,
|
|
81
|
+
logging=logging,
|
|
82
|
+
**kwargs,
|
|
83
|
+
)
|
|
84
|
+
)
|
|
85
|
+
if transformer_2 is not None:
|
|
86
|
+
blocks_stats.append(
|
|
87
|
+
_summary(
|
|
88
|
+
transformer_2,
|
|
89
|
+
details=details,
|
|
90
|
+
logging=logging,
|
|
91
|
+
**kwargs,
|
|
92
|
+
)
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
blocks_stats = [stats for stats in blocks_stats if stats.cache_options]
|
|
96
|
+
|
|
97
|
+
return blocks_stats if len(blocks_stats) else [CacheStats()]
|
|
98
|
+
|
|
99
|
+
adapter = adapter_or_others
|
|
100
|
+
if not BlockAdapter.check_block_adapter(adapter):
|
|
101
|
+
return [CacheStats()]
|
|
102
|
+
|
|
103
|
+
blocks_stats = []
|
|
104
|
+
flatten_blocks = BlockAdapter.flatten(adapter.blocks)
|
|
105
|
+
for blocks in flatten_blocks:
|
|
106
|
+
blocks_stats.append(
|
|
107
|
+
_summary(
|
|
108
|
+
blocks,
|
|
109
|
+
details=details,
|
|
110
|
+
logging=logging,
|
|
111
|
+
**kwargs,
|
|
112
|
+
)
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
blocks_stats = [stats for stats in blocks_stats if stats.cache_options]
|
|
116
|
+
|
|
117
|
+
return blocks_stats if len(blocks_stats) else [CacheStats()]
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def strify(
|
|
121
|
+
adapter_or_others: Union[
|
|
122
|
+
BlockAdapter,
|
|
123
|
+
DiffusionPipeline,
|
|
124
|
+
CacheStats,
|
|
125
|
+
List[CacheStats],
|
|
126
|
+
Dict[str, Any],
|
|
127
|
+
],
|
|
128
|
+
) -> str:
|
|
129
|
+
if isinstance(adapter_or_others, BlockAdapter):
|
|
130
|
+
stats = summary(adapter_or_others, logging=False)[-1]
|
|
131
|
+
cache_options = stats.cache_options
|
|
132
|
+
cached_steps = len(stats.cached_steps)
|
|
133
|
+
elif isinstance(adapter_or_others, DiffusionPipeline):
|
|
134
|
+
stats = summary(adapter_or_others, logging=False)[-1]
|
|
135
|
+
cache_options = stats.cache_options
|
|
136
|
+
cached_steps = len(stats.cached_steps)
|
|
137
|
+
elif isinstance(adapter_or_others, CacheStats):
|
|
138
|
+
stats = adapter_or_others
|
|
139
|
+
cache_options = stats.cache_options
|
|
140
|
+
cached_steps = len(stats.cached_steps)
|
|
141
|
+
elif isinstance(adapter_or_others, list):
|
|
142
|
+
stats = adapter_or_others[0]
|
|
143
|
+
cache_options = stats.cache_options
|
|
144
|
+
cached_steps = len(stats.cached_steps)
|
|
145
|
+
elif isinstance(adapter_or_others, dict):
|
|
146
|
+
from cache_dit.cache_factory import CacheType
|
|
147
|
+
|
|
148
|
+
# Assume cache_context_kwargs
|
|
149
|
+
cache_options = adapter_or_others
|
|
150
|
+
cached_steps = None
|
|
151
|
+
cache_type = cache_options.get("cache_type", CacheType.NONE)
|
|
152
|
+
|
|
153
|
+
if cache_type == CacheType.NONE:
|
|
154
|
+
return "NONE"
|
|
155
|
+
else:
|
|
156
|
+
raise ValueError(
|
|
157
|
+
"Please set pipe_or_stats param as one of: "
|
|
158
|
+
"DiffusionPipeline | CacheStats | Dict[str, Any]"
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
if not cache_options:
|
|
162
|
+
return "NONE"
|
|
163
|
+
|
|
164
|
+
def get_taylorseer_order():
|
|
165
|
+
taylorseer_order = 0
|
|
166
|
+
if "taylorseer_order" in cache_options:
|
|
167
|
+
taylorseer_order = cache_options["taylorseer_order"]
|
|
168
|
+
return taylorseer_order
|
|
169
|
+
|
|
170
|
+
cache_type_str = (
|
|
171
|
+
f"DBCACHE_F{cache_options.get('Fn_compute_blocks', 1)}"
|
|
172
|
+
f"B{cache_options.get('Bn_compute_blocks', 0)}_"
|
|
173
|
+
f"W{cache_options.get('max_warmup_steps', 0)}"
|
|
174
|
+
f"M{max(0, cache_options.get('max_cached_steps', -1))}"
|
|
175
|
+
f"MC{max(0, cache_options.get('max_continuous_cached_steps', -1))}_"
|
|
176
|
+
f"T{int(cache_options.get('enable_taylorseer', False))}"
|
|
177
|
+
f"O{get_taylorseer_order()}_"
|
|
178
|
+
f"R{cache_options.get('residual_diff_threshold', 0.08)}"
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
if cached_steps:
|
|
182
|
+
cache_type_str += f"_S{cached_steps}"
|
|
183
|
+
|
|
184
|
+
return cache_type_str
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _summary(
|
|
188
|
+
pipe_or_module: Union[
|
|
189
|
+
DiffusionPipeline,
|
|
190
|
+
torch.nn.Module,
|
|
191
|
+
],
|
|
192
|
+
details: bool = False,
|
|
193
|
+
logging: bool = True,
|
|
194
|
+
**kwargs,
|
|
36
195
|
) -> CacheStats:
|
|
37
196
|
cache_stats = CacheStats()
|
|
38
197
|
|
|
@@ -52,6 +211,8 @@ def summary(
|
|
|
52
211
|
cache_stats.cache_options = cache_options
|
|
53
212
|
if logging:
|
|
54
213
|
print(f"\n🤗Cache Options: {cls_name}\n\n{cache_options}")
|
|
214
|
+
else:
|
|
215
|
+
logger.warning(f"Can't find Cache Options for: {cls_name}")
|
|
55
216
|
|
|
56
217
|
if hasattr(module, "_cached_steps"):
|
|
57
218
|
cached_steps: list[int] = module._cached_steps
|
|
@@ -142,58 +303,3 @@ def summary(
|
|
|
142
303
|
)
|
|
143
304
|
|
|
144
305
|
return cache_stats
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
def strify(
|
|
148
|
-
pipe_or_stats: DiffusionPipeline | CacheStats | Dict[str, Any],
|
|
149
|
-
) -> str:
|
|
150
|
-
if isinstance(pipe_or_stats, DiffusionPipeline):
|
|
151
|
-
stats = summary(pipe_or_stats, logging=False)
|
|
152
|
-
cache_options = stats.cache_options
|
|
153
|
-
cached_steps = len(stats.cached_steps)
|
|
154
|
-
elif isinstance(pipe_or_stats, CacheStats):
|
|
155
|
-
stats = pipe_or_stats
|
|
156
|
-
cache_options = stats.cache_options
|
|
157
|
-
cached_steps = len(stats.cached_steps)
|
|
158
|
-
elif isinstance(pipe_or_stats, dict):
|
|
159
|
-
# Assume cache_context_kwargs
|
|
160
|
-
cache_options = pipe_or_stats
|
|
161
|
-
cached_steps = None
|
|
162
|
-
else:
|
|
163
|
-
raise ValueError(
|
|
164
|
-
"Please set pipe_or_stats param as one of: "
|
|
165
|
-
"DiffusionPipeline | CacheStats | Dict[str, Any]"
|
|
166
|
-
)
|
|
167
|
-
|
|
168
|
-
if not cache_options:
|
|
169
|
-
return "NONE"
|
|
170
|
-
|
|
171
|
-
if cache_options.get("cache_type", None) != CacheType.DBCache:
|
|
172
|
-
return "NONE"
|
|
173
|
-
|
|
174
|
-
def get_taylorseer_order():
|
|
175
|
-
taylorseer_order = 0
|
|
176
|
-
if "taylorseer_kwargs" in cache_options:
|
|
177
|
-
if "n_derivatives" in cache_options["taylorseer_kwargs"]:
|
|
178
|
-
taylorseer_order = cache_options["taylorseer_kwargs"][
|
|
179
|
-
"n_derivatives"
|
|
180
|
-
]
|
|
181
|
-
elif "taylorseer_order" in cache_options:
|
|
182
|
-
taylorseer_order = cache_options["taylorseer_order"]
|
|
183
|
-
return taylorseer_order
|
|
184
|
-
|
|
185
|
-
cache_type_str = (
|
|
186
|
-
f"DBCACHE_F{cache_options.get('Fn_compute_blocks', 1)}"
|
|
187
|
-
f"B{cache_options.get('Bn_compute_blocks', 0)}_"
|
|
188
|
-
f"W{cache_options.get('max_warmup_steps', 0)}"
|
|
189
|
-
f"M{max(0, cache_options.get('max_cached_steps', -1))}"
|
|
190
|
-
f"MC{max(0, cache_options.get('max_continuous_cached_steps', -1))}_"
|
|
191
|
-
f"T{int(cache_options.get('enable_taylorseer', False))}"
|
|
192
|
-
f"O{get_taylorseer_order()}_"
|
|
193
|
-
f"R{cache_options.get('residual_diff_threshold', 0.08)}"
|
|
194
|
-
)
|
|
195
|
-
|
|
196
|
-
if cached_steps:
|
|
197
|
-
cache_type_str += f"_S{cached_steps}"
|
|
198
|
-
|
|
199
|
-
return cache_type_str
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cache_dit
|
|
3
|
-
Version: 0.2.
|
|
4
|
-
Summary: 🤗
|
|
3
|
+
Version: 0.2.29
|
|
4
|
+
Summary: 🤗 A Unified and Training-free Cache Acceleration Toolbox for Diffusion Transformers
|
|
5
5
|
Author: DefTruth, vipshop.com, etc.
|
|
6
6
|
Maintainer: DefTruth, vipshop.com, etc
|
|
7
7
|
Project-URL: Repository, https://github.com/vipshop/cache-dit.git
|
|
@@ -43,8 +43,8 @@ Dynamic: requires-python
|
|
|
43
43
|
<div align="center">
|
|
44
44
|
<img src=https://github.com/vipshop/cache-dit/raw/main/assets/cache-dit-logo.png height="120">
|
|
45
45
|
|
|
46
|
-
|
|
47
|
-
|
|
46
|
+
<p align="center">
|
|
47
|
+
A <b>Unified</b> and Training-free <b>Cache Acceleration</b> Toolbox for <b>Diffusion Transformers</b> <br>
|
|
48
48
|
♥️ <b>Cache Acceleration</b> with <b>One-line</b> Code ~ ♥️
|
|
49
49
|
</p>
|
|
50
50
|
<div align='center'>
|
|
@@ -59,32 +59,39 @@ Dynamic: requires-python
|
|
|
59
59
|
🔥<b><a href="#unified">Unified Cache APIs</a> | <a href="#dbcache">DBCache</a> | <a href="#taylorseer">Hybrid TaylorSeer</a> | <a href="#cfg">Hybrid Cache CFG</a></b>🔥
|
|
60
60
|
</p>
|
|
61
61
|
<p align="center">
|
|
62
|
-
🎉Now, <b>cache-dit</b> covers <b>
|
|
62
|
+
🎉Now, <b>cache-dit</b> covers <b>100%</b> Diffusers' <b>DiT-based</b> Pipelines🎉<br>
|
|
63
63
|
🔥<b><a href="#supported">Qwen-Image</a> | <a href="#supported">FLUX.1</a> | <a href="#supported">Wan 2.1/2.2</a> | <a href="#supported"> ... </a> | <a href="#supported">CogVideoX</a></b>🔥
|
|
64
64
|
</p>
|
|
65
65
|
</div>
|
|
66
|
+
<div align='center'>
|
|
67
|
+
<img src=./assets/gifs/wan2.2.C0_Q0_NONE.gif width=160px>
|
|
68
|
+
<img src=./assets/gifs/wan2.2.C1_Q0_DBCACHE_F1B0_W2M8MC2_T1O2_R0.08.gif width=160px>
|
|
69
|
+
<img src=./assets/gifs/wan2.2.C1_Q1_fp8_w8a8_dq_DBCACHE_F1B0_W2M8MC2_T1O2_R0.08.gif width=160px>
|
|
70
|
+
<p><b>🔥Wan2.2 MoE</b> Baseline | <b><a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:~2.0x↑🎉</b> | +FP8 DQ:<b>~2.4x↑🎉</b></p>
|
|
71
|
+
<img src=./assets/qwen-image.C0_Q0_NONE.png width=160px>
|
|
72
|
+
<img src=./assets/qwen-image.C1_Q0_DBCACHE_F8B0_W8M0MC0_T1O4_R0.12_S23.png width=160px>
|
|
73
|
+
<img src=./assets/qwen-image.C1_Q1_fp8_w8a8_dq_DBCACHE_F8B0_W8M0MC0_T1O4_R0.12_S18.png width=160px>
|
|
74
|
+
<p><b>🔥Qwen-Image</b> Baseline | <b><a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:~1.8x↑🎉</b> | +FP8 DQ:<b>~2.2x↑🎉</b><br>♥️ Please consider to leave a <b>⭐️ Star</b> to support us ~ ♥️</p>
|
|
75
|
+
</p>
|
|
76
|
+
</div>
|
|
66
77
|
|
|
67
78
|
## 🔥News
|
|
68
79
|
|
|
69
|
-
- [2025-09-
|
|
70
|
-
- [2025-08-
|
|
71
|
-
- [2025-08-26] 🎉[**Wan2.2**](https://github.com/Wan-Video) **1.8x⚡️** speedup with `cache-dit + compile`! Please check the [example](./examples/run_wan_2.2.py).
|
|
72
|
-
- [2025-08-19] 🔥[**Qwen-Image-Edit**](https://github.com/QwenLM/Qwen-Image) **2x⚡️** speedup! Check the example at [run_qwen_image_edit.py](./examples/run_qwen_image_edit.py).
|
|
80
|
+
- [2025-09-03] 🎉[**Wan2.2-MoE**](https://github.com/Wan-Video) **2.4x↑🎉** speedup! Please refer to [run_wan_2.2.py](./examples/pipeline/run_wan_2.2.py) as an example.
|
|
81
|
+
- [2025-08-19] 🔥[**Qwen-Image-Edit**](https://github.com/QwenLM/Qwen-Image) **2x↑🎉** speedup! Check the example: [run_qwen_image_edit.py](./examples/pipeline/run_qwen_image_edit.py).
|
|
73
82
|
- [2025-08-12] 🎉First caching mechanism in [QwenLM/Qwen-Image](https://github.com/QwenLM/Qwen-Image) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check this [PR](https://github.com/QwenLM/Qwen-Image/pull/61).
|
|
74
|
-
- [2025-08-11] 🔥[**Qwen-Image**](https://github.com/QwenLM/Qwen-Image) **1.8x
|
|
75
|
-
- [2025-
|
|
76
|
-
- [2025-07-18] 🎉First caching mechanism in [🤗huggingface/flux-fast](https://github.com/huggingface/flux-fast) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check the [PR](https://github.com/huggingface/flux-fast/pull/13).
|
|
77
|
-
- [2025-07-13] **[🤗flux-faster](https://github.com/xlite-dev/flux-faster)** is released! **3.3x** speedup for FLUX.1 on NVIDIA L20 with **[cache-dit](https://github.com/vipshop/cache-dit)**.
|
|
83
|
+
- [2025-08-11] 🔥[**Qwen-Image**](https://github.com/QwenLM/Qwen-Image) **1.8x↑🎉** speedup! Please refer to [run_qwen_image.py](./examples/pipeline/run_qwen_image.py) as an example.
|
|
84
|
+
- [2025-07-13] 🎉[**FLUX.1-Dev**](https://github.com/xlite-dev/flux-faster) **3.3x↑🎉** speedup! NVIDIA L20 with **[cache-dit](https://github.com/vipshop/cache-dit)** + `compile + FP8 DQ`.
|
|
78
85
|
|
|
79
|
-
<!--
|
|
80
86
|
<details>
|
|
81
87
|
<summary> Previous News </summary>
|
|
82
88
|
|
|
89
|
+
- [2025-09-01] 📚[**Hybird Forward Pattern**](#unified) is supported! Please check [FLUX.1-dev](./examples/run_flux_adapter.py) as an example.
|
|
90
|
+
- [2025-08-29] 🔥</b>Covers <b>100%</b> Diffusers' <b>DiT-based</b> Pipelines: **[BlockAdapter](#unified) + [Pattern Matching](#unified).**
|
|
91
|
+
- [2025-08-10] 🔥[**FLUX.1-Kontext-dev**](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev) is supported! Please refer [run_flux_kontext.py](./examples/pipeline/run_flux_kontext.py) as an example.
|
|
83
92
|
- [2025-07-18] 🎉First caching mechanism in [🤗huggingface/flux-fast](https://github.com/huggingface/flux-fast) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check the [PR](https://github.com/huggingface/flux-fast/pull/13).
|
|
84
|
-
- [2025-07-13] **[🤗flux-faster](https://github.com/xlite-dev/flux-faster)** is released! **3.3x** speedup for FLUX.1 on NVIDIA L20 with **[cache-dit](https://github.com/vipshop/cache-dit)**.
|
|
85
93
|
|
|
86
94
|
</details>
|
|
87
|
-
-->
|
|
88
95
|
|
|
89
96
|
## 📖Contents
|
|
90
97
|
|
|
@@ -93,6 +100,11 @@ Dynamic: requires-python
|
|
|
93
100
|
- [⚙️Installation](#️installation)
|
|
94
101
|
- [🔥Supported Models](#supported)
|
|
95
102
|
- [🎉Unified Cache APIs](#unified)
|
|
103
|
+
- [📚Forward Pattern Matching](#unified)
|
|
104
|
+
- [🎉Cache with One-line Code](#unified)
|
|
105
|
+
- [🔥Automatic Block Adapter](#unified)
|
|
106
|
+
- [📚Hybird Forward Pattern](#unified)
|
|
107
|
+
- [🤖Cache Acceleration Stats](#unified)
|
|
96
108
|
- [⚡️Dual Block Cache](#dbcache)
|
|
97
109
|
- [🔥Hybrid TaylorSeer](#taylorseer)
|
|
98
110
|
- [⚡️Hybrid Cache CFG](#cfg)
|
|
@@ -185,7 +197,7 @@ Currently, for any **Diffusion** models with **Transformer Blocks** that match t
|
|
|
185
197
|
|
|
186
198
|
### ♥️Cache Acceleration with One-line Code
|
|
187
199
|
|
|
188
|
-
In most cases, you only need to call **one-line** of code, that is `cache_dit.enable_cache(...)`. After this API is called, you just need to call the pipe as normal. The `pipe` param can be **any** Diffusion Pipeline. Please refer to [Qwen-Image](./examples/run_qwen_image.py) as an example.
|
|
200
|
+
In most cases, you only need to call **one-line** of code, that is `cache_dit.enable_cache(...)`. After this API is called, you just need to call the pipe as normal. The `pipe` param can be **any** Diffusion Pipeline. Please refer to [Qwen-Image](./examples/pipeline/run_qwen_image.py) as an example.
|
|
189
201
|
|
|
190
202
|
```python
|
|
191
203
|
import cache_dit
|
|
@@ -199,11 +211,14 @@ cache_dit.enable_cache(pipe)
|
|
|
199
211
|
|
|
200
212
|
# Just call the pipe as normal.
|
|
201
213
|
output = pipe(...)
|
|
214
|
+
|
|
215
|
+
# Disable cache and run original pipe.
|
|
216
|
+
cache_dit.disable_cache(pipe)
|
|
202
217
|
```
|
|
203
218
|
|
|
204
|
-
### 🔥Automatic Block Adapter
|
|
219
|
+
### 🔥Automatic Block Adapter
|
|
205
220
|
|
|
206
|
-
But in some cases, you may have a **modified** Diffusion Pipeline or Transformer that is not located in the diffusers library or not officially supported by **cache-dit** at this time. The **BlockAdapter** can help you solve this problems. Please refer to [🔥Qwen-Image w/ BlockAdapter](./examples/run_qwen_image_adapter.py)
|
|
221
|
+
But in some cases, you may have a **modified** Diffusion Pipeline or Transformer that is not located in the diffusers library or not officially supported by **cache-dit** at this time. The **BlockAdapter** can help you solve this problems. Please refer to [🔥Qwen-Image w/ BlockAdapter](./examples/adapter/run_qwen_image_adapter.py) as an example.
|
|
207
222
|
|
|
208
223
|
```python
|
|
209
224
|
from cache_dit import ForwardPattern, BlockAdapter
|
|
@@ -225,14 +240,19 @@ cache_dit.enable_cache(
|
|
|
225
240
|
pipe=pipe, # Qwen-Image, etc.
|
|
226
241
|
transformer=pipe.transformer,
|
|
227
242
|
blocks=pipe.transformer.transformer_blocks,
|
|
228
|
-
blocks_name="transformer_blocks",
|
|
229
243
|
forward_pattern=ForwardPattern.Pattern_1,
|
|
230
244
|
),
|
|
231
245
|
)
|
|
246
|
+
```
|
|
247
|
+
For such situations, **BlockAdapter** can help you quickly apply various cache acceleration features to your own Diffusion Pipelines and Transformers. Please check the [📚BlockAdapter.md](./docs/BlockAdapter.md) for more details.
|
|
248
|
+
|
|
249
|
+
### 📚Hybird Forward Pattern
|
|
232
250
|
|
|
233
|
-
|
|
251
|
+
Sometimes, a Transformer class will contain more than one transformer `blocks`. For example, **FLUX.1** (HiDream, Chroma, etc) contains transformer_blocks and single_transformer_blocks (with different forward patterns). The **BlockAdapter** can also help you solve this problem. Please refer to [📚FLUX.1](./examples/adapter/run_flux_adapter.py) as an example.
|
|
252
|
+
|
|
253
|
+
```python
|
|
234
254
|
# For diffusers <= 0.34.0, FLUX.1 transformer_blocks and
|
|
235
|
-
# single_transformer_blocks
|
|
255
|
+
# single_transformer_blocks have different forward patterns.
|
|
236
256
|
cache_dit.enable_cache(
|
|
237
257
|
BlockAdapter(
|
|
238
258
|
pipe=pipe, # FLUX.1, etc.
|
|
@@ -241,10 +261,6 @@ cache_dit.enable_cache(
|
|
|
241
261
|
pipe.transformer.transformer_blocks,
|
|
242
262
|
pipe.transformer.single_transformer_blocks,
|
|
243
263
|
],
|
|
244
|
-
blocks_name=[
|
|
245
|
-
"transformer_blocks",
|
|
246
|
-
"single_transformer_blocks",
|
|
247
|
-
],
|
|
248
264
|
forward_pattern=[
|
|
249
265
|
ForwardPattern.Pattern_1,
|
|
250
266
|
ForwardPattern.Pattern_3,
|
|
@@ -252,7 +268,6 @@ cache_dit.enable_cache(
|
|
|
252
268
|
),
|
|
253
269
|
)
|
|
254
270
|
```
|
|
255
|
-
For such situations, **BlockAdapter** can help you quickly apply various cache acceleration features to your own Diffusion Pipelines and Transformers. Please check the [📚BlockAdapter.md](./docs/BlockAdapter.md) for more details.
|
|
256
271
|
|
|
257
272
|
### 🤖Cache Acceleration Stats Summary
|
|
258
273
|
|
|
@@ -377,7 +392,7 @@ cache_dit.enable_cache(
|
|
|
377
392
|
|
|
378
393
|
<div id="cfg"></div>
|
|
379
394
|
|
|
380
|
-
cache-dit supports caching for **CFG (classifier-free guidance)**. For models that fuse CFG and non-CFG into a single forward step, or models that do not include CFG (classifier-free guidance) in the forward step, please set `
|
|
395
|
+
cache-dit supports caching for **CFG (classifier-free guidance)**. For models that fuse CFG and non-CFG into a single forward step, or models that do not include CFG (classifier-free guidance) in the forward step, please set `enable_spearate_cfg` param to **False (default)**. Otherwise, set it to True. For examples:
|
|
381
396
|
|
|
382
397
|
```python
|
|
383
398
|
cache_dit.enable_cache(
|
|
@@ -385,10 +400,10 @@ cache_dit.enable_cache(
|
|
|
385
400
|
...,
|
|
386
401
|
# CFG: classifier free guidance or not
|
|
387
402
|
# For model that fused CFG and non-CFG into single forward step,
|
|
388
|
-
# should set
|
|
403
|
+
# should set enable_spearate_cfg as False. For example, set it as True
|
|
389
404
|
# for Wan 2.1/Qwen-Image and set it as False for FLUX.1, HunyuanVideo,
|
|
390
405
|
# CogVideoX, Mochi, LTXVideo, Allegro, CogView3Plus, EasyAnimate, SD3, etc.
|
|
391
|
-
|
|
406
|
+
enable_spearate_cfg=True, # Wan 2.1, Qwen-Image, CogView4, Cosmos, SkyReelsV2, etc.
|
|
392
407
|
# Compute cfg forward first or not, default False, namely,
|
|
393
408
|
# 0, 2, 4, ..., -> non-CFG step; 1, 3, 5, ... -> CFG step.
|
|
394
409
|
cfg_compute_first=False,
|
|
@@ -451,11 +466,21 @@ cache-dit-metrics-cli all -i1 true_dir -i2 test_dir # image dir
|
|
|
451
466
|
|
|
452
467
|
How to contribute? Star ⭐️ this repo to support us or check [CONTRIBUTE.md](./CONTRIBUTE.md).
|
|
453
468
|
|
|
454
|
-
|
|
469
|
+
<div align='center'>
|
|
470
|
+
<a href="https://star-history.com/#vipshop/cache-dit&Date">
|
|
471
|
+
<picture align='center'>
|
|
472
|
+
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=vipshop/cache-dit&type=Date&theme=dark" />
|
|
473
|
+
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=vipshop/cache-dit&type=Date" />
|
|
474
|
+
<img alt="Star History Chart" src="https://api.star-history.com/svg?repos=vipshop/cache-dit&type=Date" width=400px />
|
|
475
|
+
</picture>
|
|
476
|
+
</a>
|
|
477
|
+
</div>
|
|
478
|
+
|
|
479
|
+
## ©️Acknowledgements
|
|
455
480
|
|
|
456
|
-
<div id="
|
|
481
|
+
<div id="Acknowledgements"></div>
|
|
457
482
|
|
|
458
|
-
The **cache-dit** codebase is adapted from FBCache.
|
|
483
|
+
The **cache-dit** codebase is adapted from FBCache. Over time its codebase diverged a lot, and **cache-dit** API is no longer compatible with FBCache.
|
|
459
484
|
|
|
460
485
|
## ©️Citations
|
|
461
486
|
|
|
@@ -463,7 +488,7 @@ The **cache-dit** codebase is adapted from FBCache. Special thanks to their exce
|
|
|
463
488
|
|
|
464
489
|
```BibTeX
|
|
465
490
|
@misc{cache-dit@2025,
|
|
466
|
-
title={cache-dit:
|
|
491
|
+
title={cache-dit: A Unified and Training-free Cache Acceleration Toolbox for Diffusion Transformers},
|
|
467
492
|
url={https://github.com/vipshop/cache-dit.git},
|
|
468
493
|
note={Open-source software available at https://github.com/vipshop/cache-dit.git},
|
|
469
494
|
author={vipshop.com},
|