cache-dit 0.2.27__py3-none-any.whl → 0.2.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cache-dit might be problematic. Click here for more details.

@@ -1,9 +1,11 @@
1
+ import torch
1
2
  from typing import Any, Tuple, List
2
3
  from diffusers import DiffusionPipeline
3
4
  from cache_dit.cache_factory.cache_types import CacheType
4
5
  from cache_dit.cache_factory.block_adapters import BlockAdapter
5
6
  from cache_dit.cache_factory.block_adapters import BlockAdapterRegistry
6
7
  from cache_dit.cache_factory.cache_adapters import CachedAdapter
8
+ from cache_dit.cache_factory.cache_contexts import CachedContextManager
7
9
 
8
10
  from cache_dit.logger import init_logger
9
11
 
@@ -12,7 +14,7 @@ logger = init_logger(__name__)
12
14
 
13
15
  def enable_cache(
14
16
  # DiffusionPipeline or BlockAdapter
15
- pipe_or_adapter: DiffusionPipeline | BlockAdapter | Any,
17
+ pipe_or_adapter: DiffusionPipeline | BlockAdapter,
16
18
  # Cache context kwargs
17
19
  Fn_compute_blocks: int = 8,
18
20
  Bn_compute_blocks: int = 0,
@@ -21,7 +23,7 @@ def enable_cache(
21
23
  max_continuous_cached_steps: int = -1,
22
24
  residual_diff_threshold: float = 0.08,
23
25
  # Cache CFG or not
24
- do_separate_cfg: bool = False,
26
+ enable_spearate_cfg: bool = False,
25
27
  cfg_compute_first: bool = False,
26
28
  cfg_diff_compute_separate: bool = True,
27
29
  # Hybird TaylorSeer
@@ -30,7 +32,7 @@ def enable_cache(
30
32
  taylorseer_cache_type: str = "residual",
31
33
  taylorseer_order: int = 2,
32
34
  **other_cache_context_kwargs,
33
- ) -> DiffusionPipeline | Any:
35
+ ) -> BlockAdapter:
34
36
  r"""
35
37
  Unified Cache API for almost Any Diffusion Transformers (with Transformer Blocks
36
38
  that match the specific Input and Output patterns).
@@ -64,9 +66,9 @@ def enable_cache(
64
66
  residual_diff_threshold (`float`, *required*, defaults to 0.08):
65
67
  he value of residual diff threshold, a higher value leads to faster performance at the
66
68
  cost of lower precision.
67
- do_separate_cfg (`bool`, *required*, defaults to False):
69
+ enable_spearate_cfg (`bool`, *required*, defaults to False):
68
70
  Whether to do separate cfg or not, such as Wan 2.1, Qwen-Image. For model that fused CFG
69
- and non-CFG into single forward step, should set do_separate_cfg as False, for example:
71
+ and non-CFG into single forward step, should set enable_spearate_cfg as False, for example:
70
72
  CogVideoX, HunyuanVideo, Mochi, etc.
71
73
  cfg_compute_first (`bool`, *required*, defaults to False):
72
74
  Compute cfg forward first or not, default False, namely, 0, 2, 4, ..., -> non-CFG step;
@@ -89,7 +91,7 @@ def enable_cache(
89
91
  The order of taylorseer, higher values of n_derivatives will lead to longer computation time,
90
92
  but may improve precision significantly.
91
93
  other_cache_kwargs: (`dict`, *optional*, defaults to {})
92
- Other cache context kwargs, please check https://github.com/vipshop/cache-dit/blob/main/src/cache_dit/cache_factory/cache_context.py
94
+ Other cache context kwargs, please check https://github.com/vipshop/cache-dit/blob/main/src/cache_dit/cache_factory/cache_contexts/cache_context.py
93
95
  for more details.
94
96
 
95
97
  Examples:
@@ -104,6 +106,10 @@ def enable_cache(
104
106
 
105
107
  # Collect cache context kwargs
106
108
  cache_context_kwargs = other_cache_context_kwargs.copy()
109
+ if cache_type := cache_context_kwargs.get("cache_type", None):
110
+ if cache_type == CacheType.NONE:
111
+ return pipe_or_adapter
112
+
107
113
  cache_context_kwargs["cache_type"] = CacheType.DBCache
108
114
  cache_context_kwargs["Fn_compute_blocks"] = Fn_compute_blocks
109
115
  cache_context_kwargs["Bn_compute_blocks"] = Bn_compute_blocks
@@ -113,7 +119,7 @@ def enable_cache(
113
119
  max_continuous_cached_steps
114
120
  )
115
121
  cache_context_kwargs["residual_diff_threshold"] = residual_diff_threshold
116
- cache_context_kwargs["do_separate_cfg"] = do_separate_cfg
122
+ cache_context_kwargs["enable_spearate_cfg"] = enable_spearate_cfg
117
123
  cache_context_kwargs["cfg_compute_first"] = cfg_compute_first
118
124
  cache_context_kwargs["cfg_diff_compute_separate"] = (
119
125
  cfg_diff_compute_separate
@@ -123,25 +129,11 @@ def enable_cache(
123
129
  enable_encoder_taylorseer
124
130
  )
125
131
  cache_context_kwargs["taylorseer_cache_type"] = taylorseer_cache_type
126
- if "taylorseer_kwargs" in cache_context_kwargs:
127
- cache_context_kwargs["taylorseer_kwargs"][
128
- "n_derivatives"
129
- ] = taylorseer_order
130
- else:
131
- cache_context_kwargs["taylorseer_kwargs"] = {
132
- "n_derivatives": taylorseer_order
133
- }
132
+ cache_context_kwargs["taylorseer_order"] = taylorseer_order
134
133
 
135
- if isinstance(pipe_or_adapter, BlockAdapter):
136
- return CachedAdapter.apply(
137
- pipe=None,
138
- block_adapter=pipe_or_adapter,
139
- **cache_context_kwargs,
140
- )
141
- elif isinstance(pipe_or_adapter, DiffusionPipeline):
134
+ if isinstance(pipe_or_adapter, (DiffusionPipeline, BlockAdapter)):
142
135
  return CachedAdapter.apply(
143
- pipe=pipe_or_adapter,
144
- block_adapter=None,
136
+ pipe_or_adapter,
145
137
  **cache_context_kwargs,
146
138
  )
147
139
  else:
@@ -152,6 +144,81 @@ def enable_cache(
152
144
  )
153
145
 
154
146
 
147
+ def disable_cache(
148
+ # DiffusionPipeline or BlockAdapter
149
+ pipe_or_adapter: DiffusionPipeline | BlockAdapter,
150
+ ):
151
+ from cache_dit.cache_factory.cache_blocks.utils import (
152
+ remove_cached_stats,
153
+ )
154
+
155
+ def _disable_blocks(blocks: torch.nn.ModuleList):
156
+ if blocks is None:
157
+ return
158
+ if hasattr(blocks, "_forward_pattern"):
159
+ del blocks._forward_pattern
160
+ if hasattr(blocks, "_cache_context_kwargs"):
161
+ del blocks._cache_context_kwargs
162
+ remove_cached_stats(blocks)
163
+
164
+ def _disable_transformer(transformer: torch.nn.Module):
165
+ if transformer is None or not BlockAdapter.is_cached(transformer):
166
+ return
167
+ if original_forward := getattr(transformer, "_original_forward"):
168
+ transformer.forward = original_forward.__get__(transformer)
169
+ del transformer._original_forward
170
+ if hasattr(transformer, "_is_cached"):
171
+ del transformer._is_cached
172
+ if hasattr(transformer, "_forward_pattern"):
173
+ del transformer._forward_pattern
174
+ if hasattr(transformer, "_has_separate_cfg"):
175
+ del transformer._has_separate_cfg
176
+ if hasattr(transformer, "_cache_context_kwargs"):
177
+ del transformer._cache_context_kwargs
178
+ remove_cached_stats(transformer)
179
+ for blocks in BlockAdapter.find_blocks(transformer):
180
+ _disable_blocks(blocks)
181
+
182
+ def _disable_pipe(pipe: DiffusionPipeline):
183
+ if pipe is None or not BlockAdapter.is_cached(pipe):
184
+ return
185
+ if original_call := getattr(pipe, "_original_call"):
186
+ pipe.__class__.__call__ = original_call
187
+ del pipe.__class__._original_call
188
+ if cache_manager := getattr(pipe, "_cache_manager"):
189
+ assert isinstance(cache_manager, CachedContextManager)
190
+ cache_manager.clear_contexts()
191
+ del pipe._cache_manager
192
+ if hasattr(pipe, "_is_cached"):
193
+ del pipe.__class__._is_cached
194
+ if hasattr(pipe, "_cache_context_kwargs"):
195
+ del pipe._cache_context_kwargs
196
+ remove_cached_stats(pipe)
197
+
198
+ if isinstance(pipe_or_adapter, DiffusionPipeline):
199
+ pipe = pipe_or_adapter
200
+ _disable_pipe(pipe)
201
+ if hasattr(pipe, "transformer"):
202
+ _disable_transformer(pipe.transformer)
203
+ if hasattr(pipe, "transformer_2"): # Wan 2.2
204
+ _disable_transformer(pipe.transformer_2)
205
+ pipe_cls_name = pipe.__class__.__name__
206
+ logger.warning(f"Cache Acceleration is disabled for: {pipe_cls_name}")
207
+ elif isinstance(pipe_or_adapter, BlockAdapter):
208
+ # BlockAdapter
209
+ adapter = pipe_or_adapter
210
+ BlockAdapter.assert_normalized(adapter)
211
+ _disable_pipe(adapter.pipe)
212
+ for transformer in BlockAdapter.flatten(adapter.transformer):
213
+ _disable_transformer(transformer)
214
+ for blocks in BlockAdapter.flatten(adapter.blocks):
215
+ _disable_blocks(blocks)
216
+ pipe_cls_name = adapter.pipe.__class__.__name__
217
+ logger.warning(f"Cache Acceleration is disabled for: {pipe_cls_name}")
218
+ else:
219
+ pass # do nothing
220
+
221
+
155
222
  def supported_pipelines(
156
223
  **kwargs,
157
224
  ) -> Tuple[int, List[str]]:
@@ -30,7 +30,7 @@ class ChromaPatchFunctor(PatchFunctor):
30
30
  blocks: torch.nn.ModuleList = None,
31
31
  **kwargs,
32
32
  ) -> ChromaTransformer2DModel:
33
- if getattr(transformer, "_is_patched", False):
33
+ if hasattr(transformer, "_is_patched"):
34
34
  return transformer
35
35
 
36
36
  if blocks is None:
@@ -31,7 +31,7 @@ class FluxPatchFunctor(PatchFunctor):
31
31
  **kwargs,
32
32
  ) -> FluxTransformer2DModel:
33
33
 
34
- if getattr(transformer, "_is_patched", False):
34
+ if hasattr(transformer, "_is_patched"):
35
35
  return transformer
36
36
 
37
37
  if blocks is None:
cache_dit/utils.py CHANGED
@@ -5,9 +5,9 @@ import numpy as np
5
5
  from pprint import pprint
6
6
  from diffusers import DiffusionPipeline
7
7
 
8
- from typing import Dict, Any
8
+ from typing import Dict, Any, List, Union
9
+ from cache_dit.cache_factory import BlockAdapter
9
10
  from cache_dit.logger import init_logger
10
- from cache_dit.cache_factory import CacheType
11
11
 
12
12
 
13
13
  logger = init_logger(__name__)
@@ -30,9 +30,168 @@ class CacheStats:
30
30
 
31
31
 
32
32
  def summary(
33
- pipe_or_module: DiffusionPipeline | torch.nn.Module | Any,
33
+ adapter_or_others: Union[
34
+ BlockAdapter,
35
+ DiffusionPipeline,
36
+ torch.nn.Module,
37
+ ],
34
38
  details: bool = False,
35
39
  logging: bool = True,
40
+ **kwargs,
41
+ ) -> List[CacheStats]:
42
+ if adapter_or_others is None:
43
+ return [CacheStats()]
44
+
45
+ if not isinstance(adapter_or_others, BlockAdapter):
46
+ if not isinstance(adapter_or_others, DiffusionPipeline):
47
+ transformer = adapter_or_others
48
+ transformer_2 = None
49
+ else:
50
+ transformer = adapter_or_others.transformer
51
+ transformer_2 = None
52
+ if hasattr(adapter_or_others, "transformer_2"):
53
+ transformer_2 = adapter_or_others.transformer_2
54
+
55
+ blocks_stats: List[CacheStats] = []
56
+ for blocks in BlockAdapter.find_blocks(transformer):
57
+ blocks_stats.append(
58
+ _summary(
59
+ blocks,
60
+ details=details,
61
+ logging=logging,
62
+ **kwargs,
63
+ )
64
+ )
65
+
66
+ if transformer_2 is not None:
67
+ for blocks in BlockAdapter.find_blocks(transformer_2):
68
+ blocks_stats.append(
69
+ _summary(
70
+ blocks,
71
+ details=details,
72
+ logging=logging,
73
+ **kwargs,
74
+ )
75
+ )
76
+
77
+ blocks_stats.append(
78
+ _summary(
79
+ transformer,
80
+ details=details,
81
+ logging=logging,
82
+ **kwargs,
83
+ )
84
+ )
85
+ if transformer_2 is not None:
86
+ blocks_stats.append(
87
+ _summary(
88
+ transformer_2,
89
+ details=details,
90
+ logging=logging,
91
+ **kwargs,
92
+ )
93
+ )
94
+
95
+ blocks_stats = [stats for stats in blocks_stats if stats.cache_options]
96
+
97
+ return blocks_stats if len(blocks_stats) else [CacheStats()]
98
+
99
+ adapter = adapter_or_others
100
+ if not BlockAdapter.check_block_adapter(adapter):
101
+ return [CacheStats()]
102
+
103
+ blocks_stats = []
104
+ flatten_blocks = BlockAdapter.flatten(adapter.blocks)
105
+ for blocks in flatten_blocks:
106
+ blocks_stats.append(
107
+ _summary(
108
+ blocks,
109
+ details=details,
110
+ logging=logging,
111
+ **kwargs,
112
+ )
113
+ )
114
+
115
+ blocks_stats = [stats for stats in blocks_stats if stats.cache_options]
116
+
117
+ return blocks_stats if len(blocks_stats) else [CacheStats()]
118
+
119
+
120
+ def strify(
121
+ adapter_or_others: Union[
122
+ BlockAdapter,
123
+ DiffusionPipeline,
124
+ CacheStats,
125
+ List[CacheStats],
126
+ Dict[str, Any],
127
+ ],
128
+ ) -> str:
129
+ if isinstance(adapter_or_others, BlockAdapter):
130
+ stats = summary(adapter_or_others, logging=False)[-1]
131
+ cache_options = stats.cache_options
132
+ cached_steps = len(stats.cached_steps)
133
+ elif isinstance(adapter_or_others, DiffusionPipeline):
134
+ stats = summary(adapter_or_others, logging=False)[-1]
135
+ cache_options = stats.cache_options
136
+ cached_steps = len(stats.cached_steps)
137
+ elif isinstance(adapter_or_others, CacheStats):
138
+ stats = adapter_or_others
139
+ cache_options = stats.cache_options
140
+ cached_steps = len(stats.cached_steps)
141
+ elif isinstance(adapter_or_others, list):
142
+ stats = adapter_or_others[0]
143
+ cache_options = stats.cache_options
144
+ cached_steps = len(stats.cached_steps)
145
+ elif isinstance(adapter_or_others, dict):
146
+ from cache_dit.cache_factory import CacheType
147
+
148
+ # Assume cache_context_kwargs
149
+ cache_options = adapter_or_others
150
+ cached_steps = None
151
+ cache_type = cache_options.get("cache_type", CacheType.NONE)
152
+
153
+ if cache_type == CacheType.NONE:
154
+ return "NONE"
155
+ else:
156
+ raise ValueError(
157
+ "Please set pipe_or_stats param as one of: "
158
+ "DiffusionPipeline | CacheStats | Dict[str, Any]"
159
+ )
160
+
161
+ if not cache_options:
162
+ return "NONE"
163
+
164
+ def get_taylorseer_order():
165
+ taylorseer_order = 0
166
+ if "taylorseer_order" in cache_options:
167
+ taylorseer_order = cache_options["taylorseer_order"]
168
+ return taylorseer_order
169
+
170
+ cache_type_str = (
171
+ f"DBCACHE_F{cache_options.get('Fn_compute_blocks', 1)}"
172
+ f"B{cache_options.get('Bn_compute_blocks', 0)}_"
173
+ f"W{cache_options.get('max_warmup_steps', 0)}"
174
+ f"M{max(0, cache_options.get('max_cached_steps', -1))}"
175
+ f"MC{max(0, cache_options.get('max_continuous_cached_steps', -1))}_"
176
+ f"T{int(cache_options.get('enable_taylorseer', False))}"
177
+ f"O{get_taylorseer_order()}_"
178
+ f"R{cache_options.get('residual_diff_threshold', 0.08)}"
179
+ )
180
+
181
+ if cached_steps:
182
+ cache_type_str += f"_S{cached_steps}"
183
+
184
+ return cache_type_str
185
+
186
+
187
+ def _summary(
188
+ pipe_or_module: Union[
189
+ DiffusionPipeline,
190
+ torch.nn.Module,
191
+ ],
192
+ details: bool = False,
193
+ logging: bool = True,
194
+ **kwargs,
36
195
  ) -> CacheStats:
37
196
  cache_stats = CacheStats()
38
197
 
@@ -52,6 +211,8 @@ def summary(
52
211
  cache_stats.cache_options = cache_options
53
212
  if logging:
54
213
  print(f"\n🤗Cache Options: {cls_name}\n\n{cache_options}")
214
+ else:
215
+ logger.warning(f"Can't find Cache Options for: {cls_name}")
55
216
 
56
217
  if hasattr(module, "_cached_steps"):
57
218
  cached_steps: list[int] = module._cached_steps
@@ -142,58 +303,3 @@ def summary(
142
303
  )
143
304
 
144
305
  return cache_stats
145
-
146
-
147
- def strify(
148
- pipe_or_stats: DiffusionPipeline | CacheStats | Dict[str, Any],
149
- ) -> str:
150
- if isinstance(pipe_or_stats, DiffusionPipeline):
151
- stats = summary(pipe_or_stats, logging=False)
152
- cache_options = stats.cache_options
153
- cached_steps = len(stats.cached_steps)
154
- elif isinstance(pipe_or_stats, CacheStats):
155
- stats = pipe_or_stats
156
- cache_options = stats.cache_options
157
- cached_steps = len(stats.cached_steps)
158
- elif isinstance(pipe_or_stats, dict):
159
- # Assume cache_context_kwargs
160
- cache_options = pipe_or_stats
161
- cached_steps = None
162
- else:
163
- raise ValueError(
164
- "Please set pipe_or_stats param as one of: "
165
- "DiffusionPipeline | CacheStats | Dict[str, Any]"
166
- )
167
-
168
- if not cache_options:
169
- return "NONE"
170
-
171
- if cache_options.get("cache_type", None) != CacheType.DBCache:
172
- return "NONE"
173
-
174
- def get_taylorseer_order():
175
- taylorseer_order = 0
176
- if "taylorseer_kwargs" in cache_options:
177
- if "n_derivatives" in cache_options["taylorseer_kwargs"]:
178
- taylorseer_order = cache_options["taylorseer_kwargs"][
179
- "n_derivatives"
180
- ]
181
- elif "taylorseer_order" in cache_options:
182
- taylorseer_order = cache_options["taylorseer_order"]
183
- return taylorseer_order
184
-
185
- cache_type_str = (
186
- f"DBCACHE_F{cache_options.get('Fn_compute_blocks', 1)}"
187
- f"B{cache_options.get('Bn_compute_blocks', 0)}_"
188
- f"W{cache_options.get('max_warmup_steps', 0)}"
189
- f"M{max(0, cache_options.get('max_cached_steps', -1))}"
190
- f"MC{max(0, cache_options.get('max_continuous_cached_steps', -1))}_"
191
- f"T{int(cache_options.get('enable_taylorseer', False))}"
192
- f"O{get_taylorseer_order()}_"
193
- f"R{cache_options.get('residual_diff_threshold', 0.08)}"
194
- )
195
-
196
- if cached_steps:
197
- cache_type_str += f"_S{cached_steps}"
198
-
199
- return cache_type_str
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cache_dit
3
- Version: 0.2.27
4
- Summary: 🤗 CacheDiT: An Unified and Training-free Cache Acceleration Toolbox for Diffusion Transformers
3
+ Version: 0.2.29
4
+ Summary: 🤗 A Unified and Training-free Cache Acceleration Toolbox for Diffusion Transformers
5
5
  Author: DefTruth, vipshop.com, etc.
6
6
  Maintainer: DefTruth, vipshop.com, etc
7
7
  Project-URL: Repository, https://github.com/vipshop/cache-dit.git
@@ -43,8 +43,8 @@ Dynamic: requires-python
43
43
  <div align="center">
44
44
  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/cache-dit-logo.png height="120">
45
45
 
46
- <p align="center">
47
- An <b>Unified</b> and Training-free <b>Cache Acceleration</b> Toolbox for <b>Diffusion Transformers</b> <br>
46
+ <p align="center">
47
+ A <b>Unified</b> and Training-free <b>Cache Acceleration</b> Toolbox for <b>Diffusion Transformers</b> <br>
48
48
  ♥️ <b>Cache Acceleration</b> with <b>One-line</b> Code ~ ♥️
49
49
  </p>
50
50
  <div align='center'>
@@ -59,32 +59,39 @@ Dynamic: requires-python
59
59
  🔥<b><a href="#unified">Unified Cache APIs</a> | <a href="#dbcache">DBCache</a> | <a href="#taylorseer">Hybrid TaylorSeer</a> | <a href="#cfg">Hybrid Cache CFG</a></b>🔥
60
60
  </p>
61
61
  <p align="center">
62
- 🎉Now, <b>cache-dit</b> covers <b>All</b> mainstream <b>DiT-based</b> Diffusers' Pipelines</b>🎉<br>
62
+ 🎉Now, <b>cache-dit</b> covers <b>100%</b> Diffusers' <b>DiT-based</b> Pipelines🎉<br>
63
63
  🔥<b><a href="#supported">Qwen-Image</a> | <a href="#supported">FLUX.1</a> | <a href="#supported">Wan 2.1/2.2</a> | <a href="#supported"> ... </a> | <a href="#supported">CogVideoX</a></b>🔥
64
64
  </p>
65
65
  </div>
66
+ <div align='center'>
67
+ <img src=./assets/gifs/wan2.2.C0_Q0_NONE.gif width=160px>
68
+ <img src=./assets/gifs/wan2.2.C1_Q0_DBCACHE_F1B0_W2M8MC2_T1O2_R0.08.gif width=160px>
69
+ <img src=./assets/gifs/wan2.2.C1_Q1_fp8_w8a8_dq_DBCACHE_F1B0_W2M8MC2_T1O2_R0.08.gif width=160px>
70
+ <p><b>🔥Wan2.2 MoE</b> Baseline | <b><a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:~2.0x↑🎉</b> | +FP8 DQ:<b>~2.4x↑🎉</b></p>
71
+ <img src=./assets/qwen-image.C0_Q0_NONE.png width=160px>
72
+ <img src=./assets/qwen-image.C1_Q0_DBCACHE_F8B0_W8M0MC0_T1O4_R0.12_S23.png width=160px>
73
+ <img src=./assets/qwen-image.C1_Q1_fp8_w8a8_dq_DBCACHE_F8B0_W8M0MC0_T1O4_R0.12_S18.png width=160px>
74
+ <p><b>🔥Qwen-Image</b> Baseline | <b><a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:~1.8x↑🎉</b> | +FP8 DQ:<b>~2.2x↑🎉</b><br>♥️ Please consider to leave a <b>⭐️ Star</b> to support us ~ ♥️</p>
75
+ </p>
76
+ </div>
66
77
 
67
78
  ## 🔥News
68
79
 
69
- - [2025-09-01] 📚[**Hybird Forward Pattern**](#unified) is supported! Please check [FLUX.1-dev](./examples/run_flux_adapter.py) as an example.
70
- - [2025-08-29] 🔥</b>Covers <b>All</b> Diffusers' <b>DiT-based</b> Pipelines via **[BlockAdapter](#unified) + [Pattern Matching](#unified).**
71
- - [2025-08-26] 🎉[**Wan2.2**](https://github.com/Wan-Video) **1.8x⚡️** speedup with `cache-dit + compile`! Please check the [example](./examples/run_wan_2.2.py).
72
- - [2025-08-19] 🔥[**Qwen-Image-Edit**](https://github.com/QwenLM/Qwen-Image) **2x⚡️** speedup! Check the example at [run_qwen_image_edit.py](./examples/run_qwen_image_edit.py).
80
+ - [2025-09-03] 🎉[**Wan2.2-MoE**](https://github.com/Wan-Video) **2.4x↑🎉** speedup! Please refer to [run_wan_2.2.py](./examples/pipeline/run_wan_2.2.py) as an example.
81
+ - [2025-08-19] 🔥[**Qwen-Image-Edit**](https://github.com/QwenLM/Qwen-Image) **2x↑🎉** speedup! Check the example: [run_qwen_image_edit.py](./examples/pipeline/run_qwen_image_edit.py).
73
82
  - [2025-08-12] 🎉First caching mechanism in [QwenLM/Qwen-Image](https://github.com/QwenLM/Qwen-Image) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check this [PR](https://github.com/QwenLM/Qwen-Image/pull/61).
74
- - [2025-08-11] 🔥[**Qwen-Image**](https://github.com/QwenLM/Qwen-Image) **1.8x⚡️** speedup! Please refer [run_qwen_image.py](./examples/run_qwen_image.py) as an example.
75
- - [2025-08-10] 🔥[**FLUX.1-Kontext-dev**](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev) is supported! Please refer [run_flux_kontext.py](./examples/run_flux_kontext.py) as an example.
76
- - [2025-07-18] 🎉First caching mechanism in [🤗huggingface/flux-fast](https://github.com/huggingface/flux-fast) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check the [PR](https://github.com/huggingface/flux-fast/pull/13).
77
- - [2025-07-13] **[🤗flux-faster](https://github.com/xlite-dev/flux-faster)** is released! **3.3x** speedup for FLUX.1 on NVIDIA L20 with **[cache-dit](https://github.com/vipshop/cache-dit)**.
83
+ - [2025-08-11] 🔥[**Qwen-Image**](https://github.com/QwenLM/Qwen-Image) **1.8x↑🎉** speedup! Please refer to [run_qwen_image.py](./examples/pipeline/run_qwen_image.py) as an example.
84
+ - [2025-07-13] 🎉[**FLUX.1-Dev**](https://github.com/xlite-dev/flux-faster) **3.3x↑🎉** speedup! NVIDIA L20 with **[cache-dit](https://github.com/vipshop/cache-dit)** + `compile + FP8 DQ`.
78
85
 
79
- <!--
80
86
  <details>
81
87
  <summary> Previous News </summary>
82
88
 
89
+ - [2025-09-01] 📚[**Hybird Forward Pattern**](#unified) is supported! Please check [FLUX.1-dev](./examples/run_flux_adapter.py) as an example.
90
+ - [2025-08-29] 🔥</b>Covers <b>100%</b> Diffusers' <b>DiT-based</b> Pipelines: **[BlockAdapter](#unified) + [Pattern Matching](#unified).**
91
+ - [2025-08-10] 🔥[**FLUX.1-Kontext-dev**](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev) is supported! Please refer [run_flux_kontext.py](./examples/pipeline/run_flux_kontext.py) as an example.
83
92
  - [2025-07-18] 🎉First caching mechanism in [🤗huggingface/flux-fast](https://github.com/huggingface/flux-fast) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check the [PR](https://github.com/huggingface/flux-fast/pull/13).
84
- - [2025-07-13] **[🤗flux-faster](https://github.com/xlite-dev/flux-faster)** is released! **3.3x** speedup for FLUX.1 on NVIDIA L20 with **[cache-dit](https://github.com/vipshop/cache-dit)**.
85
93
 
86
94
  </details>
87
- -->
88
95
 
89
96
  ## 📖Contents
90
97
 
@@ -93,6 +100,11 @@ Dynamic: requires-python
93
100
  - [⚙️Installation](#️installation)
94
101
  - [🔥Supported Models](#supported)
95
102
  - [🎉Unified Cache APIs](#unified)
103
+ - [📚Forward Pattern Matching](#unified)
104
+ - [🎉Cache with One-line Code](#unified)
105
+ - [🔥Automatic Block Adapter](#unified)
106
+ - [📚Hybird Forward Pattern](#unified)
107
+ - [🤖Cache Acceleration Stats](#unified)
96
108
  - [⚡️Dual Block Cache](#dbcache)
97
109
  - [🔥Hybrid TaylorSeer](#taylorseer)
98
110
  - [⚡️Hybrid Cache CFG](#cfg)
@@ -185,7 +197,7 @@ Currently, for any **Diffusion** models with **Transformer Blocks** that match t
185
197
 
186
198
  ### ♥️Cache Acceleration with One-line Code
187
199
 
188
- In most cases, you only need to call **one-line** of code, that is `cache_dit.enable_cache(...)`. After this API is called, you just need to call the pipe as normal. The `pipe` param can be **any** Diffusion Pipeline. Please refer to [Qwen-Image](./examples/run_qwen_image.py) as an example.
200
+ In most cases, you only need to call **one-line** of code, that is `cache_dit.enable_cache(...)`. After this API is called, you just need to call the pipe as normal. The `pipe` param can be **any** Diffusion Pipeline. Please refer to [Qwen-Image](./examples/pipeline/run_qwen_image.py) as an example.
189
201
 
190
202
  ```python
191
203
  import cache_dit
@@ -199,11 +211,14 @@ cache_dit.enable_cache(pipe)
199
211
 
200
212
  # Just call the pipe as normal.
201
213
  output = pipe(...)
214
+
215
+ # Disable cache and run original pipe.
216
+ cache_dit.disable_cache(pipe)
202
217
  ```
203
218
 
204
- ### 🔥Automatic Block Adapter + 📚Hybird Forward Pattern
219
+ ### 🔥Automatic Block Adapter
205
220
 
206
- But in some cases, you may have a **modified** Diffusion Pipeline or Transformer that is not located in the diffusers library or not officially supported by **cache-dit** at this time. The **BlockAdapter** can help you solve this problems. Please refer to [🔥Qwen-Image w/ BlockAdapter](./examples/run_qwen_image_adapter.py) and [📚FLUX.1 w/ Hybird Forward Pattern](./examples/run_flux_adapter.py) for more details.
221
+ But in some cases, you may have a **modified** Diffusion Pipeline or Transformer that is not located in the diffusers library or not officially supported by **cache-dit** at this time. The **BlockAdapter** can help you solve this problems. Please refer to [🔥Qwen-Image w/ BlockAdapter](./examples/adapter/run_qwen_image_adapter.py) as an example.
207
222
 
208
223
  ```python
209
224
  from cache_dit import ForwardPattern, BlockAdapter
@@ -225,14 +240,19 @@ cache_dit.enable_cache(
225
240
  pipe=pipe, # Qwen-Image, etc.
226
241
  transformer=pipe.transformer,
227
242
  blocks=pipe.transformer.transformer_blocks,
228
- blocks_name="transformer_blocks",
229
243
  forward_pattern=ForwardPattern.Pattern_1,
230
244
  ),
231
245
  )
246
+ ```
247
+ For such situations, **BlockAdapter** can help you quickly apply various cache acceleration features to your own Diffusion Pipelines and Transformers. Please check the [📚BlockAdapter.md](./docs/BlockAdapter.md) for more details.
248
+
249
+ ### 📚Hybird Forward Pattern
232
250
 
233
- # cache-dit supported 📚Hybird Forward Pattern, for example:
251
+ Sometimes, a Transformer class will contain more than one transformer `blocks`. For example, **FLUX.1** (HiDream, Chroma, etc) contains transformer_blocks and single_transformer_blocks (with different forward patterns). The **BlockAdapter** can also help you solve this problem. Please refer to [📚FLUX.1](./examples/adapter/run_flux_adapter.py) as an example.
252
+
253
+ ```python
234
254
  # For diffusers <= 0.34.0, FLUX.1 transformer_blocks and
235
- # single_transformer_blocks has different forward pattern.
255
+ # single_transformer_blocks have different forward patterns.
236
256
  cache_dit.enable_cache(
237
257
  BlockAdapter(
238
258
  pipe=pipe, # FLUX.1, etc.
@@ -241,10 +261,6 @@ cache_dit.enable_cache(
241
261
  pipe.transformer.transformer_blocks,
242
262
  pipe.transformer.single_transformer_blocks,
243
263
  ],
244
- blocks_name=[
245
- "transformer_blocks",
246
- "single_transformer_blocks",
247
- ],
248
264
  forward_pattern=[
249
265
  ForwardPattern.Pattern_1,
250
266
  ForwardPattern.Pattern_3,
@@ -252,7 +268,6 @@ cache_dit.enable_cache(
252
268
  ),
253
269
  )
254
270
  ```
255
- For such situations, **BlockAdapter** can help you quickly apply various cache acceleration features to your own Diffusion Pipelines and Transformers. Please check the [📚BlockAdapter.md](./docs/BlockAdapter.md) for more details.
256
271
 
257
272
  ### 🤖Cache Acceleration Stats Summary
258
273
 
@@ -377,7 +392,7 @@ cache_dit.enable_cache(
377
392
 
378
393
  <div id="cfg"></div>
379
394
 
380
- cache-dit supports caching for **CFG (classifier-free guidance)**. For models that fuse CFG and non-CFG into a single forward step, or models that do not include CFG (classifier-free guidance) in the forward step, please set `do_separate_cfg` param to **False (default)**. Otherwise, set it to True. For examples:
395
+ cache-dit supports caching for **CFG (classifier-free guidance)**. For models that fuse CFG and non-CFG into a single forward step, or models that do not include CFG (classifier-free guidance) in the forward step, please set `enable_spearate_cfg` param to **False (default)**. Otherwise, set it to True. For examples:
381
396
 
382
397
  ```python
383
398
  cache_dit.enable_cache(
@@ -385,10 +400,10 @@ cache_dit.enable_cache(
385
400
  ...,
386
401
  # CFG: classifier free guidance or not
387
402
  # For model that fused CFG and non-CFG into single forward step,
388
- # should set do_separate_cfg as False. For example, set it as True
403
+ # should set enable_spearate_cfg as False. For example, set it as True
389
404
  # for Wan 2.1/Qwen-Image and set it as False for FLUX.1, HunyuanVideo,
390
405
  # CogVideoX, Mochi, LTXVideo, Allegro, CogView3Plus, EasyAnimate, SD3, etc.
391
- do_separate_cfg=True, # Wan 2.1, Qwen-Image, CogView4, Cosmos, SkyReelsV2, etc.
406
+ enable_spearate_cfg=True, # Wan 2.1, Qwen-Image, CogView4, Cosmos, SkyReelsV2, etc.
392
407
  # Compute cfg forward first or not, default False, namely,
393
408
  # 0, 2, 4, ..., -> non-CFG step; 1, 3, 5, ... -> CFG step.
394
409
  cfg_compute_first=False,
@@ -451,11 +466,21 @@ cache-dit-metrics-cli all -i1 true_dir -i2 test_dir # image dir
451
466
 
452
467
  How to contribute? Star ⭐️ this repo to support us or check [CONTRIBUTE.md](./CONTRIBUTE.md).
453
468
 
454
- ## ©️License
469
+ <div align='center'>
470
+ <a href="https://star-history.com/#vipshop/cache-dit&Date">
471
+ <picture align='center'>
472
+ <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=vipshop/cache-dit&type=Date&theme=dark" />
473
+ <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=vipshop/cache-dit&type=Date" />
474
+ <img alt="Star History Chart" src="https://api.star-history.com/svg?repos=vipshop/cache-dit&type=Date" width=400px />
475
+ </picture>
476
+ </a>
477
+ </div>
478
+
479
+ ## ©️Acknowledgements
455
480
 
456
- <div id="license"></div>
481
+ <div id="Acknowledgements"></div>
457
482
 
458
- The **cache-dit** codebase is adapted from FBCache. Special thanks to their excellent work! We have followed the original License from FBCache, please check [LICENSE](./LICENSE) for more details.
483
+ The **cache-dit** codebase is adapted from FBCache. Over time its codebase diverged a lot, and **cache-dit** API is no longer compatible with FBCache.
459
484
 
460
485
  ## ©️Citations
461
486
 
@@ -463,7 +488,7 @@ The **cache-dit** codebase is adapted from FBCache. Special thanks to their exce
463
488
 
464
489
  ```BibTeX
465
490
  @misc{cache-dit@2025,
466
- title={cache-dit: An Unified and Training-free Cache Acceleration Toolbox for Diffusion Transformers},
491
+ title={cache-dit: A Unified and Training-free Cache Acceleration Toolbox for Diffusion Transformers},
467
492
  url={https://github.com/vipshop/cache-dit.git},
468
493
  note={Open-source software available at https://github.com/vipshop/cache-dit.git},
469
494
  author={vipshop.com},