cache-dit 0.2.26__py3-none-any.whl → 0.2.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. cache_dit/__init__.py +8 -6
  2. cache_dit/_version.py +2 -2
  3. cache_dit/cache_factory/__init__.py +17 -4
  4. cache_dit/cache_factory/block_adapters/__init__.py +555 -0
  5. cache_dit/cache_factory/block_adapters/block_adapters.py +538 -0
  6. cache_dit/cache_factory/block_adapters/block_registers.py +77 -0
  7. cache_dit/cache_factory/cache_adapters.py +262 -938
  8. cache_dit/cache_factory/cache_blocks/__init__.py +60 -11
  9. cache_dit/cache_factory/cache_blocks/pattern_0_1_2.py +2 -2
  10. cache_dit/cache_factory/cache_blocks/pattern_3_4_5.py +45 -41
  11. cache_dit/cache_factory/cache_blocks/pattern_base.py +106 -80
  12. cache_dit/cache_factory/cache_blocks/utils.py +16 -10
  13. cache_dit/cache_factory/cache_contexts/__init__.py +5 -0
  14. cache_dit/cache_factory/cache_contexts/cache_context.py +327 -0
  15. cache_dit/cache_factory/cache_contexts/cache_manager.py +833 -0
  16. cache_dit/cache_factory/cache_interface.py +31 -31
  17. cache_dit/cache_factory/patch_functors/functor_chroma.py +3 -0
  18. cache_dit/cache_factory/patch_functors/functor_flux.py +4 -0
  19. cache_dit/quantize/quantize_ao.py +1 -0
  20. cache_dit/utils.py +26 -26
  21. {cache_dit-0.2.26.dist-info → cache_dit-0.2.28.dist-info}/METADATA +59 -23
  22. cache_dit-0.2.28.dist-info/RECORD +47 -0
  23. cache_dit/cache_factory/cache_context.py +0 -1155
  24. cache_dit-0.2.26.dist-info/RECORD +0 -42
  25. /cache_dit/cache_factory/{taylorseer.py → cache_contexts/taylorseer.py} +0 -0
  26. {cache_dit-0.2.26.dist-info → cache_dit-0.2.28.dist-info}/WHEEL +0 -0
  27. {cache_dit-0.2.26.dist-info → cache_dit-0.2.28.dist-info}/entry_points.txt +0 -0
  28. {cache_dit-0.2.26.dist-info → cache_dit-0.2.28.dist-info}/licenses/LICENSE +0 -0
  29. {cache_dit-0.2.26.dist-info → cache_dit-0.2.28.dist-info}/top_level.txt +0 -0
@@ -1,23 +1,18 @@
1
1
  from typing import Any, Tuple, List
2
2
  from diffusers import DiffusionPipeline
3
- from cache_dit.cache_factory.forward_pattern import ForwardPattern
4
3
  from cache_dit.cache_factory.cache_types import CacheType
5
- from cache_dit.cache_factory.cache_adapters import BlockAdapter
6
- from cache_dit.cache_factory.cache_adapters import UnifiedCacheAdapter
4
+ from cache_dit.cache_factory.block_adapters import BlockAdapter
5
+ from cache_dit.cache_factory.block_adapters import BlockAdapterRegistry
6
+ from cache_dit.cache_factory.cache_adapters import CachedAdapter
7
7
 
8
8
  from cache_dit.logger import init_logger
9
9
 
10
10
  logger = init_logger(__name__)
11
11
 
12
12
 
13
- def supported_pipelines() -> Tuple[int, List[str]]:
14
- return UnifiedCacheAdapter.supported_pipelines()
15
-
16
-
17
13
  def enable_cache(
18
- # BlockAdapter & forward pattern
14
+ # DiffusionPipeline or BlockAdapter
19
15
  pipe_or_adapter: DiffusionPipeline | BlockAdapter | Any,
20
- forward_pattern: ForwardPattern = ForwardPattern.Pattern_0,
21
16
  # Cache context kwargs
22
17
  Fn_compute_blocks: int = 8,
23
18
  Bn_compute_blocks: int = 0,
@@ -26,7 +21,7 @@ def enable_cache(
26
21
  max_continuous_cached_steps: int = -1,
27
22
  residual_diff_threshold: float = 0.08,
28
23
  # Cache CFG or not
29
- do_separate_cfg: bool = False,
24
+ enable_spearate_cfg: bool = False,
30
25
  cfg_compute_first: bool = False,
31
26
  cfg_diff_compute_separate: bool = True,
32
27
  # Hybird TaylorSeer
@@ -34,7 +29,7 @@ def enable_cache(
34
29
  enable_encoder_taylorseer: bool = False,
35
30
  taylorseer_cache_type: str = "residual",
36
31
  taylorseer_order: int = 2,
37
- **other_cache_kwargs,
32
+ **other_cache_context_kwargs,
38
33
  ) -> DiffusionPipeline | Any:
39
34
  r"""
40
35
  Unified Cache API for almost Any Diffusion Transformers (with Transformer Blocks
@@ -48,9 +43,6 @@ def enable_cache(
48
43
  The standard Diffusion Pipeline or custom BlockAdapter (from cache-dit or user-defined).
49
44
  For example: cache_dit.enable_cache(FluxPipeline(...)). Please check https://github.com/vipshop/cache-dit/blob/main/docs/BlockAdapter.md
50
45
  for the usgae of BlockAdapter.
51
- forward_pattern (`ForwardPattern`, *required*, defaults to `ForwardPattern.Pattern_0`):
52
- The forward pattern of Transformer block, please check https://github.com/vipshop/cache-dit/tree/main?tab=readme-ov-file#forward-pattern-matching
53
- for more details.
54
46
  Fn_compute_blocks (`int`, *required*, defaults to 8):
55
47
  Specifies that `DBCache` uses the **first n** Transformer blocks to fit the information
56
48
  at time step t, enabling the calculation of a more stable L1 diff and delivering more
@@ -72,9 +64,9 @@ def enable_cache(
72
64
  residual_diff_threshold (`float`, *required*, defaults to 0.08):
73
65
  he value of residual diff threshold, a higher value leads to faster performance at the
74
66
  cost of lower precision.
75
- do_separate_cfg (`bool`, *required*, defaults to False):
67
+ enable_spearate_cfg (`bool`, *required*, defaults to False):
76
68
  Whether to do separate cfg or not, such as Wan 2.1, Qwen-Image. For model that fused CFG
77
- and non-CFG into single forward step, should set do_separate_cfg as False, for example:
69
+ and non-CFG into single forward step, should set enable_spearate_cfg as False, for example:
78
70
  CogVideoX, HunyuanVideo, Mochi, etc.
79
71
  cfg_compute_first (`bool`, *required*, defaults to False):
80
72
  Compute cfg forward first or not, default False, namely, 0, 2, 4, ..., -> non-CFG step;
@@ -97,7 +89,7 @@ def enable_cache(
97
89
  The order of taylorseer, higher values of n_derivatives will lead to longer computation time,
98
90
  but may improve precision significantly.
99
91
  other_cache_kwargs: (`dict`, *optional*, defaults to {})
100
- Other cache context kwargs, please check https://github.com/vipshop/cache-dit/blob/main/src/cache_dit/cache_factory/cache_context.py
92
+ Other cache context kwargs, please check https://github.com/vipshop/cache-dit/blob/main/src/cache_dit/cache_factory/cache_contexts/cache_context.py
101
93
  for more details.
102
94
 
103
95
  Examples:
@@ -111,7 +103,11 @@ def enable_cache(
111
103
  """
112
104
 
113
105
  # Collect cache context kwargs
114
- cache_context_kwargs = other_cache_kwargs.copy()
106
+ cache_context_kwargs = other_cache_context_kwargs.copy()
107
+ if cache_type := cache_context_kwargs.get("cache_type", None):
108
+ if cache_type == CacheType.NONE:
109
+ return pipe_or_adapter
110
+
115
111
  cache_context_kwargs["cache_type"] = CacheType.DBCache
116
112
  cache_context_kwargs["Fn_compute_blocks"] = Fn_compute_blocks
117
113
  cache_context_kwargs["Bn_compute_blocks"] = Bn_compute_blocks
@@ -121,7 +117,7 @@ def enable_cache(
121
117
  max_continuous_cached_steps
122
118
  )
123
119
  cache_context_kwargs["residual_diff_threshold"] = residual_diff_threshold
124
- cache_context_kwargs["do_separate_cfg"] = do_separate_cfg
120
+ cache_context_kwargs["enable_spearate_cfg"] = enable_spearate_cfg
125
121
  cache_context_kwargs["cfg_compute_first"] = cfg_compute_first
126
122
  cache_context_kwargs["cfg_diff_compute_separate"] = (
127
123
  cfg_diff_compute_separate
@@ -131,31 +127,35 @@ def enable_cache(
131
127
  enable_encoder_taylorseer
132
128
  )
133
129
  cache_context_kwargs["taylorseer_cache_type"] = taylorseer_cache_type
134
- if "taylorseer_kwargs" in cache_context_kwargs:
135
- cache_context_kwargs["taylorseer_kwargs"][
136
- "n_derivatives"
137
- ] = taylorseer_order
138
- else:
139
- cache_context_kwargs["taylorseer_kwargs"] = {
140
- "n_derivatives": taylorseer_order
141
- }
130
+ cache_context_kwargs["taylorseer_order"] = taylorseer_order
142
131
 
143
132
  if isinstance(pipe_or_adapter, BlockAdapter):
144
- return UnifiedCacheAdapter.apply(
133
+ return CachedAdapter.apply(
145
134
  pipe=None,
146
135
  block_adapter=pipe_or_adapter,
147
- forward_pattern=forward_pattern,
148
136
  **cache_context_kwargs,
149
137
  )
150
138
  elif isinstance(pipe_or_adapter, DiffusionPipeline):
151
- return UnifiedCacheAdapter.apply(
139
+ return CachedAdapter.apply(
152
140
  pipe=pipe_or_adapter,
153
141
  block_adapter=None,
154
- forward_pattern=forward_pattern,
155
142
  **cache_context_kwargs,
156
143
  )
157
144
  else:
158
145
  raise ValueError(
146
+ f"type: {type(pipe_or_adapter)} is not valid, "
159
147
  "Please pass DiffusionPipeline or BlockAdapter"
160
148
  "for the 1's position param: pipe_or_adapter"
161
149
  )
150
+
151
+
152
+ def supported_pipelines(
153
+ **kwargs,
154
+ ) -> Tuple[int, List[str]]:
155
+ return BlockAdapterRegistry.supported_pipelines(**kwargs)
156
+
157
+
158
+ def get_adapter(
159
+ pipe: DiffusionPipeline | str | Any,
160
+ ) -> BlockAdapter:
161
+ return BlockAdapterRegistry.get_adapter(pipe)
@@ -30,6 +30,9 @@ class ChromaPatchFunctor(PatchFunctor):
30
30
  blocks: torch.nn.ModuleList = None,
31
31
  **kwargs,
32
32
  ) -> ChromaTransformer2DModel:
33
+ if getattr(transformer, "_is_patched", False):
34
+ return transformer
35
+
33
36
  if blocks is None:
34
37
  blocks = transformer.single_transformer_blocks
35
38
 
@@ -30,6 +30,10 @@ class FluxPatchFunctor(PatchFunctor):
30
30
  blocks: torch.nn.ModuleList = None,
31
31
  **kwargs,
32
32
  ) -> FluxTransformer2DModel:
33
+
34
+ if getattr(transformer, "_is_patched", False):
35
+ return transformer
36
+
33
37
  if blocks is None:
34
38
  blocks = transformer.single_transformer_blocks
35
39
 
@@ -179,6 +179,7 @@ def quantize_ao(
179
179
  force_empty_cache()
180
180
 
181
181
  logger.info(
182
+ f"Quantized Method: {quant_type:>5}\n"
182
183
  f"Quantized Linear Layers: {num_quant_linear:>5}\n"
183
184
  f"Skipped Linear Layers: {num_skip_linear:>5}\n"
184
185
  f"Total Linear Layers: {num_linear_layers:>5}\n"
cache_dit/utils.py CHANGED
@@ -7,7 +7,6 @@ from diffusers import DiffusionPipeline
7
7
 
8
8
  from typing import Dict, Any
9
9
  from cache_dit.logger import init_logger
10
- from cache_dit.cache_factory import CacheType
11
10
 
12
11
 
13
12
  logger = init_logger(__name__)
@@ -30,27 +29,32 @@ class CacheStats:
30
29
 
31
30
 
32
31
  def summary(
33
- pipe_or_transformer: DiffusionPipeline | torch.nn.Module | Any,
32
+ pipe_or_module: DiffusionPipeline | torch.nn.Module | Any,
34
33
  details: bool = False,
35
34
  logging: bool = True,
36
35
  ) -> CacheStats:
37
36
  cache_stats = CacheStats()
38
- cls_name = pipe_or_transformer.__class__.__name__
39
- if not isinstance(pipe_or_transformer, torch.nn.Module):
40
- assert hasattr(pipe_or_transformer, "transformer")
41
- transformer = pipe_or_transformer.transformer
37
+
38
+ if not isinstance(pipe_or_module, torch.nn.Module):
39
+ assert hasattr(pipe_or_module, "transformer")
40
+ module = pipe_or_module.transformer
41
+ cls_name = module.__class__.__name__
42
42
  else:
43
- transformer = pipe_or_transformer
43
+ module = pipe_or_module
44
+
45
+ cls_name = module.__class__.__name__
46
+ if isinstance(module, torch.nn.ModuleList):
47
+ cls_name = module[0].__class__.__name__
44
48
 
45
- if hasattr(transformer, "_cache_context_kwargs"):
46
- cache_options = transformer._cache_context_kwargs
49
+ if hasattr(module, "_cache_context_kwargs"):
50
+ cache_options = module._cache_context_kwargs
47
51
  cache_stats.cache_options = cache_options
48
52
  if logging:
49
53
  print(f"\n🤗Cache Options: {cls_name}\n\n{cache_options}")
50
54
 
51
- if hasattr(transformer, "_cached_steps"):
52
- cached_steps: list[int] = transformer._cached_steps
53
- residual_diffs: dict[str, float] = dict(transformer._residual_diffs)
55
+ if hasattr(module, "_cached_steps"):
56
+ cached_steps: list[int] = module._cached_steps
57
+ residual_diffs: dict[str, float] = dict(module._residual_diffs)
54
58
  cache_stats.cached_steps = cached_steps
55
59
  cache_stats.residual_diffs = residual_diffs
56
60
 
@@ -91,11 +95,9 @@ def summary(
91
95
  compact=True,
92
96
  )
93
97
 
94
- if hasattr(transformer, "_cfg_cached_steps"):
95
- cfg_cached_steps: list[int] = transformer._cfg_cached_steps
96
- cfg_residual_diffs: dict[str, float] = dict(
97
- transformer._cfg_residual_diffs
98
- )
98
+ if hasattr(module, "_cfg_cached_steps"):
99
+ cfg_cached_steps: list[int] = module._cfg_cached_steps
100
+ cfg_residual_diffs: dict[str, float] = dict(module._cfg_residual_diffs)
99
101
  cache_stats.cfg_cached_steps = cfg_cached_steps
100
102
  cache_stats.cfg_residual_diffs = cfg_residual_diffs
101
103
 
@@ -153,9 +155,15 @@ def strify(
153
155
  cache_options = stats.cache_options
154
156
  cached_steps = len(stats.cached_steps)
155
157
  elif isinstance(pipe_or_stats, dict):
158
+ from cache_dit.cache_factory import CacheType
159
+
156
160
  # Assume cache_context_kwargs
157
161
  cache_options = pipe_or_stats
158
162
  cached_steps = None
163
+ cache_type = cache_options.get("cache_type", CacheType.NONE)
164
+
165
+ if cache_type == CacheType.NONE:
166
+ return "NONE"
159
167
  else:
160
168
  raise ValueError(
161
169
  "Please set pipe_or_stats param as one of: "
@@ -165,17 +173,9 @@ def strify(
165
173
  if not cache_options:
166
174
  return "NONE"
167
175
 
168
- if cache_options.get("cache_type", None) != CacheType.DBCache:
169
- return "NONE"
170
-
171
176
  def get_taylorseer_order():
172
177
  taylorseer_order = 0
173
- if "taylorseer_kwargs" in cache_options:
174
- if "n_derivatives" in cache_options["taylorseer_kwargs"]:
175
- taylorseer_order = cache_options["taylorseer_kwargs"][
176
- "n_derivatives"
177
- ]
178
- elif "taylorseer_order" in cache_options:
178
+ if "taylorseer_order" in cache_options:
179
179
  taylorseer_order = cache_options["taylorseer_order"]
180
180
  return taylorseer_order
181
181
 
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cache_dit
3
- Version: 0.2.26
4
- Summary: 🤗 CacheDiT: An Unified and Training-free Cache Acceleration Toolbox for Diffusion Transformers
3
+ Version: 0.2.28
4
+ Summary: 🤗 A Unified and Training-free Cache Acceleration Toolbox for Diffusion Transformers
5
5
  Author: DefTruth, vipshop.com, etc.
6
6
  Maintainer: DefTruth, vipshop.com, etc
7
7
  Project-URL: Repository, https://github.com/vipshop/cache-dit.git
@@ -44,7 +44,7 @@ Dynamic: requires-python
44
44
  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/cache-dit-logo.png height="120">
45
45
 
46
46
  <p align="center">
47
- An <b>Unified</b> and Training-free <b>Cache Acceleration</b> Toolbox for <b>Diffusion Transformers</b> <br>
47
+ A <b>Unified</b> and Training-free <b>Cache Acceleration</b> Toolbox for <b>Diffusion Transformers</b> <br>
48
48
  ♥️ <b>Cache Acceleration</b> with <b>One-line</b> Code ~ ♥️
49
49
  </p>
50
50
  <div align='center'>
@@ -59,23 +59,24 @@ Dynamic: requires-python
59
59
  🔥<b><a href="#unified">Unified Cache APIs</a> | <a href="#dbcache">DBCache</a> | <a href="#taylorseer">Hybrid TaylorSeer</a> | <a href="#cfg">Hybrid Cache CFG</a></b>🔥
60
60
  </p>
61
61
  <p align="center">
62
- 🎉Now, <b>cache-dit</b> covers <b>All</b> mainstream <b>DiT-based</b> Diffusers' Pipelines</b>🎉<br>
62
+ 🎉Now, <b>cache-dit</b> covers <b>100%</b> Diffusers' <b>DiT-based</b> Pipelines🎉<br>
63
63
  🔥<b><a href="#supported">Qwen-Image</a> | <a href="#supported">FLUX.1</a> | <a href="#supported">Wan 2.1/2.2</a> | <a href="#supported"> ... </a> | <a href="#supported">CogVideoX</a></b>🔥
64
64
  </p>
65
65
  </div>
66
66
 
67
67
  ## 🔥News
68
68
 
69
- - [2025-08-29] 🔥</b>Covers <b>All</b> Diffusers' <b>DiT-based</b> Pipelines via **[BlockAdapter](#unified) + [Pattern Matching](#unified).**
70
- - [2025-08-26] 🎉[**Wan2.2**](https://github.com/Wan-Video) **1.8x⚡️** speedup with `cache-dit + compile`! Please check the [example](./examples/run_wan_2.2.py).
71
- - [2025-08-19] 🔥[**Qwen-Image-Edit**](https://github.com/QwenLM/Qwen-Image) **2x⚡️** speedup! Check the example at [run_qwen_image_edit.py](./examples/run_qwen_image_edit.py).
69
+ - [2025-09-03] 🎉[**Wan2.2-MoE**](https://github.com/Wan-Video) **2.4x⚡️** speedup! Please refer to [run_wan_2.2.py](./examples/pipeline/run_wan_2.2.py) as an example.
70
+ - [2025-08-19] 🔥[**Qwen-Image-Edit**](https://github.com/QwenLM/Qwen-Image) **2x⚡️** speedup! Check the example: [run_qwen_image_edit.py](./examples/pipeline/run_qwen_image_edit.py).
72
71
  - [2025-08-12] 🎉First caching mechanism in [QwenLM/Qwen-Image](https://github.com/QwenLM/Qwen-Image) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check this [PR](https://github.com/QwenLM/Qwen-Image/pull/61).
73
- - [2025-08-11] 🔥[**Qwen-Image**](https://github.com/QwenLM/Qwen-Image) **1.8x⚡️** speedup! Please refer [run_qwen_image.py](./examples/run_qwen_image.py) as an example.
72
+ - [2025-08-11] 🔥[**Qwen-Image**](https://github.com/QwenLM/Qwen-Image) **1.8x⚡️** speedup! Please refer to [run_qwen_image.py](./examples/pipeline/run_qwen_image.py) as an example.
74
73
 
75
74
  <details>
76
75
  <summary> Previous News </summary>
77
-
78
- - [2025-08-10] 🔥[**FLUX.1-Kontext-dev**](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev) is supported! Please refer [run_flux_kontext.py](./examples/run_flux_kontext.py) as an example.
76
+
77
+ - [2025-09-01] 📚[**Hybird Forward Pattern**](#unified) is supported! Please check [FLUX.1-dev](./examples/run_flux_adapter.py) as an example.
78
+ - [2025-08-29] 🔥</b>Covers <b>100%</b> Diffusers' <b>DiT-based</b> Pipelines: **[BlockAdapter](#unified) + [Pattern Matching](#unified).**
79
+ - [2025-08-10] 🔥[**FLUX.1-Kontext-dev**](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev) is supported! Please refer [run_flux_kontext.py](./examples/pipeline/run_flux_kontext.py) as an example.
79
80
  - [2025-07-18] 🎉First caching mechanism in [🤗huggingface/flux-fast](https://github.com/huggingface/flux-fast) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check the [PR](https://github.com/huggingface/flux-fast/pull/13).
80
81
  - [2025-07-13] **[🤗flux-faster](https://github.com/xlite-dev/flux-faster)** is released! **3.3x** speedup for FLUX.1 on NVIDIA L20 with **[cache-dit](https://github.com/vipshop/cache-dit)**.
81
82
 
@@ -88,6 +89,11 @@ Dynamic: requires-python
88
89
  - [⚙️Installation](#️installation)
89
90
  - [🔥Supported Models](#supported)
90
91
  - [🎉Unified Cache APIs](#unified)
92
+ - [📚Forward Pattern Matching](#unified)
93
+ - [🎉Cache with One-line Code](#unified)
94
+ - [🔥Automatic Block Adapter](#unified)
95
+ - [📚Hybird Forward Pattern](#unified)
96
+ - [🤖Cache Acceleration Stats](#unified)
91
97
  - [⚡️Dual Block Cache](#dbcache)
92
98
  - [🔥Hybrid TaylorSeer](#taylorseer)
93
99
  - [⚡️Hybrid Cache CFG](#cfg)
@@ -176,11 +182,11 @@ Currently, **cache-dit** library supports almost **Any** Diffusion Transformers
176
182
 
177
183
  Currently, for any **Diffusion** models with **Transformer Blocks** that match the specific **Input/Output patterns**, we can use the **Unified Cache APIs** from **cache-dit**, namely, the `cache_dit.enable_cache(...)` API. The **Unified Cache APIs** are currently in the experimental phase; please stay tuned for updates. The supported patterns are listed as follows:
178
184
 
179
- ![](https://github.com/vipshop/cache-dit/raw/main/assets/patterns.png)
185
+ ![](https://github.com/vipshop/cache-dit/raw/main/assets/patterns-v1.png)
180
186
 
181
187
  ### ♥️Cache Acceleration with One-line Code
182
188
 
183
- In most cases, you only need to call **one-line** of code, that is `cache_dit.enable_cache(...)`. After this API is called, you just need to call the pipe as normal. The `pipe` param can be **any** Diffusion Pipeline. Please refer to [Qwen-Image](./examples/run_qwen_image.py) as an example.
189
+ In most cases, you only need to call **one-line** of code, that is `cache_dit.enable_cache(...)`. After this API is called, you just need to call the pipe as normal. The `pipe` param can be **any** Diffusion Pipeline. Please refer to [Qwen-Image](./examples/pipeline/run_qwen_image.py) as an example.
184
190
 
185
191
  ```python
186
192
  import cache_dit
@@ -198,17 +204,20 @@ output = pipe(...)
198
204
 
199
205
  ### 🔥Automatic Block Adapter
200
206
 
201
- But in some cases, you may have a **modified** Diffusion Pipeline or Transformer that is not located in the diffusers library or not officially supported by **cache-dit** at this time. The **BlockAdapter** can help you solve this problems. Please refer to [Qwen-Image w/ BlockAdapter](./examples/run_qwen_image_adapter.py) as an example.
207
+ But in some cases, you may have a **modified** Diffusion Pipeline or Transformer that is not located in the diffusers library or not officially supported by **cache-dit** at this time. The **BlockAdapter** can help you solve this problems. Please refer to [🔥Qwen-Image w/ BlockAdapter](./examples/adapter/run_qwen_image_adapter.py) as an example.
202
208
 
203
209
  ```python
204
210
  from cache_dit import ForwardPattern, BlockAdapter
205
211
 
206
- # Use BlockAdapter with `auto` mode.
212
+ # Use 🔥BlockAdapter with `auto` mode.
207
213
  cache_dit.enable_cache(
208
- BlockAdapter(pipe=pipe, auto=True), # Qwen-Image, etc.
209
- # Check `📚Forward Pattern Matching` documentation and hack the code of
210
- # of Qwen-Image, you will find that it has satisfied `FORWARD_PATTERN_1`.
211
- forward_pattern=ForwardPattern.Pattern_1,
214
+ BlockAdapter(
215
+ # Any DiffusionPipeline, Qwen-Image, etc.
216
+ pipe=pipe, auto=True,
217
+ # Check `📚Forward Pattern Matching` documentation and hack the code of
218
+ # of Qwen-Image, you will find that it has satisfied `FORWARD_PATTERN_1`.
219
+ forward_pattern=ForwardPattern.Pattern_1,
220
+ ),
212
221
  )
213
222
 
214
223
  # Or, manually setup transformer configurations.
@@ -218,12 +227,39 @@ cache_dit.enable_cache(
218
227
  transformer=pipe.transformer,
219
228
  blocks=pipe.transformer.transformer_blocks,
220
229
  blocks_name="transformer_blocks",
230
+ forward_pattern=ForwardPattern.Pattern_1,
221
231
  ),
222
- forward_pattern=ForwardPattern.Pattern_1,
223
232
  )
224
233
  ```
225
234
  For such situations, **BlockAdapter** can help you quickly apply various cache acceleration features to your own Diffusion Pipelines and Transformers. Please check the [📚BlockAdapter.md](./docs/BlockAdapter.md) for more details.
226
235
 
236
+ ### 📚Hybird Forward Pattern
237
+
238
+ Sometimes, a Transformer class will contain more than one transformer `blocks`. For example, **FLUX.1** (HiDream, Chroma, etc) contains transformer_blocks and single_transformer_blocks (with different forward patterns). The **BlockAdapter** can also help you solve this problem. Please refer to [📚FLUX.1](./examples/adapter/run_flux_adapter.py) as an example.
239
+
240
+ ```python
241
+ # For diffusers <= 0.34.0, FLUX.1 transformer_blocks and
242
+ # single_transformer_blocks have different forward patterns.
243
+ cache_dit.enable_cache(
244
+ BlockAdapter(
245
+ pipe=pipe, # FLUX.1, etc.
246
+ transformer=pipe.transformer,
247
+ blocks=[
248
+ pipe.transformer.transformer_blocks,
249
+ pipe.transformer.single_transformer_blocks,
250
+ ],
251
+ blocks_name=[
252
+ "transformer_blocks",
253
+ "single_transformer_blocks",
254
+ ],
255
+ forward_pattern=[
256
+ ForwardPattern.Pattern_1,
257
+ ForwardPattern.Pattern_3,
258
+ ],
259
+ ),
260
+ )
261
+ ```
262
+
227
263
  ### 🤖Cache Acceleration Stats Summary
228
264
 
229
265
  After finishing each inference of `pipe(...)`, you can call the `cache_dit.summary()` API on pipe to get the details of the **Cache Acceleration Stats** for the current inference.
@@ -347,7 +383,7 @@ cache_dit.enable_cache(
347
383
 
348
384
  <div id="cfg"></div>
349
385
 
350
- cache-dit supports caching for **CFG (classifier-free guidance)**. For models that fuse CFG and non-CFG into a single forward step, or models that do not include CFG (classifier-free guidance) in the forward step, please set `do_separate_cfg` param to **False (default)**. Otherwise, set it to True. For examples:
386
+ cache-dit supports caching for **CFG (classifier-free guidance)**. For models that fuse CFG and non-CFG into a single forward step, or models that do not include CFG (classifier-free guidance) in the forward step, please set `enable_spearate_cfg` param to **False (default)**. Otherwise, set it to True. For examples:
351
387
 
352
388
  ```python
353
389
  cache_dit.enable_cache(
@@ -355,10 +391,10 @@ cache_dit.enable_cache(
355
391
  ...,
356
392
  # CFG: classifier free guidance or not
357
393
  # For model that fused CFG and non-CFG into single forward step,
358
- # should set do_separate_cfg as False. For example, set it as True
394
+ # should set enable_spearate_cfg as False. For example, set it as True
359
395
  # for Wan 2.1/Qwen-Image and set it as False for FLUX.1, HunyuanVideo,
360
396
  # CogVideoX, Mochi, LTXVideo, Allegro, CogView3Plus, EasyAnimate, SD3, etc.
361
- do_separate_cfg=True, # Wan 2.1, Qwen-Image, CogView4, Cosmos, SkyReelsV2, etc.
397
+ enable_spearate_cfg=True, # Wan 2.1, Qwen-Image, CogView4, Cosmos, SkyReelsV2, etc.
362
398
  # Compute cfg forward first or not, default False, namely,
363
399
  # 0, 2, 4, ..., -> non-CFG step; 1, 3, 5, ... -> CFG step.
364
400
  cfg_compute_first=False,
@@ -433,7 +469,7 @@ The **cache-dit** codebase is adapted from FBCache. Special thanks to their exce
433
469
 
434
470
  ```BibTeX
435
471
  @misc{cache-dit@2025,
436
- title={cache-dit: An Unified and Training-free Cache Acceleration Toolbox for Diffusion Transformers},
472
+ title={cache-dit: A Unified and Training-free Cache Acceleration Toolbox for Diffusion Transformers},
437
473
  url={https://github.com/vipshop/cache-dit.git},
438
474
  note={Open-source software available at https://github.com/vipshop/cache-dit.git},
439
475
  author={vipshop.com},
@@ -0,0 +1,47 @@
1
+ cache_dit/__init__.py,sha256=V4jCkTic4XvWojCUqYcjlvxiNM2DjGQbOLk6R-tAx2A,1191
2
+ cache_dit/_version.py,sha256=hCl1MKhh249NDbigjeJY-mrKYvjPFbJ7oklAepBQrto,706
3
+ cache_dit/logger.py,sha256=0zsu42hN-3-rgGC_C29ms1IvVpV4_b4_SwJCKSenxBE,4304
4
+ cache_dit/utils.py,sha256=pb5298XKmaZDoHwyteYRhixAG_0DGWrvvaObeShIhOM,7146
5
+ cache_dit/cache_factory/.gitignore,sha256=5Cb-qT9wsTUoMJ7vACDF7ZcLpAXhi5v-xdcWSRit988,23
6
+ cache_dit/cache_factory/__init__.py,sha256=M8q9furJOq2AZcLHRuCXZCjR9fNSELoEYdsCofIjMAo,1037
7
+ cache_dit/cache_factory/cache_adapters.py,sha256=q7MxY44qw90h449Gr8W5iJjSwXPJR-YIyRmu_KJnQo0,13284
8
+ cache_dit/cache_factory/cache_interface.py,sha256=2jcuTZ4D_P0M5pSw0z3BMPalobYen3YO1yKvRjaQjdQ,8332
9
+ cache_dit/cache_factory/cache_types.py,sha256=FIFa6ZBfvvSMMHyBBhvarvgg2Y2wbRgITcG_uGylGe0,991
10
+ cache_dit/cache_factory/forward_pattern.py,sha256=FumlCuZ-TSmSYH0hGBHctSJ-oGLCftdZjLygqhsmdR4,2258
11
+ cache_dit/cache_factory/utils.py,sha256=XkVM9AXcB9zYq8-S8QKAsGz80r3tA6U3lBNGDGeHOe4,1871
12
+ cache_dit/cache_factory/block_adapters/__init__.py,sha256=UFuHxNR7Y0RZoCl97wn0u2d_2rj8PzNsWfzgda5AoKM,17395
13
+ cache_dit/cache_factory/block_adapters/block_adapters.py,sha256=WsqGUDSDU_5-pIXwDqAK_k4a-4jgbFGoLCoF6kAjLt8,19198
14
+ cache_dit/cache_factory/block_adapters/block_registers.py,sha256=99ouWioxldlZJYQWhcUkOu94f8vO9V9QGzVNhKWtyO4,2005
15
+ cache_dit/cache_factory/cache_blocks/__init__.py,sha256=OWjnpJxA8EJVoRzuyb5miuiRphUFj831-bbtWsTDjnM,2750
16
+ cache_dit/cache_factory/cache_blocks/pattern_0_1_2.py,sha256=ElMps6_7uI74tSF9GDR_dEI0bZEhdzcepM29xFWnYo8,428
17
+ cache_dit/cache_factory/cache_blocks/pattern_3_4_5.py,sha256=CtBr6nvtAW8SAeEwPwiwWtPgrmwyb5ukb-j3IwFULJU,9953
18
+ cache_dit/cache_factory/cache_blocks/pattern_base.py,sha256=XSDy3hsaKbAZPGZY92YgGA0qLgjQyIX8irQkb2R5T2c,20331
19
+ cache_dit/cache_factory/cache_blocks/utils.py,sha256=wfreGvtK22hDnXuw0z0hUw-9ywu91FnExfPkP8ZzlkA,891
20
+ cache_dit/cache_factory/cache_contexts/__init__.py,sha256=rqnJ5__zqnpVHK5A1OqWILpNh5Ss-0ZDTGgtxZMKGGo,250
21
+ cache_dit/cache_factory/cache_contexts/cache_context.py,sha256=N88WLdd4KE9DuMWmpX8URcF55E2zWNwcKMxgVYkxMJY,13691
22
+ cache_dit/cache_factory/cache_contexts/cache_manager.py,sha256=wSghuX93KmCxE4pFEVKuyrO0Jt5STu_x4CxypS2EdxI,34276
23
+ cache_dit/cache_factory/cache_contexts/taylorseer.py,sha256=etSUIZzDvqW3ScKCbccTPcFaSmxV1T-xAXdk-p3e3wk,3802
24
+ cache_dit/cache_factory/patch_functors/__init__.py,sha256=yK05iONMGILsTZ83ynrUUJtiJKJ_FDjxmVIzRLy416s,252
25
+ cache_dit/cache_factory/patch_functors/functor_base.py,sha256=Ahk0fTfrHgNdEl-9JSkACvfyyv9G-Ei5OSz7XBIlX5o,357
26
+ cache_dit/cache_factory/patch_functors/functor_chroma.py,sha256=IFCuFU8HCnohM9Qpij7oU_UG1T8Ok8uTI3B9Nw7BHfY,10029
27
+ cache_dit/cache_factory/patch_functors/functor_flux.py,sha256=3Q8x-PEeDiUtuv-FnQ2qEzo4qbpsYOPB9uf7YridE88,9538
28
+ cache_dit/compile/__init__.py,sha256=FcTVzCeyypl-mxlc59_ehHL3lBNiDAFsXuRoJ-5Cfi0,56
29
+ cache_dit/compile/utils.py,sha256=nN2OIrSdwRR5zGxJinKDqb07pXpvTNTF3g_OgLkeeBU,3858
30
+ cache_dit/custom_ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
+ cache_dit/custom_ops/triton_taylorseer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
+ cache_dit/metrics/__init__.py,sha256=RaUhl5dieF40RqnizGzR30qoJJ9dyMUEADwgwMaMQrE,575
33
+ cache_dit/metrics/config.py,sha256=ieOgD9ayz722RjVzk24bSIqS2D6o7TZjGk8KeXV-OLQ,551
34
+ cache_dit/metrics/fid.py,sha256=9Ivtazl6mW0Bon2VXa-Ia5Xj2ewxRD3V1Qkd69zYM3Y,17066
35
+ cache_dit/metrics/inception.py,sha256=pBVe2X6ylLPIXTG4-GWDM9DWnCviMJbJ45R3ulhktR0,12759
36
+ cache_dit/metrics/lpips.py,sha256=I2qCNi6qJh5TRsaIsdxO0WoRX1DN7U_H3zS0oCSahYM,1032
37
+ cache_dit/metrics/metrics.py,sha256=8jvM1sF-nDxUuwCRy44QEoo4dYVLCQVh1QyAMs4eaQY,27840
38
+ cache_dit/quantize/__init__.py,sha256=kWYoMAyZgBXu9BJlZjTQ0dRffW9GqeeY9_iTkXrb70A,59
39
+ cache_dit/quantize/quantize_ao.py,sha256=mGspqYgQtenl3QnKPtsSYsSD7LbVX93f1M940bhXKLU,6066
40
+ cache_dit/quantize/quantize_interface.py,sha256=2s_R7xPSKuJeFpEGeLwRxnq_CqJcBG3a3lzyW5wh-UM,1241
41
+ cache_dit/quantize/quantize_svdq.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
+ cache_dit-0.2.28.dist-info/licenses/LICENSE,sha256=Dqb07Ik2dV41s9nIdMUbiRWEfDqo7-dQeRiY7kPO8PE,3769
43
+ cache_dit-0.2.28.dist-info/METADATA,sha256=03FPh4nIDfjSFMfkDz-sWr2g3l30UsQek8VjQ6TPn8g,23204
44
+ cache_dit-0.2.28.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
45
+ cache_dit-0.2.28.dist-info/entry_points.txt,sha256=FX2gysXaZx6NeK1iCLMcIdP8Q4_qikkIHtEmi3oWn8o,65
46
+ cache_dit-0.2.28.dist-info/top_level.txt,sha256=ZJDydonLEhujzz0FOkVbO-BqfzO9d_VqRHmZU-3MOZo,10
47
+ cache_dit-0.2.28.dist-info/RECORD,,