cache-dit 0.3.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cache-dit might be problematic. Click here for more details.

cache_dit/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.3.2'
32
- __version_tuple__ = version_tuple = (0, 3, 2)
31
+ __version__ = version = '1.0.0'
32
+ __version_tuple__ = version_tuple = (1, 0, 0)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -113,6 +113,19 @@ class BlockAdapter:
113
113
  if any((self.pipe is not None, self.transformer is not None)):
114
114
  self.maybe_fill_attrs()
115
115
  self.maybe_patchify()
116
+ self.maybe_skip_checks()
117
+
118
+ def maybe_skip_checks(self):
119
+ if getattr(self.transformer, "_hf_hook", None) is not None:
120
+ logger.warning("_hf_hook is not None, force skip pattern check!")
121
+ self.check_forward_pattern = False
122
+ self.check_num_outputs = False
123
+ elif getattr(self.transformer, "_diffusers_hook", None) is not None:
124
+ logger.warning(
125
+ "_diffusers_hook is not None, force skip pattern check!"
126
+ )
127
+ self.check_forward_pattern = False
128
+ self.check_num_outputs = False
116
129
 
117
130
  def maybe_fill_attrs(self):
118
131
  # NOTE: This func should be call before normalize.
@@ -1,10 +1,8 @@
1
1
  import torch
2
-
3
2
  import unittest
4
3
  import functools
5
-
6
4
  from contextlib import ExitStack
7
- from typing import Dict, List, Tuple, Any, Union, Callable
5
+ from typing import Dict, List, Tuple, Any, Union, Callable, Optional
8
6
 
9
7
  from diffusers import DiffusionPipeline
10
8
 
@@ -16,7 +14,7 @@ from cache_dit.cache_factory.cache_contexts import CachedContextManager
16
14
  from cache_dit.cache_factory.cache_contexts import BasicCacheConfig
17
15
  from cache_dit.cache_factory.cache_contexts import CalibratorConfig
18
16
  from cache_dit.cache_factory.cache_blocks import CachedBlocks
19
- from cache_dit.cache_factory.cache_blocks.utils import (
17
+ from cache_dit.cache_factory.cache_blocks import (
20
18
  patch_cached_stats,
21
19
  remove_cached_stats,
22
20
  )
@@ -330,7 +328,26 @@ class CachedAdapter:
330
328
 
331
329
  assert isinstance(dummy_blocks_names, list)
332
330
 
333
- @functools.wraps(original_forward)
331
+ from accelerate import hooks
332
+
333
+ _hf_hook: Optional[hooks.ModelHook] = None
334
+
335
+ if getattr(transformer, "_hf_hook", None) is not None:
336
+ _hf_hook = transformer._hf_hook # hooks from accelerate.hooks
337
+ if hasattr(transformer, "_old_forward"):
338
+ logger.warning(
339
+ "_hf_hook is not None, so, we have to re-direct transformer's "
340
+ f"original_forward({id(original_forward)}) to transformer's "
341
+ f"_old_forward({id(transformer._old_forward)})"
342
+ )
343
+ original_forward = transformer._old_forward
344
+
345
+ # TODO: remove group offload hooks the re-apply after cache applied.
346
+ # hooks = _diffusers_hook.hooks.copy(); _diffusers_hook.hooks.clear()
347
+ # re-apply hooks to transformer after cache applied.
348
+ # from diffusers.hooks.hooks import HookFunctionReference, HookRegistry
349
+ # from diffusers.hooks.group_offloading import apply_group_offloading
350
+
334
351
  def new_forward(self, *args, **kwargs):
335
352
  with ExitStack() as stack:
336
353
  for name, context_name in zip(
@@ -348,9 +365,27 @@ class CachedAdapter:
348
365
  self, dummy_name, dummy_blocks
349
366
  )
350
367
  )
351
- return original_forward(*args, **kwargs)
368
+ outputs = original_forward(*args, **kwargs)
369
+ return outputs
370
+
371
+ def new_forward_with_hf_hook(self, *args, **kwargs):
372
+ # Compatible with model cpu offload
373
+ if _hf_hook is not None and hasattr(_hf_hook, "pre_forward"):
374
+ args, kwargs = _hf_hook.pre_forward(self, *args, **kwargs)
375
+
376
+ outputs = new_forward(self, *args, **kwargs)
377
+
378
+ if _hf_hook is not None and hasattr(_hf_hook, "post_forward"):
379
+ outputs = _hf_hook.post_forward(self, outputs)
380
+
381
+ return outputs
382
+
383
+ # NOTE: Still can't fully compatible with group offloading
384
+ transformer.forward = functools.update_wrapper(
385
+ functools.partial(new_forward_with_hf_hook, transformer),
386
+ new_forward_with_hf_hook,
387
+ )
352
388
 
353
- transformer.forward = new_forward.__get__(transformer)
354
389
  transformer._original_forward = original_forward
355
390
  transformer._is_cached = True
356
391
 
@@ -12,6 +12,10 @@ from cache_dit.cache_factory.cache_blocks.pattern_0_1_2 import (
12
12
  from cache_dit.cache_factory.cache_blocks.pattern_3_4_5 import (
13
13
  CachedBlocks_Pattern_3_4_5,
14
14
  )
15
+ from cache_dit.cache_factory.cache_blocks.pattern_utils import (
16
+ patch_cached_stats,
17
+ remove_cached_stats,
18
+ )
15
19
 
16
20
  from cache_dit.logger import init_logger
17
21
 
@@ -0,0 +1,115 @@
1
+ import torch
2
+ import asyncio
3
+ import logging
4
+ from contextlib import contextmanager
5
+ from typing import Generator, Optional, List
6
+ from diffusers.hooks.group_offloading import _is_group_offload_enabled
7
+ from cache_dit.logger import init_logger
8
+
9
+ logger = init_logger(__name__)
10
+
11
+
12
+ @torch.compiler.disable
13
+ @contextmanager
14
+ def maybe_onload(
15
+ block: torch.nn.Module,
16
+ reference_tensor: torch.Tensor,
17
+ pending_tasks: List[asyncio.Task] = [],
18
+ ) -> Generator:
19
+
20
+ if not _is_group_offload_enabled(block):
21
+ yield block
22
+ return
23
+
24
+ original_devices: Optional[List[torch.device]] = None
25
+ if hasattr(block, "parameters"):
26
+ params = list(block.parameters())
27
+ if params:
28
+ original_devices = [param.data.device for param in params]
29
+
30
+ target_device: torch.device = reference_tensor.device
31
+ move_task: Optional[asyncio.Task] = None
32
+ need_restore: bool = False
33
+
34
+ try:
35
+ if original_devices is not None:
36
+ unique_devices = list(set(original_devices))
37
+ if len(unique_devices) > 1 or unique_devices[0] != target_device:
38
+ if logger.isEnabledFor(logging.DEBUG):
39
+ logger.debug(
40
+ f"Onloading from {unique_devices} to {target_device}"
41
+ )
42
+
43
+ has_meta_params = any(
44
+ dev.type == "meta" for dev in original_devices
45
+ )
46
+ if has_meta_params: # compatible with sequential cpu offload
47
+ block = block.to_empty(device=target_device)
48
+ else:
49
+ block = block.to(target_device, non_blocking=False)
50
+ need_restore = True
51
+ yield block
52
+ finally:
53
+ if need_restore and original_devices:
54
+
55
+ async def restore_device():
56
+ for param, original_device in zip(
57
+ block.parameters(), original_devices
58
+ ):
59
+ param.data = await asyncio.to_thread(
60
+ lambda p, d: p.to(d, non_blocking=True),
61
+ param.data, # type: torch.Tensor
62
+ original_device, # type: torch.device
63
+ ) # type: ignore[assignment]
64
+
65
+ loop = get_event_loop()
66
+ move_task = loop.create_task(restore_device())
67
+ if move_task:
68
+ pending_tasks.append(move_task)
69
+
70
+
71
+ def get_event_loop() -> asyncio.AbstractEventLoop:
72
+ try:
73
+ loop = asyncio.get_running_loop()
74
+ except RuntimeError:
75
+ try:
76
+ loop = asyncio.get_event_loop()
77
+ except RuntimeError:
78
+ loop = asyncio.new_event_loop()
79
+ asyncio.set_event_loop(loop)
80
+
81
+ if not loop.is_running():
82
+
83
+ def run_loop() -> None:
84
+ asyncio.set_event_loop(loop)
85
+ loop.run_forever()
86
+
87
+ import threading
88
+
89
+ if not any(t.name == "_my_loop" for t in threading.enumerate()):
90
+ threading.Thread(
91
+ target=run_loop, name="_my_loop", daemon=True
92
+ ).start()
93
+
94
+ return loop
95
+
96
+
97
+ @torch.compiler.disable
98
+ def maybe_offload(
99
+ pending_tasks: List[asyncio.Task],
100
+ ) -> None:
101
+ if not pending_tasks:
102
+ return
103
+
104
+ loop = get_event_loop()
105
+
106
+ async def gather_tasks():
107
+ return await asyncio.gather(*pending_tasks)
108
+
109
+ future = asyncio.run_coroutine_threadsafe(gather_tasks(), loop)
110
+ try:
111
+ future.result(timeout=30.0)
112
+ except Exception as e:
113
+ logger.error(f"May Offload Error: {e}")
114
+
115
+ pending_tasks.clear()
@@ -1,7 +1,9 @@
1
1
  import inspect
2
+ import asyncio
2
3
  import torch
3
4
  import torch.distributed as dist
4
5
 
6
+ from typing import List
5
7
  from cache_dit.cache_factory.cache_contexts.cache_context import CachedContext
6
8
  from cache_dit.cache_factory.cache_contexts.cache_manager import (
7
9
  CachedContextManager,
@@ -45,6 +47,7 @@ class CachedBlocks_Pattern_Base(torch.nn.Module):
45
47
  self.cache_prefix = cache_prefix
46
48
  self.cache_context = cache_context
47
49
  self.cache_manager = cache_manager
50
+ self.pending_tasks: List[asyncio.Task] = []
48
51
 
49
52
  self._check_forward_pattern()
50
53
  logger.info(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cache_dit
3
- Version: 0.3.2
3
+ Version: 1.0.0
4
4
  Summary: A Unified, Flexible and Training-free Cache Acceleration Framework for 🤗Diffusers.
5
5
  Author: DefTruth, vipshop.com, etc.
6
6
  Maintainer: DefTruth, vipshop.com, etc
@@ -48,23 +48,31 @@ Dynamic: requires-python
48
48
  <div align="center">
49
49
  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/cache-dit-logo.png height="120">
50
50
 
51
- <p align="center">
51
+ <p align="center">
52
52
  A <b>Unified</b>, Flexible and Training-free <b>Cache Acceleration</b> Framework for <b>🤗Diffusers</b> <br>
53
53
  ♥️ Cache Acceleration with <b>One-line</b> Code ~ ♥️
54
54
  </p>
55
55
  <div align='center'>
56
+ <img src="./assets/image-reward-bench.png" width=580px >
57
+ </div>
58
+ <div align='center'>
59
+ <a href="https://huggingface.co/docs/diffusers/main/en/optimization/cache_dit"><img src=https://img.shields.io/badge/🤗Diffusers-ecosystem-yellow.svg ></a>
56
60
  <img src=https://img.shields.io/badge/Language-Python-brightgreen.svg >
57
- <img src=https://img.shields.io/badge/PRs-welcome-9cf.svg >
61
+ <img src=https://img.shields.io/badge/PRs-welcome-blue.svg >
58
62
  <img src=https://img.shields.io/badge/PyPI-pass-brightgreen.svg >
59
63
  <img src=https://static.pepy.tech/badge/cache-dit >
60
- <img src=https://img.shields.io/badge/Python-3.10|3.11|3.12-9cf.svg >
61
- <img src=https://img.shields.io/badge/Release-v0.3-brightgreen.svg >
62
- </div>
64
+ <img src=https://img.shields.io/github/stars/vipshop/cache-dit.svg?style=dark >
65
+ </div>
66
+ <div align='center'>
67
+ <a href="./README.md">📚English</a> | <a href="./README_CN.md">📚中文阅读 </a> | <a href="./docs/User_Guide.md#api-documentation"> 📚API Documentation </a> | <a href="https://huggingface.co/docs/diffusers/main/en/optimization/cache_dit">🤗Diffusers' Documentation</a>
68
+ </div>
69
+ <!--
63
70
  <p align="center">
64
- <b><a href="#unified">📚Unified Cache APIs</a></b> | <a href="#forward-pattern-matching">📚Forward Pattern Matching</a> | <a href="#automatic-block-adapter">📚Automatic Block Adapter</a><br>
65
- <a href="#hybird-forward-pattern">📚Hybrid Forward Pattern</a> | <a href="#dbcache">📚DBCache</a> | <a href="#taylorseer">📚TaylorSeer Calibrator</a> | <a href="#cfg">📚Cache CFG</a><br>
71
+ <b><a href="#unified">📚Unified Cache APIs</a></b> | <a href="#forward-pattern-matching">📚Forward Pattern Matching</a> | <a href="./docs/User_Guide.md">📚Automatic Block Adapter</a><br>
72
+ <a href="./docs/User_Guide.md">📚Hybrid Forward Pattern</a> | <a href="#dbcache">📚DBCache</a> | <a href="./docs/User_Guide.md">📚TaylorSeer Calibrator</a> | <a href="./docs/User_Guide.md">📚Cache CFG</a><br>
66
73
  <a href="#benchmarks">📚Text2Image DrawBench</a> | <a href="#benchmarks">📚Text2Image Distillation DrawBench</a>
67
74
  </p>
75
+ -->
68
76
  <p align="center">
69
77
  🎉Now, <b>cache-dit</b> covers almost <b>All</b> Diffusers' <b>DiT</b> Pipelines🎉<br>
70
78
  🔥<a href="#supported">Qwen-Image</a> | <a href="#supported">FLUX.1</a> | <a href="#supported">Qwen-Image-Lightning</a> | <a href="#supported"> Wan 2.1 </a> | <a href="#supported"> Wan 2.2 </a>🔥<br>
@@ -74,6 +82,8 @@ Dynamic: requires-python
74
82
  🔥<a href="#supported">Chroma</a> | <a href="#supported">Sana</a> | <a href="#supported">Allegro</a> | <a href="#supported">Mochi</a> | <a href="#supported">SD 3/3.5</a> | <a href="#supported">Amused</a> | <a href="#supported"> ... </a> | <a href="#supported">DiT-XL</a>🔥
75
83
  </p>
76
84
  </div>
85
+
86
+
77
87
  <div align='center'>
78
88
  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/gifs/wan2.2.C0_Q0_NONE.gif width=124px>
79
89
  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/gifs/wan2.2.C1_Q0_DBCACHE_F1B0_W2M8MC2_T1O2_R0.08.gif width=124px>
@@ -85,12 +95,6 @@ Dynamic: requires-python
85
95
  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/flux.C0_Q0_NONE_T23.69s.png width=90px>
86
96
  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/flux.C0_Q0_DBCACHE_F1B0_W4M0MC0_T1O2_R0.15_S16_T11.39s.png width=90px>
87
97
  <p><b>🔥Qwen-Image</b> | <a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:1.8x↑🎉 | <b>FLUX.1-dev</b> | <a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:2.1x↑🎉</p>
88
- <img src=https://github.com/vipshop/cache-dit/raw/main/assets/flux-kontext-cat.C0_L0_Q0_NONE.png width=100px>
89
- <img src=https://github.com/vipshop/cache-dit/raw/main/assets/flux-kontext.C0_L0_Q0_NONE.png width=100px>
90
- <img src=https://github.com/vipshop/cache-dit/raw/main/assets/flux-kontext.C0_L0_Q0_DBCACHE_F8B0_W8M0MC0_T0O2_R0.08_S10.png width=100px>
91
- <img src=https://github.com/vipshop/cache-dit/raw/main/assets/flux-kontext.C0_L0_Q0_DBCACHE_F1B0_W8M0MC2_T0O2_R0.12_S12.png width=100px>
92
- <img src=https://github.com/vipshop/cache-dit/raw/main/assets/flux-kontext.C0_L0_Q0_DBCACHE_F1B0_W2M0MC2_T0O2_R0.15_S15.png width=100px>
93
- <p><b>🔥FLUX-Kontext-dev</b> | Baseline | <a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:1.3x↑🎉 | 1.7x↑🎉 | 2.0x↑ 🎉</p>
94
98
  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/qwen-image-lightning.4steps.C0_L1_Q0_NONE.png width=160px>
95
99
  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/qwen-image-lightning.4steps.C0_L1_Q0_DBCACHE_F16B16_W2M1MC1_T0O2_R0.9_S1.png width=160px>
96
100
  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/hunyuan-image-2.1.C0_L0_Q1_fp8_w8a16_wo_NONE.png width=90px>
@@ -100,7 +104,22 @@ Dynamic: requires-python
100
104
  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/qwen-image-edit.C0_L0_Q0_NONE.png width=125px>
101
105
  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/qwen-image-edit.C0_L0_Q0_DBCACHE_F8B0_W8M0MC0_T0O2_R0.08_S18.png width=125px>
102
106
  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/qwen-image-edit.C0_L0_Q0_DBCACHE_F1B0_W8M0MC2_T0O2_R0.12_S24.png width=125px>
103
- <p><b>🔥Qwen-Image-Edit</b> | Input w/o Edit | Baseline | <a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:1.6x↑🎉 | 1.9x↑🎉 </p>
107
+ <p><b>🔥Qwen-Image-Edit</b> | Input w/o Edit | Baseline | <a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:1.6x↑🎉 | 1.9x↑🎉
108
+ <br>♥️ Please consider to leave a <b>⭐️ Star</b> to support us ~ ♥️
109
+ </p>
110
+ </div>
111
+
112
+ <details align='center'>
113
+
114
+ <summary>Click here to show more Image/Video cases</summary>
115
+
116
+ <div align='center'>
117
+ <img src=https://github.com/vipshop/cache-dit/raw/main/assets/flux-kontext-cat.C0_L0_Q0_NONE.png width=100px>
118
+ <img src=https://github.com/vipshop/cache-dit/raw/main/assets/flux-kontext.C0_L0_Q0_NONE.png width=100px>
119
+ <img src=https://github.com/vipshop/cache-dit/raw/main/assets/flux-kontext.C0_L0_Q0_DBCACHE_F8B0_W8M0MC0_T0O2_R0.08_S10.png width=100px>
120
+ <img src=https://github.com/vipshop/cache-dit/raw/main/assets/flux-kontext.C0_L0_Q0_DBCACHE_F1B0_W8M0MC2_T0O2_R0.12_S12.png width=100px>
121
+ <img src=https://github.com/vipshop/cache-dit/raw/main/assets/flux-kontext.C0_L0_Q0_DBCACHE_F1B0_W2M0MC2_T0O2_R0.15_S15.png width=100px>
122
+ <p><b>🔥FLUX-Kontext-dev</b> | Baseline | <a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:1.3x↑🎉 | 1.7x↑🎉 | 2.0x↑ 🎉</p>
104
123
  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/hidream.C0_L0_Q0_NONE.png width=100px>
105
124
  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/hidream.C0_L0_Q0_DBCACHE_F1B0_W8M0MC0_T0O2_R0.08_S24.png width=100px>
106
125
  <img src=https://github.com/vipshop/cache-dit/raw/main/assets/cogview4.C0_L0_Q0_NONE.png width=100px>
@@ -160,24 +179,26 @@ Dynamic: requires-python
160
179
  <p><b>🔥Asumed</b> | <a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:1.1x↑🎉 | 1.2x↑🎉 | <b>DiT-XL-256</b> | <a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:1.8x↑🎉
161
180
  <br>♥️ Please consider to leave a <b>⭐️ Star</b> to support us ~ ♥️</p>
162
181
  </div>
182
+ </details>
163
183
 
164
184
  ## 🔥News
165
185
 
166
- - [2025-09-10] 🎉Day 1 support [**HunyuanImage-2.1**](https://github.com/Tencent-Hunyuan/HunyuanImage-2.1) with **1.7x↑🎉** speedup! Check this [example](https://github.com/vipshop/cache-dit/raw/main/examples/pipeline/run_hunyuan_image_2.1.py).
167
- - [2025-09-08] 🔥[**Qwen-Image-Lightning**](https://github.com/vipshop/cache-dit/raw/main/examples/pipeline/run_qwen_image_lightning.py) **7.1/3.5 steps🎉** inference with **[DBCache: F16B16](https://github.com/vipshop/cache-dit)**.
168
- - [2025-09-03] 🎉[**Wan2.2-MoE**](https://github.com/Wan-Video) **2.4x↑🎉** speedup! Please refer to [run_wan_2.2.py](https://github.com/vipshop/cache-dit/raw/main/examples/pipeline/run_wan_2.2.py) as an example.
169
- - [2025-08-19] 🔥[**Qwen-Image-Edit**](https://github.com/QwenLM/Qwen-Image) **2x↑🎉** speedup! Check the example: [run_qwen_image_edit.py](https://github.com/vipshop/cache-dit/raw/main/examples/pipeline/run_qwen_image_edit.py).
170
- - [2025-08-11] 🔥[**Qwen-Image**](https://github.com/QwenLM/Qwen-Image) **1.8x↑🎉** speedup! Please refer to [run_qwen_image.py](https://github.com/vipshop/cache-dit/raw/main/examples/pipeline/run_qwen_image.py) as an example.
171
- - [2025-07-13] 🎉[**FLUX.1-dev**](https://github.com/xlite-dev/flux-faster) **3.3x↑🎉** speedup! NVIDIA L20 with **[cache-dit](https://github.com/vipshop/cache-dit)** + **compile + FP8 DQ**.
186
+ - [2025-09-24] 🔥**cache-dit** has now joined the 🤗 Diffusers community ecosystem as the **first** cache acceleration framework for DiTs! Check out the documentation here: **[Diffusers Docs](https://huggingface.co/docs/diffusers/main/en/optimization/cache_dit)**. <a href="https://huggingface.co/docs/diffusers/main/en/optimization/cache_dit"><img src=https://img.shields.io/badge/🤗Diffusers-ecosystem-yellow.svg ></a>
187
+ - [2025-09-10] 🎉Day 1 support [**HunyuanImage-2.1**](https://github.com/Tencent-Hunyuan/HunyuanImage-2.1) with **1.7x↑🎉** speedup! Check this [example](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline/run_hunyuan_image_2.1.py).
188
+ - [2025-09-08] 🔥[**Qwen-Image-Lightning**](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline/run_qwen_image_lightning.py) **7.1/3.5 steps🎉** inference with **[DBCache: F16B16](https://github.com/vipshop/cache-dit)**.
189
+ - [2025-09-03] 🎉[**Wan2.2-MoE**](https://github.com/Wan-Video) **2.4x↑🎉** speedup! Please refer to [run_wan_2.2.py](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline/run_wan_2.2.py) as an example.
190
+ - [2025-08-19] 🔥[**Qwen-Image-Edit**](https://github.com/QwenLM/Qwen-Image) **2x↑🎉** speedup! Check the example: [run_qwen_image_edit.py](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline/run_qwen_image_edit.py).
191
+ - [2025-08-11] 🔥[**Qwen-Image**](https://github.com/QwenLM/Qwen-Image) **1.8x↑🎉** speedup! Please refer to [run_qwen_image.py](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline/run_qwen_image.py) as an example.
172
192
 
173
193
  <details>
174
194
  <summary> Previous News </summary>
175
195
 
196
+ - [2025-07-13] 🎉[**FLUX.1-dev**](https://github.com/xlite-dev/flux-faster) **3.3x↑🎉** speedup! NVIDIA L20 with **[cache-dit](https://github.com/vipshop/cache-dit)** + **compile + FP8 DQ**.
176
197
  - [2025-09-08] 🎉First caching mechanism in [Qwen-Image-Lightning](https://github.com/ModelTC/Qwen-Image-Lightning) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check this [PR](https://github.com/ModelTC/Qwen-Image-Lightning/pull/35).
177
198
  - [2025-09-08] 🎉First caching mechanism in [Wan2.2](https://github.com/Wan-Video/Wan2.2) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check this [PR](https://github.com/Wan-Video/Wan2.2/pull/127) for more details.
178
199
  - [2025-08-12] 🎉First caching mechanism in [QwenLM/Qwen-Image](https://github.com/QwenLM/Qwen-Image) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check this [PR](https://github.com/QwenLM/Qwen-Image/pull/61).
179
- - [2025-09-01] 📚[**Hybird Forward Pattern**](#unified) is supported! Please check [FLUX.1-dev](https://github.com/vipshop/cache-dit/raw/main/examples/run_flux_adapter.py) as an example.
180
- - [2025-08-10] 🔥[**FLUX.1-Kontext-dev**](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev) is supported! Please refer [run_flux_kontext.py](https://github.com/vipshop/cache-dit/raw/main/examples/pipeline/run_flux_kontext.py) as an example.
200
+ - [2025-09-01] 📚[**Hybird Forward Pattern**](#unified) is supported! Please check [FLUX.1-dev](https://github.com/vipshop/cache-dit/blob/main/examples/run_flux_adapter.py) as an example.
201
+ - [2025-08-10] 🔥[**FLUX.1-Kontext-dev**](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev) is supported! Please refer [run_flux_kontext.py](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline/run_flux_kontext.py) as an example.
181
202
  - [2025-07-18] 🎉First caching mechanism in [🤗huggingface/flux-fast](https://github.com/huggingface/flux-fast) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check the [PR](https://github.com/huggingface/flux-fast/pull/13).
182
203
 
183
204
  </details>
@@ -188,19 +209,13 @@ Dynamic: requires-python
188
209
 
189
210
  - [⚙️Installation](#️installation)
190
211
  - [🔥Benchmarks](#benchmarks)
191
- - [🔥Supported Pipelines](#supported)
192
- - [🎉Unified Cache APIs](#unified)
193
- - [📚Forward Pattern Matching](#forward-pattern-matching)
194
- - [♥️Cache with One-line Code](#%EF%B8%8Fcache-acceleration-with-one-line-code)
195
- - [🔥Automatic Block Adapter](#automatic-block-adapter)
196
- - [📚Hybird Forward Pattern](#automatic-block-adapter)
197
- - [📚Implement Patch Functor](#implement-patch-functor)
198
- - [🤖Cache Acceleration Stats](#cache-acceleration-stats-summary)
212
+ - [🔥Quick Start](#quick-start)
213
+ - [📚Pattern Matching](#forward-pattern-matching)
199
214
  - [⚡️Dual Block Cache](#dbcache)
200
215
  - [🔥TaylorSeer Calibrator](#taylorseer)
201
- - [⚡️Hybrid Cache CFG](#cfg)
202
- - [⚙️Torch Compile](#compile)
203
- - [🛠Metrics CLI](#metrics)
216
+ - [📚Hybrid Cache CFG](#cfg)
217
+ - [🎉User Guide](#user-guide)
218
+ - [©️Citations](#citations)
204
219
 
205
220
  ## ⚙️Installation
206
221
 
@@ -217,82 +232,13 @@ Or you can install the latest develop version from GitHub:
217
232
  pip3 install git+https://github.com/vipshop/cache-dit.git
218
233
  ```
219
234
 
220
- ## 🔥Supported Pipelines
221
-
222
- <div id="supported"></div>
223
-
224
- Currently, **cache-dit** library supports almost **Any** Diffusion Transformers (with **Transformer Blocks** that match the specific Input and Output **patterns**). Please check [🎉Examples](https://github.com/vipshop/cache-dit/raw/main/examples/pipeline) for more details. Here are just some of the tested models listed.
225
-
226
- ```python
227
- >>> import cache_dit
228
- >>> cache_dit.supported_pipelines()
229
- (30, ['Flux*', 'Mochi*', 'CogVideoX*', 'Wan*', 'HunyuanVideo*', 'QwenImage*', 'LTX*', 'Allegro*',
230
- 'CogView3Plus*', 'CogView4*', 'Cosmos*', 'EasyAnimate*', 'SkyReelsV2*', 'StableDiffusion3*',
231
- 'ConsisID*', 'DiT*', 'Amused*', 'Bria*', 'Lumina*', 'OmniGen*', 'PixArt*', 'Sana*', 'StableAudio*',
232
- 'VisualCloze*', 'AuraFlow*', 'Chroma*', 'ShapE*', 'HiDream*', 'HunyuanDiT*', 'HunyuanDiTPAG*'])
233
- ```
234
-
235
- <details>
236
- <summary> Show all pipelines </summary>
237
-
238
- - [🚀HunyuanImage-2.1](https://github.com/vipshop/cache-dit/raw/main/examples)
239
- - [🚀Qwen-Image-Lightning](https://github.com/vipshop/cache-dit/raw/main/examples)
240
- - [🚀Qwen-Image-Edit](https://github.com/vipshop/cache-dit/raw/main/examples)
241
- - [🚀Qwen-Image](https://github.com/vipshop/cache-dit/raw/main/examples)
242
- - [🚀FLUX.1-dev](https://github.com/vipshop/cache-dit/raw/main/examples)
243
- - [🚀FLUX.1-Fill-dev](https://github.com/vipshop/cache-dit/raw/main/examples)
244
- - [🚀FLUX.1-Kontext-dev](https://github.com/vipshop/cache-dit/raw/main/examples)
245
- - [🚀CogView4](https://github.com/vipshop/cache-dit/raw/main/examples)
246
- - [🚀Wan2.2-T2V](https://github.com/vipshop/cache-dit/raw/main/examples)
247
- - [🚀HunyuanVideo](https://github.com/vipshop/cache-dit/raw/main/examples)
248
- - [🚀HiDream-I1-Full](https://github.com/vipshop/cache-dit/raw/main/examples)
249
- - [🚀HunyuanDiT](https://github.com/vipshop/cache-dit/raw/main/examples)
250
- - [🚀Wan2.1-T2V](https://github.com/vipshop/cache-dit/raw/main/examples)
251
- - [🚀Wan2.1-FLF2V](https://github.com/vipshop/cache-dit/raw/main/examples)
252
- - [🚀SkyReelsV2](https://github.com/vipshop/cache-dit/raw/main/examples)
253
- - [🚀Chroma1-HD](https://github.com/vipshop/cache-dit/raw/main/examples)
254
- - [🚀CogVideoX1.5](https://github.com/vipshop/cache-dit/raw/main/examples)
255
- - [🚀CogView3-Plus](https://github.com/vipshop/cache-dit/raw/main/examples)
256
- - [🚀CogVideoX](https://github.com/vipshop/cache-dit/raw/main/examples)
257
- - [🚀VisualCloze](https://github.com/vipshop/cache-dit/raw/main/examples)
258
- - [🚀LTXVideo](https://github.com/vipshop/cache-dit/raw/main/examples)
259
- - [🚀OmniGen](https://github.com/vipshop/cache-dit/raw/main/examples)
260
- - [🚀Lumina2](https://github.com/vipshop/cache-dit/raw/main/examples)
261
- - [🚀mochi-1-preview](https://github.com/vipshop/cache-dit/raw/main/examples)
262
- - [🚀AuraFlow-v0.3](https://github.com/vipshop/cache-dit/raw/main/examples)
263
- - [🚀PixArt-Alpha](https://github.com/vipshop/cache-dit/raw/main/examples)
264
- - [🚀PixArt-Sigma](https://github.com/vipshop/cache-dit/raw/main/examples)
265
- - [🚀NVIDIA Sana](https://github.com/vipshop/cache-dit/raw/main/examples)
266
- - [🚀SD-3/3.5](https://github.com/vipshop/cache-dit/raw/main/examples)
267
- - [🚀ConsisID](https://github.com/vipshop/cache-dit/raw/main/examples)
268
- - [🚀Allegro](https://github.com/vipshop/cache-dit/raw/main/examples)
269
- - [🚀Amused](https://github.com/vipshop/cache-dit/raw/main/examples)
270
- - [🚀DiT-XL](https://github.com/vipshop/cache-dit/raw/main/examples)
271
- - ...
272
-
273
- </details>
274
-
275
235
  ## 🔥Benchmarks
276
236
 
277
237
  <div id="benchmarks"></div>
278
238
 
279
- cache-dit will support more mainstream Cache acceleration algorithms in the future. More benchmarks will be released, please stay tuned for update. Here, only the results of some precision and performance benchmarks are presented. The test dataset is **DrawBench**. For a complete benchmark, please refer to [📚Benchmarks](https://github.com/vipshop/cache-dit/raw/main/bench/).
239
+ ![image-reward-bench](https://github.com/vipshop/cache-dit/raw/main/assets/image-reward-bench.png)
280
240
 
281
- ### 📚Text2Image DrawBench: FLUX.1-dev
282
-
283
- Comparisons between different FnBn compute block configurations show that **more compute blocks result in higher precision**. For example, the F8B0_W8MC0 configuration achieves the best Clip Score (33.007) and ImageReward (1.0333). **Device**: NVIDIA L20. **F**: Fn_compute_blocks, **B**: Bn_compute_blocks, 50 steps.
284
-
285
-
286
- | Config | Clip Score(↑) | ImageReward(↑) | PSNR(↑) | TFLOPs(↓) | SpeedUp(↑) |
287
- | --- | --- | --- | --- | --- | --- |
288
- | [**FLUX.1**-dev]: 50 steps | 32.9217 | 1.0412 | INF | 3726.87 | 1.00x |
289
- | F8B0_W4MC0_R0.08 | 32.9871 | 1.0370 | 33.8317 | 2064.81 | 1.80x |
290
- | F8B0_W4MC2_R0.12 | 32.9535 | 1.0185 | 32.7346 | 1935.73 | 1.93x |
291
- | F8B0_W4MC3_R0.12 | 32.9234 | 1.0085 | 32.5385 | 1816.58 | 2.05x |
292
- | F4B0_W4MC3_R0.12 | 32.8981 | 1.0130 | 31.8031 | 1507.83 | 2.47x |
293
- | F4B0_W4MC4_R0.12 | 32.8384 | 1.0065 | 31.5292 | 1400.08 | 2.66x |
294
-
295
- The comparison between **cache-dit: DBCache** and algorithms such as Δ-DiT, Chipmunk, FORA, DuCa, TaylorSeer and FoCa is as follows. Now, in the comparison with a speedup ratio less than **3x**, cache-dit achieved the best accuracy. Please check [📚How to Reproduce?](https://github.com/vipshop/cache-dit/raw/main/bench/) for more details.
241
+ The comparison between **cache-dit: DBCache** and algorithms such as Δ-DiT, Chipmunk, FORA, DuCa, TaylorSeer and FoCa is as follows. Now, in the comparison with a speedup ratio less than **3x**, cache-dit achieved the best accuracy. Surprisingly, cache-dit: DBCache still works in the extremely few-step distill model. For a complete benchmark, please refer to [📚Benchmarks](https://github.com/vipshop/cache-dit/raw/main/bench/).
296
242
 
297
243
  | Method | TFLOPs(↓) | SpeedUp(↑) | ImageReward(↑) | Clip Score(↑) |
298
244
  | --- | --- | --- | --- | --- |
@@ -314,6 +260,8 @@ The comparison between **cache-dit: DBCache** and algorithms such as Δ-DiT, Chi
314
260
  <details>
315
261
  <summary> Show all comparison </summary>
316
262
 
263
+ ![clip-score-bench](https://github.com/vipshop/cache-dit/raw/main/assets/clip-score-bench.png)
264
+
317
265
  | Method | TFLOPs(↓) | SpeedUp(↑) | ImageReward(↑) | Clip Score(↑) |
318
266
  | --- | --- | --- | --- | --- |
319
267
  | [**FLUX.1**-dev]: 50 steps | 3726.87 | 1.00× | 0.9898 | 32.404 |
@@ -350,192 +298,84 @@ NOTE: Except for DBCache, other performance data are referenced from the paper [
350
298
 
351
299
  </details>
352
300
 
353
- ### 📚Text2Image Distillation DrawBench: Qwen-Image-Lightning
354
-
355
- Surprisingly, cache-dit: DBCache still works in the extremely few-step distill model. For example, **Qwen-Image-Lightning w/ 4 steps**, with the F16B16 configuration, the PSNR is 34.8163, the Clip Score is 35.6109, and the ImageReward is 1.2614. It maintained a relatively high precision.
356
-
357
- | Config | PSNR(↑) | Clip Score(↑) | ImageReward(↑) | TFLOPs(↓) | SpeedUp(↑) |
358
- |----------------------------|-----------|------------|--------------|----------|------------|
359
- | [**Lightning**]: 4 steps | INF | 35.5797 | 1.2630 | 274.33 | 1.00x |
360
- | F24B24_W2MC1_R0.8 | 36.3242 | 35.6224 | 1.2630 | 264.74 | 1.04x |
361
- | F16B16_W2MC1_R0.8 | 34.8163 | 35.6109 | 1.2614 | 244.25 | 1.12x |
362
- | F12B12_W2MC1_R0.8 | 33.8953 | 35.6535 | 1.2549 | 234.63 | 1.17x |
363
- | F8B8_W2MC1_R0.8 | 33.1374 | 35.7284 | 1.2517 | 224.29 | 1.22x |
364
- | F1B0_W2MC1_R0.8 | 31.8317 | 35.6651 | 1.2397 | 206.90 | 1.33x |
365
-
366
- ## 🎉Unified Cache APIs
301
+ ## 🔥Quick Start
367
302
 
368
303
  <div id="unified"></div>
369
304
 
370
- ### 📚Forward Pattern Matching
371
-
372
- Currently, for any **Diffusion** models with **Transformer Blocks** that match the specific **Input/Output patterns**, we can use the **Unified Cache APIs** from **cache-dit**, namely, the `cache_dit.enable_cache(...)` API. The **Unified Cache APIs** are currently in the experimental phase; please stay tuned for updates. The supported patterns are listed as follows:
373
-
374
- ![](https://github.com/vipshop/cache-dit/raw/main/assets/patterns-v1.png)
375
-
376
- ### ♥️Cache Acceleration with One-line Code
377
-
378
- In most cases, you only need to call **one-line** of code, that is `cache_dit.enable_cache(...)`. After this API is called, you just need to call the pipe as normal. The `pipe` param can be **any** Diffusion Pipeline. Please refer to [Qwen-Image](https://github.com/vipshop/cache-dit/raw/main/examples/pipeline/run_qwen_image.py) as an example.
379
-
380
- ```python
381
- import cache_dit
382
- from diffusers import DiffusionPipeline
383
-
384
- # Can be any diffusion pipeline
385
- pipe = DiffusionPipeline.from_pretrained("Qwen/Qwen-Image")
386
-
387
- # One-line code with default cache options.
388
- cache_dit.enable_cache(pipe)
389
-
390
- # Just call the pipe as normal.
391
- output = pipe(...)
392
-
393
- # Disable cache and run original pipe.
394
- cache_dit.disable_cache(pipe)
395
- ```
396
-
397
- ### 🔥Automatic Block Adapter
398
-
399
- But in some cases, you may have a **modified** Diffusion Pipeline or Transformer that is not located in the diffusers library or not officially supported by **cache-dit** at this time. The **BlockAdapter** can help you solve this problems. Please refer to [🔥Qwen-Image w/ BlockAdapter](https://github.com/vipshop/cache-dit/raw/main/examples/adapter/run_qwen_image_adapter.py) as an example.
400
-
401
- ```python
402
- from cache_dit import ForwardPattern, BlockAdapter
403
-
404
- # Use 🔥BlockAdapter with `auto` mode.
405
- cache_dit.enable_cache(
406
- BlockAdapter(
407
- # Any DiffusionPipeline, Qwen-Image, etc.
408
- pipe=pipe, auto=True,
409
- # Check `📚Forward Pattern Matching` documentation and hack the code of
410
- # of Qwen-Image, you will find that it has satisfied `FORWARD_PATTERN_1`.
411
- forward_pattern=ForwardPattern.Pattern_1,
412
- ),
413
- )
414
-
415
- # Or, manually setup transformer configurations.
416
- cache_dit.enable_cache(
417
- BlockAdapter(
418
- pipe=pipe, # Qwen-Image, etc.
419
- transformer=pipe.transformer,
420
- blocks=pipe.transformer.transformer_blocks,
421
- forward_pattern=ForwardPattern.Pattern_1,
422
- ),
423
- )
424
- ```
425
- For such situations, **BlockAdapter** can help you quickly apply various cache acceleration features to your own Diffusion Pipelines and Transformers. Please check the [📚BlockAdapter.md](https://github.com/vipshop/cache-dit/raw/main/docs/BlockAdapter.md) for more details.
305
+ <div id="quick-start"></div>
426
306
 
427
- ### 📚Hybird Forward Pattern
428
-
429
- Sometimes, a Transformer class will contain more than one transformer `blocks`. For example, **FLUX.1** (HiDream, Chroma, etc) contains transformer_blocks and single_transformer_blocks (with different forward patterns). The **BlockAdapter** can also help you solve this problem. Please refer to [📚FLUX.1](https://github.com/vipshop/cache-dit/raw/main/examples/adapter/run_flux_adapter.py) as an example.
307
+ In most cases, you only need to call ♥️**one-line**♥️ of code, that is `cache_dit.enable_cache(...)`. After this API is called, you just need to call the pipe as normal. The `pipe` param can be **any** Diffusion Pipeline. Please refer to [Qwen-Image](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline/run_qwen_image.py) as an example.
430
308
 
431
309
  ```python
432
- # For diffusers <= 0.34.0, FLUX.1 transformer_blocks and
433
- # single_transformer_blocks have different forward patterns.
434
- cache_dit.enable_cache(
435
- BlockAdapter(
436
- pipe=pipe, # FLUX.1, etc.
437
- transformer=pipe.transformer,
438
- blocks=[
439
- pipe.transformer.transformer_blocks,
440
- pipe.transformer.single_transformer_blocks,
441
- ],
442
- forward_pattern=[
443
- ForwardPattern.Pattern_1,
444
- ForwardPattern.Pattern_3,
445
- ],
446
- ),
447
- )
310
+ >>> import cache_dit
311
+ >>> from diffusers import DiffusionPipeline
312
+ >>> pipe = DiffusionPipeline.from_pretrained("Qwen/Qwen-Image") # Can be any diffusion pipeline
313
+ >>> cache_dit.enable_cache(pipe) # One-line code with default cache options.
314
+ >>> output = pipe(...) # Just call the pipe as normal.
315
+ >>> stats = cache_dit.summary(pipe) # Then, get the summary of cache acceleration stats.
316
+ >>> cache_dit.disable_cache(pipe) # Disable cache and run original pipe.
448
317
  ```
449
318
 
450
- Even sometimes you have more complex cases, such as **Wan 2.2 MoE**, which has more than one Transformer (namely `transformer` and `transformer_2`) in its structure. Fortunately, **cache-dit** can also handle this situation very well. Please refer to [📚Wan 2.2 MoE](https://github.com/vipshop/cache-dit/raw/main/examples/pipeline/run_wan_2.2.py) as an example.
451
-
452
- ```python
453
- from cache_dit import ForwardPattern, BlockAdapter, ParamsModifier, BasicCacheConfig
319
+ ## 📚Forward Pattern Matching
454
320
 
455
- cache_dit.enable_cache(
456
- BlockAdapter(
457
- pipe=pipe,
458
- transformer=[
459
- pipe.transformer,
460
- pipe.transformer_2,
461
- ],
462
- blocks=[
463
- pipe.transformer.blocks,
464
- pipe.transformer_2.blocks,
465
- ],
466
- forward_pattern=[
467
- ForwardPattern.Pattern_2,
468
- ForwardPattern.Pattern_2,
469
- ],
470
- # Setup different cache params for each 'blocks'. You can
471
- # pass any specific cache params to ParamModifier, the old
472
- # value will be overwrite by the new one.
473
- params_modifiers=[
474
- ParamsModifier(
475
- cache_config=BasicCacheConfig(
476
- max_warmup_steps=4,
477
- max_cached_steps=8,
478
- ),
479
- ),
480
- ParamsModifier(
481
- cache_config=BasicCacheConfig(
482
- max_warmup_steps=2,
483
- max_cached_steps=20,
484
- ),
485
- ),
486
- ],
487
- has_separate_cfg=True,
488
- ),
489
- )
490
- ```
491
- ### 📚Implement Patch Functor
492
-
493
- For any PATTERN not in {0...5}, we introduced the simple abstract concept of **Patch Functor**. Users can implement a subclass of Patch Functor to convert an unknown Pattern into a known PATTERN, and for some models, users may also need to fuse the operations within the blocks for loop into block forward.
321
+ <div id="supported"></div>
494
322
 
495
- ![](https://github.com/vipshop/cache-dit/raw/main/assets/patch-functor.png)
323
+ <div id="forward-pattern-matching"></div>
496
324
 
497
- Some Patch functors have already been provided in cache-dit: [📚HiDreamPatchFunctor](https://github.com/vipshop/cache-dit/raw/main/src/cache_dit/cache_factory/patch_functors/functor_hidream.py), [📚ChromaPatchFunctor](https://github.com/vipshop/cache-dit/raw/main/src/cache_dit/cache_factory/patch_functors/functor_chroma.py), etc. After implementing Patch Functor, users need to set the `patch_functor` property of **BlockAdapter**.
325
+ cache-dit works by matching specific input/output patterns as shown below.
498
326
 
499
- ```python
500
- @BlockAdapterRegistry.register("HiDream")
501
- def hidream_adapter(pipe, **kwargs) -> BlockAdapter:
502
- from diffusers import HiDreamImageTransformer2DModel
503
- from cache_dit.cache_factory.patch_functors import HiDreamPatchFunctor
504
-
505
- assert isinstance(pipe.transformer, HiDreamImageTransformer2DModel)
506
- return BlockAdapter(
507
- pipe=pipe,
508
- transformer=pipe.transformer,
509
- blocks=[
510
- pipe.transformer.double_stream_blocks,
511
- pipe.transformer.single_stream_blocks,
512
- ],
513
- forward_pattern=[
514
- ForwardPattern.Pattern_0,
515
- ForwardPattern.Pattern_3,
516
- ],
517
- # NOTE: Setup your custom patch functor here.
518
- patch_functor=HiDreamPatchFunctor(),
519
- **kwargs,
520
- )
521
- ```
327
+ ![](https://github.com/vipshop/cache-dit/raw/main/assets/patterns-v1.png)
522
328
 
523
- ### 🤖Cache Acceleration Stats Summary
329
+ Please check [🎉Examples](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline) for more details. Here are just some of the tested models listed.
524
330
 
525
- After finishing each inference of `pipe(...)`, you can call the `cache_dit.summary()` API on pipe to get the details of the **Cache Acceleration Stats** for the current inference.
526
331
  ```python
527
- stats = cache_dit.summary(pipe)
332
+ >>> import cache_dit
333
+ >>> cache_dit.supported_pipelines()
334
+ (30, ['Flux*', 'Mochi*', 'CogVideoX*', 'Wan*', 'HunyuanVideo*', 'QwenImage*', 'LTX*', 'Allegro*',
335
+ 'CogView3Plus*', 'CogView4*', 'Cosmos*', 'EasyAnimate*', 'SkyReelsV2*', 'StableDiffusion3*',
336
+ 'ConsisID*', 'DiT*', 'Amused*', 'Bria*', 'Lumina*', 'OmniGen*', 'PixArt*', 'Sana*', 'StableAudio*',
337
+ 'VisualCloze*', 'AuraFlow*', 'Chroma*', 'ShapE*', 'HiDream*', 'HunyuanDiT*', 'HunyuanDiTPAG*'])
528
338
  ```
529
339
 
530
- You can set `details` param as `True` to show more details of cache stats. (markdown table format) Sometimes, this may help you analyze what values of the residual diff threshold would be better.
340
+ <details>
341
+ <summary> Show all pipelines </summary>
531
342
 
532
- ```python
533
- ⚡️Cache Steps and Residual Diffs Statistics: QwenImagePipeline
343
+ - [🚀HunyuanImage-2.1](https://github.com/vipshop/cache-dit/blob/main/examples)
344
+ - [🚀Qwen-Image-Lightning](https://github.com/vipshop/cache-dit/blob/main/examples)
345
+ - [🚀Qwen-Image-Edit](https://github.com/vipshop/cache-dit/blob/main/examples)
346
+ - [🚀Qwen-Image](https://github.com/vipshop/cache-dit/blob/main/examples)
347
+ - [🚀FLUX.1-dev](https://github.com/vipshop/cache-dit/blob/main/examples)
348
+ - [🚀FLUX.1-Fill-dev](https://github.com/vipshop/cache-dit/blob/main/examples)
349
+ - [🚀FLUX.1-Kontext-dev](https://github.com/vipshop/cache-dit/blob/main/examples)
350
+ - [🚀CogView4](https://github.com/vipshop/cache-dit/blob/main/examples)
351
+ - [🚀Wan2.2-T2V](https://github.com/vipshop/cache-dit/blob/main/examples)
352
+ - [🚀HunyuanVideo](https://github.com/vipshop/cache-dit/blob/main/examples)
353
+ - [🚀HiDream-I1-Full](https://github.com/vipshop/cache-dit/blob/main/examples)
354
+ - [🚀HunyuanDiT](https://github.com/vipshop/cache-dit/blob/main/examples)
355
+ - [🚀Wan2.1-T2V](https://github.com/vipshop/cache-dit/blob/main/examples)
356
+ - [🚀Wan2.1-FLF2V](https://github.com/vipshop/cache-dit/blob/main/examples)
357
+ - [🚀SkyReelsV2](https://github.com/vipshop/cache-dit/blob/main/examples)
358
+ - [🚀Chroma1-HD](https://github.com/vipshop/cache-dit/blob/main/examples)
359
+ - [🚀CogVideoX1.5](https://github.com/vipshop/cache-dit/blob/main/examples)
360
+ - [🚀CogView3-Plus](https://github.com/vipshop/cache-dit/blob/main/examples)
361
+ - [🚀CogVideoX](https://github.com/vipshop/cache-dit/blob/main/examples)
362
+ - [🚀VisualCloze](https://github.com/vipshop/cache-dit/blob/main/examples)
363
+ - [🚀LTXVideo](https://github.com/vipshop/cache-dit/blob/main/examples)
364
+ - [🚀OmniGen](https://github.com/vipshop/cache-dit/blob/main/examples)
365
+ - [🚀Lumina2](https://github.com/vipshop/cache-dit/blob/main/examples)
366
+ - [🚀mochi-1-preview](https://github.com/vipshop/cache-dit/blob/main/examples)
367
+ - [🚀AuraFlow-v0.3](https://github.com/vipshop/cache-dit/blob/main/examples)
368
+ - [🚀PixArt-Alpha](https://github.com/vipshop/cache-dit/blob/main/examples)
369
+ - [🚀PixArt-Sigma](https://github.com/vipshop/cache-dit/blob/main/examples)
370
+ - [🚀NVIDIA Sana](https://github.com/vipshop/cache-dit/blob/main/examples)
371
+ - [🚀SD-3/3.5](https://github.com/vipshop/cache-dit/blob/main/examples)
372
+ - [🚀ConsisID](https://github.com/vipshop/cache-dit/blob/main/examples)
373
+ - [🚀Allegro](https://github.com/vipshop/cache-dit/blob/main/examples)
374
+ - [🚀Amused](https://github.com/vipshop/cache-dit/blob/main/examples)
375
+ - [🚀DiT-XL](https://github.com/vipshop/cache-dit/blob/main/examples)
376
+ - ...
534
377
 
535
- | Cache Steps | Diffs Min | Diffs P25 | Diffs P50 | Diffs P75 | Diffs P95 | Diffs Max |
536
- |-------------|-----------|-----------|-----------|-----------|-----------|-----------|
537
- | 23 | 0.045 | 0.084 | 0.114 | 0.147 | 0.241 | 0.297 |
538
- ```
378
+ </details>
539
379
 
540
380
  ## ⚡️DBCache: Dual Block Cache
541
381
 
@@ -543,20 +383,9 @@ You can set `details` param as `True` to show more details of cache stats. (mark
543
383
 
544
384
  ![](https://github.com/vipshop/cache-dit/raw/main/assets/dbcache-v1.png)
545
385
 
546
- **DBCache**: **Dual Block Caching** for Diffusion Transformers. Different configurations of compute blocks (**F8B12**, etc.) can be customized in DBCache, enabling a balanced trade-off between performance and precision. Moreover, it can be entirely **training**-**free**. Please check [DBCache.md](https://github.com/vipshop/cache-dit/raw/main/docs/DBCache.md) docs for more design details.
547
-
548
- - **Fn**: Specifies that DBCache uses the **first n** Transformer blocks to fit the information at time step t, enabling the calculation of a more stable L1 diff and delivering more accurate information to subsequent blocks.
549
- - **Bn**: Further fuses approximate information in the **last n** Transformer blocks to enhance prediction accuracy. These blocks act as an auto-scaler for approximate hidden states that use residual cache.
386
+ **DBCache**: **Dual Block Caching** for Diffusion Transformers. Different configurations of compute blocks (**F8B12**, etc.) can be customized in DBCache, enabling a balanced trade-off between performance and precision. Moreover, it can be entirely **training**-**free**. Please Check the [DBCache](https://github.com/vipshop/cache-dit/blob/main/docs/DBCache.md) and [User Guide](https://github.com/vipshop/cache-dit/blob/main/docs/User_Guide.md#dbcache) docs for details.
550
387
 
551
388
  ```python
552
- import cache_dit
553
- from diffusers import FluxPipeline
554
-
555
- pipe_or_adapter = FluxPipeline.from_pretrained(
556
- "black-forest-labs/FLUX.1-dev",
557
- torch_dtype=torch.bfloat16,
558
- ).to("cuda")
559
-
560
389
  # Default options, F8B0, 8 warmup steps, and unlimited cached
561
390
  # steps for good balance between performance and precision
562
391
  cache_dit.enable_cache(pipe_or_adapter)
@@ -576,28 +405,13 @@ cache_dit.enable_cache(
576
405
  )
577
406
  ```
578
407
 
579
- <div align="center">
580
- <p align="center">
581
- DBCache, <b> L20x1 </b>, Steps: 28, "A cat holding a sign that says hello world with complex background"
582
- </p>
583
- </div>
584
-
585
- |Baseline(L20x1)|F1B0 (0.08)|F1B0 (0.20)|F8B8 (0.15)|F12B12 (0.20)|F16B16 (0.20)|
586
- |:---:|:---:|:---:|:---:|:---:|:---:|
587
- |24.85s|15.59s|8.58s|15.41s|15.11s|17.74s|
588
- |<img src=https://github.com/vipshop/cache-dit/raw/main/assets/NONE_R0.08_S0.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F1B0S1_R0.08_S11.png width=105px> | <img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F1B0S1_R0.2_S19.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F8B8S1_R0.15_S15.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F12B12S4_R0.2_S16.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F16B16S4_R0.2_S13.png width=105px>|
589
-
590
408
  ## 🔥TaylorSeer Calibrator
591
409
 
592
410
  <div id="taylorseer"></div>
593
411
 
594
- We have supported the [TaylorSeers: From Reusing to Forecasting: Accelerating Diffusion Models with TaylorSeers](https://arxiv.org/pdf/2503.06923) algorithm to further improve the precision of DBCache in cases where the cached steps are large, namely, **Hybrid TaylorSeer + DBCache**. At timesteps with significant intervals, the feature similarity in diffusion models decreases substantially, significantly harming the generation quality.
595
-
596
- $$
597
- \mathcal{F}\_{\text {pred }, m}\left(x_{t-k}^l\right)=\mathcal{F}\left(x_t^l\right)+\sum_{i=1}^m \frac{\Delta^i \mathcal{F}\left(x_t^l\right)}{i!\cdot N^i}(-k)^i
598
- $$
412
+ The [TaylorSeers](https://huggingface.co/papers/2503.06923) algorithm further improves the precision of DBCache in cases where the cached steps are large (Hybrid TaylorSeer + DBCache). At timesteps with significant intervals, the feature similarity in diffusion models decreases substantially, significantly harming the generation quality.
599
413
 
600
- **TaylorSeer** employs a differential method to approximate the higher-order derivatives of features and predict features in future timesteps with Taylor series expansion. The TaylorSeer implemented in cache-dit supports both hidden states and residual cache types. That is $\mathcal{F}\_{\text {pred }, m}\left(x_{t-k}^l\right)$ can be a residual cache or a hidden-state cache.
414
+ TaylorSeer employs a differential method to approximate the higher-order derivatives of features and predict features in future timesteps with Taylor series expansion. The TaylorSeer implemented in CacheDiT supports both hidden states and residual cache types. F_pred can be a residual cache or a hidden-state cache.
601
415
 
602
416
  ```python
603
417
  from cache_dit import BasicCacheConfig, TaylorSeerCalibratorConfig
@@ -620,25 +434,14 @@ cache_dit.enable_cache(
620
434
  )
621
435
  ```
622
436
 
623
- > [!Important]
624
- > Please note that if you have used TaylorSeer as the calibrator for approximate hidden states, the **Bn** param of DBCache can be set to **0**. In essence, DBCache's Bn is also act as a calibrator, so you can choose either Bn > 0 or TaylorSeer. We recommend using the configuration scheme of **TaylorSeer** + **DBCache FnB0**.
625
-
626
- <div align="center">
627
- <p align="center">
628
- <b>DBCache F1B0 + TaylorSeer</b>, L20x1, Steps: 28, <br>"A cat holding a sign that says hello world with complex background"
629
- </p>
630
- </div>
631
-
632
- |Baseline(L20x1)|F1B0 (0.12)|+TaylorSeer|F1B0 (0.15)|+TaylorSeer|+compile|
633
- |:---:|:---:|:---:|:---:|:---:|:---:|
634
- |24.85s|12.85s|12.86s|10.27s|10.28s|8.48s|
635
- |<img src=https://github.com/vipshop/cache-dit/raw/main/assets/NONE_R0.08_S0.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/U0_C0_DBCACHE_F1B0S1W0T0ET0_R0.12_S14_T12.85s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/U0_C0_DBCACHE_F1B0S1W0T1ET1_R0.12_S14_T12.86s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/U0_C0_DBCACHE_F1B0S1W0T0ET0_R0.15_S17_T10.27s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/U0_C0_DBCACHE_F1B0S1W0T1ET1_R0.15_S17_T10.28s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/U0_C1_DBCACHE_F1B0S1W0T1ET1_R0.15_S17_T8.48s.png width=105px>|
437
+ > [!TIP]
438
+ > The `Bn_compute_blocks` parameter of DBCache can be set to `0` if you use TaylorSeer as the calibrator for approximate hidden states. DBCache's `Bn_compute_blocks` also acts as a calibrator, so you can choose either `Bn_compute_blocks` > 0 or TaylorSeer. We recommend using the configuration scheme of TaylorSeer + DBCache FnB0.
636
439
 
637
- ## ⚡️Hybrid Cache CFG
440
+ ## 📚Hybrid Cache CFG
638
441
 
639
442
  <div id="cfg"></div>
640
443
 
641
- cache-dit supports caching for **CFG (classifier-free guidance)**. For models that fuse CFG and non-CFG into a single forward step, or models that do not include CFG (classifier-free guidance) in the forward step, please set `enable_separate_cfg` param to **False (default, None)**. Otherwise, set it to True. For examples:
444
+ cache-dit supports caching for CFG (classifier-free guidance). For models that fuse CFG and non-CFG into a single forward step, or models that do not include CFG (classifier-free guidance) in the forward step, please set `enable_separate_cfg` parameter to `False (default, None)`. Otherwise, set it to `True`.
642
445
 
643
446
  ```python
644
447
  from cache_dit import BasicCacheConfig
@@ -647,75 +450,35 @@ cache_dit.enable_cache(
647
450
  pipe_or_adapter,
648
451
  cache_config=BasicCacheConfig(
649
452
  ...,
650
- # CFG: classifier free guidance or not
651
- # For model that fused CFG and non-CFG into single forward step,
652
- # should set enable_separate_cfg as False. For example, set it as True
653
- # for Wan 2.1/Qwen-Image and set it as False for FLUX.1, HunyuanVideo,
654
- # CogVideoX, Mochi, LTXVideo, Allegro, CogView3Plus, EasyAnimate, SD3, etc.
655
- enable_separate_cfg=True, # Wan 2.1, Qwen-Image, CogView4, Cosmos, SkyReelsV2, etc.
656
- # Compute cfg forward first or not, default False, namely,
657
- # 0, 2, 4, ..., -> non-CFG step; 1, 3, 5, ... -> CFG step.
658
- cfg_compute_first=False,
659
- # Compute separate diff values for CFG and non-CFG step,
660
- # default True. If False, we will use the computed diff from
661
- # current non-CFG transformer step for current CFG step.
662
- cfg_diff_compute_separate=True,
453
+ # For example, set it as True for Wan 2.1/Qwen-Image
454
+ # and set it as False for FLUX.1, HunyuanVideo, CogVideoX, etc.
455
+ enable_separate_cfg=True,
663
456
  ),
664
457
  )
665
458
  ```
666
459
 
667
- ## ⚙️Torch Compile
668
-
669
- <div id="compile"></div>
670
-
671
- By the way, **cache-dit** is designed to work compatibly with **torch.compile.** You can easily use cache-dit with torch.compile to further achieve a better performance. For example:
672
-
673
- ```python
674
- cache_dit.enable_cache(pipe)
675
-
676
- # Compile the Transformer module
677
- pipe.transformer = torch.compile(pipe.transformer)
678
- ```
679
- However, users intending to use **cache-dit** for DiT with **dynamic input shapes** should consider increasing the **recompile** **limit** of `torch._dynamo`. Otherwise, the recompile_limit error may be triggered, causing the module to fall back to eager mode.
680
- ```python
681
- torch._dynamo.config.recompile_limit = 96 # default is 8
682
- torch._dynamo.config.accumulated_recompile_limit = 2048 # default is 256
683
- ```
684
-
685
- Please check [perf.py](https://github.com/vipshop/cache-dit/raw/main/bench/perf.py) for more details.
686
-
687
-
688
- ## 🛠Metrics CLI
689
-
690
- <div id="metrics"></div>
691
-
692
- You can utilize the APIs provided by cache-dit to quickly evaluate the accuracy losses caused by different cache configurations. For example:
693
-
694
- ```python
695
- from cache_dit.metrics import compute_psnr
696
- from cache_dit.metrics import compute_ssim
697
- from cache_dit.metrics import compute_fid
698
- from cache_dit.metrics import compute_lpips
699
- from cache_dit.metrics import compute_clip_score
700
- from cache_dit.metrics import compute_image_reward
701
-
702
- psnr, n = compute_psnr("true.png", "test.png") # Num: n
703
- psnr, n = compute_psnr("true_dir", "test_dir")
704
- ssim, n = compute_ssim("true_dir", "test_dir")
705
- fid, n = compute_fid("true_dir", "test_dir")
706
- lpips, n = compute_lpips("true_dir", "test_dir")
707
- clip, n = compute_clip_score("DrawBench200.txt", "test_dir")
708
- reward, n = compute_image_reward("DrawBench200.txt", "test_dir")
709
- ```
710
-
711
- Or, you can use `cache-dit-metrics-cli` tool. For examples:
712
-
713
- ```bash
714
- cache-dit-metrics-cli -h # show usage
715
- # all: PSNR, FID, SSIM, MSE, ..., etc.
716
- cache-dit-metrics-cli all -i1 true.png -i2 test.png # image
717
- cache-dit-metrics-cli all -i1 true_dir -i2 test_dir # image dir
718
- ```
460
+ ## 🎉User Guide
461
+
462
+ <div id="user-guide"></div>
463
+
464
+ For more advanced features such as **Unified Cache APIs**, **Forward Pattern Matching**, **Automatic Block Adapter**, **Hybrid Forward Pattern**, **DBCache**, **TaylorSeer Calibrator**, and **Hybrid Cache CFG**, please refer to the [🎉User_Guide.md](./docs/User_Guide.md) for details.
465
+
466
+ - [⚙️Installation](./docs/User_Guide.md#️installation)
467
+ - [🔥Benchmarks](./docs/User_Guide.md#benchmarks)
468
+ - [🔥Supported Pipelines](./docs/User_Guide.md#supported-pipelines)
469
+ - [🎉Unified Cache APIs](./docs/User_Guide.md#unified-cache-apis)
470
+ - [📚Forward Pattern Matching](./docs/User_Guide.md#forward-pattern-matching)
471
+ - [📚Cache with One-line Code](./docs/User_Guide.md#%EF%B8%8Fcache-acceleration-with-one-line-code)
472
+ - [🔥Automatic Block Adapter](./docs/User_Guide.md#automatic-block-adapter)
473
+ - [📚Hybird Forward Pattern](./docs/User_Guide.md#hybird-forward-pattern)
474
+ - [📚Implement Patch Functor](./docs/User_Guide.md#implement-patch-functor)
475
+ - [🤖Cache Acceleration Stats](./docs/User_Guide.md#cache-acceleration-stats-summary)
476
+ - [⚡️Dual Block Cache](./docs/User_Guide.md#️dbcache-dual-block-cache)
477
+ - [🔥TaylorSeer Calibrator](./docs/User_Guide.md#taylorseer-calibrator)
478
+ - [⚡️Hybrid Cache CFG](./docs/User_Guide.md#️hybrid-cache-cfg)
479
+ - [⚙️Torch Compile](./docs/User_Guide.md#️torch-compile)
480
+ - [🛠Metrics CLI](./docs/User_Guide.md#metrics-cli)
481
+ - [📚API Documents](./docs/User_Guide.md#api-documentation)
719
482
 
720
483
  ## 👋Contribute
721
484
  <div id="contribute"></div>
@@ -738,13 +501,17 @@ How to contribute? Star ⭐️ this repo to support us or check [CONTRIBUTE.md](
738
501
 
739
502
  The **cache-dit** codebase is adapted from FBCache. Over time its codebase diverged a lot, and **cache-dit** API is no longer compatible with FBCache.
740
503
 
504
+ ## ©️Special Acknowledgements
505
+
506
+ Special thanks to vipshop's Computer Vision AI Team for supporting document, testing and production-level deployment of this project.
507
+
741
508
  ## ©️Citations
742
509
 
743
510
  <div id="citations"></div>
744
511
 
745
512
  ```BibTeX
746
513
  @misc{cache-dit@2025,
747
- title={cache-dit: A Unified, Flexible and Training-free Cache Acceleration Framework for 🤗Diffusers.},
514
+ title={cache-dit: A Unified, Flexible and Training-free Cache Acceleration Framework for Diffusers.},
748
515
  url={https://github.com/vipshop/cache-dit.git},
749
516
  note={Open-source software available at https://github.com/vipshop/cache-dit.git},
750
517
  author={vipshop.com},
@@ -1,5 +1,5 @@
1
1
  cache_dit/__init__.py,sha256=sHRg0swXZZiw6lvSQ53fcVtN9JRayx0az2lXAz5OOGI,1510
2
- cache_dit/_version.py,sha256=e8NqPtZ8fggRgk3GPrqZ_U_BDV8aSULw1u_Gn9NNbnk,704
2
+ cache_dit/_version.py,sha256=vLA4ITz09S-S435nq6yTF6l3qiSz6w4euS1rOxXgd1M,704
3
3
  cache_dit/logger.py,sha256=0zsu42hN-3-rgGC_C29ms1IvVpV4_b4_SwJCKSenxBE,4304
4
4
  cache_dit/utils.py,sha256=AyYRwi5XBxYBH4GaXxOxv9-X24Te_IYOYwh54t_1d3A,10674
5
5
  cache_dit/cache_factory/.gitignore,sha256=5Cb-qT9wsTUoMJ7vACDF7ZcLpAXhi5v-xdcWSRit988,23
@@ -10,15 +10,16 @@ cache_dit/cache_factory/forward_pattern.py,sha256=FumlCuZ-TSmSYH0hGBHctSJ-oGLCft
10
10
  cache_dit/cache_factory/params_modifier.py,sha256=zYJJsInTYCaYHBZ7mZJOP-PZnkSg3iN1WPewNOayXos,3628
11
11
  cache_dit/cache_factory/utils.py,sha256=XkVM9AXcB9zYq8-S8QKAsGz80r3tA6U3lBNGDGeHOe4,1871
12
12
  cache_dit/cache_factory/block_adapters/__init__.py,sha256=33geXMz56TxFWMp0c-H4__MY5SGRzKMKj3TXnUYOMlc,17512
13
- cache_dit/cache_factory/block_adapters/block_adapters.py,sha256=jAgzMPTaY4rBuq7DLK2VeEWuYLy7lvw7bZcPY4S93b4,21660
13
+ cache_dit/cache_factory/block_adapters/block_adapters.py,sha256=2TVK_KqiYXC7AKZ2s07fzdOzUoeUBc9P1SzQtLVzhf4,22249
14
14
  cache_dit/cache_factory/block_adapters/block_registers.py,sha256=2L7QeM4ygnaKQpC9PoJod0QRYyxidUKU2AYpysDCUwE,2572
15
15
  cache_dit/cache_factory/cache_adapters/__init__.py,sha256=py71WGD3JztQ1uk6qdLVbzYcQ1rvqFidNNaQYo7tqTo,79
16
- cache_dit/cache_factory/cache_adapters/cache_adapter.py,sha256=GrkSz4was9gg_dYkfBobrOQ_eNqipQBqeuFfqcwkCXc,19650
17
- cache_dit/cache_factory/cache_blocks/__init__.py,sha256=08Ox7kD05lkRKCOsVTdEZeKAWBheqpxfrAT1Nz7eclI,2916
16
+ cache_dit/cache_factory/cache_adapters/cache_adapter.py,sha256=7heGoy8LHMP54ISMwfJ-i_ALngkbnUdeQDBRrE-MTgs,21303
17
+ cache_dit/cache_factory/cache_blocks/__init__.py,sha256=mivvm8YOfqT7YHs8y_MzGOGztPw8LxAqKGXuSRXxCv0,3032
18
+ cache_dit/cache_factory/cache_blocks/offload_utils.py,sha256=wusgcqaCrwEjvv7Guy-6VXhNOgPPUrBV2sSVuRmGuvo,3513
18
19
  cache_dit/cache_factory/cache_blocks/pattern_0_1_2.py,sha256=ElMps6_7uI74tSF9GDR_dEI0bZEhdzcepM29xFWnYo8,428
19
20
  cache_dit/cache_factory/cache_blocks/pattern_3_4_5.py,sha256=Bv56qETXhsREvCrNvnZpSqDIIHsi6Ze3FJW4Yk2x3uI,8597
20
- cache_dit/cache_factory/cache_blocks/pattern_base.py,sha256=d4H9kEB0AgnVMT8aF0Y54SUMUQUxw5HQ8gRkoCuTQ_A,14577
21
- cache_dit/cache_factory/cache_blocks/utils.py,sha256=dGOC1tMMOvcbvEgx44eTESKn_jsv-0RZ3tRHPa3wmQ4,1315
21
+ cache_dit/cache_factory/cache_blocks/pattern_base.py,sha256=wdh0bbcpKO08AW2FTsj9X_tTbFCLkDmBjrstMxTf7MQ,14668
22
+ cache_dit/cache_factory/cache_blocks/pattern_utils.py,sha256=dGOC1tMMOvcbvEgx44eTESKn_jsv-0RZ3tRHPa3wmQ4,1315
22
23
  cache_dit/cache_factory/cache_contexts/__init__.py,sha256=T6Vak3x7Rs0Oy15Tou49p-rPQRA2jiuYtJBsbv1lBBU,388
23
24
  cache_dit/cache_factory/cache_contexts/cache_context.py,sha256=3EhaMCz3VUQ_NF81VgYwWoSEGIvhScPxPYhjL1OcgxE,15240
24
25
  cache_dit/cache_factory/cache_contexts/cache_manager.py,sha256=hSKAeP1CxmO3RFUxjFjAK1xdvVvTmeayh5jEHMaQXNE,30225
@@ -48,9 +49,9 @@ cache_dit/metrics/metrics.py,sha256=7UV-H2NRbhfr6dvrXEzU97Zy-BSQ5zEfm9CKtaK4ldg,
48
49
  cache_dit/quantize/__init__.py,sha256=kWYoMAyZgBXu9BJlZjTQ0dRffW9GqeeY9_iTkXrb70A,59
49
50
  cache_dit/quantize/quantize_ao.py,sha256=Fx1KW4l3gdEkdrcAYtPoDW7WKBJWrs3glOHiEwW_TgE,6160
50
51
  cache_dit/quantize/quantize_interface.py,sha256=2s_R7xPSKuJeFpEGeLwRxnq_CqJcBG3a3lzyW5wh-UM,1241
51
- cache_dit-0.3.2.dist-info/licenses/LICENSE,sha256=Dqb07Ik2dV41s9nIdMUbiRWEfDqo7-dQeRiY7kPO8PE,3769
52
- cache_dit-0.3.2.dist-info/METADATA,sha256=L8vWXW0w9Z4GXVXylKnqmhnfpKJ8YeL0LKIuwLL8HEo,47858
53
- cache_dit-0.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
54
- cache_dit-0.3.2.dist-info/entry_points.txt,sha256=FX2gysXaZx6NeK1iCLMcIdP8Q4_qikkIHtEmi3oWn8o,65
55
- cache_dit-0.3.2.dist-info/top_level.txt,sha256=ZJDydonLEhujzz0FOkVbO-BqfzO9d_VqRHmZU-3MOZo,10
56
- cache_dit-0.3.2.dist-info/RECORD,,
52
+ cache_dit-1.0.0.dist-info/licenses/LICENSE,sha256=Dqb07Ik2dV41s9nIdMUbiRWEfDqo7-dQeRiY7kPO8PE,3769
53
+ cache_dit-1.0.0.dist-info/METADATA,sha256=HbV42qlhu8PFIO6FD_PuIo1dO-7K-yBiPCc5fikKIsg,35959
54
+ cache_dit-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
55
+ cache_dit-1.0.0.dist-info/entry_points.txt,sha256=FX2gysXaZx6NeK1iCLMcIdP8Q4_qikkIHtEmi3oWn8o,65
56
+ cache_dit-1.0.0.dist-info/top_level.txt,sha256=ZJDydonLEhujzz0FOkVbO-BqfzO9d_VqRHmZU-3MOZo,10
57
+ cache_dit-1.0.0.dist-info/RECORD,,