cache-dit 1.0.6__py3-none-any.whl → 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cache-dit might be problematic. Click here for more details.
- cache_dit/__init__.py +13 -1
- cache_dit/_version.py +2 -2
- cache_dit/cache_factory/block_adapters/__init__.py +44 -0
- cache_dit/cache_factory/block_adapters/block_registers.py +1 -0
- cache_dit/cache_factory/cache_blocks/pattern_base.py +3 -8
- cache_dit/metrics/__init__.py +11 -0
- cache_dit/parallelism/backends/native_diffusers/__init__.py +6 -0
- cache_dit/parallelism/backends/{parallel_difffusers.py → native_diffusers/parallel_difffusers.py} +29 -4
- cache_dit/parallelism/backends/native_pytorch/__init__.py +0 -0
- cache_dit/parallelism/parallel_config.py +8 -0
- cache_dit/parallelism/parallel_interface.py +1 -1
- cache_dit/quantize/__init__.py +7 -0
- cache_dit/utils.py +4 -0
- {cache_dit-1.0.6.dist-info → cache_dit-1.0.8.dist-info}/METADATA +24 -30
- {cache_dit-1.0.6.dist-info → cache_dit-1.0.8.dist-info}/RECORD +19 -17
- {cache_dit-1.0.6.dist-info → cache_dit-1.0.8.dist-info}/WHEEL +0 -0
- {cache_dit-1.0.6.dist-info → cache_dit-1.0.8.dist-info}/entry_points.txt +0 -0
- {cache_dit-1.0.6.dist-info → cache_dit-1.0.8.dist-info}/licenses/LICENSE +0 -0
- {cache_dit-1.0.6.dist-info → cache_dit-1.0.8.dist-info}/top_level.txt +0 -0
cache_dit/__init__.py
CHANGED
|
@@ -26,12 +26,24 @@ from cache_dit.cache_factory import FoCaCalibratorConfig
|
|
|
26
26
|
from cache_dit.cache_factory import supported_pipelines
|
|
27
27
|
from cache_dit.cache_factory import get_adapter
|
|
28
28
|
from cache_dit.compile import set_compile_configs
|
|
29
|
-
from cache_dit.quantize import quantize
|
|
30
29
|
from cache_dit.parallelism import ParallelismBackend
|
|
31
30
|
from cache_dit.parallelism import ParallelismConfig
|
|
32
31
|
from cache_dit.utils import summary
|
|
33
32
|
from cache_dit.utils import strify
|
|
34
33
|
|
|
34
|
+
try:
|
|
35
|
+
from cache_dit.quantize import quantize
|
|
36
|
+
except ImportError as e: # noqa: F841
|
|
37
|
+
err_msg = str(e)
|
|
38
|
+
|
|
39
|
+
def quantize(*args, **kwargs):
|
|
40
|
+
raise ImportError(
|
|
41
|
+
"Quantization requires additional dependencies. "
|
|
42
|
+
"Please install cache-dit[quantization] or cache-dit[all] "
|
|
43
|
+
f"to use this feature. Error message: {err_msg}"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
35
47
|
NONE = CacheType.NONE
|
|
36
48
|
DBCache = CacheType.DBCache
|
|
37
49
|
DBPrune = CacheType.DBPrune
|
cache_dit/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '1.0.
|
|
32
|
-
__version_tuple__ = version_tuple = (1, 0,
|
|
31
|
+
__version__ = version = '1.0.8'
|
|
32
|
+
__version_tuple__ = version_tuple = (1, 0, 8)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -577,3 +577,47 @@ def hunyuanditpag_adapter(pipe, **kwargs) -> BlockAdapter:
|
|
|
577
577
|
patch_functor=HunyuanDiTPatchFunctor(),
|
|
578
578
|
**kwargs,
|
|
579
579
|
)
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
@BlockAdapterRegistry.register("Kandinsky5")
|
|
583
|
+
def kandinsky5_adapter(pipe, **kwargs) -> BlockAdapter:
|
|
584
|
+
try:
|
|
585
|
+
from diffusers import Kandinsky5Transformer3DModel
|
|
586
|
+
|
|
587
|
+
assert isinstance(pipe.transformer, Kandinsky5Transformer3DModel)
|
|
588
|
+
return BlockAdapter(
|
|
589
|
+
pipe=pipe,
|
|
590
|
+
transformer=pipe.transformer,
|
|
591
|
+
blocks=pipe.transformer.visual_transformer_blocks,
|
|
592
|
+
forward_pattern=ForwardPattern.Pattern_3, # or Pattern_2
|
|
593
|
+
has_separate_cfg=True,
|
|
594
|
+
check_forward_pattern=False,
|
|
595
|
+
check_num_outputs=False,
|
|
596
|
+
**kwargs,
|
|
597
|
+
)
|
|
598
|
+
except ImportError:
|
|
599
|
+
raise ImportError(
|
|
600
|
+
"Kandinsky5Transformer3DModel is not available in the current diffusers version. "
|
|
601
|
+
"Please upgrade diffusers>=0.36.dev0 to use this adapter."
|
|
602
|
+
)
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
@BlockAdapterRegistry.register("PRX")
|
|
606
|
+
def prx_adapter(pipe, **kwargs) -> BlockAdapter:
|
|
607
|
+
try:
|
|
608
|
+
from diffusers import PRXTransformer2DModel
|
|
609
|
+
|
|
610
|
+
assert isinstance(pipe.transformer, PRXTransformer2DModel)
|
|
611
|
+
return BlockAdapter(
|
|
612
|
+
pipe=pipe,
|
|
613
|
+
transformer=pipe.transformer,
|
|
614
|
+
blocks=pipe.transformer.blocks,
|
|
615
|
+
forward_pattern=ForwardPattern.Pattern_3,
|
|
616
|
+
check_num_outputs=False,
|
|
617
|
+
**kwargs,
|
|
618
|
+
)
|
|
619
|
+
except ImportError:
|
|
620
|
+
raise ImportError(
|
|
621
|
+
"PRXTransformer2DModel is not available in the current diffusers version. "
|
|
622
|
+
"Please upgrade diffusers>=0.36.dev0 to use this adapter."
|
|
623
|
+
)
|
|
@@ -139,14 +139,9 @@ class CachedBlocks_Pattern_Base(torch.nn.Module):
|
|
|
139
139
|
*args,
|
|
140
140
|
**kwargs,
|
|
141
141
|
)
|
|
142
|
-
|
|
143
|
-
hidden_states, encoder_hidden_states
|
|
144
|
-
|
|
145
|
-
hidden_states, encoder_hidden_states = (
|
|
146
|
-
encoder_hidden_states,
|
|
147
|
-
hidden_states,
|
|
148
|
-
)
|
|
149
|
-
|
|
142
|
+
hidden_states, encoder_hidden_states = self._process_block_outputs(
|
|
143
|
+
hidden_states, encoder_hidden_states
|
|
144
|
+
)
|
|
150
145
|
return hidden_states, encoder_hidden_states
|
|
151
146
|
|
|
152
147
|
@torch.compiler.disable
|
cache_dit/metrics/__init__.py
CHANGED
|
@@ -1,3 +1,14 @@
|
|
|
1
|
+
try:
|
|
2
|
+
import ImageReward
|
|
3
|
+
import lpips
|
|
4
|
+
import skimage
|
|
5
|
+
import scipy
|
|
6
|
+
except ImportError:
|
|
7
|
+
raise ImportError(
|
|
8
|
+
"Metrics functionality requires the 'metrics' extra dependencies. "
|
|
9
|
+
"Install with:\npip install cache-dit[metrics]"
|
|
10
|
+
)
|
|
11
|
+
|
|
1
12
|
from cache_dit.metrics.metrics import compute_psnr
|
|
2
13
|
from cache_dit.metrics.metrics import compute_ssim
|
|
3
14
|
from cache_dit.metrics.metrics import compute_mse
|
cache_dit/parallelism/backends/{parallel_difffusers.py → native_diffusers/parallel_difffusers.py}
RENAMED
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
import torch
|
|
2
2
|
|
|
3
3
|
from typing import Optional
|
|
4
|
+
from cache_dit.logger import init_logger
|
|
5
|
+
|
|
6
|
+
logger = init_logger(__name__)
|
|
7
|
+
|
|
4
8
|
|
|
5
9
|
try:
|
|
6
10
|
from diffusers import ContextParallelConfig
|
|
@@ -24,10 +28,18 @@ def maybe_enable_parallelism(
|
|
|
24
28
|
transformer: torch.nn.Module,
|
|
25
29
|
parallelism_config: Optional[ParallelismConfig],
|
|
26
30
|
) -> torch.nn.Module:
|
|
27
|
-
assert isinstance(transformer, ModelMixin)
|
|
31
|
+
assert isinstance(transformer, ModelMixin), (
|
|
32
|
+
"transformer must be an instance of diffusers' ModelMixin, "
|
|
33
|
+
f"but got {type(transformer)}"
|
|
34
|
+
)
|
|
28
35
|
if parallelism_config is None:
|
|
29
36
|
return transformer
|
|
30
37
|
|
|
38
|
+
assert isinstance(parallelism_config, ParallelismConfig), (
|
|
39
|
+
"parallelism_config must be an instance of ParallelismConfig"
|
|
40
|
+
f" but got {type(parallelism_config)}"
|
|
41
|
+
)
|
|
42
|
+
|
|
31
43
|
if (
|
|
32
44
|
parallelism_config.backend == ParallelismBackend.NATIVE_DIFFUSER
|
|
33
45
|
and native_diffusers_parallelism_available()
|
|
@@ -43,11 +55,24 @@ def maybe_enable_parallelism(
|
|
|
43
55
|
)
|
|
44
56
|
if cp_config is not None:
|
|
45
57
|
if hasattr(transformer, "enable_parallelism"):
|
|
46
|
-
if hasattr(transformer, "set_attention_backend"):
|
|
58
|
+
if hasattr(transformer, "set_attention_backend"):
|
|
47
59
|
# Now only _native_cudnn is supported for parallelism
|
|
48
60
|
# issue: https://github.com/huggingface/diffusers/pull/12443
|
|
49
|
-
transformer.set_attention_backend("_native_cudnn")
|
|
50
|
-
|
|
61
|
+
transformer.set_attention_backend("_native_cudnn")
|
|
62
|
+
logger.warning(
|
|
63
|
+
"Set attention backend to _native_cudnn for parallelism because of "
|
|
64
|
+
"the issue: https://github.com/huggingface/diffusers/pull/12443"
|
|
65
|
+
)
|
|
66
|
+
cp_plan = parallelism_config.parallel_kwargs.get(
|
|
67
|
+
"cp_plan", None
|
|
68
|
+
)
|
|
69
|
+
if cp_plan is not None:
|
|
70
|
+
logger.info(
|
|
71
|
+
f"Using custom context parallelism plan: {cp_plan}"
|
|
72
|
+
)
|
|
73
|
+
transformer.enable_parallelism(
|
|
74
|
+
config=cp_config, cp_plan=cp_plan
|
|
75
|
+
)
|
|
51
76
|
else:
|
|
52
77
|
raise ValueError(
|
|
53
78
|
f"{transformer.__class__.__name__} does not support context parallelism."
|
|
File without changes
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import dataclasses
|
|
2
|
+
from typing import Optional, Dict, Any
|
|
2
3
|
from cache_dit.parallelism.parallel_backend import ParallelismBackend
|
|
3
4
|
from cache_dit.logger import init_logger
|
|
4
5
|
|
|
@@ -20,6 +21,13 @@ class ParallelismConfig:
|
|
|
20
21
|
# tp_size (`int`, *optional*):
|
|
21
22
|
# The degree of tensor parallelism.
|
|
22
23
|
tp_size: int = None
|
|
24
|
+
# parallel_kwargs (`dict`, *optional*):
|
|
25
|
+
# Additional kwargs for parallelism backends. For example, for
|
|
26
|
+
# NATIVE_DIFFUSER backend, it can include `cp_plan` and other
|
|
27
|
+
# arguments for `Context Parallelism`.
|
|
28
|
+
parallel_kwargs: Optional[Dict[str, Any]] = dataclasses.field(
|
|
29
|
+
default_factory=dict
|
|
30
|
+
)
|
|
23
31
|
|
|
24
32
|
def __post_init__(self):
|
|
25
33
|
assert ParallelismBackend.is_supported(self.backend), (
|
|
@@ -22,7 +22,7 @@ def enable_parallelism(
|
|
|
22
22
|
return transformer
|
|
23
23
|
|
|
24
24
|
if parallelism_config.backend == ParallelismBackend.NATIVE_DIFFUSER:
|
|
25
|
-
from cache_dit.parallelism.backends.
|
|
25
|
+
from cache_dit.parallelism.backends.native_diffusers import (
|
|
26
26
|
maybe_enable_parallelism,
|
|
27
27
|
native_diffusers_parallelism_available,
|
|
28
28
|
)
|
cache_dit/quantize/__init__.py
CHANGED
cache_dit/utils.py
CHANGED
|
@@ -183,6 +183,8 @@ def strify(
|
|
|
183
183
|
cached_steps = None
|
|
184
184
|
cache_type = cache_options.get("cache_type", CacheType.NONE)
|
|
185
185
|
|
|
186
|
+
stats = None
|
|
187
|
+
|
|
186
188
|
if cache_type == CacheType.NONE:
|
|
187
189
|
return "NONE"
|
|
188
190
|
else:
|
|
@@ -217,6 +219,8 @@ def strify(
|
|
|
217
219
|
return "T0O0"
|
|
218
220
|
|
|
219
221
|
def parallelism_str():
|
|
222
|
+
if stats is None:
|
|
223
|
+
return ""
|
|
220
224
|
parallelism_config: ParallelismConfig = stats.parallelism_config
|
|
221
225
|
if parallelism_config is not None:
|
|
222
226
|
return f"_{parallelism_config.strify()}"
|
|
@@ -1,37 +1,33 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cache_dit
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.8
|
|
4
4
|
Summary: A Unified, Flexible and Training-free Cache Acceleration Framework for 🤗Diffusers.
|
|
5
5
|
Author: DefTruth, vipshop.com, etc.
|
|
6
6
|
Maintainer: DefTruth, vipshop.com, etc
|
|
7
|
-
Project-URL: Repository, https://github.com/vipshop/cache-dit
|
|
8
|
-
Project-URL: Homepage, https://github.com/vipshop/cache-dit
|
|
7
|
+
Project-URL: Repository, https://github.com/vipshop/cache-dit
|
|
8
|
+
Project-URL: Homepage, https://github.com/vipshop/cache-dit
|
|
9
|
+
Project-URL: GitHub, https://github.com/vipshop/cache-dit
|
|
9
10
|
Requires-Python: >=3.10
|
|
10
11
|
Description-Content-Type: text/markdown
|
|
11
12
|
License-File: LICENSE
|
|
12
|
-
Requires-Dist: packaging
|
|
13
13
|
Requires-Dist: pyyaml
|
|
14
14
|
Requires-Dist: torch>=2.7.1
|
|
15
|
-
Requires-Dist: transformers>=4.55.2
|
|
16
15
|
Requires-Dist: diffusers>=0.35.1
|
|
17
|
-
Requires-Dist:
|
|
18
|
-
|
|
19
|
-
Requires-Dist:
|
|
20
|
-
Requires-Dist: torchao>=0.12.0
|
|
21
|
-
Requires-Dist: image-reward
|
|
22
|
-
Provides-Extra: all
|
|
16
|
+
Requires-Dist: transformers>=4.55.2
|
|
17
|
+
Provides-Extra: quantization
|
|
18
|
+
Requires-Dist: torchao>=0.12.0; extra == "quantization"
|
|
23
19
|
Provides-Extra: metrics
|
|
20
|
+
Requires-Dist: scipy; extra == "metrics"
|
|
21
|
+
Requires-Dist: scikit-image; extra == "metrics"
|
|
24
22
|
Requires-Dist: image-reward; extra == "metrics"
|
|
25
|
-
Requires-Dist: pytorch-fid; extra == "metrics"
|
|
26
23
|
Requires-Dist: lpips==0.1.4; extra == "metrics"
|
|
27
24
|
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: packaging; extra == "dev"
|
|
28
26
|
Requires-Dist: pre-commit; extra == "dev"
|
|
29
27
|
Requires-Dist: pytest<8.0.0,>=7.0.0; extra == "dev"
|
|
30
28
|
Requires-Dist: pytest-html; extra == "dev"
|
|
31
29
|
Requires-Dist: expecttest; extra == "dev"
|
|
32
30
|
Requires-Dist: hypothesis; extra == "dev"
|
|
33
|
-
Requires-Dist: transformers; extra == "dev"
|
|
34
|
-
Requires-Dist: diffusers; extra == "dev"
|
|
35
31
|
Requires-Dist: accelerate; extra == "dev"
|
|
36
32
|
Requires-Dist: peft; extra == "dev"
|
|
37
33
|
Requires-Dist: protobuf; extra == "dev"
|
|
@@ -39,23 +35,21 @@ Requires-Dist: sentencepiece; extra == "dev"
|
|
|
39
35
|
Requires-Dist: opencv-python-headless; extra == "dev"
|
|
40
36
|
Requires-Dist: ftfy; extra == "dev"
|
|
41
37
|
Requires-Dist: scikit-image; extra == "dev"
|
|
42
|
-
|
|
38
|
+
Provides-Extra: all
|
|
39
|
+
Requires-Dist: cache-dit[quantization]; extra == "all"
|
|
40
|
+
Requires-Dist: cache-dit[metrics]; extra == "all"
|
|
43
41
|
Dynamic: license-file
|
|
44
|
-
Dynamic: provides-extra
|
|
45
|
-
Dynamic: requires-dist
|
|
46
42
|
Dynamic: requires-python
|
|
47
43
|
|
|
48
|
-
|
|
44
|
+
📚English | <a href="./README_CN.md">📚中文阅读 </a>
|
|
49
45
|
|
|
50
46
|
<div align="center">
|
|
51
47
|
<img src=https://github.com/vipshop/cache-dit/raw/main/assets/cache-dit-logo.png height="120">
|
|
52
48
|
<p align="center">
|
|
53
49
|
A <b>Unified</b>, Flexible and Training-free <b>Cache Acceleration</b> Framework for <b>🤗Diffusers</b> <br>
|
|
54
|
-
♥️ Cache Acceleration with <b>One-line</b> Code ~ ♥️
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
🔥<b><a href="./docs/User_Guide.md">DBCache</a> | <a href="./docs/User_Guide.md">DBPrune</a> | <a href="./docs/User_Guide.md">Hybird TaylorSeer</a> | <a href="./docs/User_Guide.md">Hybird Cache CFG</a></b>🔥 <br>
|
|
58
|
-
🔥<b><a href="./docs/User_Guide.md">Hybrid Context Paralleism</a> | <a href="./docs/User_Guide.md">PyTorch Native</a> | <a href="./docs/User_Guide.md">SOTA</a></b>🔥
|
|
50
|
+
♥️ Cache Acceleration with <b>One-line</b> Code ~ ♥️ <br>
|
|
51
|
+
🔥<b><a href="./docs/User_Guide.md">DBCache</a> | <a href="./docs/User_Guide.md">DBPrune</a> | <a href="./docs/User_Guide.md">Hybrid TaylorSeer</a> | <a href="./docs/User_Guide.md">Hybrid Cache CFG</a></b>🔥 <br>
|
|
52
|
+
🔥<b><a href="./docs/User_Guide.md">Hybrid Context Paralleism</a> | <a href="./docs/User_Guide.md">Diffusers Native</a> | <a href="./docs/User_Guide.md">SOTA</a></b>🔥
|
|
59
53
|
</p>
|
|
60
54
|
<div align='center'>
|
|
61
55
|
<img src=https://img.shields.io/badge/Language-Python-brightgreen.svg >
|
|
@@ -198,7 +192,7 @@ You can install the stable release of cache-dit from PyPI, or the latest develop
|
|
|
198
192
|
- **[🎉Easy New Model Integration](./docs/User_Guide.md#automatic-block-adapter)**: Features like **Unified Cache APIs**, **Forward Pattern Matching**, **Automatic Block Adapter**, **Hybrid Forward Pattern**, and **Patch Functor** make it highly functional and flexible. For example, we achieved 🎉 Day 1 support for [HunyuanImage-2.1](https://github.com/Tencent-Hunyuan/HunyuanImage-2.1) with 1.7x speedup w/o precision loss—even before it was available in the Diffusers library.
|
|
199
193
|
- **[🎉State-of-the-Art Performance](./bench/)**: Compared with algorithms including Δ-DiT, Chipmunk, FORA, DuCa, TaylorSeer and FoCa, cache-dit achieved the **SOTA** performance w/ **7.4x↑🎉** speedup on ClipScore!
|
|
200
194
|
- **[🎉Support for 4/8-Steps Distilled Models](./bench/)**: Surprisingly, cache-dit's **DBCache** works for extremely few-step distilled models—something many other methods fail to do.
|
|
201
|
-
- **[🎉Compatibility with Other Optimizations](./docs/User_Guide.md#️torch-compile)**: Designed to work seamlessly with torch.compile,
|
|
195
|
+
- **[🎉Compatibility with Other Optimizations](./docs/User_Guide.md#️torch-compile)**: Designed to work seamlessly with torch.compile, Quantization ([torchao](./examples/quantize/), [🔥nunchaku](./examples/quantize/)), CPU or Sequential Offloading, **[🔥Context Parallelism](./docs/User_Guide.md/#️hybrid-context-parallelism)**, Tensor Parallelism, etc.
|
|
202
196
|
- **[🎉Hybrid Cache Acceleration](./docs/User_Guide.md#taylorseer-calibrator)**: Now supports hybrid **Block-wise Cache + Calibrator** schemes (e.g., DBCache or DBPrune + TaylorSeerCalibrator). DBCache or DBPrune acts as the **Indicator** to decide *when* to cache, while the Calibrator decides *how* to cache. More mainstream cache acceleration algorithms (e.g., FoCa) will be supported in the future, along with additional benchmarks—stay tuned for updates!
|
|
203
197
|
- **[🤗Diffusers Ecosystem Integration](https://huggingface.co/docs/diffusers/main/en/optimization/cache_dit)**: 🔥**cache-dit** has joined the Diffusers community ecosystem as the **first** DiT-specific cache acceleration framework! Check out the documentation here: <a href="https://huggingface.co/docs/diffusers/main/en/optimization/cache_dit"><img src=https://img.shields.io/badge/🤗Diffusers-ecosystem-yellow.svg ></a>
|
|
204
198
|
|
|
@@ -206,14 +200,12 @@ You can install the stable release of cache-dit from PyPI, or the latest develop
|
|
|
206
200
|
|
|
207
201
|
## 🔥Important News
|
|
208
202
|
|
|
209
|
-
- 2025.10.20: 🔥Now cache-dit supported the [Hybrid Cache + Context Parallelism](./docs/User_Guide.md/#️hybrid-context-parallelism) scheme!🔥
|
|
203
|
+
- 2025.10.20: 🔥Now cache-dit supported the **[Hybrid Cache + Context Parallelism](./docs/User_Guide.md/#️hybrid-context-parallelism)** scheme!🔥
|
|
210
204
|
- 2025.10.16: 🎉cache-dit + [**🔥nunchaku 4-bits**](https://github.com/nunchaku-tech/nunchaku) supported: [Qwen-Image-Lightning 4/8 steps](./examples/quantize/).
|
|
211
205
|
- 2025.10.15: 🎉cache-dit now supported [**🔥nunchaku**](https://github.com/nunchaku-tech/nunchaku): Qwen-Image/FLUX.1 [4-bits examples](./examples/quantize/)
|
|
212
206
|
- 2025.10.13: 🎉cache-dit achieved the **SOTA** performance w/ **7.4x↑🎉** speedup on ClipScore!
|
|
213
207
|
- 2025.10.10: 🔥[**Qwen-Image-ControlNet-Inpainting**](https://huggingface.co/InstantX/Qwen-Image-ControlNet-Inpainting) **2.3x↑🎉** speedup! Check the [example](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline/run_qwen_image_controlnet_inpaint.py).
|
|
214
208
|
- 2025.09.26: 🔥[**Qwen-Image-Edit-Plus(2509)**](https://github.com/QwenLM/Qwen-Image) **2.1x↑🎉** speedup! Please check the [example](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline/run_qwen_image_edit_plus.py).
|
|
215
|
-
- 2025.09.25: 🎉The **first API-stable version (v1.0.0)** of cache-dit has finally been released!
|
|
216
|
-
- 2025.09.25: 🔥**cache-dit** has joined the Diffusers community ecosystem: <a href="https://huggingface.co/docs/diffusers/main/en/optimization/cache_dit"><img src=https://img.shields.io/badge/🤗Diffusers-ecosystem-yellow.svg ></a>
|
|
217
209
|
- 2025.09.10: 🎉Day 1 support [**HunyuanImage-2.1**](https://github.com/Tencent-Hunyuan/HunyuanImage-2.1) with **1.7x↑🎉** speedup! Check this [example](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline/run_hunyuan_image_2.1.py).
|
|
218
210
|
- 2025.09.08: 🔥[**Qwen-Image-Lightning**](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline/run_qwen_image_lightning.py) **7.1/3.5 steps🎉** inference with **[DBCache: F16B16](https://github.com/vipshop/cache-dit)**.
|
|
219
211
|
- 2025.09.03: 🎉[**Wan2.2-MoE**](https://github.com/Wan-Video) **2.4x↑🎉** speedup! Please refer to [run_wan_2.2.py](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline/run_wan_2.2.py) as an example.
|
|
@@ -223,6 +215,8 @@ You can install the stable release of cache-dit from PyPI, or the latest develop
|
|
|
223
215
|
<details>
|
|
224
216
|
<summary>Previous News</summary>
|
|
225
217
|
|
|
218
|
+
- 2025.09.25: 🎉The **first API-stable version (v1.0.0)** of cache-dit has finally been released!
|
|
219
|
+
- 2025.09.25: 🔥**cache-dit** has joined the Diffusers community ecosystem: <a href="https://huggingface.co/docs/diffusers/main/en/optimization/cache_dit"><img src=https://img.shields.io/badge/🤗Diffusers-ecosystem-yellow.svg ></a>
|
|
226
220
|
- 2025.09.08: 🎉First caching mechanism in [Wan2.2](https://github.com/Wan-Video/Wan2.2) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check this [PR](https://github.com/Wan-Video/Wan2.2/pull/127) for more details.
|
|
227
221
|
- 2025.09.08: 🎉First caching mechanism in [Qwen-Image-Lightning](https://github.com/ModelTC/Qwen-Image-Lightning) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check this [PR](https://github.com/ModelTC/Qwen-Image-Lightning/pull/35).
|
|
228
222
|
- 2025.08.10: 🔥[**FLUX.1-Kontext-dev**](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev) is supported! Please refer [run_flux_kontext.py](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline/run_flux_kontext.py) as an example.
|
|
@@ -245,13 +239,13 @@ For more advanced features such as **Unified Cache APIs**, **Forward Pattern Mat
|
|
|
245
239
|
- [📚Forward Pattern Matching](./docs/User_Guide.md#forward-pattern-matching)
|
|
246
240
|
- [📚Cache with One-line Code](./docs/User_Guide.md#%EF%B8%8Fcache-acceleration-with-one-line-code)
|
|
247
241
|
- [🔥Automatic Block Adapter](./docs/User_Guide.md#automatic-block-adapter)
|
|
248
|
-
- [📚
|
|
242
|
+
- [📚Hybrid Forward Pattern](./docs/User_Guide.md#hybrid-forward-pattern)
|
|
249
243
|
- [📚Implement Patch Functor](./docs/User_Guide.md#implement-patch-functor)
|
|
250
244
|
- [🤖Cache Acceleration Stats](./docs/User_Guide.md#cache-acceleration-stats-summary)
|
|
251
245
|
- [⚡️DBCache: Dual Block Cache](./docs/User_Guide.md#️dbcache-dual-block-cache)
|
|
252
246
|
- [⚡️DBPrune: Dynamic Block Prune](./docs/User_Guide.md#️dbprune-dynamic-block-prune)
|
|
253
|
-
- [🔥Hybrid TaylorSeer](./docs/User_Guide.md#taylorseer-calibrator)
|
|
254
247
|
- [⚡️Hybrid Cache CFG](./docs/User_Guide.md#️hybrid-cache-cfg)
|
|
248
|
+
- [🔥Hybrid TaylorSeer Calibrator](./docs/User_Guide.md#taylorseer-calibrator)
|
|
255
249
|
- [⚡️Hybrid Context Parallelism](./docs/User_Guide.md#context-paralleism)
|
|
256
250
|
- [🛠Metrics Command Line](./docs/User_Guide.md#metrics-cli)
|
|
257
251
|
- [⚙️Torch Compile](./docs/User_Guide.md#️torch-compile)
|
|
@@ -275,7 +269,7 @@ How to contribute? Star ⭐️ this repo to support us or check [CONTRIBUTE.md](
|
|
|
275
269
|
|
|
276
270
|
## 🎉Projects Using CacheDiT
|
|
277
271
|
|
|
278
|
-
Here is a curated list of open-source projects integrating **CacheDiT**, including popular repositories like [jetson-containers](https://github.com/dusty-nv/jetson-containers/blob/master/packages/diffusion/cache_edit/build.sh) , [flux-fast](https://github.com/huggingface/flux-fast) , and [sdnext](https://github.com/vladmandic/sdnext/
|
|
272
|
+
Here is a curated list of open-source projects integrating **CacheDiT**, including popular repositories like [jetson-containers](https://github.com/dusty-nv/jetson-containers/blob/master/packages/diffusion/cache_edit/build.sh) , [flux-fast](https://github.com/huggingface/flux-fast) , and [sdnext](https://github.com/vladmandic/sdnext/discussions/4269) . **CacheDiT** has also been **recommended** by [Wan2.2](https://github.com/Wan-Video/Wan2.2) , [Qwen-Image-Lightning](https://github.com/ModelTC/Qwen-Image-Lightning) , [Qwen-Image](https://github.com/QwenLM/Qwen-Image) , and <a href="https://huggingface.co/docs/diffusers/main/en/optimization/cache_dit"><img src="https://img.shields.io/badge/🤗Diffusers-ecosystem-yellow.svg"></a> , among others. We would be grateful if you could let us know if you have used CacheDiT.
|
|
279
273
|
|
|
280
274
|
## ©️Acknowledgements
|
|
281
275
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
cache_dit/__init__.py,sha256=
|
|
2
|
-
cache_dit/_version.py,sha256=
|
|
1
|
+
cache_dit/__init__.py,sha256=Azqj-3QMQK4HZDTGgyUtAfatUwuU-YQ4w8erJSyrsbE,2082
|
|
2
|
+
cache_dit/_version.py,sha256=09KGe_qkXH8vhvX180khtkldtbrAX-u8refqVsC-Ky4,704
|
|
3
3
|
cache_dit/logger.py,sha256=0zsu42hN-3-rgGC_C29ms1IvVpV4_b4_SwJCKSenxBE,4304
|
|
4
|
-
cache_dit/utils.py,sha256=
|
|
4
|
+
cache_dit/utils.py,sha256=3NcEb324fNY0NYnrBTjsLURKQuckKeFe3V9Dfc_g4sc,17851
|
|
5
5
|
cache_dit/cache_factory/.gitignore,sha256=5Cb-qT9wsTUoMJ7vACDF7ZcLpAXhi5v-xdcWSRit988,23
|
|
6
6
|
cache_dit/cache_factory/__init__.py,sha256=5UjrpxLVlmjHttTL0O14fD5oU5uKI3FKYevL613ibFQ,1848
|
|
7
7
|
cache_dit/cache_factory/cache_interface.py,sha256=244uTVx83hpCpbCDgEOydi5HqG7hKHHzEoz1ApJW6lI,14627
|
|
@@ -9,16 +9,16 @@ cache_dit/cache_factory/cache_types.py,sha256=QnWfaS52UOXQtnoCUOwwz4ziY0dyBta6vQ
|
|
|
9
9
|
cache_dit/cache_factory/forward_pattern.py,sha256=FumlCuZ-TSmSYH0hGBHctSJ-oGLCftdZjLygqhsmdR4,2258
|
|
10
10
|
cache_dit/cache_factory/params_modifier.py,sha256=2T98IbepAolWW6GwQsqUDsRzu0k65vo7BOrN3V8mKog,3606
|
|
11
11
|
cache_dit/cache_factory/utils.py,sha256=S3SD6Zhexzhkqnmfo830v6oNLm8stZe32nF4VdxD_bA,2497
|
|
12
|
-
cache_dit/cache_factory/block_adapters/__init__.py,sha256=
|
|
12
|
+
cache_dit/cache_factory/block_adapters/__init__.py,sha256=eeBcWUMIvS-x3GcD1LNesW2SuB9V5mtwG9MoUBWHsL8,19765
|
|
13
13
|
cache_dit/cache_factory/block_adapters/block_adapters.py,sha256=2TVK_KqiYXC7AKZ2s07fzdOzUoeUBc9P1SzQtLVzhf4,22249
|
|
14
|
-
cache_dit/cache_factory/block_adapters/block_registers.py,sha256=
|
|
14
|
+
cache_dit/cache_factory/block_adapters/block_registers.py,sha256=KU0cqtLYRlij2WvuQ6erqZbxUWkb6DjvmY_sB3o_fQM,2594
|
|
15
15
|
cache_dit/cache_factory/cache_adapters/__init__.py,sha256=py71WGD3JztQ1uk6qdLVbzYcQ1rvqFidNNaQYo7tqTo,79
|
|
16
16
|
cache_dit/cache_factory/cache_adapters/cache_adapter.py,sha256=WYrgV3DKxOxttl-wEKymyKIB1Po0eW73Q2_vOlGEKdQ,24080
|
|
17
17
|
cache_dit/cache_factory/cache_blocks/__init__.py,sha256=cpxzmDcUhbXcReHqaKSnWyEEbIg1H91Pz5hE3z9Xj3k,9984
|
|
18
18
|
cache_dit/cache_factory/cache_blocks/offload_utils.py,sha256=wusgcqaCrwEjvv7Guy-6VXhNOgPPUrBV2sSVuRmGuvo,3513
|
|
19
19
|
cache_dit/cache_factory/cache_blocks/pattern_0_1_2.py,sha256=j4bTafqU5DLQhzP_X5XwOk-QUVLWkGrX-Q6JZvBGHh0,666
|
|
20
20
|
cache_dit/cache_factory/cache_blocks/pattern_3_4_5.py,sha256=2qPnXVZwpQIm2oJ-Yrn3Avqi3BcXtE2133jPIL_LhK8,19595
|
|
21
|
-
cache_dit/cache_factory/cache_blocks/pattern_base.py,sha256=
|
|
21
|
+
cache_dit/cache_factory/cache_blocks/pattern_base.py,sha256=uNcPZU8b8i_-re_X1xBHkSDQSacQO7Fa69vjbfAYxOA,25275
|
|
22
22
|
cache_dit/cache_factory/cache_blocks/pattern_utils.py,sha256=qOxoVTlYPQzPMrR06-7_Ce_lwNg6n5pt1KQrvxzAJhE,3124
|
|
23
23
|
cache_dit/cache_factory/cache_contexts/__init__.py,sha256=7uY8fX9uhpC71VNm1HH4aDIicYn-dD3kRpPQhvc9-EI,853
|
|
24
24
|
cache_dit/cache_factory/cache_contexts/cache_config.py,sha256=G0PVWgckDqeyARc72Ne_0lRtO_LftsOeMERRhbh2gCA,5739
|
|
@@ -44,7 +44,7 @@ cache_dit/compile/__init__.py,sha256=FcTVzCeyypl-mxlc59_ehHL3lBNiDAFsXuRoJ-5Cfi0
|
|
|
44
44
|
cache_dit/compile/utils.py,sha256=nN2OIrSdwRR5zGxJinKDqb07pXpvTNTF3g_OgLkeeBU,3858
|
|
45
45
|
cache_dit/custom_ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
46
|
cache_dit/custom_ops/triton_taylorseer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
|
-
cache_dit/metrics/__init__.py,sha256=
|
|
47
|
+
cache_dit/metrics/__init__.py,sha256=Y_JrBr9XE6NKXwyXc7d_-PaX9c_rk5FKms-IYgCyHmY,936
|
|
48
48
|
cache_dit/metrics/clip_score.py,sha256=ERNCFQFJKzJdbIX9OAg-1LiSPuXUVHLOFxbf2gcENpc,3938
|
|
49
49
|
cache_dit/metrics/config.py,sha256=ieOgD9ayz722RjVzk24bSIqS2D6o7TZjGk8KeXV-OLQ,551
|
|
50
50
|
cache_dit/metrics/fid.py,sha256=ZM_FM0XERtpnkMUfphmw2aOdljrh1uba-pnYItu0q6M,18219
|
|
@@ -54,15 +54,17 @@ cache_dit/metrics/lpips.py,sha256=hrHrmdM-f2B4TKDs0xLqJO5JFaYcCjq2qNIR8oCrVkc,81
|
|
|
54
54
|
cache_dit/metrics/metrics.py,sha256=AZbQyoavE-djvyRUZ_EfCIrWSQbiWQFo7n2dhn7XptE,40466
|
|
55
55
|
cache_dit/parallelism/__init__.py,sha256=dheBG5_TZCuwctviMslpAEgB-B3N8F816bE51qsw_fU,210
|
|
56
56
|
cache_dit/parallelism/parallel_backend.py,sha256=js1soTMenLeAyPMsBgdI3gWcdXoqjWgBD-PuFEywMr0,508
|
|
57
|
-
cache_dit/parallelism/parallel_config.py,sha256=
|
|
58
|
-
cache_dit/parallelism/parallel_interface.py,sha256=
|
|
59
|
-
cache_dit/parallelism/backends/
|
|
60
|
-
cache_dit/
|
|
57
|
+
cache_dit/parallelism/parallel_config.py,sha256=cOAXaniGf4CDPG5sbVktZy2rTZe49jSsnUIW2IBerGM,2106
|
|
58
|
+
cache_dit/parallelism/parallel_interface.py,sha256=WPPYYaodo0PCYrn6-Haz8GcNZ2RK3EG7q6P_cH51Qj0,2202
|
|
59
|
+
cache_dit/parallelism/backends/native_diffusers/__init__.py,sha256=T_6GeBA7TRiVbvtqGLLH2flkRiK0o7JBREt2xhS_-YE,242
|
|
60
|
+
cache_dit/parallelism/backends/native_diffusers/parallel_difffusers.py,sha256=wHRjxRWK5E92cdSwDkZJpKQCQGZfxY53woW47rMFH2I,2844
|
|
61
|
+
cache_dit/parallelism/backends/native_pytorch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
62
|
+
cache_dit/quantize/__init__.py,sha256=rUu0V9VRjOgwXuIUHHAI-osivNjAdUsi-jpkDbFp6Gk,278
|
|
61
63
|
cache_dit/quantize/quantize_ao.py,sha256=bbEUwsrMp3bMuRw8qJZREIvCHaJRQoZyfMjlu4ImRMI,6315
|
|
62
64
|
cache_dit/quantize/quantize_interface.py,sha256=2s_R7xPSKuJeFpEGeLwRxnq_CqJcBG3a3lzyW5wh-UM,1241
|
|
63
|
-
cache_dit-1.0.
|
|
64
|
-
cache_dit-1.0.
|
|
65
|
-
cache_dit-1.0.
|
|
66
|
-
cache_dit-1.0.
|
|
67
|
-
cache_dit-1.0.
|
|
68
|
-
cache_dit-1.0.
|
|
65
|
+
cache_dit-1.0.8.dist-info/licenses/LICENSE,sha256=Dqb07Ik2dV41s9nIdMUbiRWEfDqo7-dQeRiY7kPO8PE,3769
|
|
66
|
+
cache_dit-1.0.8.dist-info/METADATA,sha256=cs6CuqsbXQ8w69_kg2e7yALeczPAZjd5hz8odNrsvZA,29462
|
|
67
|
+
cache_dit-1.0.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
68
|
+
cache_dit-1.0.8.dist-info/entry_points.txt,sha256=FX2gysXaZx6NeK1iCLMcIdP8Q4_qikkIHtEmi3oWn8o,65
|
|
69
|
+
cache_dit-1.0.8.dist-info/top_level.txt,sha256=ZJDydonLEhujzz0FOkVbO-BqfzO9d_VqRHmZU-3MOZo,10
|
|
70
|
+
cache_dit-1.0.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|