cache-dit 1.0.3__py3-none-any.whl → 1.0.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cache_dit/__init__.py +37 -19
- cache_dit/_version.py +2 -2
- cache_dit/caching/__init__.py +36 -0
- cache_dit/{cache_factory → caching}/block_adapters/__init__.py +126 -11
- cache_dit/{cache_factory → caching}/block_adapters/block_adapters.py +78 -7
- cache_dit/caching/block_adapters/block_registers.py +118 -0
- cache_dit/caching/cache_adapters/__init__.py +1 -0
- cache_dit/{cache_factory → caching}/cache_adapters/cache_adapter.py +214 -114
- cache_dit/caching/cache_blocks/__init__.py +226 -0
- cache_dit/caching/cache_blocks/pattern_0_1_2.py +26 -0
- cache_dit/caching/cache_blocks/pattern_3_4_5.py +543 -0
- cache_dit/caching/cache_blocks/pattern_base.py +748 -0
- cache_dit/caching/cache_blocks/pattern_utils.py +86 -0
- cache_dit/caching/cache_contexts/__init__.py +28 -0
- cache_dit/caching/cache_contexts/cache_config.py +120 -0
- cache_dit/{cache_factory → caching}/cache_contexts/cache_context.py +18 -94
- cache_dit/{cache_factory → caching}/cache_contexts/cache_manager.py +133 -12
- cache_dit/{cache_factory → caching}/cache_contexts/calibrators/__init__.py +25 -3
- cache_dit/{cache_factory → caching}/cache_contexts/calibrators/foca.py +1 -1
- cache_dit/{cache_factory → caching}/cache_contexts/calibrators/taylorseer.py +81 -9
- cache_dit/caching/cache_contexts/context_manager.py +36 -0
- cache_dit/caching/cache_contexts/prune_config.py +63 -0
- cache_dit/caching/cache_contexts/prune_context.py +155 -0
- cache_dit/caching/cache_contexts/prune_manager.py +167 -0
- cache_dit/{cache_factory → caching}/cache_interface.py +150 -37
- cache_dit/{cache_factory → caching}/cache_types.py +19 -2
- cache_dit/{cache_factory → caching}/forward_pattern.py +14 -14
- cache_dit/{cache_factory → caching}/params_modifier.py +10 -10
- cache_dit/caching/patch_functors/__init__.py +15 -0
- cache_dit/{cache_factory → caching}/patch_functors/functor_chroma.py +1 -1
- cache_dit/{cache_factory → caching}/patch_functors/functor_dit.py +1 -1
- cache_dit/{cache_factory → caching}/patch_functors/functor_flux.py +1 -1
- cache_dit/{cache_factory → caching}/patch_functors/functor_hidream.py +1 -1
- cache_dit/{cache_factory → caching}/patch_functors/functor_hunyuan_dit.py +1 -1
- cache_dit/{cache_factory → caching}/patch_functors/functor_qwen_image_controlnet.py +1 -1
- cache_dit/{cache_factory → caching}/utils.py +19 -8
- cache_dit/metrics/__init__.py +11 -0
- cache_dit/parallelism/__init__.py +3 -0
- cache_dit/parallelism/backends/native_diffusers/__init__.py +6 -0
- cache_dit/parallelism/backends/native_diffusers/context_parallelism/__init__.py +164 -0
- cache_dit/parallelism/backends/native_diffusers/context_parallelism/attention/__init__.py +4 -0
- cache_dit/parallelism/backends/native_diffusers/context_parallelism/attention/_attention_dispatch.py +304 -0
- cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_chroma.py +95 -0
- cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_cogvideox.py +202 -0
- cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_cogview.py +299 -0
- cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_cosisid.py +123 -0
- cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_dit.py +94 -0
- cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_flux.py +88 -0
- cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_hunyuan.py +729 -0
- cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_ltxvideo.py +264 -0
- cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_nunchaku.py +407 -0
- cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_pixart.py +285 -0
- cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_qwen_image.py +104 -0
- cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_registers.py +84 -0
- cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_wan.py +101 -0
- cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_planners.py +117 -0
- cache_dit/parallelism/backends/native_diffusers/parallel_difffusers.py +49 -0
- cache_dit/parallelism/backends/native_diffusers/utils.py +11 -0
- cache_dit/parallelism/backends/native_pytorch/__init__.py +6 -0
- cache_dit/parallelism/backends/native_pytorch/parallel_torch.py +62 -0
- cache_dit/parallelism/backends/native_pytorch/tensor_parallelism/__init__.py +48 -0
- cache_dit/parallelism/backends/native_pytorch/tensor_parallelism/tp_plan_flux.py +171 -0
- cache_dit/parallelism/backends/native_pytorch/tensor_parallelism/tp_plan_kandinsky5.py +79 -0
- cache_dit/parallelism/backends/native_pytorch/tensor_parallelism/tp_plan_qwen_image.py +78 -0
- cache_dit/parallelism/backends/native_pytorch/tensor_parallelism/tp_plan_registers.py +65 -0
- cache_dit/parallelism/backends/native_pytorch/tensor_parallelism/tp_plan_wan.py +153 -0
- cache_dit/parallelism/backends/native_pytorch/tensor_parallelism/tp_planners.py +14 -0
- cache_dit/parallelism/parallel_backend.py +26 -0
- cache_dit/parallelism/parallel_config.py +88 -0
- cache_dit/parallelism/parallel_interface.py +77 -0
- cache_dit/quantize/__init__.py +7 -0
- cache_dit/quantize/backends/__init__.py +1 -0
- cache_dit/quantize/backends/bitsandbytes/__init__.py +0 -0
- cache_dit/quantize/backends/torchao/__init__.py +1 -0
- cache_dit/quantize/{quantize_ao.py → backends/torchao/quantize_ao.py} +40 -30
- cache_dit/quantize/quantize_backend.py +0 -0
- cache_dit/quantize/quantize_config.py +0 -0
- cache_dit/quantize/quantize_interface.py +3 -16
- cache_dit/summary.py +593 -0
- cache_dit/utils.py +46 -290
- {cache_dit-1.0.3.dist-info → cache_dit-1.0.14.dist-info}/METADATA +123 -116
- cache_dit-1.0.14.dist-info/RECORD +102 -0
- cache_dit-1.0.14.dist-info/licenses/LICENSE +203 -0
- cache_dit/cache_factory/__init__.py +0 -28
- cache_dit/cache_factory/block_adapters/block_registers.py +0 -90
- cache_dit/cache_factory/cache_adapters/__init__.py +0 -1
- cache_dit/cache_factory/cache_blocks/__init__.py +0 -76
- cache_dit/cache_factory/cache_blocks/pattern_0_1_2.py +0 -16
- cache_dit/cache_factory/cache_blocks/pattern_3_4_5.py +0 -306
- cache_dit/cache_factory/cache_blocks/pattern_base.py +0 -458
- cache_dit/cache_factory/cache_blocks/pattern_utils.py +0 -41
- cache_dit/cache_factory/cache_contexts/__init__.py +0 -15
- cache_dit/cache_factory/patch_functors/__init__.py +0 -15
- cache_dit-1.0.3.dist-info/RECORD +0 -58
- cache_dit-1.0.3.dist-info/licenses/LICENSE +0 -53
- /cache_dit/{cache_factory → caching}/.gitignore +0 -0
- /cache_dit/{cache_factory → caching}/cache_blocks/offload_utils.py +0 -0
- /cache_dit/{cache_factory → caching}/cache_contexts/calibrators/base.py +0 -0
- /cache_dit/{cache_factory → caching}/patch_functors/functor_base.py +0 -0
- /cache_dit/{custom_ops → kernels}/__init__.py +0 -0
- /cache_dit/{custom_ops → kernels}/triton_taylorseer.py +0 -0
- {cache_dit-1.0.3.dist-info → cache_dit-1.0.14.dist-info}/WHEEL +0 -0
- {cache_dit-1.0.3.dist-info → cache_dit-1.0.14.dist-info}/entry_points.txt +0 -0
- {cache_dit-1.0.3.dist-info → cache_dit-1.0.14.dist-info}/top_level.txt +0 -0
|
@@ -1,37 +1,36 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cache_dit
|
|
3
|
-
Version: 1.0.
|
|
4
|
-
Summary: A Unified
|
|
3
|
+
Version: 1.0.14
|
|
4
|
+
Summary: A Unified and Flexible Inference Engine with Hybrid Cache Acceleration and Parallelism for 🤗Diffusers.
|
|
5
5
|
Author: DefTruth, vipshop.com, etc.
|
|
6
6
|
Maintainer: DefTruth, vipshop.com, etc
|
|
7
|
-
Project-URL: Repository, https://github.com/vipshop/cache-dit
|
|
8
|
-
Project-URL: Homepage, https://github.com/vipshop/cache-dit
|
|
7
|
+
Project-URL: Repository, https://github.com/vipshop/cache-dit
|
|
8
|
+
Project-URL: Homepage, https://github.com/vipshop/cache-dit
|
|
9
|
+
Project-URL: GitHub, https://github.com/vipshop/cache-dit
|
|
9
10
|
Requires-Python: >=3.10
|
|
10
11
|
Description-Content-Type: text/markdown
|
|
11
12
|
License-File: LICENSE
|
|
12
|
-
Requires-Dist: packaging
|
|
13
13
|
Requires-Dist: pyyaml
|
|
14
14
|
Requires-Dist: torch>=2.7.1
|
|
15
|
-
Requires-Dist: transformers>=4.55.2
|
|
16
15
|
Requires-Dist: diffusers>=0.35.1
|
|
17
|
-
Requires-Dist:
|
|
18
|
-
|
|
19
|
-
Requires-Dist:
|
|
20
|
-
|
|
21
|
-
Requires-Dist:
|
|
22
|
-
|
|
16
|
+
Requires-Dist: transformers>=4.55.2
|
|
17
|
+
Provides-Extra: parallelism
|
|
18
|
+
Requires-Dist: einops>=0.8.1; extra == "parallelism"
|
|
19
|
+
Provides-Extra: quantization
|
|
20
|
+
Requires-Dist: torchao>=0.14.1; extra == "quantization"
|
|
21
|
+
Requires-Dist: bitsandbytes>=0.48.1; extra == "quantization"
|
|
23
22
|
Provides-Extra: metrics
|
|
23
|
+
Requires-Dist: scipy; extra == "metrics"
|
|
24
|
+
Requires-Dist: scikit-image; extra == "metrics"
|
|
24
25
|
Requires-Dist: image-reward; extra == "metrics"
|
|
25
|
-
Requires-Dist: pytorch-fid; extra == "metrics"
|
|
26
26
|
Requires-Dist: lpips==0.1.4; extra == "metrics"
|
|
27
27
|
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: packaging; extra == "dev"
|
|
28
29
|
Requires-Dist: pre-commit; extra == "dev"
|
|
29
30
|
Requires-Dist: pytest<8.0.0,>=7.0.0; extra == "dev"
|
|
30
31
|
Requires-Dist: pytest-html; extra == "dev"
|
|
31
32
|
Requires-Dist: expecttest; extra == "dev"
|
|
32
33
|
Requires-Dist: hypothesis; extra == "dev"
|
|
33
|
-
Requires-Dist: transformers; extra == "dev"
|
|
34
|
-
Requires-Dist: diffusers; extra == "dev"
|
|
35
34
|
Requires-Dist: accelerate; extra == "dev"
|
|
36
35
|
Requires-Dist: peft; extra == "dev"
|
|
37
36
|
Requires-Dist: protobuf; extra == "dev"
|
|
@@ -39,40 +38,109 @@ Requires-Dist: sentencepiece; extra == "dev"
|
|
|
39
38
|
Requires-Dist: opencv-python-headless; extra == "dev"
|
|
40
39
|
Requires-Dist: ftfy; extra == "dev"
|
|
41
40
|
Requires-Dist: scikit-image; extra == "dev"
|
|
42
|
-
|
|
41
|
+
Provides-Extra: all
|
|
42
|
+
Requires-Dist: cache-dit[parallelism]; extra == "all"
|
|
43
|
+
Requires-Dist: cache-dit[quantization]; extra == "all"
|
|
44
|
+
Requires-Dist: cache-dit[metrics]; extra == "all"
|
|
43
45
|
Dynamic: license-file
|
|
44
|
-
Dynamic: provides-extra
|
|
45
|
-
Dynamic: requires-dist
|
|
46
46
|
Dynamic: requires-python
|
|
47
47
|
|
|
48
|
-
<a href="./README.md">📚English</a> | <a href="./README_CN.md">📚中文阅读 </a>
|
|
49
|
-
|
|
50
48
|
<div align="center">
|
|
51
|
-
<img src=https://github.com/vipshop/cache-dit/raw/main/assets/cache-dit-logo.png height="120">
|
|
52
49
|
<p align="center">
|
|
53
|
-
|
|
54
|
-
|
|
50
|
+
<h2 align="center">
|
|
51
|
+
<img src=https://github.com/vipshop/cache-dit/raw/main/assets/cache-dit-logo.png height="90" align="left">
|
|
52
|
+
A Unified and Flexible Inference Engine with 🤗🎉<br>Hybrid Cache Acceleration and Parallelism for DiTs<br>
|
|
53
|
+
<a href="https://pepy.tech/projects/cache-dit"><img src=https://static.pepy.tech/personalized-badge/cache-dit?period=total&units=INTERNATIONAL_SYSTEM&left_color=GRAY&right_color=BLUE&left_text=downloads></a>
|
|
54
|
+
<img src=https://img.shields.io/github/stars/vipshop/cache-dit.svg?style=dark >
|
|
55
|
+
<a href="https://huggingface.co/docs/diffusers/main/en/optimization/cache_dit"><img src=https://img.shields.io/badge/🤗Diffusers-ecosystem-yellow.svg ></a>
|
|
56
|
+
<a href="https://hellogithub.com/repository/vipshop/cache-dit" target="_blank"><img src="https://api.hellogithub.com/v1/widgets/recommend.svg?rid=b8b03b3b32a449ea84cfc2b96cd384f3&claim_uid=ofSCbzTmdeQk3FD&theme=small" alt="Featured|HelloGitHub" /></a>
|
|
57
|
+
<img src=https://img.shields.io/badge/Models-30+-hotpink.svg >
|
|
58
|
+
</h2>
|
|
55
59
|
</p>
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
60
|
+
</div>
|
|
61
|
+
|
|
62
|
+
<!--
|
|
63
|
+
<a href="https://pepy.tech/projects/cache-dit"><img src=https://static.pepy.tech/personalized-badge/cache-dit?period=total&units=INTERNATIONAL_SYSTEM&left_color=GRAY&right_color=GREEN&left_text=downloads></a>
|
|
64
|
+
<a href="https://pypi.org/project/cache-dit/"><img src=https://img.shields.io/pypi/dm/cache-dit.svg ></a>
|
|
65
|
+
-->
|
|
66
|
+
|
|
67
|
+
## 🔥Hightlight
|
|
68
|
+
|
|
69
|
+
We are excited to announce that the **first API-stable version (v1.0.0)** of cache-dit has finally been released!
|
|
70
|
+
**[cache-dit](https://github.com/vipshop/cache-dit)** is a **Unified** and **Flexible** inference engine for 🤗 Diffusers, enabling acceleration with just ♥️**one line**♥️ of code. Key features: **Unified Cache APIs**, **Forward Pattern Matching**, **Automatic Block Adapter**, **DBCache**, **DBPrune**, **Hybrid TaylorSeer Calibrator**, **Hybrid Cache CFG**, **Context Parallelism**, **Tensor Parallelism**, **Torch Compile Compatible** and **🎉SOTA** performance.
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
pip3 install -U cache-dit # pip3 install git+https://github.com/vipshop/cache-dit.git
|
|
74
|
+
```
|
|
75
|
+
You can install the stable release of cache-dit from PyPI, or the latest development version from GitHub. Then try ♥️ Cache Acceleration with just **one line** of code ~ ♥️
|
|
76
|
+
```python
|
|
77
|
+
>>> import cache_dit
|
|
78
|
+
>>> from diffusers import DiffusionPipeline
|
|
79
|
+
>>> pipe = DiffusionPipeline.from_pretrained("Qwen/Qwen-Image") # Can be any diffusion pipeline
|
|
80
|
+
>>> cache_dit.enable_cache(pipe) # One-line code with default cache options.
|
|
81
|
+
>>> output = pipe(...) # Just call the pipe as normal.
|
|
82
|
+
>>> stats = cache_dit.summary(pipe) # Then, get the summary of cache acceleration stats.
|
|
83
|
+
>>> cache_dit.disable_cache(pipe) # Disable cache and run original pipe.
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### 📚Core Features
|
|
87
|
+
|
|
88
|
+
- **[🎉Full 🤗Diffusers Support](./docs/User_Guide.md#supported-pipelines)**: Notably, **[cache-dit](https://github.com/vipshop/cache-dit)** now supports nearly **all** of Diffusers' **DiT-based** pipelines, include **[30+](./examples/pipeline/)** series, nearly **[100+](./examples/pipeline/)** pipelines, such as FLUX.1, Qwen-Image, Qwen-Image-Lightning, Wan 2.1/2.2, HunyuanImage-2.1, HunyuanVideo, HiDream, AuraFlow, CogView3Plus, CogView4, CogVideoX, LTXVideo, ConsisID, SkyReelsV2, VisualCloze, PixArt, Chroma, Mochi, SD 3.5, DiT-XL, etc.
|
|
89
|
+
- **[🎉Extremely Easy to Use](./docs/User_Guide.md#unified-cache-apis)**: In most cases, you only need **one line** of code: `cache_dit.enable_cache(...)`. After calling this API, just use the pipeline as normal.
|
|
90
|
+
- **[🎉Easy New Model Integration](./docs/User_Guide.md#automatic-block-adapter)**: Features like **Unified Cache APIs**, **Forward Pattern Matching**, **Automatic Block Adapter**, **Hybrid Forward Pattern**, and **Patch Functor** make it highly functional and flexible. For example, we achieved 🎉 Day 1 support for [HunyuanImage-2.1](https://github.com/Tencent-Hunyuan/HunyuanImage-2.1) with 1.7x speedup w/o precision loss—even before it was available in the Diffusers library.
|
|
91
|
+
- **[🎉State-of-the-Art Performance](./bench/)**: Compared with algorithms including Δ-DiT, Chipmunk, FORA, DuCa, TaylorSeer and FoCa, cache-dit achieved the **SOTA** performance w/ **7.4x↑🎉** speedup on ClipScore!
|
|
92
|
+
- **[🎉Support for 4/8-Steps Distilled Models](./bench/)**: Surprisingly, cache-dit's **DBCache** works for extremely few-step distilled models—something many other methods fail to do.
|
|
93
|
+
- **[🎉Compatibility with Other Optimizations](./docs/User_Guide.md#️torch-compile)**: Designed to work seamlessly with torch.compile, Quantization ([torchao](./examples/quantize/), [🔥nunchaku](./examples/quantize/)), CPU or Sequential Offloading, **[🔥Context Parallelism](./docs/User_Guide.md/#️hybrid-context-parallelism)**, **[🔥Tensor Parallelism](./docs/User_Guide.md#️hybrid-tensor-parallelism)**, etc.
|
|
94
|
+
- **[🎉Hybrid Cache Acceleration](./docs/User_Guide.md#taylorseer-calibrator)**: Now supports hybrid **Block-wise Cache + Calibrator** schemes (e.g., DBCache or DBPrune + TaylorSeerCalibrator). DBCache or DBPrune acts as the **Indicator** to decide *when* to cache, while the Calibrator decides *how* to cache. More mainstream cache acceleration algorithms (e.g., FoCa) will be supported in the future, along with additional benchmarks—stay tuned for updates!
|
|
95
|
+
- **[🤗Diffusers Ecosystem Integration](https://huggingface.co/docs/diffusers/main/en/optimization/cache_dit)**: 🔥**cache-dit** has joined the Diffusers community ecosystem as the **first** DiT-specific cache acceleration framework! Check out the documentation here: <a href="https://huggingface.co/docs/diffusers/main/en/optimization/cache_dit"><img src=https://img.shields.io/badge/🤗Diffusers-ecosystem-yellow.svg ></a>
|
|
96
|
+
|
|
97
|
+

|
|
98
|
+
|
|
99
|
+
## 🔥 Supported DiTs
|
|
100
|
+
|
|
101
|
+
> [!Tip]
|
|
102
|
+
> One **Model Series** may contain **many** pipelines. cache-dit applies optimizations at the **Transformer** level; thus, any pipelines that include the supported transformer are already supported by cache-dit. ✅: known work and official supported now; ✖️: unofficial supported now, but maybe support in the future; **4-bits**: w/ nunchaku + svdq int4.
|
|
103
|
+
|
|
104
|
+
<div align="center">
|
|
105
|
+
|
|
106
|
+
| 📚Model | Cache | CP | TP | 📚Model | Cache | CP | TP |
|
|
107
|
+
|:---|:---|:---|:---|:---|:---|:---|:---|
|
|
108
|
+
| **🎉[FLUX.1](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✅ | ✅ | **🎉[FLUX.1 4-bits](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✅ | ✖️ |
|
|
109
|
+
| **🎉[Qwen-Image](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✅ | ✅ | **🎉[Qwen-Image 4-bits](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✅ | ✖️ |
|
|
110
|
+
| **🎉[Qwen...Lightning](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✅ | ✅ | **🎉[Qwen...Lightning 4-bits](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✅ | ✖️ |
|
|
111
|
+
| **🎉[CogVideoX](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✅ | ✖️ | **🎉[OmniGen](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✖️ | ✖️ |
|
|
112
|
+
| **🎉[Wan 2.1](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✅ | ✅ | **🎉[PixArt](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✅ | ✖️ |
|
|
113
|
+
| **🎉[Wan 2.2](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✅ | ✅ | **🎉[CogVideoX 1.5](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✅ | ✖️ |
|
|
114
|
+
| **🎉[HunyuanVideo](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✅ | ✅ | **🎉[Sana](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✖️ | ✖️ |
|
|
115
|
+
| **🎉[LTX](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✅ | ✖️ | **🎉[VisualCloze](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✅ | ✅ |
|
|
116
|
+
| **🎉[Allegro](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✖️ | ✖️ | **🎉[AuraFlow](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✖️ | ✖️ |
|
|
117
|
+
| **🎉[CogView4](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✅ | ✖️ | **🎉[ShapE](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✖️ | ✖️ |
|
|
118
|
+
| **🎉[CogView3Plus](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✅ | ✖️ | **🎉[Chroma](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✅ | ️✅ |
|
|
119
|
+
| **🎉[Cosmos](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✖️ | ✖️ | **🎉[HiDream](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✖️ | ✖️ |
|
|
120
|
+
| **🎉[EasyAnimate](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✖️ | ✖️ | **🎉[HunyuanDiT](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✖️ | ✖️ |
|
|
121
|
+
| **🎉[SkyReelsV2](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✖️ | ✖️ | **🎉[HunyuanDiTPAG](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✖️ | ✖️ |
|
|
122
|
+
| **🎉[StableDiffusion3](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✖️ | ✖️ | **🎉[Kandinsky5](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✖️ | ✅️ |
|
|
123
|
+
| **🎉[ConsisID](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✅ | ✖️ | **🎉[PRX](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✖️ | ✖️ |
|
|
124
|
+
| **🎉[DiT](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✅ | ✖️ | **🎉[HunyuanImage](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✅ | ✅ |
|
|
125
|
+
| **🎉[Amused](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✖️ | ✖️ | **🎉[LongCatVideo](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✖️ | ✖️ |
|
|
126
|
+
| **🎉[StableAudio](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✖️ | ✖️ | **🎉[Bria](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✖️ | ✖️ |
|
|
127
|
+
| **🎉[Mochi](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✖️ | ✖️ | **🎉[Lumina](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline)** | ✅ | ✖️ | ✖️ |
|
|
128
|
+
|
|
129
|
+
</div>
|
|
130
|
+
|
|
131
|
+
<details align='center'>
|
|
132
|
+
<summary>🔥<b>Click</b> here to show many <b>Image/Video</b> cases🔥</summary>
|
|
133
|
+
|
|
134
|
+
<p align='center'>
|
|
135
|
+
🎉Now, cache-dit covers almost All Diffusers' DiT Pipelines🎉 <br>
|
|
136
|
+
🔥<a href="./examples/pipeline">Qwen-Image</a> | <a href="./examples/pipeline">Qwen-Image-Edit</a> | <a href="./examples/pipeline">Qwen-Image-Edit-Plus </a> 🔥<br>
|
|
67
137
|
🔥<a href="./examples/pipeline">FLUX.1</a> | <a href="./examples/pipeline">Qwen-Image-Lightning 4/8 Steps</a> | <a href="./examples/pipeline"> Wan 2.1 </a> | <a href="./examples/pipeline"> Wan 2.2 </a>🔥<br>
|
|
68
138
|
🔥<a href="./examples/pipeline">HunyuanImage-2.1</a> | <a href="./examples/pipeline">HunyuanVideo</a> | <a href="./examples/pipeline">HunyuanDiT</a> | <a href="./examples/pipeline">HiDream</a> | <a href="./examples/pipeline">AuraFlow</a>🔥<br>
|
|
69
139
|
🔥<a href="./examples/pipeline">CogView3Plus</a> | <a href="./examples/pipeline">CogView4</a> | <a href="./examples/pipeline">LTXVideo</a> | <a href="./examples/pipeline">CogVideoX</a> | <a href="./examples/">CogVideoX 1.5</a> | <a href="./examples/">ConsisID</a>🔥<br>
|
|
70
140
|
🔥<a href="./examples/pipeline">Cosmos</a> | <a href="./examples/pipeline">SkyReelsV2</a> | <a href="./examples/pipeline">VisualCloze</a> | <a href="./examples/pipeline">OmniGen 1/2</a> | <a href="./examples/pipeline">Lumina 1/2</a> | <a href="./examples/pipeline">PixArt</a>🔥<br>
|
|
71
141
|
🔥<a href="./examples/pipeline">Chroma</a> | <a href="./examples/pipeline">Sana</a> | <a href="./examples/pipeline">Allegro</a> | <a href="./examples/pipeline">Mochi</a> | <a href="./examples/pipeline">SD 3/3.5</a> | <a href="./examples/pipeline">Amused</a> | <a href="./examples/pipeline"> ... </a> | <a href="./examples/pipeline">DiT-XL</a>🔥
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
</div>
|
|
75
|
-
|
|
142
|
+
</p>
|
|
143
|
+
|
|
76
144
|
<div align='center'>
|
|
77
145
|
<img src=https://github.com/vipshop/cache-dit/raw/main/assets/gifs/wan2.2.C0_Q0_NONE.gif width=124px>
|
|
78
146
|
<img src=https://github.com/vipshop/cache-dit/raw/main/assets/gifs/wan2.2.C1_Q0_DBCACHE_F1B0_W2M8MC2_T1O2_R0.08.gif width=124px>
|
|
@@ -95,10 +163,6 @@ Dynamic: requires-python
|
|
|
95
163
|
<img src=https://github.com/vipshop/cache-dit/raw/main/assets/qwen-image-edit.C0_L0_Q0_DBCACHE_F1B0_W8M0MC2_T0O2_R0.12_S24.png width=125px>
|
|
96
164
|
<p><b>🔥Qwen-Image-Edit</b> | Input w/o Edit | Baseline | <a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:1.6x↑🎉 | 1.9x↑🎉 </p>
|
|
97
165
|
</div>
|
|
98
|
-
|
|
99
|
-
<details align='center'>
|
|
100
|
-
<summary>🔥<b>Click</b> here to show many <b>Image/Video</b> cases🔥</summary>
|
|
101
|
-
|
|
102
166
|
<div align='center'>
|
|
103
167
|
<img src=https://github.com/vipshop/cache-dit/raw/main/assets/flux-kontext-cat.C0_L0_Q0_NONE.png width=100px>
|
|
104
168
|
<img src=https://github.com/vipshop/cache-dit/raw/main/assets/flux-kontext.C0_L0_Q0_NONE.png width=100px>
|
|
@@ -168,87 +232,35 @@ Dynamic: requires-python
|
|
|
168
232
|
|
|
169
233
|
</details>
|
|
170
234
|
|
|
171
|
-
##
|
|
172
|
-
|
|
173
|
-
We are excited to announce that the **first API-stable version (v1.0.0)** of cache-dit has finally been released!
|
|
174
|
-
**[cache-dit](https://github.com/vipshop/cache-dit)** is a **Unified**, **Flexible**, and **Training-free** cache acceleration framework for 🤗 Diffusers, enabling cache acceleration with just **one line** of code. Key features include **Unified Cache APIs**, **Forward Pattern Matching**, **Automatic Block Adapter**, **Hybrid Forward Pattern**, **DBCache**, **TaylorSeer Calibrator**, and **Cache CFG**.
|
|
175
|
-
|
|
176
|
-
```bash
|
|
177
|
-
pip3 install -U cache-dit # pip3 install git+https://github.com/vipshop/cache-dit.git
|
|
178
|
-
```
|
|
179
|
-
You can install the stable release of cache-dit from PyPI, or the latest development version from GitHub. Then try ♥️ Cache Acceleration with just **one line** of code ~ ♥️
|
|
180
|
-
```python
|
|
181
|
-
>>> import cache_dit
|
|
182
|
-
>>> from diffusers import DiffusionPipeline
|
|
183
|
-
>>> pipe = DiffusionPipeline.from_pretrained("Qwen/Qwen-Image") # Can be any diffusion pipeline
|
|
184
|
-
>>> cache_dit.enable_cache(pipe) # One-line code with default cache options.
|
|
185
|
-
>>> output = pipe(...) # Just call the pipe as normal.
|
|
186
|
-
>>> stats = cache_dit.summary(pipe) # Then, get the summary of cache acceleration stats.
|
|
187
|
-
>>> cache_dit.disable_cache(pipe) # Disable cache and run original pipe.
|
|
188
|
-
```
|
|
189
|
-
|
|
190
|
-
### 📚Core Features
|
|
191
|
-
|
|
192
|
-
- **[🎉Full 🤗Diffusers Support](./docs/User_Guide.md#supported-pipelines)**: Notably, **[cache-dit](https://github.com/vipshop/cache-dit)** now supports nearly **all** of Diffusers' **DiT-based** pipelines, such as Qwen-Image, FLUX.1, Qwen-Image-Lightning, HunyuanImage-2.1, HunyuanVideo, HunyuanDiT, Wan 2.1/2.2, HiDream, AuraFlow, CogView3Plus, CogView4, LTXVideo, CogVideoX 1.5, ConsisID, SkyReelsV2, VisualCloze, OmniGen, Lumina, PixArt, Chroma, Sana, Allegro, Mochi, SD 3.5, Amused, and DiT-XL.
|
|
193
|
-
- **[🎉Extremely Easy to Use](./docs/User_Guide.md#unified-cache-apis)**: In most cases, you only need **one line** of code: `cache_dit.enable_cache(...)`. After calling this API, just use the pipeline as normal.
|
|
194
|
-
- **[🎉Easy New Model Integration](./docs/User_Guide.md#automatic-block-adapter)**: Features like **Unified Cache APIs**, **Forward Pattern Matching**, **Automatic Block Adapter**, **Hybrid Forward Pattern**, and **Patch Functor** make it highly functional and flexible. For example, we achieved 🎉 Day 1 support for [HunyuanImage-2.1](https://github.com/Tencent-Hunyuan/HunyuanImage-2.1) with 1.7x speedup w/o precision loss—even before it was available in the Diffusers library.
|
|
195
|
-
- **[🎉State-of-the-Art Performance](./bench/)**: Compared with algorithms including Δ-DiT, Chipmunk, FORA, DuCa, TaylorSeer and FoCa, cache-dit achieves the best accuracy when the speedup ratio is below 4x.
|
|
196
|
-
- **[🎉Support for 4/8-Steps Distilled Models](./bench/)**: Surprisingly, cache-dit's **DBCache** works for extremely few-step distilled models—something many other methods fail to do.
|
|
197
|
-
- **[🎉Compatibility with Other Optimizations](./docs/User_Guide.md#️torch-compile)**: Designed to work seamlessly with torch.compile, model CPU offload, sequential CPU offload, group offloading, etc.
|
|
198
|
-
- **[🎉Hybrid Cache Acceleration](./docs/User_Guide.md#taylorseer-calibrator)**: Now supports hybrid **DBCache + Calibrator** schemes (e.g., DBCache + TaylorSeerCalibrator). DBCache acts as the **Indicator** to decide *when* to cache, while the Calibrator decides *how* to cache. More mainstream cache acceleration algorithms (e.g., FoCa) will be supported in the future, along with additional benchmarks—stay tuned for updates!
|
|
199
|
-
- **[🤗Diffusers Ecosystem Integration](https://huggingface.co/docs/diffusers/main/en/optimization/cache_dit)**: 🔥**cache-dit** has joined the Diffusers community ecosystem as the **first** DiT-specific cache acceleration framework! Check out the documentation here: <a href="https://huggingface.co/docs/diffusers/main/en/optimization/cache_dit"><img src=https://img.shields.io/badge/🤗Diffusers-ecosystem-yellow.svg ></a>
|
|
200
|
-
|
|
201
|
-

|
|
202
|
-
|
|
203
|
-
## 🔥Important News
|
|
204
|
-
|
|
205
|
-
- 2025.10.10: 🔥[**Qwen-Image-ControlNet-Inpainting**](https://huggingface.co/InstantX/Qwen-Image-ControlNet-Inpainting) **2.3x↑🎉** speedup! Check the [example](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline/run_qwen_image_controlnet_inpaint.py).
|
|
206
|
-
- 2025.09.26: 🔥[**Qwen-Image-Edit-Plus(2509)**](https://github.com/QwenLM/Qwen-Image) **2.1x↑🎉** speedup! Please check the [example](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline/run_qwen_image_edit_plus.py).
|
|
207
|
-
- 2025.09.25: 🎉The **first API-stable version (v1.0.0)** of cache-dit has finally been released!
|
|
208
|
-
- 2025.09.25: 🔥**cache-dit** has joined the Diffusers community ecosystem: <a href="https://huggingface.co/docs/diffusers/main/en/optimization/cache_dit"><img src=https://img.shields.io/badge/🤗Diffusers-ecosystem-yellow.svg ></a>
|
|
209
|
-
- 2025.09.10: 🎉Day 1 support [**HunyuanImage-2.1**](https://github.com/Tencent-Hunyuan/HunyuanImage-2.1) with **1.7x↑🎉** speedup! Check this [example](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline/run_hunyuan_image_2.1.py).
|
|
210
|
-
- 2025.09.08: 🔥[**Qwen-Image-Lightning**](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline/run_qwen_image_lightning.py) **7.1/3.5 steps🎉** inference with **[DBCache: F16B16](https://github.com/vipshop/cache-dit)**.
|
|
211
|
-
- 2025.09.03: 🎉[**Wan2.2-MoE**](https://github.com/Wan-Video) **2.4x↑🎉** speedup! Please refer to [run_wan_2.2.py](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline/run_wan_2.2.py) as an example.
|
|
212
|
-
- 2025.08.19: 🔥[**Qwen-Image-Edit**](https://github.com/QwenLM/Qwen-Image) **2x↑🎉** speedup! Check the example: [run_qwen_image_edit.py](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline/run_qwen_image_edit.py).
|
|
213
|
-
- 2025.08.11: 🔥[**Qwen-Image**](https://github.com/QwenLM/Qwen-Image) **1.8x↑🎉** speedup! Please refer to [run_qwen_image.py](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline/run_qwen_image.py) as an example.
|
|
214
|
-
|
|
215
|
-
<details>
|
|
216
|
-
<summary>Previous News</summary>
|
|
217
|
-
|
|
218
|
-
- 2025.09.08: 🎉First caching mechanism in [Wan2.2](https://github.com/Wan-Video/Wan2.2) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check this [PR](https://github.com/Wan-Video/Wan2.2/pull/127) for more details.
|
|
219
|
-
- 2025.09.08: 🎉First caching mechanism in [Qwen-Image-Lightning](https://github.com/ModelTC/Qwen-Image-Lightning) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check this [PR](https://github.com/ModelTC/Qwen-Image-Lightning/pull/35).
|
|
220
|
-
- 2025.08.10: 🔥[**FLUX.1-Kontext-dev**](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev) is supported! Please refer [run_flux_kontext.py](https://github.com/vipshop/cache-dit/blob/main/examples/pipeline/run_flux_kontext.py) as an example.
|
|
221
|
-
- 2025.08.12: 🎉First caching mechanism in [QwenLM/Qwen-Image](https://github.com/QwenLM/Qwen-Image) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check this [PR](https://github.com/QwenLM/Qwen-Image/pull/61).
|
|
222
|
-
- 2025.07.18: 🎉First caching mechanism in [🤗huggingface/flux-fast](https://github.com/huggingface/flux-fast) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check the [PR](https://github.com/huggingface/flux-fast/pull/13).
|
|
223
|
-
- 2025.07.13: 🎉[**FLUX.1-dev**](https://github.com/xlite-dev/flux-faster) **3.3x↑🎉** speedup! NVIDIA L20 with **[cache-dit](https://github.com/vipshop/cache-dit)** + **compile + FP8 DQ**.
|
|
224
|
-
|
|
225
|
-
</details>
|
|
226
|
-
|
|
227
|
-
## 📚User Guide
|
|
235
|
+
## 📖Table of Contents
|
|
228
236
|
|
|
229
237
|
<div id="user-guide"></div>
|
|
230
238
|
|
|
231
|
-
For more advanced features such as **Unified Cache APIs**, **Forward Pattern Matching**, **Automatic Block Adapter**, **Hybrid Forward Pattern**, **Patch Functor**, **DBCache**, **TaylorSeer Calibrator**,
|
|
239
|
+
For more advanced features such as **Unified Cache APIs**, **Forward Pattern Matching**, **Automatic Block Adapter**, **Hybrid Forward Pattern**, **Patch Functor**, **DBCache**, **DBPrune**, **TaylorSeer Calibrator**, **Hybrid Cache CFG**, **Context Parallelism** and **Tensor Parallelism**, please refer to the [🎉User_Guide.md](./docs/User_Guide.md) for details.
|
|
232
240
|
|
|
233
241
|
- [⚙️Installation](./docs/User_Guide.md#️installation)
|
|
242
|
+
- [🔥Supported DiTs](./docs/User_Guide.md#supported)
|
|
234
243
|
- [🔥Benchmarks](./docs/User_Guide.md#benchmarks)
|
|
235
|
-
- [🔥Supported Pipelines](./docs/User_Guide.md#supported-pipelines)
|
|
236
244
|
- [🎉Unified Cache APIs](./docs/User_Guide.md#unified-cache-apis)
|
|
237
245
|
- [📚Forward Pattern Matching](./docs/User_Guide.md#forward-pattern-matching)
|
|
238
246
|
- [📚Cache with One-line Code](./docs/User_Guide.md#%EF%B8%8Fcache-acceleration-with-one-line-code)
|
|
239
247
|
- [🔥Automatic Block Adapter](./docs/User_Guide.md#automatic-block-adapter)
|
|
240
|
-
- [📚
|
|
248
|
+
- [📚Hybrid Forward Pattern](./docs/User_Guide.md#hybrid-forward-pattern)
|
|
241
249
|
- [📚Implement Patch Functor](./docs/User_Guide.md#implement-patch-functor)
|
|
250
|
+
- [📚Transformer-Only Interface](./docs/User_Guide.md#transformer-only-interface)
|
|
251
|
+
- [📚How to use ParamsModifier](./docs/User_Guide.md#how-to-use-paramsmodifier)
|
|
242
252
|
- [🤖Cache Acceleration Stats](./docs/User_Guide.md#cache-acceleration-stats-summary)
|
|
243
|
-
- [⚡️Dual Block Cache](./docs/User_Guide.md#️dbcache-dual-block-cache)
|
|
244
|
-
- [
|
|
253
|
+
- [⚡️DBCache: Dual Block Cache](./docs/User_Guide.md#️dbcache-dual-block-cache)
|
|
254
|
+
- [⚡️DBPrune: Dynamic Block Prune](./docs/User_Guide.md#️dbprune-dynamic-block-prune)
|
|
245
255
|
- [⚡️Hybrid Cache CFG](./docs/User_Guide.md#️hybrid-cache-cfg)
|
|
246
|
-
- [
|
|
256
|
+
- [🔥Hybrid TaylorSeer Calibrator](./docs/User_Guide.md#taylorseer-calibrator)
|
|
257
|
+
- [⚡️Hybrid Context Parallelism](./docs/User_Guide.md#context-parallelism)
|
|
258
|
+
- [⚡️Hybrid Tensor Parallelism](./docs/User_Guide.md#tensor-parallelism)
|
|
259
|
+
- [🤖Low-bits Quantization](./docs/User_Guide.md#quantization)
|
|
260
|
+
- [🛠Metrics Command Line](./docs/User_Guide.md#metrics-cli)
|
|
247
261
|
- [⚙️Torch Compile](./docs/User_Guide.md#️torch-compile)
|
|
248
262
|
- [📚API Documents](./docs/User_Guide.md#api-documentation)
|
|
249
263
|
|
|
250
|
-
|
|
251
|
-
|
|
252
264
|
## 👋Contribute
|
|
253
265
|
<div id="contribute"></div>
|
|
254
266
|
|
|
@@ -267,15 +279,10 @@ How to contribute? Star ⭐️ this repo to support us or check [CONTRIBUTE.md](
|
|
|
267
279
|
|
|
268
280
|
## 🎉Projects Using CacheDiT
|
|
269
281
|
|
|
270
|
-
Here is a curated list of open-source projects integrating **CacheDiT**, including popular repositories like [jetson-containers](https://github.com/dusty-nv/jetson-containers/blob/master/packages/diffusion/cache_edit/build.sh) , [flux-fast](https://github.com/huggingface/flux-fast) , and [sdnext](https://github.com/vladmandic/sdnext/
|
|
271
|
-
|
|
272
|
-
## ©️Acknowledgements
|
|
282
|
+
Here is a curated list of open-source projects integrating **CacheDiT**, including popular repositories like [jetson-containers](https://github.com/dusty-nv/jetson-containers/blob/master/packages/diffusion/cache_edit/build.sh) , [flux-fast](https://github.com/huggingface/flux-fast) , and [sdnext](https://github.com/vladmandic/sdnext/discussions/4269) . 🎉**CacheDiT** has been **recommended** by: [Wan2.2](https://github.com/Wan-Video/Wan2.2) , [Qwen-Image-Lightning](https://github.com/ModelTC/Qwen-Image-Lightning) , [Qwen-Image](https://github.com/QwenLM/Qwen-Image) , [LongCat-Video](https://github.com/meituan-longcat/LongCat-Video) , [Kandinsky-5](https://github.com/ai-forever/Kandinsky-5) , <a href="https://huggingface.co/docs/diffusers/main/en/optimization/cache_dit"><img src="https://img.shields.io/badge/🤗Diffusers-ecosystem-yellow.svg"></a> , [<a href="https://hellogithub.com/repository/vipshop/cache-dit" target="_blank"><img src="https://api.hellogithub.com/v1/widgets/recommend.svg?rid=b8b03b3b32a449ea84cfc2b96cd384f3&claim_uid=ofSCbzTmdeQk3FD&theme=small" alt="Featured|HelloGitHub" /></a>](https://hellogithub.com/repository/vipshop/cache-dit) , among others.
|
|
273
283
|
|
|
274
|
-
<div id="Acknowledgements"></div>
|
|
275
284
|
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
## ©️Special Acknowledgements
|
|
285
|
+
## ©️Acknowledgements
|
|
279
286
|
|
|
280
287
|
Special thanks to vipshop's Computer Vision AI Team for supporting document, testing and production-level deployment of this project.
|
|
281
288
|
|
|
@@ -285,10 +292,10 @@ Special thanks to vipshop's Computer Vision AI Team for supporting document, tes
|
|
|
285
292
|
|
|
286
293
|
```BibTeX
|
|
287
294
|
@misc{cache-dit@2025,
|
|
288
|
-
title={cache-dit: A Unified
|
|
295
|
+
title={cache-dit: A Unified and Flexible Inference Engine with Hybrid Cache Acceleration and Parallelism for Diffusers.},
|
|
289
296
|
url={https://github.com/vipshop/cache-dit.git},
|
|
290
297
|
note={Open-source software available at https://github.com/vipshop/cache-dit.git},
|
|
291
|
-
author={vipshop.com},
|
|
298
|
+
author={DefTruth, vipshop.com},
|
|
292
299
|
year={2025}
|
|
293
300
|
}
|
|
294
301
|
```
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
cache_dit/__init__.py,sha256=o2p2odulRXB24ZZb4zE0Rn1O085lRu59sO7BEVCPOOs,2025
|
|
2
|
+
cache_dit/_version.py,sha256=6M51k-eDUkvbg1O0UDaNPjBRHmqrs5f127zkdrldxjg,706
|
|
3
|
+
cache_dit/logger.py,sha256=0zsu42hN-3-rgGC_C29ms1IvVpV4_b4_SwJCKSenxBE,4304
|
|
4
|
+
cache_dit/summary.py,sha256=ZSmmpEEV_yKT6d86rqEIwaQNni1WgOEVdbg1o88-7lI,22159
|
|
5
|
+
cache_dit/utils.py,sha256=FeprLDwxnlWWMqBS5ZKRgqJrORkCn64oTqRVNjHYLJM,1846
|
|
6
|
+
cache_dit/caching/.gitignore,sha256=5Cb-qT9wsTUoMJ7vACDF7ZcLpAXhi5v-xdcWSRit988,23
|
|
7
|
+
cache_dit/caching/__init__.py,sha256=47Im7unxC6gB2vFjFgbY-qHuJ-GOPKT6nU3ZkTGUA74,1747
|
|
8
|
+
cache_dit/caching/cache_interface.py,sha256=OWMv6K8jpSFNc9kjdEim_XdFTn0x5ygUKfM-S_pOtZ8,18058
|
|
9
|
+
cache_dit/caching/cache_types.py,sha256=QnWfaS52UOXQtnoCUOwwz4ziY0dyBta6vQ6hvgtdV44,1404
|
|
10
|
+
cache_dit/caching/forward_pattern.py,sha256=DAnldDC-B_FOMK8REtX2hx8mLZ9GLe1UWYfkxzyyMgo,2197
|
|
11
|
+
cache_dit/caching/params_modifier.py,sha256=4GDKkZmol53j9ICZnbbkuTSAjO2p63KFH3hTE_fahBM,3588
|
|
12
|
+
cache_dit/caching/utils.py,sha256=0K2DSvbhLJgajI9tHtwjaYH_YKAUWK8PY74wPiLRC_s,2473
|
|
13
|
+
cache_dit/caching/block_adapters/__init__.py,sha256=duiVXQB8vasnC15PdGc9yTYQvi-NOjmb3MstmjpbX50,22254
|
|
14
|
+
cache_dit/caching/block_adapters/block_adapters.py,sha256=H5BuIm5_1Cy2aHjSu56WQhrB2VozUfeARiWdMpxBHSk,25267
|
|
15
|
+
cache_dit/caching/block_adapters/block_registers.py,sha256=L-PYmh2jz-D8b-rBznm2lC0k6u9Tpo6u68lXJXM86Cw,3821
|
|
16
|
+
cache_dit/caching/cache_adapters/__init__.py,sha256=IRO0IigSRxxwLccCBNLCJuQMtZDZo9WUikFAtn8LUFE,73
|
|
17
|
+
cache_dit/caching/cache_adapters/cache_adapter.py,sha256=1xeFMybrlSQycw_Slq9rZ2RcwJXkqDvvPp3DpST44Wg,25654
|
|
18
|
+
cache_dit/caching/cache_blocks/__init__.py,sha256=FKKGZ_jfFadet4k9jXejCqp1dhS7sIHBGfyWBtlPeXU,9924
|
|
19
|
+
cache_dit/caching/cache_blocks/offload_utils.py,sha256=wusgcqaCrwEjvv7Guy-6VXhNOgPPUrBV2sSVuRmGuvo,3513
|
|
20
|
+
cache_dit/caching/cache_blocks/pattern_0_1_2.py,sha256=bzSVkxFa2uvtNU47GC-poWR6l74i597hIhXjX9u6AGU,654
|
|
21
|
+
cache_dit/caching/cache_blocks/pattern_3_4_5.py,sha256=g-4WStwVJIRF4gj_71UX6Zt0UqHKv42K-Zf4f8cw6Pc,19767
|
|
22
|
+
cache_dit/caching/cache_blocks/pattern_base.py,sha256=ZaLeyFZDTE1qyV-n6bZNLV50-KPxMNxbEYjhAl7VuTQ,27832
|
|
23
|
+
cache_dit/caching/cache_blocks/pattern_utils.py,sha256=AEuWRQxbR2p6yIUwblAT_kQokqIBgbCEjnJXjyT6YMA,3106
|
|
24
|
+
cache_dit/caching/cache_contexts/__init__.py,sha256=OOiCta6yCWr7NIwj45_ERY8SFdDS_m4usubHM4Pgq8A,805
|
|
25
|
+
cache_dit/caching/cache_contexts/cache_config.py,sha256=7lKrCg6v9scWTV59kulfIYuJkdHYTRw9jhiBlfLdN9w,5947
|
|
26
|
+
cache_dit/caching/cache_contexts/cache_context.py,sha256=kHLAyWK9-jC8Up_y6fgcdJR_NU92L6QQ3oeBs8I8Cu4,11406
|
|
27
|
+
cache_dit/caching/cache_contexts/cache_manager.py,sha256=L7hQbyEqf5TZMJ9aCP81NxDlNHs4ASf8XEZ-09DcVH4,35386
|
|
28
|
+
cache_dit/caching/cache_contexts/context_manager.py,sha256=7Q3D5eQperAWaEuVrlwkCMDSCjqdgzSOtSta1aJ0ZYM,1059
|
|
29
|
+
cache_dit/caching/cache_contexts/prune_config.py,sha256=OdMEs_nBO3SDq8vC4JcF4LyINws2QWxFZOkh9N53ScQ,3204
|
|
30
|
+
cache_dit/caching/cache_contexts/prune_context.py,sha256=HlPARnwrRQmkgdcsAYSm7_nhe8X7-dQYrIxbyVW64x0,6341
|
|
31
|
+
cache_dit/caching/cache_contexts/prune_manager.py,sha256=KOn6h2IJmM5GUMUzCNjM6xgmaABzQ_zIV-BuAxL0bBM,6111
|
|
32
|
+
cache_dit/caching/cache_contexts/calibrators/__init__.py,sha256=Kgyy93ftAaJp-GJ1XtJqQ9AaN8rlaMCHcFLwDeKZhtI,6251
|
|
33
|
+
cache_dit/caching/cache_contexts/calibrators/base.py,sha256=mn6ZBkChGpGwN5csrHTUGMoX6BBPvqHXSLbIExiW-EU,748
|
|
34
|
+
cache_dit/caching/cache_contexts/calibrators/foca.py,sha256=dh0LsV_y9RkDxmcL_VaUYTEylkzSuqlBHKUKnSxTGZU,885
|
|
35
|
+
cache_dit/caching/cache_contexts/calibrators/taylorseer.py,sha256=lN2y_baGCG-aRed_xTVjybrOyCH4YivKpQbxQUMHT24,6003
|
|
36
|
+
cache_dit/caching/patch_functors/__init__.py,sha256=mP81SWtlqUGjLMLXIpAN7_R0GW_RZBVv_Rr01IB0v7k,610
|
|
37
|
+
cache_dit/caching/patch_functors/functor_base.py,sha256=Ahk0fTfrHgNdEl-9JSkACvfyyv9G-Ei5OSz7XBIlX5o,357
|
|
38
|
+
cache_dit/caching/patch_functors/functor_chroma.py,sha256=BNhFkSQe8PCZ6aK68xQYcosIeDByxxRF-e5ZE7NZXHg,13424
|
|
39
|
+
cache_dit/caching/patch_functors/functor_dit.py,sha256=wbo8cQZbgtLFAHHFZb5bXMQ572jr9UK7VmVl-MsG_wM,3898
|
|
40
|
+
cache_dit/caching/patch_functors/functor_flux.py,sha256=xyPY2FJ1wy1UJfgX_MVBCJvmH2hydNjHbeyRFr2N39Y,9553
|
|
41
|
+
cache_dit/caching/patch_functors/functor_hidream.py,sha256=OO3_cVTGbG1VKnCHU-nu9k6VMpQKa-oQWVghT64FuyM,15303
|
|
42
|
+
cache_dit/caching/patch_functors/functor_hunyuan_dit.py,sha256=mrSpxbdUY7RY_EEH4X22RGHI00BeTUFOL_-G33vTyBA,6395
|
|
43
|
+
cache_dit/caching/patch_functors/functor_qwen_image_controlnet.py,sha256=FxPcD_fCdJ-9AFn2EMbI32oi61r6Y-3A4W_U-TbCTLA,10513
|
|
44
|
+
cache_dit/compile/__init__.py,sha256=FcTVzCeyypl-mxlc59_ehHL3lBNiDAFsXuRoJ-5Cfi0,56
|
|
45
|
+
cache_dit/compile/utils.py,sha256=nN2OIrSdwRR5zGxJinKDqb07pXpvTNTF3g_OgLkeeBU,3858
|
|
46
|
+
cache_dit/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
|
+
cache_dit/kernels/triton_taylorseer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
48
|
+
cache_dit/metrics/__init__.py,sha256=Y_JrBr9XE6NKXwyXc7d_-PaX9c_rk5FKms-IYgCyHmY,936
|
|
49
|
+
cache_dit/metrics/clip_score.py,sha256=ERNCFQFJKzJdbIX9OAg-1LiSPuXUVHLOFxbf2gcENpc,3938
|
|
50
|
+
cache_dit/metrics/config.py,sha256=ieOgD9ayz722RjVzk24bSIqS2D6o7TZjGk8KeXV-OLQ,551
|
|
51
|
+
cache_dit/metrics/fid.py,sha256=ZM_FM0XERtpnkMUfphmw2aOdljrh1uba-pnYItu0q6M,18219
|
|
52
|
+
cache_dit/metrics/image_reward.py,sha256=N8HalJo1T1js0dsNb2V1KRv4kIdcm3nhx7iOXJuqcns,5421
|
|
53
|
+
cache_dit/metrics/inception.py,sha256=pBVe2X6ylLPIXTG4-GWDM9DWnCviMJbJ45R3ulhktR0,12759
|
|
54
|
+
cache_dit/metrics/lpips.py,sha256=hrHrmdM-f2B4TKDs0xLqJO5JFaYcCjq2qNIR8oCrVkc,811
|
|
55
|
+
cache_dit/metrics/metrics.py,sha256=AZbQyoavE-djvyRUZ_EfCIrWSQbiWQFo7n2dhn7XptE,40466
|
|
56
|
+
cache_dit/parallelism/__init__.py,sha256=dheBG5_TZCuwctviMslpAEgB-B3N8F816bE51qsw_fU,210
|
|
57
|
+
cache_dit/parallelism/parallel_backend.py,sha256=XFmv8GmAKfadJQomUbAgox9aJQxmNxdlPhVE6nJBn0s,939
|
|
58
|
+
cache_dit/parallelism/parallel_config.py,sha256=kgpysG1lKCJ6Wd3llQFSmiY_Hdorv2wTfm2Xcj6qec0,3738
|
|
59
|
+
cache_dit/parallelism/parallel_interface.py,sha256=2OtZiXmpQbSchzH_MKUwMr1iDB3gj1sklQJGA_Kc04I,2664
|
|
60
|
+
cache_dit/parallelism/backends/native_diffusers/__init__.py,sha256=rpYu52_ATmsBpztWSzpwpYvhsQ0ABJe-KyP4UyawgZ8,237
|
|
61
|
+
cache_dit/parallelism/backends/native_diffusers/parallel_difffusers.py,sha256=CY53v3lc36o8HXdc42xc5c0wsKdpDiswNDM3kLAuW-0,1607
|
|
62
|
+
cache_dit/parallelism/backends/native_diffusers/utils.py,sha256=VAqtv9b8PTvcoYzD_CbvtRgdg9_VbtBug_5L38PADl0,266
|
|
63
|
+
cache_dit/parallelism/backends/native_diffusers/context_parallelism/__init__.py,sha256=oOCVeisY7K8JPw5JaGmqhoZZslFDAsYC5oNgbTmt1vU,6228
|
|
64
|
+
cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_chroma.py,sha256=r1LbdyI26sY1Ig5sX24qtUNdk5NfX-DLP0Po2_cCR78,3899
|
|
65
|
+
cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_cogvideox.py,sha256=QKS0xN62N7mTlJIutuOoiuYW9vkjmwrjZM8quPbaDKw,7812
|
|
66
|
+
cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_cogview.py,sha256=fsifJF1Nlh96YVVYk9qa4q7xvno3gqWncpZkrMmCQZY,12403
|
|
67
|
+
cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_cosisid.py,sha256=ry0zmbbVFiu_sBXXGp4XCO9rzZTxJ9dORFVqXIfLUR8,5248
|
|
68
|
+
cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_dit.py,sha256=jrrSciQUYP4T4bc7BuXVt6ha1VJC-L3jrcNywVHIRng,3346
|
|
69
|
+
cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_flux.py,sha256=yaGCqywRGjd_UL8FtwnEbX3NmM7z2XPhMeo3TNfI7mA,3690
|
|
70
|
+
cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_hunyuan.py,sha256=2VcObE9F25tF9NJNTudcQX8nYediF3CvF2GYE85Lm60,28628
|
|
71
|
+
cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_ltxvideo.py,sha256=tzIx3MOMGuO2hVIhaM_z6IXRdYABX4sjfqd5slhRCVc,10470
|
|
72
|
+
cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_nunchaku.py,sha256=VvE4tzKqLU5h4CxyFT1NG2xamcDy11iQQS_NsbYrEbA,16712
|
|
73
|
+
cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_pixart.py,sha256=vftDDzQ62PXCl3cji8uFV9OpQSfX0aP7X0QlNW1mJzM,10872
|
|
74
|
+
cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_qwen_image.py,sha256=XbNE40TMC_RptsXJXIGEWTPTlccQPG5z0oLGWhQKlYE,4314
|
|
75
|
+
cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_registers.py,sha256=W3Uh71YMxwqUqJZp8wrDN_E1Xcxi5UNRKJefPjFm15g,2713
|
|
76
|
+
cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_plan_wan.py,sha256=DZlt3r04KfCpFy43JXO2OTDNqsgHW282mgBh5ViL6yc,3977
|
|
77
|
+
cache_dit/parallelism/backends/native_diffusers/context_parallelism/cp_planners.py,sha256=_dTHUdlhVunZOwidDN5zqHtrwLxv6ykZW9TApMTcaqA,5206
|
|
78
|
+
cache_dit/parallelism/backends/native_diffusers/context_parallelism/attention/__init__.py,sha256=6jIdEQGxC3I97MjLmxkXr3bp6QhzGQ7nId-PzA9ffbg,246
|
|
79
|
+
cache_dit/parallelism/backends/native_diffusers/context_parallelism/attention/_attention_dispatch.py,sha256=a6_9PDfBfhQo3fkRpB7y6vDK1vHAmkHTrX3Z0WZ29og,10411
|
|
80
|
+
cache_dit/parallelism/backends/native_pytorch/__init__.py,sha256=EAzdGEP47-y9w6uyPisvDjCF3TE03ZCdhalAY89p-ZA,226
|
|
81
|
+
cache_dit/parallelism/backends/native_pytorch/parallel_torch.py,sha256=1A-Gdv6JRueZNpwJdrVxRT7TBxwo2Bn-CwMbnlhJx0M,2047
|
|
82
|
+
cache_dit/parallelism/backends/native_pytorch/tensor_parallelism/__init__.py,sha256=M-nEf6a22UeoIjZEhIajpUpGSQzWiNn_zmWiBNU70Fs,1662
|
|
83
|
+
cache_dit/parallelism/backends/native_pytorch/tensor_parallelism/tp_plan_flux.py,sha256=yiZeyFigd3kpHLMGQ6tt3iK9CU6gHaLm3MiVZWEb4_0,6549
|
|
84
|
+
cache_dit/parallelism/backends/native_pytorch/tensor_parallelism/tp_plan_kandinsky5.py,sha256=MwNzjj8nyxOzqdmyhAaeslFM5iVHBY0I2Jd9W4TjTjs,2702
|
|
85
|
+
cache_dit/parallelism/backends/native_pytorch/tensor_parallelism/tp_plan_qwen_image.py,sha256=AcRCfML0bwGeVY0gnVfMQtW9GAWc5wO4ZhxWGNDCkKc,2644
|
|
86
|
+
cache_dit/parallelism/backends/native_pytorch/tensor_parallelism/tp_plan_registers.py,sha256=ERgUE6RnweM7BGp9aO4jywAyMrS8KHrL1mTL7XReMkw,2099
|
|
87
|
+
cache_dit/parallelism/backends/native_pytorch/tensor_parallelism/tp_plan_wan.py,sha256=j8w5gZn3MhSsacxOVguss85GSI6bdLL61FMySDZKwJc,5191
|
|
88
|
+
cache_dit/parallelism/backends/native_pytorch/tensor_parallelism/tp_planners.py,sha256=GbhB7-21DiNF786oGw-AeGBUISkjzEgp82Mztjkl6J0,571
|
|
89
|
+
cache_dit/quantize/__init__.py,sha256=rUu0V9VRjOgwXuIUHHAI-osivNjAdUsi-jpkDbFp6Gk,278
|
|
90
|
+
cache_dit/quantize/quantize_backend.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
91
|
+
cache_dit/quantize/quantize_config.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
92
|
+
cache_dit/quantize/quantize_interface.py,sha256=dKLrLF-FDdRLQq-3CnaRzkAh70P4oObJE3-qWF7goM0,882
|
|
93
|
+
cache_dit/quantize/backends/__init__.py,sha256=SL9EupOwBRzRcHZBI1ABqdHjCS9vEpFZXjA9R5ikTk8,33
|
|
94
|
+
cache_dit/quantize/backends/bitsandbytes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
95
|
+
cache_dit/quantize/backends/torchao/__init__.py,sha256=NK1widhJeKSx8ICCcyYZAWGUpGc2uKF5O7pLIcSCUDI,37
|
|
96
|
+
cache_dit/quantize/backends/torchao/quantize_ao.py,sha256=-a99fEp5xjEXpIgD4Hu1lBVk7bBl2XN1tsWhGlK2cN4,6718
|
|
97
|
+
cache_dit-1.0.14.dist-info/licenses/LICENSE,sha256=6LIQc3N_o1SeNXCONTZUJPO4bFIYccq-E4Qp8Dti5CQ,11438
|
|
98
|
+
cache_dit-1.0.14.dist-info/METADATA,sha256=Rhfwk2kYNDQuXKS9VnE_rgClkf1qwmP62ERGK1HqwkI,31359
|
|
99
|
+
cache_dit-1.0.14.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
100
|
+
cache_dit-1.0.14.dist-info/entry_points.txt,sha256=FX2gysXaZx6NeK1iCLMcIdP8Q4_qikkIHtEmi3oWn8o,65
|
|
101
|
+
cache_dit-1.0.14.dist-info/top_level.txt,sha256=ZJDydonLEhujzz0FOkVbO-BqfzO9d_VqRHmZU-3MOZo,10
|
|
102
|
+
cache_dit-1.0.14.dist-info/RECORD,,
|