cache-dit 0.1.1__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cache_dit-0.1.1 → cache_dit-0.1.2}/PKG-INFO +28 -24
- {cache_dit-0.1.1 → cache_dit-0.1.2}/README.md +24 -20
- cache_dit-0.1.2/assets/cache-dit.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/pyproject.toml +3 -3
- {cache_dit-0.1.1 → cache_dit-0.1.2}/setup.py +1 -1
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/_version.py +2 -2
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit.egg-info/PKG-INFO +28 -24
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit.egg-info/SOURCES.txt +1 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/.github/workflows/issue.yml +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/.gitignore +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/.pre-commit-config.yaml +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/CONTRIBUTE.md +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/LICENSE +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/MANIFEST.in +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/assets/DBCACHE_F12B12S4_R0.2_S16.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/assets/DBCACHE_F12B16S4_R0.08_S6.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/assets/DBCACHE_F16B16S2_R0.2_S14.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/assets/DBCACHE_F16B16S4_R0.2_S13.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/assets/DBCACHE_F1B0S1_R0.08_S11.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/assets/DBCACHE_F1B0S1_R0.2_S19.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/assets/DBCACHE_F8B0S2_R0.12_S12.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/assets/DBCACHE_F8B16S1_R0.2_S18.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/assets/DBCACHE_F8B8S1_R0.08_S9.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/assets/DBCACHE_F8B8S1_R0.12_S12.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/assets/DBCACHE_F8B8S1_R0.15_S15.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/assets/DBCache.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/assets/DBPRUNE_F1B0_R0.03_P24.0_T19.43s.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/assets/DBPRUNE_F1B0_R0.04_P34.6_T16.82s.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/assets/DBPRUNE_F1B0_R0.05_P38.3_T15.95s.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/assets/DBPRUNE_F1B0_R0.06_P45.2_T14.24s.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/assets/DBPRUNE_F1B0_R0.07_P52.3_T12.53s.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/assets/DBPRUNE_F1B0_R0.08_P52.4_T12.52s.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/assets/DBPRUNE_F1B0_R0.09_P59.2_T10.81s.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/assets/DBPRUNE_F1B0_R0.12_P59.5_T10.76s.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/assets/DBPRUNE_F1B0_R0.12_P63.0_T9.90s.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/assets/DBPRUNE_F1B0_R0.1_P62.8_T9.95s.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/assets/DBPRUNE_F1B0_R0.2_P59.5_T10.66s.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/assets/DBPRUNE_F1B0_R0.3_P63.1_T9.79s.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/assets/NONE_R0.08_S0.png +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/bench/.gitignore +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/bench/bench.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/docs/.gitignore +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/examples/.gitignore +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/examples/run_flux.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/pytest.ini +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/requirements.txt +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/setup.cfg +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/__init__.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/__init__.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/dual_block_cache/__init__.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/dual_block_cache/cache_context.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/__init__.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/cogvideox.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/flux.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/mochi.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/dynamic_block_prune/__init__.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/__init__.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/cogvideox.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/flux.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/mochi.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/dynamic_block_prune/prune_context.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/first_block_cache/__init__.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/first_block_cache/cache_context.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/__init__.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/cogvideox.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/flux.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/mochi.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/wan.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/taylorseer.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/utils.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/logger.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/primitives.py +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit.egg-info/dependency_links.txt +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit.egg-info/requires.txt +0 -0
- {cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit.egg-info/top_level.txt +0 -0
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cache_dit
|
|
3
|
-
Version: 0.1.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: 🤗 CacheDiT: A Training-free and Easy-to-use Cache Acceleration Toolbox for Diffusion Transformers
|
|
5
5
|
Author: DefTruth, vipshop.com, etc.
|
|
6
6
|
Maintainer: DefTruth, vipshop.com, etc
|
|
7
|
-
Project-URL: Repository, https://github.com/vipshop/
|
|
8
|
-
Project-URL: Homepage, https://github.com/vipshop/
|
|
7
|
+
Project-URL: Repository, https://github.com/vipshop/cache-dit.git
|
|
8
|
+
Project-URL: Homepage, https://github.com/vipshop/cache-dit.git
|
|
9
9
|
Requires-Python: >=3.10
|
|
10
10
|
Description-Content-Type: text/markdown
|
|
11
11
|
License-File: LICENSE
|
|
@@ -35,18 +35,18 @@ Dynamic: requires-python
|
|
|
35
35
|
|
|
36
36
|
<div align="center">
|
|
37
37
|
<p align="center">
|
|
38
|
-
<h3
|
|
38
|
+
<h3>🤗 CacheDiT: A Training-free and Easy-to-use Cache Acceleration <br>Toolbox for Diffusion Transformers</h3>
|
|
39
39
|
</p>
|
|
40
|
-
|
|
40
|
+
<img src=https://github.com/vipshop/cache-dit/raw/dev/assets/cache-dit.png >
|
|
41
41
|
<div align='center'>
|
|
42
42
|
<img src=https://img.shields.io/badge/Language-Python-brightgreen.svg >
|
|
43
43
|
<img src=https://img.shields.io/badge/PRs-welcome-9cf.svg >
|
|
44
44
|
<img src=https://img.shields.io/badge/PyPI-pass-brightgreen.svg >
|
|
45
45
|
<img src=https://img.shields.io/badge/Python-3.10|3.11|3.12-9cf.svg >
|
|
46
|
-
<img src=https://img.shields.io/badge/Release-v0.1.
|
|
46
|
+
<img src=https://img.shields.io/badge/Release-v0.1.2-brightgreen.svg >
|
|
47
47
|
</div>
|
|
48
48
|
<p align="center">
|
|
49
|
-
DeepCache
|
|
49
|
+
DeepCache is for UNet not DiT. Most DiT cache speedups are complex and not training-free. CacheDiT provides <br>a series of training-free, UNet-style cache accelerators for DiT: DBCache, DBPrune, FBCache, etc.
|
|
50
50
|
</p>
|
|
51
51
|
</div>
|
|
52
52
|
|
|
@@ -69,7 +69,7 @@ Dynamic: requires-python
|
|
|
69
69
|
|Baseline(L20x1)|F1B0 (0.08)|F1B0 (0.20)|F8B8 (0.15)|F12B12 (0.20)|F16B16 (0.20)|
|
|
70
70
|
|:---:|:---:|:---:|:---:|:---:|:---:|
|
|
71
71
|
|24.85s|15.59s|8.58s|15.41s|15.11s|17.74s|
|
|
72
|
-
|<img src=https://github.com/vipshop/
|
|
72
|
+
|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/NONE_R0.08_S0.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F1B0S1_R0.08_S11.png width=105px> | <img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F1B0S1_R0.2_S19.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F8B8S1_R0.15_S15.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F12B12S4_R0.2_S16.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F16B16S4_R0.2_S13.png width=105px>|
|
|
73
73
|
|**Baseline(L20x1)**|**F1B0 (0.08)**|**F8B8 (0.12)**|**F8B12 (0.20)**|**F8B16 (0.20)**|**F8B20 (0.20)**|
|
|
74
74
|
|27.85s|6.04s|5.88s|5.77s|6.01s|6.20s|
|
|
75
75
|
|<img src=https://github.com/user-attachments/assets/70ea57f4-d8f2-415b-8a96-d8315974a5e6 width=105px>|<img src=https://github.com/user-attachments/assets/fc0e1a67-19cc-44aa-bf50-04696e7978a0 width=105px> |<img src=https://github.com/user-attachments/assets/d1434896-628c-436b-95ad-43c085a8629e width=105px>|<img src=https://github.com/user-attachments/assets/aaa42cd2-57de-4c4e-8bfb-913018a8251d width=105px>|<img src=https://github.com/user-attachments/assets/dc0ba2a4-ef7c-436d-8a39-67055deab92f width=105px>|<img src=https://github.com/user-attachments/assets/aede466f-61ed-4256-8df0-fecf8020c5ca width=105px>|
|
|
@@ -93,7 +93,7 @@ These case studies demonstrate that even with relatively high thresholds (such a
|
|
|
93
93
|
|Baseline(L20x1)|Pruned(24%)|Pruned(35%)|Pruned(38%)|Pruned(45%)|Pruned(60%)|
|
|
94
94
|
|:---:|:---:|:---:|:---:|:---:|:---:|
|
|
95
95
|
|24.85s|19.43s|16.82s|15.95s|14.24s|10.66s|
|
|
96
|
-
|<img src=https://github.com/vipshop/
|
|
96
|
+
|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/NONE_R0.08_S0.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.03_P24.0_T19.43s.png width=105px> | <img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.04_P34.6_T16.82s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.05_P38.3_T15.95s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.06_P45.2_T14.24s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.2_P59.5_T10.66s.png width=105px>|
|
|
97
97
|
|
|
98
98
|
<div align="center">
|
|
99
99
|
<p align="center">
|
|
@@ -103,13 +103,17 @@ These case studies demonstrate that even with relatively high thresholds (such a
|
|
|
103
103
|
|
|
104
104
|
Moreover, both DBCache and DBPrune are **plug-and-play** solutions that works hand-in-hand with [ParaAttention](https://github.com/chengzeyi/ParaAttention). Users can easily tap into its **Context Parallelism** features for distributed inference.
|
|
105
105
|
|
|
106
|
+
<p align="center">
|
|
107
|
+
♥️ Please consider to leave a ⭐️ Star to support us ~ ♥️
|
|
108
|
+
</p>
|
|
109
|
+
|
|
106
110
|
## ©️Citations
|
|
107
111
|
|
|
108
112
|
```BibTeX
|
|
109
|
-
@misc{
|
|
110
|
-
title={
|
|
111
|
-
url={https://github.com/vipshop/
|
|
112
|
-
note={Open-source software available at https://github.com/vipshop/
|
|
113
|
+
@misc{CacheDiT@2025,
|
|
114
|
+
title={CacheDiT: A Training-free and Easy-to-use cache acceleration Toolbox for Diffusion Transformers},
|
|
115
|
+
url={https://github.com/vipshop/cache-dit.git},
|
|
116
|
+
note={Open-source software available at https://github.com/vipshop/cache-dit.git},
|
|
113
117
|
author={vipshop.com},
|
|
114
118
|
year={2025}
|
|
115
119
|
}
|
|
@@ -119,7 +123,7 @@ Moreover, both DBCache and DBPrune are **plug-and-play** solutions that works ha
|
|
|
119
123
|
|
|
120
124
|
<div id="reference"></div>
|
|
121
125
|
|
|
122
|
-
|
|
126
|
+
The **CacheDiT** codebase was adapted from FBCache's implementation at the [ParaAttention](https://github.com/chengzeyi/ParaAttention/tree/main/src/para_attn/first_block_cache). We would like to express our sincere gratitude for this excellent work!
|
|
123
127
|
|
|
124
128
|
## 📖Contents
|
|
125
129
|
|
|
@@ -140,7 +144,7 @@ Moreover, both DBCache and DBPrune are **plug-and-play** solutions that works ha
|
|
|
140
144
|
|
|
141
145
|
<div id="installation"></div>
|
|
142
146
|
|
|
143
|
-
You can install the stable release of `
|
|
147
|
+
You can install the stable release of `cache-dit` from PyPI:
|
|
144
148
|
|
|
145
149
|
```bash
|
|
146
150
|
pip3 install cache-dit
|
|
@@ -148,7 +152,7 @@ pip3 install cache-dit
|
|
|
148
152
|
Or you can install the latest develop version from GitHub:
|
|
149
153
|
|
|
150
154
|
```bash
|
|
151
|
-
pip3 install git+https://github.com/vipshop/
|
|
155
|
+
pip3 install git+https://github.com/vipshop/cache-dit.git
|
|
152
156
|
```
|
|
153
157
|
|
|
154
158
|
## ⚡️DBCache: Dual Block Cache
|
|
@@ -270,13 +274,13 @@ apply_cache_on_pipe(pipe, **cache_options)
|
|
|
270
274
|
|Baseline(L20x1)|Pruned(24%)|Pruned(35%)|Pruned(38%)|Pruned(45%)|Pruned(60%)|
|
|
271
275
|
|:---:|:---:|:---:|:---:|:---:|:---:|
|
|
272
276
|
|24.85s|19.43s|16.82s|15.95s|14.24s|10.66s|
|
|
273
|
-
|<img src=https://github.com/vipshop/
|
|
277
|
+
|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/NONE_R0.08_S0.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.03_P24.0_T19.43s.png width=105px> | <img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.04_P34.6_T16.82s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.05_P38.3_T15.95s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.06_P45.2_T14.24s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.2_P59.5_T10.66s.png width=105px>|
|
|
274
278
|
|
|
275
279
|
## 🎉Context Parallelism
|
|
276
280
|
|
|
277
281
|
<div id="context-parallelism"></div>
|
|
278
282
|
|
|
279
|
-
|
|
283
|
+
**CacheDiT** are **plug-and-play** solutions that works hand-in-hand with [ParaAttention](https://github.com/chengzeyi/ParaAttention). Users can **easily tap into** its **Context Parallelism** features for distributed inference. Firstly, install `para-attn` from PyPI:
|
|
280
284
|
|
|
281
285
|
```bash
|
|
282
286
|
pip3 install para-attn # or install `para-attn` from sources.
|
|
@@ -312,7 +316,7 @@ apply_cache_on_pipe(
|
|
|
312
316
|
|
|
313
317
|
<div id="compile"></div>
|
|
314
318
|
|
|
315
|
-
**
|
|
319
|
+
**CacheDiT** are designed to work compatibly with `torch.compile`. For example:
|
|
316
320
|
|
|
317
321
|
```python
|
|
318
322
|
apply_cache_on_pipe(
|
|
@@ -321,7 +325,7 @@ apply_cache_on_pipe(
|
|
|
321
325
|
# Compile the Transformer module
|
|
322
326
|
pipe.transformer = torch.compile(pipe.transformer)
|
|
323
327
|
```
|
|
324
|
-
However, users intending to use
|
|
328
|
+
However, users intending to use **CacheDiT** for DiT with **dynamic input shapes** should consider increasing the **recompile** **limit** of `torch._dynamo` to achieve better performance.
|
|
325
329
|
|
|
326
330
|
```python
|
|
327
331
|
torch._dynamo.config.recompile_limit = 96 # default is 8
|
|
@@ -333,9 +337,9 @@ Otherwise, the recompile_limit error may be triggered, causing the module to fal
|
|
|
333
337
|
|
|
334
338
|
<div id="supported"></div>
|
|
335
339
|
|
|
336
|
-
- [🚀FLUX.1](https://github.com/vipshop/
|
|
337
|
-
- [🚀CogVideoX](https://github.com/vipshop/
|
|
338
|
-
- [🚀Mochi](https://github.com/vipshop/
|
|
340
|
+
- [🚀FLUX.1](https://github.com/vipshop/cache-dit/raw/main/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters)
|
|
341
|
+
- [🚀CogVideoX](https://github.com/vipshop/cache-dit/raw/main/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters)
|
|
342
|
+
- [🚀Mochi](https://github.com/vipshop/cache-dit/raw/main/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters)
|
|
339
343
|
|
|
340
344
|
## 👋Contribute
|
|
341
345
|
<div id="contribute"></div>
|
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
<div align="center">
|
|
2
2
|
<p align="center">
|
|
3
|
-
<h3
|
|
3
|
+
<h3>🤗 CacheDiT: A Training-free and Easy-to-use Cache Acceleration <br>Toolbox for Diffusion Transformers</h3>
|
|
4
4
|
</p>
|
|
5
|
-
|
|
5
|
+
<img src=https://github.com/vipshop/cache-dit/raw/dev/assets/cache-dit.png >
|
|
6
6
|
<div align='center'>
|
|
7
7
|
<img src=https://img.shields.io/badge/Language-Python-brightgreen.svg >
|
|
8
8
|
<img src=https://img.shields.io/badge/PRs-welcome-9cf.svg >
|
|
9
9
|
<img src=https://img.shields.io/badge/PyPI-pass-brightgreen.svg >
|
|
10
10
|
<img src=https://img.shields.io/badge/Python-3.10|3.11|3.12-9cf.svg >
|
|
11
|
-
<img src=https://img.shields.io/badge/Release-v0.1.
|
|
11
|
+
<img src=https://img.shields.io/badge/Release-v0.1.2-brightgreen.svg >
|
|
12
12
|
</div>
|
|
13
13
|
<p align="center">
|
|
14
|
-
DeepCache
|
|
14
|
+
DeepCache is for UNet not DiT. Most DiT cache speedups are complex and not training-free. CacheDiT provides <br>a series of training-free, UNet-style cache accelerators for DiT: DBCache, DBPrune, FBCache, etc.
|
|
15
15
|
</p>
|
|
16
16
|
</div>
|
|
17
17
|
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
|Baseline(L20x1)|F1B0 (0.08)|F1B0 (0.20)|F8B8 (0.15)|F12B12 (0.20)|F16B16 (0.20)|
|
|
35
35
|
|:---:|:---:|:---:|:---:|:---:|:---:|
|
|
36
36
|
|24.85s|15.59s|8.58s|15.41s|15.11s|17.74s|
|
|
37
|
-
|<img src=https://github.com/vipshop/
|
|
37
|
+
|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/NONE_R0.08_S0.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F1B0S1_R0.08_S11.png width=105px> | <img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F1B0S1_R0.2_S19.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F8B8S1_R0.15_S15.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F12B12S4_R0.2_S16.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F16B16S4_R0.2_S13.png width=105px>|
|
|
38
38
|
|**Baseline(L20x1)**|**F1B0 (0.08)**|**F8B8 (0.12)**|**F8B12 (0.20)**|**F8B16 (0.20)**|**F8B20 (0.20)**|
|
|
39
39
|
|27.85s|6.04s|5.88s|5.77s|6.01s|6.20s|
|
|
40
40
|
|<img src=https://github.com/user-attachments/assets/70ea57f4-d8f2-415b-8a96-d8315974a5e6 width=105px>|<img src=https://github.com/user-attachments/assets/fc0e1a67-19cc-44aa-bf50-04696e7978a0 width=105px> |<img src=https://github.com/user-attachments/assets/d1434896-628c-436b-95ad-43c085a8629e width=105px>|<img src=https://github.com/user-attachments/assets/aaa42cd2-57de-4c4e-8bfb-913018a8251d width=105px>|<img src=https://github.com/user-attachments/assets/dc0ba2a4-ef7c-436d-8a39-67055deab92f width=105px>|<img src=https://github.com/user-attachments/assets/aede466f-61ed-4256-8df0-fecf8020c5ca width=105px>|
|
|
@@ -58,7 +58,7 @@ These case studies demonstrate that even with relatively high thresholds (such a
|
|
|
58
58
|
|Baseline(L20x1)|Pruned(24%)|Pruned(35%)|Pruned(38%)|Pruned(45%)|Pruned(60%)|
|
|
59
59
|
|:---:|:---:|:---:|:---:|:---:|:---:|
|
|
60
60
|
|24.85s|19.43s|16.82s|15.95s|14.24s|10.66s|
|
|
61
|
-
|<img src=https://github.com/vipshop/
|
|
61
|
+
|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/NONE_R0.08_S0.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.03_P24.0_T19.43s.png width=105px> | <img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.04_P34.6_T16.82s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.05_P38.3_T15.95s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.06_P45.2_T14.24s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.2_P59.5_T10.66s.png width=105px>|
|
|
62
62
|
|
|
63
63
|
<div align="center">
|
|
64
64
|
<p align="center">
|
|
@@ -68,13 +68,17 @@ These case studies demonstrate that even with relatively high thresholds (such a
|
|
|
68
68
|
|
|
69
69
|
Moreover, both DBCache and DBPrune are **plug-and-play** solutions that works hand-in-hand with [ParaAttention](https://github.com/chengzeyi/ParaAttention). Users can easily tap into its **Context Parallelism** features for distributed inference.
|
|
70
70
|
|
|
71
|
+
<p align="center">
|
|
72
|
+
♥️ Please consider to leave a ⭐️ Star to support us ~ ♥️
|
|
73
|
+
</p>
|
|
74
|
+
|
|
71
75
|
## ©️Citations
|
|
72
76
|
|
|
73
77
|
```BibTeX
|
|
74
|
-
@misc{
|
|
75
|
-
title={
|
|
76
|
-
url={https://github.com/vipshop/
|
|
77
|
-
note={Open-source software available at https://github.com/vipshop/
|
|
78
|
+
@misc{CacheDiT@2025,
|
|
79
|
+
title={CacheDiT: A Training-free and Easy-to-use cache acceleration Toolbox for Diffusion Transformers},
|
|
80
|
+
url={https://github.com/vipshop/cache-dit.git},
|
|
81
|
+
note={Open-source software available at https://github.com/vipshop/cache-dit.git},
|
|
78
82
|
author={vipshop.com},
|
|
79
83
|
year={2025}
|
|
80
84
|
}
|
|
@@ -84,7 +88,7 @@ Moreover, both DBCache and DBPrune are **plug-and-play** solutions that works ha
|
|
|
84
88
|
|
|
85
89
|
<div id="reference"></div>
|
|
86
90
|
|
|
87
|
-
|
|
91
|
+
The **CacheDiT** codebase was adapted from FBCache's implementation at the [ParaAttention](https://github.com/chengzeyi/ParaAttention/tree/main/src/para_attn/first_block_cache). We would like to express our sincere gratitude for this excellent work!
|
|
88
92
|
|
|
89
93
|
## 📖Contents
|
|
90
94
|
|
|
@@ -105,7 +109,7 @@ Moreover, both DBCache and DBPrune are **plug-and-play** solutions that works ha
|
|
|
105
109
|
|
|
106
110
|
<div id="installation"></div>
|
|
107
111
|
|
|
108
|
-
You can install the stable release of `
|
|
112
|
+
You can install the stable release of `cache-dit` from PyPI:
|
|
109
113
|
|
|
110
114
|
```bash
|
|
111
115
|
pip3 install cache-dit
|
|
@@ -113,7 +117,7 @@ pip3 install cache-dit
|
|
|
113
117
|
Or you can install the latest develop version from GitHub:
|
|
114
118
|
|
|
115
119
|
```bash
|
|
116
|
-
pip3 install git+https://github.com/vipshop/
|
|
120
|
+
pip3 install git+https://github.com/vipshop/cache-dit.git
|
|
117
121
|
```
|
|
118
122
|
|
|
119
123
|
## ⚡️DBCache: Dual Block Cache
|
|
@@ -235,13 +239,13 @@ apply_cache_on_pipe(pipe, **cache_options)
|
|
|
235
239
|
|Baseline(L20x1)|Pruned(24%)|Pruned(35%)|Pruned(38%)|Pruned(45%)|Pruned(60%)|
|
|
236
240
|
|:---:|:---:|:---:|:---:|:---:|:---:|
|
|
237
241
|
|24.85s|19.43s|16.82s|15.95s|14.24s|10.66s|
|
|
238
|
-
|<img src=https://github.com/vipshop/
|
|
242
|
+
|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/NONE_R0.08_S0.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.03_P24.0_T19.43s.png width=105px> | <img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.04_P34.6_T16.82s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.05_P38.3_T15.95s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.06_P45.2_T14.24s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.2_P59.5_T10.66s.png width=105px>|
|
|
239
243
|
|
|
240
244
|
## 🎉Context Parallelism
|
|
241
245
|
|
|
242
246
|
<div id="context-parallelism"></div>
|
|
243
247
|
|
|
244
|
-
|
|
248
|
+
**CacheDiT** are **plug-and-play** solutions that works hand-in-hand with [ParaAttention](https://github.com/chengzeyi/ParaAttention). Users can **easily tap into** its **Context Parallelism** features for distributed inference. Firstly, install `para-attn` from PyPI:
|
|
245
249
|
|
|
246
250
|
```bash
|
|
247
251
|
pip3 install para-attn # or install `para-attn` from sources.
|
|
@@ -277,7 +281,7 @@ apply_cache_on_pipe(
|
|
|
277
281
|
|
|
278
282
|
<div id="compile"></div>
|
|
279
283
|
|
|
280
|
-
**
|
|
284
|
+
**CacheDiT** are designed to work compatibly with `torch.compile`. For example:
|
|
281
285
|
|
|
282
286
|
```python
|
|
283
287
|
apply_cache_on_pipe(
|
|
@@ -286,7 +290,7 @@ apply_cache_on_pipe(
|
|
|
286
290
|
# Compile the Transformer module
|
|
287
291
|
pipe.transformer = torch.compile(pipe.transformer)
|
|
288
292
|
```
|
|
289
|
-
However, users intending to use
|
|
293
|
+
However, users intending to use **CacheDiT** for DiT with **dynamic input shapes** should consider increasing the **recompile** **limit** of `torch._dynamo` to achieve better performance.
|
|
290
294
|
|
|
291
295
|
```python
|
|
292
296
|
torch._dynamo.config.recompile_limit = 96 # default is 8
|
|
@@ -298,9 +302,9 @@ Otherwise, the recompile_limit error may be triggered, causing the module to fal
|
|
|
298
302
|
|
|
299
303
|
<div id="supported"></div>
|
|
300
304
|
|
|
301
|
-
- [🚀FLUX.1](https://github.com/vipshop/
|
|
302
|
-
- [🚀CogVideoX](https://github.com/vipshop/
|
|
303
|
-
- [🚀Mochi](https://github.com/vipshop/
|
|
305
|
+
- [🚀FLUX.1](https://github.com/vipshop/cache-dit/raw/main/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters)
|
|
306
|
+
- [🚀CogVideoX](https://github.com/vipshop/cache-dit/raw/main/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters)
|
|
307
|
+
- [🚀Mochi](https://github.com/vipshop/cache-dit/raw/main/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters)
|
|
304
308
|
|
|
305
309
|
## 👋Contribute
|
|
306
310
|
<div id="contribute"></div>
|
|
Binary file
|
|
@@ -4,14 +4,14 @@ name = "cache_dit"
|
|
|
4
4
|
dynamic = ["version", "dependencies", "optional-dependencies"]
|
|
5
5
|
requires-python = ">=3.10"
|
|
6
6
|
authors = [{name = "DefTruth, vipshop.com, etc."}]
|
|
7
|
-
description = "
|
|
7
|
+
description = "🤗 CacheDiT: A Training-free and Easy-to-use Cache Acceleration Toolbox for Diffusion Transformers"
|
|
8
8
|
maintainers = [{name="DefTruth, vipshop.com, etc"}]
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
|
|
11
11
|
[project.urls]
|
|
12
12
|
|
|
13
|
-
Repository = "https://github.com/vipshop/
|
|
14
|
-
Homepage = "https://github.com/vipshop/
|
|
13
|
+
Repository = "https://github.com/vipshop/cache-dit.git"
|
|
14
|
+
Homepage = "https://github.com/vipshop/cache-dit.git"
|
|
15
15
|
|
|
16
16
|
[build-system]
|
|
17
17
|
|
|
@@ -44,7 +44,7 @@ def fetch_requirements():
|
|
|
44
44
|
|
|
45
45
|
setup(
|
|
46
46
|
name=PACKAGE_NAME,
|
|
47
|
-
description="
|
|
47
|
+
description="🤗 CacheDiT: A Training-free and Easy-to-use Cache Acceleration Toolbox for Diffusion Transformers",
|
|
48
48
|
author="vipshop.com",
|
|
49
49
|
use_scm_version={
|
|
50
50
|
"write_to": path.join("src", "cache_dit", "_version.py"),
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cache_dit
|
|
3
|
-
Version: 0.1.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: 🤗 CacheDiT: A Training-free and Easy-to-use Cache Acceleration Toolbox for Diffusion Transformers
|
|
5
5
|
Author: DefTruth, vipshop.com, etc.
|
|
6
6
|
Maintainer: DefTruth, vipshop.com, etc
|
|
7
|
-
Project-URL: Repository, https://github.com/vipshop/
|
|
8
|
-
Project-URL: Homepage, https://github.com/vipshop/
|
|
7
|
+
Project-URL: Repository, https://github.com/vipshop/cache-dit.git
|
|
8
|
+
Project-URL: Homepage, https://github.com/vipshop/cache-dit.git
|
|
9
9
|
Requires-Python: >=3.10
|
|
10
10
|
Description-Content-Type: text/markdown
|
|
11
11
|
License-File: LICENSE
|
|
@@ -35,18 +35,18 @@ Dynamic: requires-python
|
|
|
35
35
|
|
|
36
36
|
<div align="center">
|
|
37
37
|
<p align="center">
|
|
38
|
-
<h3
|
|
38
|
+
<h3>🤗 CacheDiT: A Training-free and Easy-to-use Cache Acceleration <br>Toolbox for Diffusion Transformers</h3>
|
|
39
39
|
</p>
|
|
40
|
-
|
|
40
|
+
<img src=https://github.com/vipshop/cache-dit/raw/dev/assets/cache-dit.png >
|
|
41
41
|
<div align='center'>
|
|
42
42
|
<img src=https://img.shields.io/badge/Language-Python-brightgreen.svg >
|
|
43
43
|
<img src=https://img.shields.io/badge/PRs-welcome-9cf.svg >
|
|
44
44
|
<img src=https://img.shields.io/badge/PyPI-pass-brightgreen.svg >
|
|
45
45
|
<img src=https://img.shields.io/badge/Python-3.10|3.11|3.12-9cf.svg >
|
|
46
|
-
<img src=https://img.shields.io/badge/Release-v0.1.
|
|
46
|
+
<img src=https://img.shields.io/badge/Release-v0.1.2-brightgreen.svg >
|
|
47
47
|
</div>
|
|
48
48
|
<p align="center">
|
|
49
|
-
DeepCache
|
|
49
|
+
DeepCache is for UNet not DiT. Most DiT cache speedups are complex and not training-free. CacheDiT provides <br>a series of training-free, UNet-style cache accelerators for DiT: DBCache, DBPrune, FBCache, etc.
|
|
50
50
|
</p>
|
|
51
51
|
</div>
|
|
52
52
|
|
|
@@ -69,7 +69,7 @@ Dynamic: requires-python
|
|
|
69
69
|
|Baseline(L20x1)|F1B0 (0.08)|F1B0 (0.20)|F8B8 (0.15)|F12B12 (0.20)|F16B16 (0.20)|
|
|
70
70
|
|:---:|:---:|:---:|:---:|:---:|:---:|
|
|
71
71
|
|24.85s|15.59s|8.58s|15.41s|15.11s|17.74s|
|
|
72
|
-
|<img src=https://github.com/vipshop/
|
|
72
|
+
|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/NONE_R0.08_S0.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F1B0S1_R0.08_S11.png width=105px> | <img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F1B0S1_R0.2_S19.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F8B8S1_R0.15_S15.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F12B12S4_R0.2_S16.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F16B16S4_R0.2_S13.png width=105px>|
|
|
73
73
|
|**Baseline(L20x1)**|**F1B0 (0.08)**|**F8B8 (0.12)**|**F8B12 (0.20)**|**F8B16 (0.20)**|**F8B20 (0.20)**|
|
|
74
74
|
|27.85s|6.04s|5.88s|5.77s|6.01s|6.20s|
|
|
75
75
|
|<img src=https://github.com/user-attachments/assets/70ea57f4-d8f2-415b-8a96-d8315974a5e6 width=105px>|<img src=https://github.com/user-attachments/assets/fc0e1a67-19cc-44aa-bf50-04696e7978a0 width=105px> |<img src=https://github.com/user-attachments/assets/d1434896-628c-436b-95ad-43c085a8629e width=105px>|<img src=https://github.com/user-attachments/assets/aaa42cd2-57de-4c4e-8bfb-913018a8251d width=105px>|<img src=https://github.com/user-attachments/assets/dc0ba2a4-ef7c-436d-8a39-67055deab92f width=105px>|<img src=https://github.com/user-attachments/assets/aede466f-61ed-4256-8df0-fecf8020c5ca width=105px>|
|
|
@@ -93,7 +93,7 @@ These case studies demonstrate that even with relatively high thresholds (such a
|
|
|
93
93
|
|Baseline(L20x1)|Pruned(24%)|Pruned(35%)|Pruned(38%)|Pruned(45%)|Pruned(60%)|
|
|
94
94
|
|:---:|:---:|:---:|:---:|:---:|:---:|
|
|
95
95
|
|24.85s|19.43s|16.82s|15.95s|14.24s|10.66s|
|
|
96
|
-
|<img src=https://github.com/vipshop/
|
|
96
|
+
|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/NONE_R0.08_S0.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.03_P24.0_T19.43s.png width=105px> | <img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.04_P34.6_T16.82s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.05_P38.3_T15.95s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.06_P45.2_T14.24s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.2_P59.5_T10.66s.png width=105px>|
|
|
97
97
|
|
|
98
98
|
<div align="center">
|
|
99
99
|
<p align="center">
|
|
@@ -103,13 +103,17 @@ These case studies demonstrate that even with relatively high thresholds (such a
|
|
|
103
103
|
|
|
104
104
|
Moreover, both DBCache and DBPrune are **plug-and-play** solutions that works hand-in-hand with [ParaAttention](https://github.com/chengzeyi/ParaAttention). Users can easily tap into its **Context Parallelism** features for distributed inference.
|
|
105
105
|
|
|
106
|
+
<p align="center">
|
|
107
|
+
♥️ Please consider to leave a ⭐️ Star to support us ~ ♥️
|
|
108
|
+
</p>
|
|
109
|
+
|
|
106
110
|
## ©️Citations
|
|
107
111
|
|
|
108
112
|
```BibTeX
|
|
109
|
-
@misc{
|
|
110
|
-
title={
|
|
111
|
-
url={https://github.com/vipshop/
|
|
112
|
-
note={Open-source software available at https://github.com/vipshop/
|
|
113
|
+
@misc{CacheDiT@2025,
|
|
114
|
+
title={CacheDiT: A Training-free and Easy-to-use cache acceleration Toolbox for Diffusion Transformers},
|
|
115
|
+
url={https://github.com/vipshop/cache-dit.git},
|
|
116
|
+
note={Open-source software available at https://github.com/vipshop/cache-dit.git},
|
|
113
117
|
author={vipshop.com},
|
|
114
118
|
year={2025}
|
|
115
119
|
}
|
|
@@ -119,7 +123,7 @@ Moreover, both DBCache and DBPrune are **plug-and-play** solutions that works ha
|
|
|
119
123
|
|
|
120
124
|
<div id="reference"></div>
|
|
121
125
|
|
|
122
|
-
|
|
126
|
+
The **CacheDiT** codebase was adapted from FBCache's implementation at the [ParaAttention](https://github.com/chengzeyi/ParaAttention/tree/main/src/para_attn/first_block_cache). We would like to express our sincere gratitude for this excellent work!
|
|
123
127
|
|
|
124
128
|
## 📖Contents
|
|
125
129
|
|
|
@@ -140,7 +144,7 @@ Moreover, both DBCache and DBPrune are **plug-and-play** solutions that works ha
|
|
|
140
144
|
|
|
141
145
|
<div id="installation"></div>
|
|
142
146
|
|
|
143
|
-
You can install the stable release of `
|
|
147
|
+
You can install the stable release of `cache-dit` from PyPI:
|
|
144
148
|
|
|
145
149
|
```bash
|
|
146
150
|
pip3 install cache-dit
|
|
@@ -148,7 +152,7 @@ pip3 install cache-dit
|
|
|
148
152
|
Or you can install the latest develop version from GitHub:
|
|
149
153
|
|
|
150
154
|
```bash
|
|
151
|
-
pip3 install git+https://github.com/vipshop/
|
|
155
|
+
pip3 install git+https://github.com/vipshop/cache-dit.git
|
|
152
156
|
```
|
|
153
157
|
|
|
154
158
|
## ⚡️DBCache: Dual Block Cache
|
|
@@ -270,13 +274,13 @@ apply_cache_on_pipe(pipe, **cache_options)
|
|
|
270
274
|
|Baseline(L20x1)|Pruned(24%)|Pruned(35%)|Pruned(38%)|Pruned(45%)|Pruned(60%)|
|
|
271
275
|
|:---:|:---:|:---:|:---:|:---:|:---:|
|
|
272
276
|
|24.85s|19.43s|16.82s|15.95s|14.24s|10.66s|
|
|
273
|
-
|<img src=https://github.com/vipshop/
|
|
277
|
+
|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/NONE_R0.08_S0.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.03_P24.0_T19.43s.png width=105px> | <img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.04_P34.6_T16.82s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.05_P38.3_T15.95s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.06_P45.2_T14.24s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBPRUNE_F1B0_R0.2_P59.5_T10.66s.png width=105px>|
|
|
274
278
|
|
|
275
279
|
## 🎉Context Parallelism
|
|
276
280
|
|
|
277
281
|
<div id="context-parallelism"></div>
|
|
278
282
|
|
|
279
|
-
|
|
283
|
+
**CacheDiT** are **plug-and-play** solutions that works hand-in-hand with [ParaAttention](https://github.com/chengzeyi/ParaAttention). Users can **easily tap into** its **Context Parallelism** features for distributed inference. Firstly, install `para-attn` from PyPI:
|
|
280
284
|
|
|
281
285
|
```bash
|
|
282
286
|
pip3 install para-attn # or install `para-attn` from sources.
|
|
@@ -312,7 +316,7 @@ apply_cache_on_pipe(
|
|
|
312
316
|
|
|
313
317
|
<div id="compile"></div>
|
|
314
318
|
|
|
315
|
-
**
|
|
319
|
+
**CacheDiT** are designed to work compatibly with `torch.compile`. For example:
|
|
316
320
|
|
|
317
321
|
```python
|
|
318
322
|
apply_cache_on_pipe(
|
|
@@ -321,7 +325,7 @@ apply_cache_on_pipe(
|
|
|
321
325
|
# Compile the Transformer module
|
|
322
326
|
pipe.transformer = torch.compile(pipe.transformer)
|
|
323
327
|
```
|
|
324
|
-
However, users intending to use
|
|
328
|
+
However, users intending to use **CacheDiT** for DiT with **dynamic input shapes** should consider increasing the **recompile** **limit** of `torch._dynamo` to achieve better performance.
|
|
325
329
|
|
|
326
330
|
```python
|
|
327
331
|
torch._dynamo.config.recompile_limit = 96 # default is 8
|
|
@@ -333,9 +337,9 @@ Otherwise, the recompile_limit error may be triggered, causing the module to fal
|
|
|
333
337
|
|
|
334
338
|
<div id="supported"></div>
|
|
335
339
|
|
|
336
|
-
- [🚀FLUX.1](https://github.com/vipshop/
|
|
337
|
-
- [🚀CogVideoX](https://github.com/vipshop/
|
|
338
|
-
- [🚀Mochi](https://github.com/vipshop/
|
|
340
|
+
- [🚀FLUX.1](https://github.com/vipshop/cache-dit/raw/main/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters)
|
|
341
|
+
- [🚀CogVideoX](https://github.com/vipshop/cache-dit/raw/main/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters)
|
|
342
|
+
- [🚀Mochi](https://github.com/vipshop/cache-dit/raw/main/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters)
|
|
339
343
|
|
|
340
344
|
## 👋Contribute
|
|
341
345
|
<div id="contribute"></div>
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/dual_block_cache/__init__.py
RENAMED
|
File without changes
|
{cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/dual_block_cache/cache_context.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/dynamic_block_prune/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/dynamic_block_prune/prune_context.py
RENAMED
|
File without changes
|
{cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/first_block_cache/__init__.py
RENAMED
|
File without changes
|
{cache_dit-0.1.1 → cache_dit-0.1.2}/src/cache_dit/cache_factory/first_block_cache/cache_context.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|