cache-dit 0.2.4__tar.gz → 0.2.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cache-dit might be problematic. Click here for more details.
- {cache_dit-0.2.4 → cache_dit-0.2.5}/PKG-INFO +21 -8
- {cache_dit-0.2.4 → cache_dit-0.2.5}/README.md +20 -7
- {cache_dit-0.2.4 → cache_dit-0.2.5}/examples/.gitignore +1 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/examples/README.md +10 -2
- cache_dit-0.2.5/examples/data/flf2v_input_first_frame.png +0 -0
- cache_dit-0.2.5/examples/data/flf2v_input_last_frame.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/examples/requirements.txt +1 -1
- {cache_dit-0.2.4 → cache_dit-0.2.5}/examples/run_cogvideox.py +1 -1
- {cache_dit-0.2.4 → cache_dit-0.2.5}/examples/run_wan.py +8 -2
- cache_dit-0.2.5/examples/run_wan_flf2v.py +191 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/_version.py +2 -2
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dual_block_cache/cache_context.py +138 -33
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/first_block_cache/cache_context.py +2 -2
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit.egg-info/PKG-INFO +21 -8
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit.egg-info/SOURCES.txt +3 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/.github/workflows/issue.yml +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/.gitignore +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/.pre-commit-config.yaml +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/CONTRIBUTE.md +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/LICENSE +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/MANIFEST.in +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBCACHE_F12B12S4_R0.2_S16.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBCACHE_F12B16S4_R0.08_S6.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBCACHE_F16B16S2_R0.2_S14.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBCACHE_F16B16S4_R0.2_S13.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBCACHE_F1B0S1_R0.08_S11.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBCACHE_F1B0S1_R0.2_S19.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBCACHE_F8B0S2_R0.12_S12.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBCACHE_F8B16S1_R0.2_S18.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBCACHE_F8B8S1_R0.08_S9.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBCACHE_F8B8S1_R0.12_S12.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBCACHE_F8B8S1_R0.15_S15.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBPRUNE_F1B0_R0.03_P24.0_T19.43s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBPRUNE_F1B0_R0.04_P34.6_T16.82s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBPRUNE_F1B0_R0.05_P38.3_T15.95s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBPRUNE_F1B0_R0.06_P45.2_T14.24s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBPRUNE_F1B0_R0.07_P52.3_T12.53s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBPRUNE_F1B0_R0.08_P52.4_T12.52s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBPRUNE_F1B0_R0.09_P59.2_T10.81s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBPRUNE_F1B0_R0.12_P59.5_T10.76s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBPRUNE_F1B0_R0.12_P63.0_T9.90s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBPRUNE_F1B0_R0.1_P62.8_T9.95s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBPRUNE_F1B0_R0.2_P59.5_T10.66s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBPRUNE_F1B0_R0.3_P63.1_T9.79s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/NONE_R0.08_S0.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/TEXTURE_DBCACHE_F1B0_R0.08.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/TEXTURE_DBCACHE_F8B12_R0.12.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/TEXTURE_DBCACHE_F8B16_R0.2.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/TEXTURE_DBCACHE_F8B20_R0.2.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/TEXTURE_DBCACHE_F8B8_R0.12.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/TEXTURE_NONE_R0.08.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C0_DBCACHE_F1B0S1W0T0ET0_R0.12_S14_T12.85s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C0_DBCACHE_F1B0S1W0T0ET0_R0.15_S17_T10.27s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C0_DBCACHE_F1B0S1W0T1ET1_R0.12_S14_T12.86s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C0_DBCACHE_F1B0S1W0T1ET1_R0.15_S17_T10.28s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C1_DBCACHE_F1B0S1W0T1ET1_R0.15_S17_T8.48s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C1_DBPRUNE_F1B0_R0.03_P24.0_T16.25s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C1_DBPRUNE_F1B0_R0.045_P38.2_T13.41s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C1_DBPRUNE_F1B0_R0.04_P34.6_T14.12s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C1_DBPRUNE_F1B0_R0.055_P45.1_T12.00s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C1_DBPRUNE_F1B0_R0.05_P41.6_T12.70s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C1_DBPRUNE_F1B0_R0.2_P59.5_T8.86s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C1_DBPRUNE_F8B8_R0.08_P23.1_T16.14s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C1_NONE_R0.08_S0_T20.43s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_DBPRUNE_F1B0_R0.03_P27.3_T6.62s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_DBPRUNE_F1B0_R0.03_P27.3_T6.63s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_DBPRUNE_F1B0_R0.045_P38.2_T5.81s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_DBPRUNE_F1B0_R0.045_P38.2_T5.82s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_DBPRUNE_F1B0_R0.04_P34.6_T6.06s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_DBPRUNE_F1B0_R0.04_P34.6_T6.07s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_DBPRUNE_F1B0_R0.04_P34.6_T6.08s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_DBPRUNE_F1B0_R0.055_P45.1_T5.27s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_DBPRUNE_F1B0_R0.055_P45.1_T5.28s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_DBPRUNE_F1B0_R0.2_P59.5_T3.95s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_DBPRUNE_F1B0_R0.2_P59.5_T3.96s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_NONE_R0.08_S0_T7.78s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_NONE_R0.08_S0_T7.79s.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/cache-dit-v1.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/dbcache-fnbn-v1.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/dbcache-v1.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/dbprune-v1.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/fbcache-v1.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/bench/.gitignore +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/bench/bench.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/docs/.gitignore +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/examples/data/cup.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/examples/data/cup_mask.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/examples/run_flux.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/examples/run_flux_fill.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/examples/run_hunyuan_video.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/examples/run_mochi.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/pyproject.toml +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/pytest.ini +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/requirements.txt +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/setup.cfg +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/setup.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/__init__.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/__init__.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dual_block_cache/__init__.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/__init__.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/cogvideox.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/flux.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/hunyuan_video.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/mochi.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/wan.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dynamic_block_prune/__init__.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/__init__.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/cogvideox.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/flux.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/hunyuan_video.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/mochi.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/wan.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dynamic_block_prune/prune_context.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/first_block_cache/__init__.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/__init__.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/cogvideox.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/flux.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/hunyuan_video.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/mochi.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/wan.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/taylorseer.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/utils.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/compile/__init__.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/compile/utils.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/custom_ops/__init__.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/custom_ops/triton_taylorseer.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/logger.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/primitives.py +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit.egg-info/dependency_links.txt +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit.egg-info/requires.txt +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit.egg-info/top_level.txt +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/tests/.gitignore +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/tests/README.md +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/tests/taylorseer_approximation_order_2.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/tests/taylorseer_approximation_order_4.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/tests/taylorseer_approximation_test.png +0 -0
- {cache_dit-0.2.4 → cache_dit-0.2.5}/tests/test_taylorseer.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cache_dit
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.5
|
|
4
4
|
Summary: 🤗 CacheDiT: A Training-free and Easy-to-use Cache Acceleration Toolbox for Diffusion Transformers
|
|
5
5
|
Author: DefTruth, vipshop.com, etc.
|
|
6
6
|
Maintainer: DefTruth, vipshop.com, etc
|
|
@@ -44,7 +44,7 @@ Dynamic: requires-python
|
|
|
44
44
|
<img src=https://img.shields.io/badge/PyPI-pass-brightgreen.svg >
|
|
45
45
|
<img src=https://static.pepy.tech/badge/cache-dit >
|
|
46
46
|
<img src=https://img.shields.io/badge/Python-3.10|3.11|3.12-9cf.svg >
|
|
47
|
-
<img src=https://img.shields.io/badge/Release-v0.2
|
|
47
|
+
<img src=https://img.shields.io/badge/Release-v0.2-brightgreen.svg >
|
|
48
48
|
</div>
|
|
49
49
|
<p align="center">
|
|
50
50
|
DeepCache is for UNet not DiT. Most DiT cache speedups are complex and not training-free. CacheDiT offers <br>a set of training-free cache accelerators for DiT: <b>🔥<a href="#dbcache">DBCache</a>, <a href="#dbprune">DBPrune</a>, <a href="#taylorseer">TaylorSeer</a>, <a href="#fbcache">FBCache</a></b>, etc🔥
|
|
@@ -169,7 +169,7 @@ The **CacheDiT** codebase is adapted from [FBCache](https://github.com/chengzeyi
|
|
|
169
169
|
You can install the stable release of `cache-dit` from PyPI:
|
|
170
170
|
|
|
171
171
|
```bash
|
|
172
|
-
pip3 install cache-dit
|
|
172
|
+
pip3 install -U cache-dit
|
|
173
173
|
```
|
|
174
174
|
Or you can install the latest develop version from GitHub:
|
|
175
175
|
|
|
@@ -181,11 +181,13 @@ pip3 install git+https://github.com/vipshop/cache-dit.git
|
|
|
181
181
|
|
|
182
182
|
<div id="supported"></div>
|
|
183
183
|
|
|
184
|
-
- [🚀FLUX.1](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
185
|
-
- [🚀
|
|
184
|
+
- [🚀FLUX.1-dev](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
185
|
+
- [🚀FLUX.1-Fill-dev](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
186
|
+
- [🚀mochi-1-preview](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
186
187
|
- [🚀CogVideoX](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
187
188
|
- [🚀CogVideoX1.5](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
188
|
-
- [🚀Wan2.1](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
189
|
+
- [🚀Wan2.1-T2V](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
190
|
+
- [🚀Wan2.1-FLF2V](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
189
191
|
- [🚀HunyuanVideo](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
190
192
|
|
|
191
193
|
|
|
@@ -281,7 +283,7 @@ cache_options = {
|
|
|
281
283
|
"taylorseer_kwargs": {
|
|
282
284
|
"n_derivatives": 2, # default is 2.
|
|
283
285
|
},
|
|
284
|
-
"warmup_steps": 3, # n_derivatives + 1
|
|
286
|
+
"warmup_steps": 3, # prefer: >= n_derivatives + 1
|
|
285
287
|
"residual_diff_threshold": 0.12,
|
|
286
288
|
}
|
|
287
289
|
```
|
|
@@ -304,12 +306,23 @@ cache_options = {
|
|
|
304
306
|
|
|
305
307
|
<div id="cfg"></div>
|
|
306
308
|
|
|
307
|
-
CacheDiT supports caching for CFG (classifier-free guidance)
|
|
309
|
+
CacheDiT supports caching for **CFG (classifier-free guidance)**. For models that fuse CFG and non-CFG into a single forward step, or models that do not include CFG (classifier-free guidance) in the forward step, please set `do_separate_classifier_free_guidance` param to **False (default)**. Otherwise, set it to True. For examples:
|
|
308
310
|
|
|
309
311
|
```python
|
|
310
312
|
cache_options = {
|
|
313
|
+
# CFG: classifier free guidance or not
|
|
314
|
+
# For model that fused CFG and non-CFG into single forward step,
|
|
315
|
+
# should set do_separate_classifier_free_guidance as False.
|
|
316
|
+
# For example, set it as True for Wan 2.1 and set it as False
|
|
317
|
+
# for FLUX.1, HunyuanVideo, CogVideoX, Mochi.
|
|
311
318
|
"do_separate_classifier_free_guidance": True, # Wan 2.1
|
|
319
|
+
# Compute cfg forward first or not, default False, namely,
|
|
320
|
+
# 0, 2, 4, ..., -> non-CFG step; 1, 3, 5, ... -> CFG step.
|
|
312
321
|
"cfg_compute_first": False,
|
|
322
|
+
# Compute spearate diff values for CFG and non-CFG step,
|
|
323
|
+
# default True. If False, we will use the computed diff from
|
|
324
|
+
# current non-CFG transformer step for current CFG step.
|
|
325
|
+
"cfg_diff_compute_separate": True,
|
|
313
326
|
}
|
|
314
327
|
```
|
|
315
328
|
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
<img src=https://img.shields.io/badge/PyPI-pass-brightgreen.svg >
|
|
10
10
|
<img src=https://static.pepy.tech/badge/cache-dit >
|
|
11
11
|
<img src=https://img.shields.io/badge/Python-3.10|3.11|3.12-9cf.svg >
|
|
12
|
-
<img src=https://img.shields.io/badge/Release-v0.2
|
|
12
|
+
<img src=https://img.shields.io/badge/Release-v0.2-brightgreen.svg >
|
|
13
13
|
</div>
|
|
14
14
|
<p align="center">
|
|
15
15
|
DeepCache is for UNet not DiT. Most DiT cache speedups are complex and not training-free. CacheDiT offers <br>a set of training-free cache accelerators for DiT: <b>🔥<a href="#dbcache">DBCache</a>, <a href="#dbprune">DBPrune</a>, <a href="#taylorseer">TaylorSeer</a>, <a href="#fbcache">FBCache</a></b>, etc🔥
|
|
@@ -134,7 +134,7 @@ The **CacheDiT** codebase is adapted from [FBCache](https://github.com/chengzeyi
|
|
|
134
134
|
You can install the stable release of `cache-dit` from PyPI:
|
|
135
135
|
|
|
136
136
|
```bash
|
|
137
|
-
pip3 install cache-dit
|
|
137
|
+
pip3 install -U cache-dit
|
|
138
138
|
```
|
|
139
139
|
Or you can install the latest develop version from GitHub:
|
|
140
140
|
|
|
@@ -146,11 +146,13 @@ pip3 install git+https://github.com/vipshop/cache-dit.git
|
|
|
146
146
|
|
|
147
147
|
<div id="supported"></div>
|
|
148
148
|
|
|
149
|
-
- [🚀FLUX.1](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
150
|
-
- [🚀
|
|
149
|
+
- [🚀FLUX.1-dev](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
150
|
+
- [🚀FLUX.1-Fill-dev](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
151
|
+
- [🚀mochi-1-preview](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
151
152
|
- [🚀CogVideoX](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
152
153
|
- [🚀CogVideoX1.5](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
153
|
-
- [🚀Wan2.1](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
154
|
+
- [🚀Wan2.1-T2V](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
155
|
+
- [🚀Wan2.1-FLF2V](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
154
156
|
- [🚀HunyuanVideo](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
155
157
|
|
|
156
158
|
|
|
@@ -246,7 +248,7 @@ cache_options = {
|
|
|
246
248
|
"taylorseer_kwargs": {
|
|
247
249
|
"n_derivatives": 2, # default is 2.
|
|
248
250
|
},
|
|
249
|
-
"warmup_steps": 3, # n_derivatives + 1
|
|
251
|
+
"warmup_steps": 3, # prefer: >= n_derivatives + 1
|
|
250
252
|
"residual_diff_threshold": 0.12,
|
|
251
253
|
}
|
|
252
254
|
```
|
|
@@ -269,12 +271,23 @@ cache_options = {
|
|
|
269
271
|
|
|
270
272
|
<div id="cfg"></div>
|
|
271
273
|
|
|
272
|
-
CacheDiT supports caching for CFG (classifier-free guidance)
|
|
274
|
+
CacheDiT supports caching for **CFG (classifier-free guidance)**. For models that fuse CFG and non-CFG into a single forward step, or models that do not include CFG (classifier-free guidance) in the forward step, please set `do_separate_classifier_free_guidance` param to **False (default)**. Otherwise, set it to True. For examples:
|
|
273
275
|
|
|
274
276
|
```python
|
|
275
277
|
cache_options = {
|
|
278
|
+
# CFG: classifier free guidance or not
|
|
279
|
+
# For model that fused CFG and non-CFG into single forward step,
|
|
280
|
+
# should set do_separate_classifier_free_guidance as False.
|
|
281
|
+
# For example, set it as True for Wan 2.1 and set it as False
|
|
282
|
+
# for FLUX.1, HunyuanVideo, CogVideoX, Mochi.
|
|
276
283
|
"do_separate_classifier_free_guidance": True, # Wan 2.1
|
|
284
|
+
# Compute cfg forward first or not, default False, namely,
|
|
285
|
+
# 0, 2, 4, ..., -> non-CFG step; 1, 3, 5, ... -> CFG step.
|
|
277
286
|
"cfg_compute_first": False,
|
|
287
|
+
# Compute spearate diff values for CFG and non-CFG step,
|
|
288
|
+
# default True. If False, we will use the computed diff from
|
|
289
|
+
# current non-CFG transformer step for current CFG step.
|
|
290
|
+
"cfg_diff_compute_separate": True,
|
|
278
291
|
}
|
|
279
292
|
```
|
|
280
293
|
|
|
@@ -32,7 +32,7 @@ python3 run_cogvideox.py --cache --Fn 8 --Bn 8
|
|
|
32
32
|
python3 run_cogvideox.py --cache --Fn 8 --Bn 0 --taylorseer
|
|
33
33
|
```
|
|
34
34
|
|
|
35
|
-
- Wan2.1
|
|
35
|
+
- Wan2.1 T2V
|
|
36
36
|
|
|
37
37
|
```bash
|
|
38
38
|
python3 run_wan.py # baseline
|
|
@@ -40,7 +40,15 @@ python3 run_wan.py --cache --Fn 8 --Bn 8
|
|
|
40
40
|
python3 run_wan.py --cache --Fn 8 --Bn 0 --taylorseer
|
|
41
41
|
```
|
|
42
42
|
|
|
43
|
-
-
|
|
43
|
+
- Wan2.1 FLF2V
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
python3 run_wan_flf2v.py # baseline
|
|
47
|
+
python3 run_wan_flf2v.py --cache --Fn 8 --Bn 8
|
|
48
|
+
python3 run_wan_flf2v.py --cache --Fn 8 --Bn 0 --taylorseer
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
- mochi-1-preview
|
|
44
52
|
|
|
45
53
|
```bash
|
|
46
54
|
python3 run_mochi.py # baseline
|
|
Binary file
|
|
Binary file
|
|
@@ -70,7 +70,7 @@ if args.cache:
|
|
|
70
70
|
"enable_taylorseer": args.taylorseer,
|
|
71
71
|
"enable_encoder_taylorseer": args.taylorseer,
|
|
72
72
|
# Taylorseer cache type cache be hidden_states or residual
|
|
73
|
-
"taylorseer_cache_type": "
|
|
73
|
+
"taylorseer_cache_type": "hidden_states",
|
|
74
74
|
"taylorseer_kwargs": {
|
|
75
75
|
"n_derivatives": args.taylorseer_order,
|
|
76
76
|
},
|
|
@@ -63,7 +63,13 @@ if args.cache:
|
|
|
63
63
|
# For model that fused CFG and non-CFG into single forward step,
|
|
64
64
|
# should set do_separate_classifier_free_guidance as False.
|
|
65
65
|
"do_separate_classifier_free_guidance": True,
|
|
66
|
+
# Compute cfg forward first or not, default False, namely,
|
|
67
|
+
# 0, 2, 4, ..., -> non-CFG step; 1, 3, 5, ... -> CFG step.
|
|
66
68
|
"cfg_compute_first": False,
|
|
69
|
+
# Compute spearate diff values for CFG and non-CFG step,
|
|
70
|
+
# default True. If False, we will use the computed diff from
|
|
71
|
+
# current non-CFG transformer step for current CFG step.
|
|
72
|
+
"cfg_diff_compute_separate": True,
|
|
67
73
|
"enable_taylorseer": args.taylorseer,
|
|
68
74
|
"enable_encoder_taylorseer": args.taylorseer,
|
|
69
75
|
# Taylorseer cache type cache be hidden_states or residual
|
|
@@ -89,12 +95,12 @@ pipe.enable_model_cpu_offload()
|
|
|
89
95
|
|
|
90
96
|
# Wan currently requires installing diffusers from source
|
|
91
97
|
assert isinstance(pipe.vae, AutoencoderKLWan) # enable type check for IDE
|
|
92
|
-
if diffusers.__version__ >= "0.34.0
|
|
98
|
+
if diffusers.__version__ >= "0.34.0":
|
|
93
99
|
pipe.vae.enable_tiling()
|
|
94
100
|
pipe.vae.enable_slicing()
|
|
95
101
|
else:
|
|
96
102
|
print(
|
|
97
|
-
"Wan pipeline requires diffusers version >= 0.34.0
|
|
103
|
+
"Wan pipeline requires diffusers version >= 0.34.0 "
|
|
98
104
|
"for vae tiling and slicing, please install diffusers "
|
|
99
105
|
"from source."
|
|
100
106
|
)
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
import torch
|
|
4
|
+
import diffusers
|
|
5
|
+
import argparse
|
|
6
|
+
import numpy as np
|
|
7
|
+
import torchvision.transforms.functional as TF
|
|
8
|
+
from diffusers import AutoencoderKLWan, WanImageToVideoPipeline
|
|
9
|
+
from diffusers.utils import export_to_video, load_image
|
|
10
|
+
from transformers import CLIPVisionModel
|
|
11
|
+
|
|
12
|
+
from cache_dit.cache_factory import CacheType, apply_cache_on_pipe
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_args() -> argparse.ArgumentParser:
|
|
16
|
+
parser = argparse.ArgumentParser()
|
|
17
|
+
# General arguments
|
|
18
|
+
parser.add_argument("--cache", action="store_true", default=False)
|
|
19
|
+
parser.add_argument("--taylorseer", action="store_true", default=False)
|
|
20
|
+
parser.add_argument("--taylorseer-order", "--order", type=int, default=2)
|
|
21
|
+
parser.add_argument("--Fn-compute-blocks", "--Fn", type=int, default=1)
|
|
22
|
+
parser.add_argument("--Bn-compute-blocks", "--Bn", type=int, default=0)
|
|
23
|
+
parser.add_argument("--downsample-factor", "--df", type=int, default=4)
|
|
24
|
+
parser.add_argument("--rdt", type=float, default=0.08)
|
|
25
|
+
parser.add_argument("--warmup-steps", type=int, default=0)
|
|
26
|
+
return parser.parse_args()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def aspect_ratio_resize(image, pipe, max_area=720 * 1280):
|
|
30
|
+
aspect_ratio = image.height / image.width
|
|
31
|
+
mod_value = (
|
|
32
|
+
pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[1]
|
|
33
|
+
)
|
|
34
|
+
height = round(np.sqrt(max_area * aspect_ratio)) // mod_value * mod_value
|
|
35
|
+
width = round(np.sqrt(max_area / aspect_ratio)) // mod_value * mod_value
|
|
36
|
+
image = image.resize((width, height))
|
|
37
|
+
return image, height, width
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def center_crop_resize(image, height, width):
|
|
41
|
+
# Calculate resize ratio to match first frame dimensions
|
|
42
|
+
resize_ratio = max(width / image.width, height / image.height)
|
|
43
|
+
|
|
44
|
+
# Resize the image
|
|
45
|
+
width = round(image.width * resize_ratio)
|
|
46
|
+
height = round(image.height * resize_ratio)
|
|
47
|
+
size = [width, height]
|
|
48
|
+
image = TF.center_crop(image, size)
|
|
49
|
+
|
|
50
|
+
return image, height, width
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def prepare_pipeline(
|
|
54
|
+
pipe: WanImageToVideoPipeline,
|
|
55
|
+
args: argparse.ArgumentParser,
|
|
56
|
+
):
|
|
57
|
+
if args.cache:
|
|
58
|
+
cache_options = {
|
|
59
|
+
"cache_type": CacheType.DBCache,
|
|
60
|
+
"warmup_steps": args.warmup_steps,
|
|
61
|
+
"max_cached_steps": -1, # -1 means no limit
|
|
62
|
+
"downsample_factor": args.downsample_factor,
|
|
63
|
+
# Fn=1, Bn=0, means FB Cache, otherwise, Dual Block Cache
|
|
64
|
+
"Fn_compute_blocks": args.Fn_compute_blocks, # Fn, F8, etc.
|
|
65
|
+
"Bn_compute_blocks": args.Bn_compute_blocks, # Bn, B16, etc.
|
|
66
|
+
"residual_diff_threshold": args.rdt,
|
|
67
|
+
# releative token diff threshold, default is 0.0
|
|
68
|
+
"important_condition_threshold": 0.00,
|
|
69
|
+
# CFG: classifier free guidance or not
|
|
70
|
+
# For model that fused CFG and non-CFG into single forward step,
|
|
71
|
+
# should set do_separate_classifier_free_guidance as False.
|
|
72
|
+
"do_separate_classifier_free_guidance": True,
|
|
73
|
+
# Compute cfg forward first or not, default False, namely,
|
|
74
|
+
# 0, 2, 4, ..., -> non-CFG step; 1, 3, 5, ... -> CFG step.
|
|
75
|
+
"cfg_compute_first": False,
|
|
76
|
+
# Compute spearate diff values for CFG and non-CFG step,
|
|
77
|
+
# default True. If False, we will use the computed diff from
|
|
78
|
+
# current non-CFG transformer step for current CFG step.
|
|
79
|
+
"cfg_diff_compute_separate": True,
|
|
80
|
+
"enable_taylorseer": args.taylorseer,
|
|
81
|
+
"enable_encoder_taylorseer": args.taylorseer,
|
|
82
|
+
# Taylorseer cache type cache be hidden_states or residual
|
|
83
|
+
"taylorseer_cache_type": "residual",
|
|
84
|
+
"taylorseer_kwargs": {
|
|
85
|
+
"n_derivatives": args.taylorseer_order,
|
|
86
|
+
},
|
|
87
|
+
}
|
|
88
|
+
cache_type_str = "DBCACHE"
|
|
89
|
+
cache_type_str = (
|
|
90
|
+
f"{cache_type_str}_F{args.Fn_compute_blocks}"
|
|
91
|
+
f"B{args.Bn_compute_blocks}W{args.warmup_steps}"
|
|
92
|
+
f"T{int(args.taylorseer)}O{args.taylorseer_order}"
|
|
93
|
+
)
|
|
94
|
+
print(f"cache options:\n{cache_options}")
|
|
95
|
+
|
|
96
|
+
apply_cache_on_pipe(pipe, **cache_options)
|
|
97
|
+
else:
|
|
98
|
+
cache_type_str = "NONE"
|
|
99
|
+
|
|
100
|
+
# Enable memory savings
|
|
101
|
+
pipe.enable_model_cpu_offload()
|
|
102
|
+
|
|
103
|
+
# Wan currently requires installing diffusers from source
|
|
104
|
+
assert isinstance(pipe.vae, AutoencoderKLWan) # enable type check for IDE
|
|
105
|
+
if diffusers.__version__ >= "0.34.0":
|
|
106
|
+
pipe.vae.enable_tiling()
|
|
107
|
+
pipe.vae.enable_slicing()
|
|
108
|
+
else:
|
|
109
|
+
print(
|
|
110
|
+
"Wan pipeline requires diffusers version >= 0.34.0 "
|
|
111
|
+
"for vae tiling and slicing, please install diffusers "
|
|
112
|
+
"from source."
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
return cache_type_str, pipe
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def main():
|
|
119
|
+
args = get_args()
|
|
120
|
+
print(args)
|
|
121
|
+
|
|
122
|
+
model_id = os.environ.get(
|
|
123
|
+
"WAN_FLF2V_DIR",
|
|
124
|
+
"Wan-AI/Wan2.1-FLF2V-14B-720P-Diffusers",
|
|
125
|
+
)
|
|
126
|
+
image_encoder = CLIPVisionModel.from_pretrained(
|
|
127
|
+
model_id, subfolder="image_encoder", torch_dtype=torch.float32
|
|
128
|
+
)
|
|
129
|
+
vae = AutoencoderKLWan.from_pretrained(
|
|
130
|
+
model_id, subfolder="vae", torch_dtype=torch.float32
|
|
131
|
+
)
|
|
132
|
+
pipe = WanImageToVideoPipeline.from_pretrained(
|
|
133
|
+
model_id,
|
|
134
|
+
vae=vae,
|
|
135
|
+
image_encoder=image_encoder,
|
|
136
|
+
torch_dtype=torch.bfloat16,
|
|
137
|
+
)
|
|
138
|
+
pipe.to("cuda")
|
|
139
|
+
|
|
140
|
+
cache_type_str, pipe = prepare_pipeline(pipe, args)
|
|
141
|
+
|
|
142
|
+
first_frame = load_image("data/flf2v_input_first_frame.png")
|
|
143
|
+
last_frame = load_image("data/flf2v_input_last_frame.png")
|
|
144
|
+
|
|
145
|
+
first_frame, height, width = aspect_ratio_resize(first_frame, pipe)
|
|
146
|
+
if last_frame.size != first_frame.size:
|
|
147
|
+
last_frame, _, _ = center_crop_resize(last_frame, height, width)
|
|
148
|
+
|
|
149
|
+
prompt = (
|
|
150
|
+
"CG animation style, a small blue bird takes off from the ground, flapping its wings. "
|
|
151
|
+
+ "The bird's feathers are delicate, with a unique pattern on its chest. The background shows "
|
|
152
|
+
+ "a blue sky with white clouds under bright sunshine. The camera follows the bird upward, "
|
|
153
|
+
+ "capturing its flight and the vastness of the sky from a close-up, low-angle perspective."
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
start = time.time()
|
|
157
|
+
output = pipe(
|
|
158
|
+
image=first_frame,
|
|
159
|
+
last_image=last_frame,
|
|
160
|
+
prompt=prompt,
|
|
161
|
+
height=height,
|
|
162
|
+
width=width,
|
|
163
|
+
guidance_scale=5.5,
|
|
164
|
+
num_frames=49,
|
|
165
|
+
num_inference_steps=35,
|
|
166
|
+
generator=torch.Generator("cpu").manual_seed(0),
|
|
167
|
+
).frames[0]
|
|
168
|
+
end = time.time()
|
|
169
|
+
|
|
170
|
+
if hasattr(pipe.transformer, "_cached_steps"):
|
|
171
|
+
cached_steps = pipe.transformer._cached_steps
|
|
172
|
+
residual_diffs = pipe.transformer._residual_diffs
|
|
173
|
+
print(f"Cache Steps: {len(cached_steps)}, {cached_steps}")
|
|
174
|
+
print(f"Residual Diffs: {len(residual_diffs)}, {residual_diffs}")
|
|
175
|
+
if hasattr(pipe.transformer, "_cfg_cached_steps"):
|
|
176
|
+
cfg_cached_steps = pipe.transformer._cfg_cached_steps
|
|
177
|
+
cfg_residual_diffs = pipe.transformer._cfg_residual_diffs
|
|
178
|
+
print(f"CFG Cache Steps: {len(cfg_cached_steps)}, {cfg_cached_steps} ")
|
|
179
|
+
print(
|
|
180
|
+
f"CFG Residual Diffs: {len(cfg_residual_diffs)}, {cfg_residual_diffs}"
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
time_cost = end - start
|
|
184
|
+
save_path = f"wan.flf2v.{cache_type_str}.mp4"
|
|
185
|
+
print(f"Time cost: {time_cost:.2f}s")
|
|
186
|
+
print(f"Saving video to {save_path}")
|
|
187
|
+
export_to_video(output, save_path, fps=16)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
if __name__ == "__main__":
|
|
191
|
+
main()
|