cache-dit 0.2.5__tar.gz → 0.2.28__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cache-dit might be problematic. Click here for more details.
- {cache_dit-0.2.5 → cache_dit-0.2.28}/.github/workflows/issue.yml +2 -2
- {cache_dit-0.2.5 → cache_dit-0.2.28}/CONTRIBUTE.md +4 -4
- cache_dit-0.2.28/MANIFEST.in +5 -0
- cache_dit-0.2.28/PKG-INFO +478 -0
- cache_dit-0.2.28/README.md +436 -0
- cache_dit-0.2.28/bench/bench.py +225 -0
- cache_dit-0.2.28/bench/cache_config.yaml +12 -0
- cache_dit-0.2.28/docs/BlockAdapter.md +57 -0
- cache_dit-0.2.28/docs/DBCache.md +161 -0
- cache_dit-0.2.28/examples/README.md +90 -0
- cache_dit-0.2.28/examples/adapter/run_flux_adapter.py +94 -0
- cache_dit-0.2.28/examples/adapter/run_qwen_image_adapter.py +103 -0
- cache_dit-0.2.28/examples/pipeline/run_cogvideox.py +75 -0
- cache_dit-0.2.28/examples/pipeline/run_flux.py +45 -0
- cache_dit-0.2.28/examples/pipeline/run_flux_fill.py +50 -0
- cache_dit-0.2.28/examples/pipeline/run_flux_kontext.py +49 -0
- cache_dit-0.2.28/examples/pipeline/run_hunyuan_video.py +77 -0
- cache_dit-0.2.28/examples/pipeline/run_mochi.py +53 -0
- cache_dit-0.2.28/examples/pipeline/run_qwen_image.py +127 -0
- cache_dit-0.2.28/examples/pipeline/run_qwen_image_edit.py +86 -0
- cache_dit-0.2.28/examples/pipeline/run_wan.py +87 -0
- cache_dit-0.2.28/examples/pipeline/run_wan_2.2.py +163 -0
- cache_dit-0.2.28/examples/pipeline/run_wan_flf2v.py +138 -0
- {cache_dit-0.2.5 → cache_dit-0.2.28}/examples/requirements.txt +2 -1
- cache_dit-0.2.28/examples/utils.py +42 -0
- {cache_dit-0.2.5 → cache_dit-0.2.28}/pyproject.toml +6 -2
- cache_dit-0.2.28/requirements.txt +9 -0
- {cache_dit-0.2.5 → cache_dit-0.2.28}/setup.py +8 -1
- cache_dit-0.2.28/src/cache_dit/__init__.py +32 -0
- {cache_dit-0.2.5 → cache_dit-0.2.28}/src/cache_dit/_version.py +16 -3
- cache_dit-0.2.28/src/cache_dit/cache_factory/.gitignore +2 -0
- cache_dit-0.2.28/src/cache_dit/cache_factory/__init__.py +23 -0
- cache_dit-0.2.28/src/cache_dit/cache_factory/block_adapters/__init__.py +555 -0
- cache_dit-0.2.28/src/cache_dit/cache_factory/block_adapters/block_adapters.py +538 -0
- cache_dit-0.2.28/src/cache_dit/cache_factory/block_adapters/block_registers.py +77 -0
- cache_dit-0.2.28/src/cache_dit/cache_factory/cache_adapters.py +394 -0
- cache_dit-0.2.28/src/cache_dit/cache_factory/cache_blocks/__init__.py +69 -0
- cache_dit-0.2.28/src/cache_dit/cache_factory/cache_blocks/pattern_0_1_2.py +16 -0
- cache_dit-0.2.28/src/cache_dit/cache_factory/cache_blocks/pattern_3_4_5.py +274 -0
- cache_dit-0.2.28/src/cache_dit/cache_factory/cache_blocks/pattern_base.py +522 -0
- cache_dit-0.2.28/src/cache_dit/cache_factory/cache_blocks/utils.py +25 -0
- cache_dit-0.2.28/src/cache_dit/cache_factory/cache_contexts/__init__.py +5 -0
- cache_dit-0.2.28/src/cache_dit/cache_factory/cache_contexts/cache_context.py +327 -0
- cache_dit-0.2.28/src/cache_dit/cache_factory/cache_contexts/cache_manager.py +833 -0
- {cache_dit-0.2.5/src/cache_dit/cache_factory → cache_dit-0.2.28/src/cache_dit/cache_factory/cache_contexts}/taylorseer.py +5 -6
- cache_dit-0.2.28/src/cache_dit/cache_factory/cache_interface.py +161 -0
- cache_dit-0.2.28/src/cache_dit/cache_factory/cache_types.py +39 -0
- cache_dit-0.2.28/src/cache_dit/cache_factory/forward_pattern.py +84 -0
- cache_dit-0.2.28/src/cache_dit/cache_factory/patch_functors/__init__.py +5 -0
- cache_dit-0.2.28/src/cache_dit/cache_factory/patch_functors/functor_base.py +18 -0
- cache_dit-0.2.28/src/cache_dit/cache_factory/patch_functors/functor_chroma.py +276 -0
- cache_dit-0.2.28/src/cache_dit/cache_factory/patch_functors/functor_flux.py +267 -0
- cache_dit-0.2.28/src/cache_dit/cache_factory/utils.py +57 -0
- cache_dit-0.2.28/src/cache_dit/compile/__init__.py +1 -0
- {cache_dit-0.2.5 → cache_dit-0.2.28}/src/cache_dit/compile/utils.py +16 -10
- cache_dit-0.2.28/src/cache_dit/metrics/__init__.py +14 -0
- cache_dit-0.2.28/src/cache_dit/metrics/config.py +34 -0
- cache_dit-0.2.28/src/cache_dit/metrics/fid.py +498 -0
- cache_dit-0.2.28/src/cache_dit/metrics/inception.py +353 -0
- cache_dit-0.2.28/src/cache_dit/metrics/lpips.py +43 -0
- cache_dit-0.2.28/src/cache_dit/metrics/metrics.py +882 -0
- cache_dit-0.2.28/src/cache_dit/quantize/__init__.py +1 -0
- cache_dit-0.2.28/src/cache_dit/quantize/quantize_ao.py +197 -0
- cache_dit-0.2.28/src/cache_dit/quantize/quantize_interface.py +46 -0
- cache_dit-0.2.28/src/cache_dit/utils.py +196 -0
- cache_dit-0.2.28/src/cache_dit.egg-info/PKG-INFO +478 -0
- cache_dit-0.2.28/src/cache_dit.egg-info/SOURCES.txt +91 -0
- cache_dit-0.2.28/src/cache_dit.egg-info/entry_points.txt +2 -0
- {cache_dit-0.2.5 → cache_dit-0.2.28}/src/cache_dit.egg-info/requires.txt +10 -3
- cache_dit-0.2.28/tests/README.md +63 -0
- cache_dit-0.2.28/tests/cache_config.yaml +12 -0
- cache_dit-0.2.28/tests/data/.gitignore +2 -0
- cache_dit-0.2.28/tests/test_cache_loader.py +7 -0
- cache_dit-0.2.28/tests/test_forward_pattern.py +401 -0
- cache_dit-0.2.28/tests/test_metrics.py +84 -0
- {cache_dit-0.2.5 → cache_dit-0.2.28}/tests/test_taylorseer.py +3 -3
- cache_dit-0.2.5/MANIFEST.in +0 -6
- cache_dit-0.2.5/PKG-INFO +0 -513
- cache_dit-0.2.5/README.md +0 -478
- cache_dit-0.2.5/assets/DBCACHE_F12B12S4_R0.2_S16.png +0 -0
- cache_dit-0.2.5/assets/DBCACHE_F12B16S4_R0.08_S6.png +0 -0
- cache_dit-0.2.5/assets/DBCACHE_F16B16S2_R0.2_S14.png +0 -0
- cache_dit-0.2.5/assets/DBCACHE_F16B16S4_R0.2_S13.png +0 -0
- cache_dit-0.2.5/assets/DBCACHE_F1B0S1_R0.08_S11.png +0 -0
- cache_dit-0.2.5/assets/DBCACHE_F1B0S1_R0.2_S19.png +0 -0
- cache_dit-0.2.5/assets/DBCACHE_F8B0S2_R0.12_S12.png +0 -0
- cache_dit-0.2.5/assets/DBCACHE_F8B16S1_R0.2_S18.png +0 -0
- cache_dit-0.2.5/assets/DBCACHE_F8B8S1_R0.08_S9.png +0 -0
- cache_dit-0.2.5/assets/DBCACHE_F8B8S1_R0.12_S12.png +0 -0
- cache_dit-0.2.5/assets/DBCACHE_F8B8S1_R0.15_S15.png +0 -0
- cache_dit-0.2.5/assets/DBPRUNE_F1B0_R0.03_P24.0_T19.43s.png +0 -0
- cache_dit-0.2.5/assets/DBPRUNE_F1B0_R0.04_P34.6_T16.82s.png +0 -0
- cache_dit-0.2.5/assets/DBPRUNE_F1B0_R0.05_P38.3_T15.95s.png +0 -0
- cache_dit-0.2.5/assets/DBPRUNE_F1B0_R0.06_P45.2_T14.24s.png +0 -0
- cache_dit-0.2.5/assets/DBPRUNE_F1B0_R0.07_P52.3_T12.53s.png +0 -0
- cache_dit-0.2.5/assets/DBPRUNE_F1B0_R0.08_P52.4_T12.52s.png +0 -0
- cache_dit-0.2.5/assets/DBPRUNE_F1B0_R0.09_P59.2_T10.81s.png +0 -0
- cache_dit-0.2.5/assets/DBPRUNE_F1B0_R0.12_P59.5_T10.76s.png +0 -0
- cache_dit-0.2.5/assets/DBPRUNE_F1B0_R0.12_P63.0_T9.90s.png +0 -0
- cache_dit-0.2.5/assets/DBPRUNE_F1B0_R0.1_P62.8_T9.95s.png +0 -0
- cache_dit-0.2.5/assets/DBPRUNE_F1B0_R0.2_P59.5_T10.66s.png +0 -0
- cache_dit-0.2.5/assets/DBPRUNE_F1B0_R0.3_P63.1_T9.79s.png +0 -0
- cache_dit-0.2.5/assets/NONE_R0.08_S0.png +0 -0
- cache_dit-0.2.5/assets/TEXTURE_DBCACHE_F1B0_R0.08.png +0 -0
- cache_dit-0.2.5/assets/TEXTURE_DBCACHE_F8B12_R0.12.png +0 -0
- cache_dit-0.2.5/assets/TEXTURE_DBCACHE_F8B16_R0.2.png +0 -0
- cache_dit-0.2.5/assets/TEXTURE_DBCACHE_F8B20_R0.2.png +0 -0
- cache_dit-0.2.5/assets/TEXTURE_DBCACHE_F8B8_R0.12.png +0 -0
- cache_dit-0.2.5/assets/TEXTURE_NONE_R0.08.png +0 -0
- cache_dit-0.2.5/assets/U0_C0_DBCACHE_F1B0S1W0T0ET0_R0.12_S14_T12.85s.png +0 -0
- cache_dit-0.2.5/assets/U0_C0_DBCACHE_F1B0S1W0T0ET0_R0.15_S17_T10.27s.png +0 -0
- cache_dit-0.2.5/assets/U0_C0_DBCACHE_F1B0S1W0T1ET1_R0.12_S14_T12.86s.png +0 -0
- cache_dit-0.2.5/assets/U0_C0_DBCACHE_F1B0S1W0T1ET1_R0.15_S17_T10.28s.png +0 -0
- cache_dit-0.2.5/assets/U0_C1_DBCACHE_F1B0S1W0T1ET1_R0.15_S17_T8.48s.png +0 -0
- cache_dit-0.2.5/assets/U0_C1_DBPRUNE_F1B0_R0.03_P24.0_T16.25s.png +0 -0
- cache_dit-0.2.5/assets/U0_C1_DBPRUNE_F1B0_R0.045_P38.2_T13.41s.png +0 -0
- cache_dit-0.2.5/assets/U0_C1_DBPRUNE_F1B0_R0.04_P34.6_T14.12s.png +0 -0
- cache_dit-0.2.5/assets/U0_C1_DBPRUNE_F1B0_R0.055_P45.1_T12.00s.png +0 -0
- cache_dit-0.2.5/assets/U0_C1_DBPRUNE_F1B0_R0.05_P41.6_T12.70s.png +0 -0
- cache_dit-0.2.5/assets/U0_C1_DBPRUNE_F1B0_R0.2_P59.5_T8.86s.png +0 -0
- cache_dit-0.2.5/assets/U0_C1_DBPRUNE_F8B8_R0.08_P23.1_T16.14s.png +0 -0
- cache_dit-0.2.5/assets/U0_C1_NONE_R0.08_S0_T20.43s.png +0 -0
- cache_dit-0.2.5/assets/U4_C1_DBPRUNE_F1B0_R0.03_P27.3_T6.62s.png +0 -0
- cache_dit-0.2.5/assets/U4_C1_DBPRUNE_F1B0_R0.03_P27.3_T6.63s.png +0 -0
- cache_dit-0.2.5/assets/U4_C1_DBPRUNE_F1B0_R0.045_P38.2_T5.81s.png +0 -0
- cache_dit-0.2.5/assets/U4_C1_DBPRUNE_F1B0_R0.045_P38.2_T5.82s.png +0 -0
- cache_dit-0.2.5/assets/U4_C1_DBPRUNE_F1B0_R0.04_P34.6_T6.06s.png +0 -0
- cache_dit-0.2.5/assets/U4_C1_DBPRUNE_F1B0_R0.04_P34.6_T6.07s.png +0 -0
- cache_dit-0.2.5/assets/U4_C1_DBPRUNE_F1B0_R0.04_P34.6_T6.08s.png +0 -0
- cache_dit-0.2.5/assets/U4_C1_DBPRUNE_F1B0_R0.055_P45.1_T5.27s.png +0 -0
- cache_dit-0.2.5/assets/U4_C1_DBPRUNE_F1B0_R0.055_P45.1_T5.28s.png +0 -0
- cache_dit-0.2.5/assets/U4_C1_DBPRUNE_F1B0_R0.2_P59.5_T3.95s.png +0 -0
- cache_dit-0.2.5/assets/U4_C1_DBPRUNE_F1B0_R0.2_P59.5_T3.96s.png +0 -0
- cache_dit-0.2.5/assets/U4_C1_NONE_R0.08_S0_T7.78s.png +0 -0
- cache_dit-0.2.5/assets/U4_C1_NONE_R0.08_S0_T7.79s.png +0 -0
- cache_dit-0.2.5/assets/cache-dit-v1.png +0 -0
- cache_dit-0.2.5/assets/dbcache-fnbn-v1.png +0 -0
- cache_dit-0.2.5/assets/dbcache-v1.png +0 -0
- cache_dit-0.2.5/assets/dbprune-v1.png +0 -0
- cache_dit-0.2.5/assets/fbcache-v1.png +0 -0
- cache_dit-0.2.5/bench/bench.py +0 -300
- cache_dit-0.2.5/examples/README.md +0 -65
- cache_dit-0.2.5/examples/data/cup.png +0 -0
- cache_dit-0.2.5/examples/data/cup_mask.png +0 -0
- cache_dit-0.2.5/examples/data/flf2v_input_first_frame.png +0 -0
- cache_dit-0.2.5/examples/data/flf2v_input_last_frame.png +0 -0
- cache_dit-0.2.5/examples/run_cogvideox.py +0 -142
- cache_dit-0.2.5/examples/run_flux.py +0 -96
- cache_dit-0.2.5/examples/run_flux_fill.py +0 -100
- cache_dit-0.2.5/examples/run_hunyuan_video.py +0 -145
- cache_dit-0.2.5/examples/run_mochi.py +0 -101
- cache_dit-0.2.5/examples/run_wan.py +0 -140
- cache_dit-0.2.5/examples/run_wan_flf2v.py +0 -191
- cache_dit-0.2.5/requirements.txt +0 -6
- cache_dit-0.2.5/src/cache_dit/cache_factory/__init__.py +0 -168
- cache_dit-0.2.5/src/cache_dit/cache_factory/dual_block_cache/cache_context.py +0 -1893
- cache_dit-0.2.5/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/__init__.py +0 -51
- cache_dit-0.2.5/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/cogvideox.py +0 -87
- cache_dit-0.2.5/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/flux.py +0 -98
- cache_dit-0.2.5/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/hunyuan_video.py +0 -294
- cache_dit-0.2.5/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/mochi.py +0 -87
- cache_dit-0.2.5/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/wan.py +0 -97
- cache_dit-0.2.5/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/__init__.py +0 -51
- cache_dit-0.2.5/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/cogvideox.py +0 -87
- cache_dit-0.2.5/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/flux.py +0 -98
- cache_dit-0.2.5/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/hunyuan_video.py +0 -294
- cache_dit-0.2.5/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/mochi.py +0 -87
- cache_dit-0.2.5/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/wan.py +0 -97
- cache_dit-0.2.5/src/cache_dit/cache_factory/dynamic_block_prune/prune_context.py +0 -987
- cache_dit-0.2.5/src/cache_dit/cache_factory/first_block_cache/__init__.py +0 -0
- cache_dit-0.2.5/src/cache_dit/cache_factory/first_block_cache/cache_context.py +0 -719
- cache_dit-0.2.5/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/__init__.py +0 -57
- cache_dit-0.2.5/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/cogvideox.py +0 -89
- cache_dit-0.2.5/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/flux.py +0 -100
- cache_dit-0.2.5/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/hunyuan_video.py +0 -295
- cache_dit-0.2.5/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/mochi.py +0 -89
- cache_dit-0.2.5/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/wan.py +0 -98
- cache_dit-0.2.5/src/cache_dit/cache_factory/utils.py +0 -0
- cache_dit-0.2.5/src/cache_dit/compile/__init__.py +0 -1
- cache_dit-0.2.5/src/cache_dit/custom_ops/__init__.py +0 -0
- cache_dit-0.2.5/src/cache_dit/primitives.py +0 -152
- cache_dit-0.2.5/src/cache_dit.egg-info/PKG-INFO +0 -513
- cache_dit-0.2.5/src/cache_dit.egg-info/SOURCES.txt +0 -136
- cache_dit-0.2.5/tests/README.md +0 -9
- cache_dit-0.2.5/tests/taylorseer_approximation_order_2.png +0 -0
- cache_dit-0.2.5/tests/taylorseer_approximation_order_4.png +0 -0
- cache_dit-0.2.5/tests/taylorseer_approximation_test.png +0 -0
- {cache_dit-0.2.5 → cache_dit-0.2.28}/.gitignore +0 -0
- {cache_dit-0.2.5 → cache_dit-0.2.28}/.pre-commit-config.yaml +0 -0
- {cache_dit-0.2.5 → cache_dit-0.2.28}/LICENSE +0 -0
- {cache_dit-0.2.5 → cache_dit-0.2.28}/bench/.gitignore +0 -0
- {cache_dit-0.2.5 → cache_dit-0.2.28}/docs/.gitignore +0 -0
- {cache_dit-0.2.5 → cache_dit-0.2.28}/examples/.gitignore +0 -0
- {cache_dit-0.2.5 → cache_dit-0.2.28}/pytest.ini +0 -0
- {cache_dit-0.2.5 → cache_dit-0.2.28}/setup.cfg +0 -0
- {cache_dit-0.2.5/src/cache_dit → cache_dit-0.2.28/src/cache_dit/custom_ops}/__init__.py +0 -0
- {cache_dit-0.2.5 → cache_dit-0.2.28}/src/cache_dit/custom_ops/triton_taylorseer.py +0 -0
- {cache_dit-0.2.5 → cache_dit-0.2.28}/src/cache_dit/logger.py +0 -0
- /cache_dit-0.2.5/src/cache_dit/cache_factory/dual_block_cache/__init__.py → /cache_dit-0.2.28/src/cache_dit/quantize/quantize_svdq.py +0 -0
- {cache_dit-0.2.5 → cache_dit-0.2.28}/src/cache_dit.egg-info/dependency_links.txt +0 -0
- {cache_dit-0.2.5 → cache_dit-0.2.28}/src/cache_dit.egg-info/top_level.txt +0 -0
- {cache_dit-0.2.5 → cache_dit-0.2.28}/tests/.gitignore +0 -0
- /cache_dit-0.2.5/src/cache_dit/cache_factory/dynamic_block_prune/__init__.py → /cache_dit-0.2.28/tests/test_patch_functor.py +0 -0
|
@@ -12,8 +12,8 @@ jobs:
|
|
|
12
12
|
steps:
|
|
13
13
|
- uses: actions/stale@v9.0.0
|
|
14
14
|
with:
|
|
15
|
-
days-before-issue-stale:
|
|
16
|
-
days-before-issue-close:
|
|
15
|
+
days-before-issue-stale: 360
|
|
16
|
+
days-before-issue-close: 360
|
|
17
17
|
stale-issue-label: "stale"
|
|
18
18
|
stale-issue-message: "This issue is stale because it has been open for 30 days with no activity."
|
|
19
19
|
close-issue-message: "This issue was closed because it has been inactive for 7 days since being marked as stale."
|
|
@@ -5,9 +5,9 @@
|
|
|
5
5
|
Before submitting code, configure pre-commit, for example:
|
|
6
6
|
|
|
7
7
|
```bash
|
|
8
|
-
# fork vipshop/
|
|
9
|
-
git clone git@github.com:your-github-page/your-fork-
|
|
10
|
-
cd your-fork-
|
|
8
|
+
# fork vipshop/cache-dit to your own github page, then:
|
|
9
|
+
git clone git@github.com:your-github-page/your-fork-cache-dit.git
|
|
10
|
+
cd your-fork-cache-dit && git checkout -b dev
|
|
11
11
|
# update submodule
|
|
12
12
|
git submodule update --init --recursive --force
|
|
13
13
|
# install pre-commit
|
|
@@ -24,5 +24,5 @@ pre-commit run --all-files
|
|
|
24
24
|
git add .
|
|
25
25
|
git commit -m "support xxx-cache method"
|
|
26
26
|
git push
|
|
27
|
-
# then, open a PR from your personal branch to
|
|
27
|
+
# then, open a PR from your personal branch to cache-dit:main
|
|
28
28
|
```
|
|
@@ -0,0 +1,478 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cache_dit
|
|
3
|
+
Version: 0.2.28
|
|
4
|
+
Summary: 🤗 A Unified and Training-free Cache Acceleration Toolbox for Diffusion Transformers
|
|
5
|
+
Author: DefTruth, vipshop.com, etc.
|
|
6
|
+
Maintainer: DefTruth, vipshop.com, etc
|
|
7
|
+
Project-URL: Repository, https://github.com/vipshop/cache-dit.git
|
|
8
|
+
Project-URL: Homepage, https://github.com/vipshop/cache-dit.git
|
|
9
|
+
Requires-Python: >=3.10
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Requires-Dist: packaging
|
|
13
|
+
Requires-Dist: pyyaml
|
|
14
|
+
Requires-Dist: torch>=2.7.1
|
|
15
|
+
Requires-Dist: transformers>=4.55.2
|
|
16
|
+
Requires-Dist: diffusers>=0.35.1
|
|
17
|
+
Requires-Dist: scikit-image
|
|
18
|
+
Requires-Dist: scipy
|
|
19
|
+
Requires-Dist: lpips==0.1.4
|
|
20
|
+
Requires-Dist: torchao>=0.12.0
|
|
21
|
+
Provides-Extra: all
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pre-commit; extra == "dev"
|
|
24
|
+
Requires-Dist: pytest<8.0.0,>=7.0.0; extra == "dev"
|
|
25
|
+
Requires-Dist: pytest-html; extra == "dev"
|
|
26
|
+
Requires-Dist: expecttest; extra == "dev"
|
|
27
|
+
Requires-Dist: hypothesis; extra == "dev"
|
|
28
|
+
Requires-Dist: transformers; extra == "dev"
|
|
29
|
+
Requires-Dist: diffusers; extra == "dev"
|
|
30
|
+
Requires-Dist: accelerate; extra == "dev"
|
|
31
|
+
Requires-Dist: peft; extra == "dev"
|
|
32
|
+
Requires-Dist: protobuf; extra == "dev"
|
|
33
|
+
Requires-Dist: sentencepiece; extra == "dev"
|
|
34
|
+
Requires-Dist: opencv-python-headless; extra == "dev"
|
|
35
|
+
Requires-Dist: ftfy; extra == "dev"
|
|
36
|
+
Requires-Dist: scikit-image; extra == "dev"
|
|
37
|
+
Requires-Dist: pytorch-fid; extra == "dev"
|
|
38
|
+
Dynamic: license-file
|
|
39
|
+
Dynamic: provides-extra
|
|
40
|
+
Dynamic: requires-dist
|
|
41
|
+
Dynamic: requires-python
|
|
42
|
+
|
|
43
|
+
<div align="center">
|
|
44
|
+
<img src=https://github.com/vipshop/cache-dit/raw/main/assets/cache-dit-logo.png height="120">
|
|
45
|
+
|
|
46
|
+
<p align="center">
|
|
47
|
+
A <b>Unified</b> and Training-free <b>Cache Acceleration</b> Toolbox for <b>Diffusion Transformers</b> <br>
|
|
48
|
+
♥️ <b>Cache Acceleration</b> with <b>One-line</b> Code ~ ♥️
|
|
49
|
+
</p>
|
|
50
|
+
<div align='center'>
|
|
51
|
+
<img src=https://img.shields.io/badge/Language-Python-brightgreen.svg >
|
|
52
|
+
<img src=https://img.shields.io/badge/PRs-welcome-9cf.svg >
|
|
53
|
+
<img src=https://img.shields.io/badge/PyPI-pass-brightgreen.svg >
|
|
54
|
+
<img src=https://static.pepy.tech/badge/cache-dit >
|
|
55
|
+
<img src=https://img.shields.io/badge/Python-3.10|3.11|3.12-9cf.svg >
|
|
56
|
+
<img src=https://img.shields.io/badge/Release-v0.2-brightgreen.svg >
|
|
57
|
+
</div>
|
|
58
|
+
<p align="center">
|
|
59
|
+
🔥<b><a href="#unified">Unified Cache APIs</a> | <a href="#dbcache">DBCache</a> | <a href="#taylorseer">Hybrid TaylorSeer</a> | <a href="#cfg">Hybrid Cache CFG</a></b>🔥
|
|
60
|
+
</p>
|
|
61
|
+
<p align="center">
|
|
62
|
+
🎉Now, <b>cache-dit</b> covers <b>100%</b> Diffusers' <b>DiT-based</b> Pipelines🎉<br>
|
|
63
|
+
🔥<b><a href="#supported">Qwen-Image</a> | <a href="#supported">FLUX.1</a> | <a href="#supported">Wan 2.1/2.2</a> | <a href="#supported"> ... </a> | <a href="#supported">CogVideoX</a></b>🔥
|
|
64
|
+
</p>
|
|
65
|
+
</div>
|
|
66
|
+
|
|
67
|
+
## 🔥News
|
|
68
|
+
|
|
69
|
+
- [2025-09-03] 🎉[**Wan2.2-MoE**](https://github.com/Wan-Video) **2.4x⚡️** speedup! Please refer to [run_wan_2.2.py](./examples/pipeline/run_wan_2.2.py) as an example.
|
|
70
|
+
- [2025-08-19] 🔥[**Qwen-Image-Edit**](https://github.com/QwenLM/Qwen-Image) **2x⚡️** speedup! Check the example: [run_qwen_image_edit.py](./examples/pipeline/run_qwen_image_edit.py).
|
|
71
|
+
- [2025-08-12] 🎉First caching mechanism in [QwenLM/Qwen-Image](https://github.com/QwenLM/Qwen-Image) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check this [PR](https://github.com/QwenLM/Qwen-Image/pull/61).
|
|
72
|
+
- [2025-08-11] 🔥[**Qwen-Image**](https://github.com/QwenLM/Qwen-Image) **1.8x⚡️** speedup! Please refer to [run_qwen_image.py](./examples/pipeline/run_qwen_image.py) as an example.
|
|
73
|
+
|
|
74
|
+
<details>
|
|
75
|
+
<summary> Previous News </summary>
|
|
76
|
+
|
|
77
|
+
- [2025-09-01] 📚[**Hybird Forward Pattern**](#unified) is supported! Please check [FLUX.1-dev](./examples/run_flux_adapter.py) as an example.
|
|
78
|
+
- [2025-08-29] 🔥</b>Covers <b>100%</b> Diffusers' <b>DiT-based</b> Pipelines: **[BlockAdapter](#unified) + [Pattern Matching](#unified).**
|
|
79
|
+
- [2025-08-10] 🔥[**FLUX.1-Kontext-dev**](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev) is supported! Please refer [run_flux_kontext.py](./examples/pipeline/run_flux_kontext.py) as an example.
|
|
80
|
+
- [2025-07-18] 🎉First caching mechanism in [🤗huggingface/flux-fast](https://github.com/huggingface/flux-fast) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check the [PR](https://github.com/huggingface/flux-fast/pull/13).
|
|
81
|
+
- [2025-07-13] **[🤗flux-faster](https://github.com/xlite-dev/flux-faster)** is released! **3.3x** speedup for FLUX.1 on NVIDIA L20 with **[cache-dit](https://github.com/vipshop/cache-dit)**.
|
|
82
|
+
|
|
83
|
+
</details>
|
|
84
|
+
|
|
85
|
+
## 📖Contents
|
|
86
|
+
|
|
87
|
+
<div id="contents"></div>
|
|
88
|
+
|
|
89
|
+
- [⚙️Installation](#️installation)
|
|
90
|
+
- [🔥Supported Models](#supported)
|
|
91
|
+
- [🎉Unified Cache APIs](#unified)
|
|
92
|
+
- [📚Forward Pattern Matching](#unified)
|
|
93
|
+
- [🎉Cache with One-line Code](#unified)
|
|
94
|
+
- [🔥Automatic Block Adapter](#unified)
|
|
95
|
+
- [📚Hybird Forward Pattern](#unified)
|
|
96
|
+
- [🤖Cache Acceleration Stats](#unified)
|
|
97
|
+
- [⚡️Dual Block Cache](#dbcache)
|
|
98
|
+
- [🔥Hybrid TaylorSeer](#taylorseer)
|
|
99
|
+
- [⚡️Hybrid Cache CFG](#cfg)
|
|
100
|
+
- [⚙️Torch Compile](#compile)
|
|
101
|
+
- [🛠Metrics CLI](#metrics)
|
|
102
|
+
|
|
103
|
+
## ⚙️Installation
|
|
104
|
+
|
|
105
|
+
<div id="installation"></div>
|
|
106
|
+
|
|
107
|
+
You can install the stable release of `cache-dit` from PyPI:
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
pip3 install -U cache-dit
|
|
111
|
+
```
|
|
112
|
+
Or you can install the latest develop version from GitHub:
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
pip3 install git+https://github.com/vipshop/cache-dit.git
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## 🔥Supported Models
|
|
119
|
+
|
|
120
|
+
<div id="supported"></div>
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
>>> import cache_dit
|
|
124
|
+
>>> cache_dit.supported_pipelines()
|
|
125
|
+
(31, ['Flux*', 'Mochi*', 'CogVideoX*', 'Wan*', 'HunyuanVideo*', 'QwenImage*', 'LTXVideo*',
|
|
126
|
+
'Allegro*', 'CogView3Plus*', 'CogView4*', 'Cosmos*', 'EasyAnimate*', 'SkyReelsV2*', 'SD3*',
|
|
127
|
+
'ConsisID*', 'DiT*', 'Amused*', 'Bria*', 'HunyuanDiT*', 'HunyuanDiTPAG*', 'Lumina*', 'Lumina2*',
|
|
128
|
+
'OmniGen*', 'PixArt*', 'Sana*', 'ShapE*', 'StableAudio*', 'VisualCloze*', 'AuraFlow*',
|
|
129
|
+
'Chroma*', 'HiDream*'])
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
Currently, **cache-dit** library supports almost **Any** Diffusion Transformers (with **Transformer Blocks** that match the specific Input and Output **patterns**). Please check [🎉Unified Cache APIs](#unified) for more details. Here are just some of the tested models listed:
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
- [🚀Qwen-Image-Edit](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
136
|
+
- [🚀Qwen-Image](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
137
|
+
- [🚀FLUX.1-dev](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
138
|
+
- [🚀FLUX.1-Fill-dev](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
139
|
+
- [🚀FLUX.1-Kontext-dev](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
140
|
+
- [🚀CogVideoX](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
141
|
+
- [🚀CogVideoX1.5](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
142
|
+
- [🚀Wan2.2-T2V](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
143
|
+
- [🚀Wan2.1-T2V](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
144
|
+
- [🚀Wan2.1-FLF2V](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
145
|
+
- [🚀HunyuanVideo](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
146
|
+
- [🚀HunyuanDiT](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
147
|
+
|
|
148
|
+
<details>
|
|
149
|
+
<summary> More Pipelines </summary>
|
|
150
|
+
|
|
151
|
+
- [🚀mochi-1-preview](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
152
|
+
- [🚀LTXVideo](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
153
|
+
- [🚀Allegro](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
154
|
+
- [🚀CogView3Plus](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
155
|
+
- [🚀CogView4](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
156
|
+
- [🚀Cosmos](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
157
|
+
- [🚀EasyAnimate](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
158
|
+
- [🚀SkyReelsV2](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
159
|
+
- [🚀SD3](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
160
|
+
- [🚀ConsisID](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
161
|
+
- [🚀DiT](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
162
|
+
- [🚀Amused](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
163
|
+
- [🚀HunyuanDiTPAG](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
164
|
+
- [🚀Lumina](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
165
|
+
- [🚀Lumina2](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
166
|
+
- [🚀OmniGen](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
167
|
+
- [🚀PixArt](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
168
|
+
- [🚀Sana](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
169
|
+
- [🚀StableAudio](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
170
|
+
- [🚀VisualCloze](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
171
|
+
- [🚀AuraFlow](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
172
|
+
- [🚀Chroma](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
173
|
+
- [🚀HiDream](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
174
|
+
|
|
175
|
+
</details>
|
|
176
|
+
|
|
177
|
+
## 🎉Unified Cache APIs
|
|
178
|
+
|
|
179
|
+
<div id="unified"></div>
|
|
180
|
+
|
|
181
|
+
### 📚Forward Pattern Matching
|
|
182
|
+
|
|
183
|
+
Currently, for any **Diffusion** models with **Transformer Blocks** that match the specific **Input/Output patterns**, we can use the **Unified Cache APIs** from **cache-dit**, namely, the `cache_dit.enable_cache(...)` API. The **Unified Cache APIs** are currently in the experimental phase; please stay tuned for updates. The supported patterns are listed as follows:
|
|
184
|
+
|
|
185
|
+

|
|
186
|
+
|
|
187
|
+
### ♥️Cache Acceleration with One-line Code
|
|
188
|
+
|
|
189
|
+
In most cases, you only need to call **one-line** of code, that is `cache_dit.enable_cache(...)`. After this API is called, you just need to call the pipe as normal. The `pipe` param can be **any** Diffusion Pipeline. Please refer to [Qwen-Image](./examples/pipeline/run_qwen_image.py) as an example.
|
|
190
|
+
|
|
191
|
+
```python
|
|
192
|
+
import cache_dit
|
|
193
|
+
from diffusers import DiffusionPipeline
|
|
194
|
+
|
|
195
|
+
# Can be any diffusion pipeline
|
|
196
|
+
pipe = DiffusionPipeline.from_pretrained("Qwen/Qwen-Image")
|
|
197
|
+
|
|
198
|
+
# One-line code with default cache options.
|
|
199
|
+
cache_dit.enable_cache(pipe)
|
|
200
|
+
|
|
201
|
+
# Just call the pipe as normal.
|
|
202
|
+
output = pipe(...)
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
### 🔥Automatic Block Adapter
|
|
206
|
+
|
|
207
|
+
But in some cases, you may have a **modified** Diffusion Pipeline or Transformer that is not located in the diffusers library or not officially supported by **cache-dit** at this time. The **BlockAdapter** can help you solve this problems. Please refer to [🔥Qwen-Image w/ BlockAdapter](./examples/adapter/run_qwen_image_adapter.py) as an example.
|
|
208
|
+
|
|
209
|
+
```python
|
|
210
|
+
from cache_dit import ForwardPattern, BlockAdapter
|
|
211
|
+
|
|
212
|
+
# Use 🔥BlockAdapter with `auto` mode.
|
|
213
|
+
cache_dit.enable_cache(
|
|
214
|
+
BlockAdapter(
|
|
215
|
+
# Any DiffusionPipeline, Qwen-Image, etc.
|
|
216
|
+
pipe=pipe, auto=True,
|
|
217
|
+
# Check `📚Forward Pattern Matching` documentation and hack the code of
|
|
218
|
+
# of Qwen-Image, you will find that it has satisfied `FORWARD_PATTERN_1`.
|
|
219
|
+
forward_pattern=ForwardPattern.Pattern_1,
|
|
220
|
+
),
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
# Or, manually setup transformer configurations.
|
|
224
|
+
cache_dit.enable_cache(
|
|
225
|
+
BlockAdapter(
|
|
226
|
+
pipe=pipe, # Qwen-Image, etc.
|
|
227
|
+
transformer=pipe.transformer,
|
|
228
|
+
blocks=pipe.transformer.transformer_blocks,
|
|
229
|
+
blocks_name="transformer_blocks",
|
|
230
|
+
forward_pattern=ForwardPattern.Pattern_1,
|
|
231
|
+
),
|
|
232
|
+
)
|
|
233
|
+
```
|
|
234
|
+
For such situations, **BlockAdapter** can help you quickly apply various cache acceleration features to your own Diffusion Pipelines and Transformers. Please check the [📚BlockAdapter.md](./docs/BlockAdapter.md) for more details.
|
|
235
|
+
|
|
236
|
+
### 📚Hybird Forward Pattern
|
|
237
|
+
|
|
238
|
+
Sometimes, a Transformer class will contain more than one transformer `blocks`. For example, **FLUX.1** (HiDream, Chroma, etc) contains transformer_blocks and single_transformer_blocks (with different forward patterns). The **BlockAdapter** can also help you solve this problem. Please refer to [📚FLUX.1](./examples/adapter/run_flux_adapter.py) as an example.
|
|
239
|
+
|
|
240
|
+
```python
|
|
241
|
+
# For diffusers <= 0.34.0, FLUX.1 transformer_blocks and
|
|
242
|
+
# single_transformer_blocks have different forward patterns.
|
|
243
|
+
cache_dit.enable_cache(
|
|
244
|
+
BlockAdapter(
|
|
245
|
+
pipe=pipe, # FLUX.1, etc.
|
|
246
|
+
transformer=pipe.transformer,
|
|
247
|
+
blocks=[
|
|
248
|
+
pipe.transformer.transformer_blocks,
|
|
249
|
+
pipe.transformer.single_transformer_blocks,
|
|
250
|
+
],
|
|
251
|
+
blocks_name=[
|
|
252
|
+
"transformer_blocks",
|
|
253
|
+
"single_transformer_blocks",
|
|
254
|
+
],
|
|
255
|
+
forward_pattern=[
|
|
256
|
+
ForwardPattern.Pattern_1,
|
|
257
|
+
ForwardPattern.Pattern_3,
|
|
258
|
+
],
|
|
259
|
+
),
|
|
260
|
+
)
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
### 🤖Cache Acceleration Stats Summary
|
|
264
|
+
|
|
265
|
+
After finishing each inference of `pipe(...)`, you can call the `cache_dit.summary()` API on pipe to get the details of the **Cache Acceleration Stats** for the current inference.
|
|
266
|
+
```python
|
|
267
|
+
stats = cache_dit.summary(pipe)
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
You can set `details` param as `True` to show more details of cache stats. (markdown table format) Sometimes, this may help you analyze what values of the residual diff threshold would be better.
|
|
271
|
+
|
|
272
|
+
```python
|
|
273
|
+
⚡️Cache Steps and Residual Diffs Statistics: QwenImagePipeline
|
|
274
|
+
|
|
275
|
+
| Cache Steps | Diffs Min | Diffs P25 | Diffs P50 | Diffs P75 | Diffs P95 | Diffs Max |
|
|
276
|
+
|-------------|-----------|-----------|-----------|-----------|-----------|-----------|
|
|
277
|
+
| 23 | 0.045 | 0.084 | 0.114 | 0.147 | 0.241 | 0.297 |
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
## ⚡️DBCache: Dual Block Cache
|
|
281
|
+
|
|
282
|
+
<div id="dbcache"></div>
|
|
283
|
+
|
|
284
|
+

|
|
285
|
+
|
|
286
|
+
**DBCache**: **Dual Block Caching** for Diffusion Transformers. Different configurations of compute blocks (**F8B12**, etc.) can be customized in DBCache, enabling a balanced trade-off between performance and precision. Moreover, it can be entirely **training**-**free**. Please check [DBCache.md](./docs/DBCache.md) docs for more design details.
|
|
287
|
+
|
|
288
|
+
- **Fn**: Specifies that DBCache uses the **first n** Transformer blocks to fit the information at time step t, enabling the calculation of a more stable L1 diff and delivering more accurate information to subsequent blocks.
|
|
289
|
+
- **Bn**: Further fuses approximate information in the **last n** Transformer blocks to enhance prediction accuracy. These blocks act as an auto-scaler for approximate hidden states that use residual cache.
|
|
290
|
+
|
|
291
|
+
```python
|
|
292
|
+
import cache_dit
|
|
293
|
+
from diffusers import FluxPipeline
|
|
294
|
+
|
|
295
|
+
pipe = FluxPipeline.from_pretrained(
|
|
296
|
+
"black-forest-labs/FLUX.1-dev",
|
|
297
|
+
torch_dtype=torch.bfloat16,
|
|
298
|
+
).to("cuda")
|
|
299
|
+
|
|
300
|
+
# Default options, F8B0, 8 warmup steps, and unlimited cached
|
|
301
|
+
# steps for good balance between performance and precision
|
|
302
|
+
cache_dit.enable_cache(pipe)
|
|
303
|
+
|
|
304
|
+
# Custom options, F8B8, higher precision
|
|
305
|
+
cache_dit.enable_cache(
|
|
306
|
+
pipe,
|
|
307
|
+
max_warmup_steps=8, # steps do not cache
|
|
308
|
+
max_cached_steps=-1, # -1 means no limit
|
|
309
|
+
Fn_compute_blocks=8, # Fn, F8, etc.
|
|
310
|
+
Bn_compute_blocks=8, # Bn, B8, etc.
|
|
311
|
+
residual_diff_threshold=0.12,
|
|
312
|
+
)
|
|
313
|
+
```
|
|
314
|
+
Moreover, users configuring higher **Bn** values (e.g., **F8B16**) while aiming to maintain good performance can specify **Bn_compute_blocks_ids** to work with Bn. DBCache will only compute the specified blocks, with the remaining estimated using the previous step's residual cache.
|
|
315
|
+
|
|
316
|
+
```python
|
|
317
|
+
# Custom options, F8B16, higher precision with good performance.
|
|
318
|
+
cache_dit.enable_cache(
|
|
319
|
+
pipe,
|
|
320
|
+
Fn_compute_blocks=8, # Fn, F8, etc.
|
|
321
|
+
Bn_compute_blocks=16, # Bn, B16, etc.
|
|
322
|
+
# 0, 2, 4, ..., 14, 15, etc. [0,16)
|
|
323
|
+
Bn_compute_blocks_ids=cache_dit.block_range(0, 16, 2),
|
|
324
|
+
# If the L1 difference is below this threshold, skip Bn blocks
|
|
325
|
+
# not in `Bn_compute_blocks_ids`(1, 3,..., etc), Otherwise,
|
|
326
|
+
# compute these blocks.
|
|
327
|
+
non_compute_blocks_diff_threshold=0.08,
|
|
328
|
+
)
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
<div align="center">
|
|
332
|
+
<p align="center">
|
|
333
|
+
DBCache, <b> L20x1 </b>, Steps: 28, "A cat holding a sign that says hello world with complex background"
|
|
334
|
+
</p>
|
|
335
|
+
</div>
|
|
336
|
+
|
|
337
|
+
|Baseline(L20x1)|F1B0 (0.08)|F1B0 (0.20)|F8B8 (0.15)|F12B12 (0.20)|F16B16 (0.20)|
|
|
338
|
+
|:---:|:---:|:---:|:---:|:---:|:---:|
|
|
339
|
+
|24.85s|15.59s|8.58s|15.41s|15.11s|17.74s|
|
|
340
|
+
|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/NONE_R0.08_S0.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F1B0S1_R0.08_S11.png width=105px> | <img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F1B0S1_R0.2_S19.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F8B8S1_R0.15_S15.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F12B12S4_R0.2_S16.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F16B16S4_R0.2_S13.png width=105px>|
|
|
341
|
+
|
|
342
|
+
## 🔥Hybrid TaylorSeer
|
|
343
|
+
|
|
344
|
+
<div id="taylorseer"></div>
|
|
345
|
+
|
|
346
|
+
We have supported the [TaylorSeers: From Reusing to Forecasting: Accelerating Diffusion Models with TaylorSeers](https://arxiv.org/pdf/2503.06923) algorithm to further improve the precision of DBCache in cases where the cached steps are large, namely, **Hybrid TaylorSeer + DBCache**. At timesteps with significant intervals, the feature similarity in diffusion models decreases substantially, significantly harming the generation quality.
|
|
347
|
+
|
|
348
|
+
$$
|
|
349
|
+
\mathcal{F}\_{\text {pred }, m}\left(x_{t-k}^l\right)=\mathcal{F}\left(x_t^l\right)+\sum_{i=1}^m \frac{\Delta^i \mathcal{F}\left(x_t^l\right)}{i!\cdot N^i}(-k)^i
|
|
350
|
+
$$
|
|
351
|
+
|
|
352
|
+
**TaylorSeer** employs a differential method to approximate the higher-order derivatives of features and predict features in future timesteps with Taylor series expansion. The TaylorSeer implemented in cache-dit supports both hidden states and residual cache types. That is $\mathcal{F}\_{\text {pred }, m}\left(x_{t-k}^l\right)$ can be a residual cache or a hidden-state cache.
|
|
353
|
+
|
|
354
|
+
```python
|
|
355
|
+
cache_dit.enable_cache(
|
|
356
|
+
pipe,
|
|
357
|
+
enable_taylorseer=True,
|
|
358
|
+
enable_encoder_taylorseer=True,
|
|
359
|
+
# Taylorseer cache type cache be hidden_states or residual.
|
|
360
|
+
taylorseer_cache_type="residual",
|
|
361
|
+
# Higher values of order will lead to longer computation time
|
|
362
|
+
taylorseer_order=2, # default is 2.
|
|
363
|
+
max_warmup_steps=3, # prefer: >= order + 1
|
|
364
|
+
residual_diff_threshold=0.12
|
|
365
|
+
)s
|
|
366
|
+
```
|
|
367
|
+
|
|
368
|
+
> [!Important]
|
|
369
|
+
> Please note that if you have used TaylorSeer as the calibrator for approximate hidden states, the **Bn** param of DBCache can be set to **0**. In essence, DBCache's Bn is also act as a calibrator, so you can choose either Bn > 0 or TaylorSeer. We recommend using the configuration scheme of **TaylorSeer** + **DBCache FnB0**.
|
|
370
|
+
|
|
371
|
+
<div align="center">
|
|
372
|
+
<p align="center">
|
|
373
|
+
<b>DBCache F1B0 + TaylorSeer</b>, L20x1, Steps: 28, <br>"A cat holding a sign that says hello world with complex background"
|
|
374
|
+
</p>
|
|
375
|
+
</div>
|
|
376
|
+
|
|
377
|
+
|Baseline(L20x1)|F1B0 (0.12)|+TaylorSeer|F1B0 (0.15)|+TaylorSeer|+compile|
|
|
378
|
+
|:---:|:---:|:---:|:---:|:---:|:---:|
|
|
379
|
+
|24.85s|12.85s|12.86s|10.27s|10.28s|8.48s|
|
|
380
|
+
|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/NONE_R0.08_S0.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/U0_C0_DBCACHE_F1B0S1W0T0ET0_R0.12_S14_T12.85s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/U0_C0_DBCACHE_F1B0S1W0T1ET1_R0.12_S14_T12.86s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/U0_C0_DBCACHE_F1B0S1W0T0ET0_R0.15_S17_T10.27s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/U0_C0_DBCACHE_F1B0S1W0T1ET1_R0.15_S17_T10.28s.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/U0_C1_DBCACHE_F1B0S1W0T1ET1_R0.15_S17_T8.48s.png width=105px>|
|
|
381
|
+
|
|
382
|
+
## ⚡️Hybrid Cache CFG
|
|
383
|
+
|
|
384
|
+
<div id="cfg"></div>
|
|
385
|
+
|
|
386
|
+
cache-dit supports caching for **CFG (classifier-free guidance)**. For models that fuse CFG and non-CFG into a single forward step, or models that do not include CFG (classifier-free guidance) in the forward step, please set `enable_spearate_cfg` param to **False (default)**. Otherwise, set it to True. For examples:
|
|
387
|
+
|
|
388
|
+
```python
|
|
389
|
+
cache_dit.enable_cache(
|
|
390
|
+
pipe,
|
|
391
|
+
...,
|
|
392
|
+
# CFG: classifier free guidance or not
|
|
393
|
+
# For model that fused CFG and non-CFG into single forward step,
|
|
394
|
+
# should set enable_spearate_cfg as False. For example, set it as True
|
|
395
|
+
# for Wan 2.1/Qwen-Image and set it as False for FLUX.1, HunyuanVideo,
|
|
396
|
+
# CogVideoX, Mochi, LTXVideo, Allegro, CogView3Plus, EasyAnimate, SD3, etc.
|
|
397
|
+
enable_spearate_cfg=True, # Wan 2.1, Qwen-Image, CogView4, Cosmos, SkyReelsV2, etc.
|
|
398
|
+
# Compute cfg forward first or not, default False, namely,
|
|
399
|
+
# 0, 2, 4, ..., -> non-CFG step; 1, 3, 5, ... -> CFG step.
|
|
400
|
+
cfg_compute_first=False,
|
|
401
|
+
# Compute spearate diff values for CFG and non-CFG step,
|
|
402
|
+
# default True. If False, we will use the computed diff from
|
|
403
|
+
# current non-CFG transformer step for current CFG step.
|
|
404
|
+
cfg_diff_compute_separate=True,
|
|
405
|
+
)
|
|
406
|
+
```
|
|
407
|
+
|
|
408
|
+
## ⚙️Torch Compile
|
|
409
|
+
|
|
410
|
+
<div id="compile"></div>
|
|
411
|
+
|
|
412
|
+
By the way, **cache-dit** is designed to work compatibly with **torch.compile.** You can easily use cache-dit with torch.compile to further achieve a better performance. For example:
|
|
413
|
+
|
|
414
|
+
```python
|
|
415
|
+
cache_dit.enable_cache(pipe)
|
|
416
|
+
|
|
417
|
+
# Compile the Transformer module
|
|
418
|
+
pipe.transformer = torch.compile(pipe.transformer)
|
|
419
|
+
```
|
|
420
|
+
However, users intending to use **cache-dit** for DiT with **dynamic input shapes** should consider increasing the **recompile** **limit** of `torch._dynamo`. Otherwise, the recompile_limit error may be triggered, causing the module to fall back to eager mode.
|
|
421
|
+
```python
|
|
422
|
+
torch._dynamo.config.recompile_limit = 96 # default is 8
|
|
423
|
+
torch._dynamo.config.accumulated_recompile_limit = 2048 # default is 256
|
|
424
|
+
```
|
|
425
|
+
|
|
426
|
+
Please check [bench.py](./bench/bench.py) for more details.
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
## 🛠Metrics CLI
|
|
430
|
+
|
|
431
|
+
<div id="metrics"></div>
|
|
432
|
+
|
|
433
|
+
You can utilize the APIs provided by cache-dit to quickly evaluate the accuracy losses caused by different cache configurations. For example:
|
|
434
|
+
|
|
435
|
+
```python
|
|
436
|
+
from cache_dit.metrics import compute_psnr
|
|
437
|
+
from cache_dit.metrics import compute_video_psnr
|
|
438
|
+
from cache_dit.metrics import FrechetInceptionDistance # FID
|
|
439
|
+
|
|
440
|
+
FID = FrechetInceptionDistance()
|
|
441
|
+
image_psnr, n = compute_psnr("true.png", "test.png") # Num: n
|
|
442
|
+
image_fid, n = FID.compute_fid("true_dir", "test_dir")
|
|
443
|
+
video_psnr, n = compute_video_psnr("true.mp4", "test.mp4") # Frames: n
|
|
444
|
+
```
|
|
445
|
+
|
|
446
|
+
Please check [test_metrics.py](./tests/test_metrics.py) for more details. Or, you can use `cache-dit-metrics-cli` tool. For examples:
|
|
447
|
+
|
|
448
|
+
```bash
|
|
449
|
+
cache-dit-metrics-cli -h # show usage
|
|
450
|
+
# all: PSNR, FID, SSIM, MSE, ..., etc.
|
|
451
|
+
cache-dit-metrics-cli all -i1 true.png -i2 test.png # image
|
|
452
|
+
cache-dit-metrics-cli all -i1 true_dir -i2 test_dir # image dir
|
|
453
|
+
```
|
|
454
|
+
|
|
455
|
+
## 👋Contribute
|
|
456
|
+
<div id="contribute"></div>
|
|
457
|
+
|
|
458
|
+
How to contribute? Star ⭐️ this repo to support us or check [CONTRIBUTE.md](./CONTRIBUTE.md).
|
|
459
|
+
|
|
460
|
+
## ©️License
|
|
461
|
+
|
|
462
|
+
<div id="license"></div>
|
|
463
|
+
|
|
464
|
+
The **cache-dit** codebase is adapted from FBCache. Special thanks to their excellent work! We have followed the original License from FBCache, please check [LICENSE](./LICENSE) for more details.
|
|
465
|
+
|
|
466
|
+
## ©️Citations
|
|
467
|
+
|
|
468
|
+
<div id="citations"></div>
|
|
469
|
+
|
|
470
|
+
```BibTeX
|
|
471
|
+
@misc{cache-dit@2025,
|
|
472
|
+
title={cache-dit: A Unified and Training-free Cache Acceleration Toolbox for Diffusion Transformers},
|
|
473
|
+
url={https://github.com/vipshop/cache-dit.git},
|
|
474
|
+
note={Open-source software available at https://github.com/vipshop/cache-dit.git},
|
|
475
|
+
author={vipshop.com},
|
|
476
|
+
year={2025}
|
|
477
|
+
}
|
|
478
|
+
```
|