cache-dit 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. cache_dit-0.1.0/.github/workflows/issue.yml +22 -0
  2. cache_dit-0.1.0/.gitignore +167 -0
  3. cache_dit-0.1.0/.pre-commit-config.yaml +31 -0
  4. cache_dit-0.1.0/CONTRIBUTE.md +28 -0
  5. cache_dit-0.1.0/LICENSE +53 -0
  6. cache_dit-0.1.0/MANIFEST.in +6 -0
  7. cache_dit-0.1.0/PKG-INFO +350 -0
  8. cache_dit-0.1.0/README.md +315 -0
  9. cache_dit-0.1.0/assets/DBCACHE_F12B12S4_R0.2_S16.png +0 -0
  10. cache_dit-0.1.0/assets/DBCACHE_F12B16S4_R0.08_S6.png +0 -0
  11. cache_dit-0.1.0/assets/DBCACHE_F16B16S2_R0.2_S14.png +0 -0
  12. cache_dit-0.1.0/assets/DBCACHE_F16B16S4_R0.2_S13.png +0 -0
  13. cache_dit-0.1.0/assets/DBCACHE_F1B0S1_R0.08_S11.png +0 -0
  14. cache_dit-0.1.0/assets/DBCACHE_F1B0S1_R0.2_S19.png +0 -0
  15. cache_dit-0.1.0/assets/DBCACHE_F8B0S2_R0.12_S12.png +0 -0
  16. cache_dit-0.1.0/assets/DBCACHE_F8B16S1_R0.2_S18.png +0 -0
  17. cache_dit-0.1.0/assets/DBCACHE_F8B8S1_R0.08_S9.png +0 -0
  18. cache_dit-0.1.0/assets/DBCACHE_F8B8S1_R0.12_S12.png +0 -0
  19. cache_dit-0.1.0/assets/DBCACHE_F8B8S1_R0.15_S15.png +0 -0
  20. cache_dit-0.1.0/assets/DBCache.png +0 -0
  21. cache_dit-0.1.0/assets/DBPRUNE_F1B0_R0.03_P24.0_T19.43s.png +0 -0
  22. cache_dit-0.1.0/assets/DBPRUNE_F1B0_R0.04_P34.6_T16.82s.png +0 -0
  23. cache_dit-0.1.0/assets/DBPRUNE_F1B0_R0.05_P38.3_T15.95s.png +0 -0
  24. cache_dit-0.1.0/assets/DBPRUNE_F1B0_R0.06_P45.2_T14.24s.png +0 -0
  25. cache_dit-0.1.0/assets/DBPRUNE_F1B0_R0.07_P52.3_T12.53s.png +0 -0
  26. cache_dit-0.1.0/assets/DBPRUNE_F1B0_R0.08_P52.4_T12.52s.png +0 -0
  27. cache_dit-0.1.0/assets/DBPRUNE_F1B0_R0.09_P59.2_T10.81s.png +0 -0
  28. cache_dit-0.1.0/assets/DBPRUNE_F1B0_R0.12_P59.5_T10.76s.png +0 -0
  29. cache_dit-0.1.0/assets/DBPRUNE_F1B0_R0.12_P63.0_T9.90s.png +0 -0
  30. cache_dit-0.1.0/assets/DBPRUNE_F1B0_R0.1_P62.8_T9.95s.png +0 -0
  31. cache_dit-0.1.0/assets/DBPRUNE_F1B0_R0.2_P59.5_T10.66s.png +0 -0
  32. cache_dit-0.1.0/assets/DBPRUNE_F1B0_R0.3_P63.1_T9.79s.png +0 -0
  33. cache_dit-0.1.0/assets/NONE_R0.08_S0.png +0 -0
  34. cache_dit-0.1.0/bench/.gitignore +168 -0
  35. cache_dit-0.1.0/bench/bench.py +208 -0
  36. cache_dit-0.1.0/docs/.gitignore +166 -0
  37. cache_dit-0.1.0/examples/.gitignore +168 -0
  38. cache_dit-0.1.0/examples/run_flux.py +23 -0
  39. cache_dit-0.1.0/pyproject.toml +28 -0
  40. cache_dit-0.1.0/pytest.ini +7 -0
  41. cache_dit-0.1.0/requirements.txt +6 -0
  42. cache_dit-0.1.0/setup.cfg +20 -0
  43. cache_dit-0.1.0/setup.py +78 -0
  44. cache_dit-0.1.0/src/cache_dit/__init__.py +0 -0
  45. cache_dit-0.1.0/src/cache_dit/_version.py +21 -0
  46. cache_dit-0.1.0/src/cache_dit/cache_factory/__init__.py +166 -0
  47. cache_dit-0.1.0/src/cache_dit/cache_factory/dual_block_cache/__init__.py +0 -0
  48. cache_dit-0.1.0/src/cache_dit/cache_factory/dual_block_cache/cache_context.py +1361 -0
  49. cache_dit-0.1.0/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/__init__.py +45 -0
  50. cache_dit-0.1.0/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/cogvideox.py +89 -0
  51. cache_dit-0.1.0/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/flux.py +100 -0
  52. cache_dit-0.1.0/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/mochi.py +88 -0
  53. cache_dit-0.1.0/src/cache_dit/cache_factory/dynamic_block_prune/__init__.py +0 -0
  54. cache_dit-0.1.0/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/__init__.py +45 -0
  55. cache_dit-0.1.0/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/cogvideox.py +89 -0
  56. cache_dit-0.1.0/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/flux.py +100 -0
  57. cache_dit-0.1.0/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/mochi.py +89 -0
  58. cache_dit-0.1.0/src/cache_dit/cache_factory/dynamic_block_prune/prune_context.py +979 -0
  59. cache_dit-0.1.0/src/cache_dit/cache_factory/first_block_cache/__init__.py +0 -0
  60. cache_dit-0.1.0/src/cache_dit/cache_factory/first_block_cache/cache_context.py +727 -0
  61. cache_dit-0.1.0/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/__init__.py +53 -0
  62. cache_dit-0.1.0/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/cogvideox.py +89 -0
  63. cache_dit-0.1.0/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/flux.py +100 -0
  64. cache_dit-0.1.0/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/mochi.py +89 -0
  65. cache_dit-0.1.0/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/wan.py +98 -0
  66. cache_dit-0.1.0/src/cache_dit/cache_factory/taylorseer.py +76 -0
  67. cache_dit-0.1.0/src/cache_dit/cache_factory/utils.py +0 -0
  68. cache_dit-0.1.0/src/cache_dit/logger.py +97 -0
  69. cache_dit-0.1.0/src/cache_dit/primitives.py +152 -0
  70. cache_dit-0.1.0/src/cache_dit.egg-info/PKG-INFO +350 -0
  71. cache_dit-0.1.0/src/cache_dit.egg-info/SOURCES.txt +73 -0
  72. cache_dit-0.1.0/src/cache_dit.egg-info/dependency_links.txt +1 -0
  73. cache_dit-0.1.0/src/cache_dit.egg-info/requires.txt +21 -0
  74. cache_dit-0.1.0/src/cache_dit.egg-info/top_level.txt +1 -0
@@ -0,0 +1,22 @@
1
+ name: issues
2
+ on:
3
+ schedule:
4
+ - cron: "0 0 * * 0"
5
+
6
+ jobs:
7
+ close-issues:
8
+ runs-on: ubuntu-latest
9
+ permissions:
10
+ issues: write
11
+ pull-requests: write
12
+ steps:
13
+ - uses: actions/stale@v9.0.0
14
+ with:
15
+ days-before-issue-stale: 30
16
+ days-before-issue-close: 7
17
+ stale-issue-label: "stale"
18
+ stale-issue-message: "This issue is stale because it has been open for 30 days with no activity."
19
+ close-issue-message: "This issue was closed because it has been inactive for 7 days since being marked as stale."
20
+ days-before-pr-stale: -1
21
+ days-before-pr-close: -1
22
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -0,0 +1,167 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
161
+
162
+ _version.py
163
+
164
+ report*.html
165
+
166
+ .DS_Store
167
+ tmp
@@ -0,0 +1,31 @@
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v4.0.1
4
+ hooks:
5
+ - id: check-docstring-first
6
+ - id: check-toml
7
+ - id: check-yaml
8
+ exclude: packaging/.*
9
+ args:
10
+ - --allow-multiple-documents
11
+ - id: mixed-line-ending
12
+ args: [--fix=lf]
13
+ - id: end-of-file-fixer
14
+
15
+ - repo: https://github.com/PyCQA/flake8
16
+ rev: 7.1.1
17
+ hooks:
18
+ - id: flake8
19
+ args: [--config=setup.cfg]
20
+
21
+ - repo: https://github.com/PyCQA/pydocstyle
22
+ rev: 6.1.1
23
+ hooks:
24
+ - id: pydocstyle
25
+
26
+ - repo: https://github.com/psf/black
27
+ rev: 24.10.0
28
+ hooks:
29
+ - id: black-jupyter
30
+ args:
31
+ - --line-length=80
@@ -0,0 +1,28 @@
1
+ # Developer Guide
2
+
3
+ ## 👨‍💻Pre-commit
4
+
5
+ Before submitting code, configure pre-commit, for example:
6
+
7
+ ```bash
8
+ # fork vipshop/DBCache to your own github page, then:
9
+ git clone git@github.com:your-github-page/your-fork-DBCache.git
10
+ cd your-fork-DBCache && git checkout -b dev
11
+ # update submodule
12
+ git submodule update --init --recursive --force
13
+ # install pre-commit
14
+ pip3 install pre-commit
15
+ pre-commit install
16
+ pre-commit run --all-files
17
+ ```
18
+
19
+ ## 👨‍💻Add a new feature
20
+
21
+ ```bash
22
+ # feat: support xxx-cache method
23
+ # add your commits
24
+ git add .
25
+ git commit -m "support xxx-cache method"
26
+ git push
27
+ # then, open a PR from your personal branch to DBCache:main
28
+ ```
@@ -0,0 +1,53 @@
1
+ # License
2
+
3
+ ## Acceptance
4
+
5
+ By using the software, you agree to all of the terms and conditions below.
6
+
7
+ ## Copyright License
8
+
9
+ The licensor grants you a non-exclusive, royalty-free, worldwide, non-sublicensable, non-transferable license to use, copy, distribute, make available, and prepare derivative works of the software, in each case subject to the limitations and conditions below.
10
+
11
+ ## Limitations
12
+
13
+ You may not provide the software to third parties as a hosted or managed service, where the service provides users with access to any substantial set of the features or functionality of the software.
14
+
15
+ You may not move, change, disable, or circumvent the license key functionality in the software, and you may not remove or obscure any functionality in the software that is protected by the license key.
16
+
17
+ You may not alter, remove, or obscure any licensing, copyright, or other notices of the licensor in the software. Any use of the licensor’s trademarks is subject to applicable law.
18
+
19
+ ## Patents
20
+
21
+ The licensor grants you a license, under any patent claims the licensor can license, or becomes able to license, to make, have made, use, sell, offer for sale, import and have imported the software, in each case subject to the limitations and conditions in this license. This license does not cover any patent claims that you cause to be infringed by modifications or additions to the software. If you or your company make any written claim that the software infringes or contributes to infringement of any patent, your patent license for the software granted under these terms ends immediately. If your company makes such a claim, your patent license ends immediately for work on behalf of your company.
22
+
23
+ ## Notices
24
+
25
+ You must ensure that anyone who gets a copy of any part of the software from you also gets a copy of these terms.
26
+
27
+ If you modify the software, you must include in any modified copies of the software prominent notices stating that you have modified the software.
28
+ No Other Rights
29
+
30
+ These terms do not imply any licenses other than those expressly granted in these terms.
31
+
32
+ ## Termination
33
+
34
+ If you use the software in violation of these terms, such use is not licensed, and your licenses will automatically terminate. If the licensor provides you with a notice of your violation, and you cease all violation of this license no later than 30 days after you receive that notice, your licenses will be reinstated retroactively. However, if you violate these terms after such reinstatement, any additional violation of these terms will cause your licenses to terminate automatically and permanently.
35
+
36
+ ## No Liability
37
+
38
+ As far as the law allows, the software comes as is, without any warranty or condition, and the licensor will not be liable to you for any damages arising out of these terms or the use or nature of the software, under any kind of legal claim.
39
+ Definitions
40
+
41
+ The licensor is the entity offering these terms, and the software is the software the licensor makes available under these terms, including any portion of it.
42
+
43
+ ## Definitions
44
+
45
+ you refers to the individual or entity agreeing to these terms.
46
+
47
+ your company is any legal entity, sole proprietorship, or other kind of organization that you work for, plus all organizations that have control over, are under the control of, or are under common control with that organization. control means ownership of substantially all the assets of an entity, or the power to direct its management and policies by vote, contract, or otherwise. Control can be direct or indirect.
48
+
49
+ your licenses are all the licenses granted to you for the software under these terms.
50
+
51
+ use means anything you do with the software requiring one of your licenses.
52
+
53
+ trademark means trademarks, service marks, and similar rights.
@@ -0,0 +1,6 @@
1
+ include MANIFEST.in
2
+ include LICENSE
3
+ include requirements.txt
4
+ recursive-include tests *
5
+ prune */__pycache__
6
+ global-exclude *.o *.so *.dylib *.a .git *.pyc *.swp
@@ -0,0 +1,350 @@
1
+ Metadata-Version: 2.4
2
+ Name: cache_dit
3
+ Version: 0.1.0
4
+ Summary: ⚡️DBCache: A Training-free UNet-style Cache Acceleration for Diffusion Transformers
5
+ Author: DefTruth, vipshop.com, etc.
6
+ Maintainer: DefTruth, vipshop.com, etc
7
+ Project-URL: Repository, https://github.com/vipshop/DBCache.git
8
+ Project-URL: Homepage, https://github.com/vipshop/DBCache.git
9
+ Requires-Python: >=3.10
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Requires-Dist: packaging
13
+ Requires-Dist: torch
14
+ Requires-Dist: transformers
15
+ Requires-Dist: diffusers
16
+ Provides-Extra: all
17
+ Provides-Extra: dev
18
+ Requires-Dist: pre-commit; extra == "dev"
19
+ Requires-Dist: pytest<8.0.0,>=7.0.0; extra == "dev"
20
+ Requires-Dist: pytest-html; extra == "dev"
21
+ Requires-Dist: expecttest; extra == "dev"
22
+ Requires-Dist: hypothesis; extra == "dev"
23
+ Requires-Dist: transformers; extra == "dev"
24
+ Requires-Dist: diffusers; extra == "dev"
25
+ Requires-Dist: accelerate; extra == "dev"
26
+ Requires-Dist: peft; extra == "dev"
27
+ Requires-Dist: protobuf; extra == "dev"
28
+ Requires-Dist: sentencepiece; extra == "dev"
29
+ Requires-Dist: opencv-python-headless; extra == "dev"
30
+ Requires-Dist: ftfy; extra == "dev"
31
+ Dynamic: license-file
32
+ Dynamic: provides-extra
33
+ Dynamic: requires-dist
34
+ Dynamic: requires-python
35
+
36
+ <div align="center">
37
+ <p align="center">
38
+ <h3>⚡️DBCache: A Training-free UNet-style Cache Acceleration for <br>Diffusion Transformers</h2>
39
+ </p>
40
+ <img src=https://github.com/vipshop/DBCache/blob/main/assets/DBCache.png >
41
+ <div align='center'>
42
+ <img src=https://img.shields.io/badge/Language-Python-brightgreen.svg >
43
+ <img src=https://img.shields.io/badge/PRs-welcome-9cf.svg >
44
+ <img src=https://img.shields.io/badge/Build-pass-brightgreen.svg >
45
+ <img src=https://img.shields.io/badge/Python-3.10|3.11|3.12-9cf.svg >
46
+ <img src=https://img.shields.io/badge/Release-v0.1.0-brightgreen.svg >
47
+ </div>
48
+ <p align="center">
49
+ DeepCache requires UNet’s U-shape, but DiT lacks it. Most DiT cache accelerators are complex and not training-free. DBCache builds on FBCache to create a training-free, UNet-style cache accelerator for DiT.
50
+ </p>
51
+ </div>
52
+
53
+ ## 🤗 Introduction
54
+
55
+ <div align="center">
56
+ <p align="center">
57
+ <h3>DBCache: Dual Block Caching for Diffusion Transformers</h3>
58
+ </p>
59
+ </div>
60
+
61
+ **DBCache**: **Dual Block Caching** for Diffusion Transformers. We have enhanced `FBCache` into a more general and customizable cache algorithm, namely `DBCache`, enabling it to achieve fully `UNet-style` cache acceleration for DiT models. Different configurations of compute blocks (**F8B12**, etc.) can be customized in DBCache. Moreover, it can be entirely **training**-**free**. DBCache can strike a perfect **balance** between performance and precision!
62
+
63
+ <div align="center">
64
+ <p align="center">
65
+ DBCache, <b> L20x1 </b>, Steps: 28, "A cat holding a sign that says hello world with complex background"
66
+ </p>
67
+ </div>
68
+
69
+ |Baseline(L20x1)|F1B0 (0.08)|F1B0 (0.20)|F8B8 (0.15)|F12B12 (0.20)|F16B16 (0.20)|
70
+ |:---:|:---:|:---:|:---:|:---:|:---:|
71
+ |24.85s|15.59s|8.58s|15.41s|15.11s|17.74s|
72
+ |<img src=https://github.com/vipshop/DBCache/blob/main/assets/NONE_R0.08_S0.png width=105px>|<img src=https://github.com/vipshop/DBCache/blob/main/assets/DBCACHE_F1B0S1_R0.08_S11.png width=105px> | <img src=https://github.com/vipshop/DBCache/blob/main/assets/DBCACHE_F1B0S1_R0.2_S19.png width=105px>|<img src=https://github.com/vipshop/DBCache/blob/main/assets/DBCACHE_F8B8S1_R0.15_S15.png width=105px>|<img src=https://github.com/vipshop/DBCache/blob/main/assets/DBCACHE_F12B12S4_R0.2_S16.png width=105px>|<img src=https://github.com/vipshop/DBCache/blob/main/assets/DBCACHE_F16B16S4_R0.2_S13.png width=105px>|
73
+ |**Baseline(L20x1)**|**F1B0 (0.08)**|**F8B8 (0.12)**|**F8B12 (0.20)**|**F8B16 (0.20)**|**F8B20 (0.20)**|
74
+ |27.85s|6.04s|5.88s|5.77s|6.01s|6.20s|
75
+ |<img src=https://github.com/user-attachments/assets/70ea57f4-d8f2-415b-8a96-d8315974a5e6 width=105px>|<img src=https://github.com/user-attachments/assets/fc0e1a67-19cc-44aa-bf50-04696e7978a0 width=105px> |<img src=https://github.com/user-attachments/assets/d1434896-628c-436b-95ad-43c085a8629e width=105px>|<img src=https://github.com/user-attachments/assets/aaa42cd2-57de-4c4e-8bfb-913018a8251d width=105px>|<img src=https://github.com/user-attachments/assets/dc0ba2a4-ef7c-436d-8a39-67055deab92f width=105px>|<img src=https://github.com/user-attachments/assets/aede466f-61ed-4256-8df0-fecf8020c5ca width=105px>|
76
+
77
+ <div align="center">
78
+ <p align="center">
79
+ DBCache, <b> L20x4 </b>, Steps: 20, case to show the texture recovery ability of DBCache
80
+ </p>
81
+ </div>
82
+
83
+ These case studies demonstrate that even with relatively high thresholds (such as 0.12, 0.15, 0.2, etc.) under the DBCache **F12B12** or **F8B16** configuration, the detailed texture of the kitten's fur, colored cloth, and the clarity of text can still be preserved. This suggests that users can leverage DBCache to effectively balance performance and precision in their workflows!
84
+
85
+ <div align="center">
86
+ <p align="center">
87
+ <h3>DBPrune: Dynamic Block Prune with Residual Caching</h3>
88
+ </p>
89
+ </div>
90
+
91
+ **DBPrune**: We have further implemented a new **Dynamic Block Prune** algorithm based on **Residual Caching** for Diffusion Transformers, referred to as DBPrune. DBPrune caches each block's hidden states and residuals, then **dynamically prunes** blocks during inference by computing the L1 distance between previous hidden states. When a block is pruned, its output is approximated using the cached residuals.
92
+
93
+ |Baseline(L20x1)|Pruned(24%)|Pruned(35%)|Pruned(38%)|Pruned(45%)|Pruned(60%)|
94
+ |:---:|:---:|:---:|:---:|:---:|:---:|
95
+ |24.85s|19.43s|16.82s|15.95s|14.24s|10.66s|
96
+ |<img src=https://github.com/vipshop/DBCache/blob/main/assets/NONE_R0.08_S0.png width=105px>|<img src=https://github.com/vipshop/DBCache/blob/main/assets/DBPRUNE_F1B0_R0.03_P24.0_T19.43s.png width=105px> | <img src=https://github.com/vipshop/DBCache/blob/main/assets/DBPRUNE_F1B0_R0.04_P34.6_T16.82s.png width=105px>|<img src=https://github.com/vipshop/DBCache/blob/main/assets/DBPRUNE_F1B0_R0.05_P38.3_T15.95s.png width=105px>|<img src=https://github.com/vipshop/DBCache/blob/main/assets/DBPRUNE_F1B0_R0.06_P45.2_T14.24s.png width=105px>|<img src=https://github.com/vipshop/DBCache/blob/main/assets/DBPRUNE_F1B0_R0.2_P59.5_T10.66s.png width=105px>|
97
+
98
+ <div align="center">
99
+ <p align="center">
100
+ DBPrune, <b> L20x1 </b>, Steps: 28, "A cat holding a sign that says hello world with complex background"
101
+ </p>
102
+ </div>
103
+
104
+ Moreover, both DBCache and DBPrune are **plug-and-play** solutions that works hand-in-hand with [ParaAttention](https://github.com/chengzeyi/ParaAttention). Users can easily tap into its **Context Parallelism** features for distributed inference.
105
+
106
+ ## ©️Citations
107
+
108
+ ```BibTeX
109
+ @misc{DBCache@2025,
110
+ title={DBCache: A Training-free UNet-style Cache Acceleration for Diffusion Transformers},
111
+ url={https://github.com/vipshop/DBCache.git},
112
+ note={Open-source software available at https://github.com/vipshop/DBCache.git},
113
+ author={vipshop.com},
114
+ year={2025}
115
+ }
116
+ ```
117
+
118
+ ## 👋Reference
119
+
120
+ <div id="reference"></div>
121
+
122
+ **DBCache** is built upon **FBCache**. The **DBCache** codebase was adapted from FBCache's implementation at the [ParaAttention](https://github.com/chengzeyi/ParaAttention/tree/main/src/para_attn/first_block_cache). We would like to express our sincere gratitude for this excellent work!
123
+
124
+ ## 📖Contents
125
+
126
+ <div id="contents"></div>
127
+
128
+ - [⚙️Installation](#️installation)
129
+ - [⚡️Dual Block Cache](#dbcache)
130
+ - [🎉First Block Cache](#fbcache)
131
+ - [⚡️Dynamic Block Prune](#dbprune)
132
+ - [🎉Context Parallelism](#context-parallelism)
133
+ - [⚡️Torch Compile](#compile)
134
+ - [🎉Supported Models](#supported)
135
+ - [👋Contribute](#contribute)
136
+ - [©️License](#license)
137
+
138
+
139
+ ## ⚙️Installation
140
+
141
+ <div id="installation"></div>
142
+
143
+ You can install the stable release of `DBCache` from PyPI:
144
+
145
+ ```bash
146
+ pip3 install cache-dit
147
+ ```
148
+ Or you can install the latest develop version from GitHub:
149
+
150
+ ```bash
151
+ pip3 install git+https://github.com/vipshop/DBCache.git
152
+ ```
153
+
154
+ ## ⚡️DBCache: Dual Block Cache
155
+
156
+ <div id="dbcache"></div>
157
+
158
+ ![](https://github.com/user-attachments/assets/c2a382b9-0ccd-46f4-aacc-87857b4a4de8)
159
+
160
+ **DBCache** provides configurable parameters for custom optimization, enabling a balanced trade-off between performance and precision:
161
+
162
+ - **Fn**: Specifies that DBCache uses the **first n** Transformer blocks to fit the information at time step t, enabling the calculation of a more stable L1 diff and delivering more accurate information to subsequent blocks.
163
+ - **Bn**: Further fuses approximate information in the **last n** Transformer blocks to enhance prediction accuracy. These blocks act as an auto-scaler for approximate hidden states that use residual cache.
164
+ - **warmup_steps**: (default: 0) DBCache does not apply the caching strategy when the number of running steps is less than or equal to this value, ensuring the model sufficiently learns basic features during warmup.
165
+ - **max_cached_steps**: (default: -1) DBCache disables the caching strategy when the running steps exceed this value to prevent precision degradation.
166
+ - **residual_diff_threshold**: The value of residual diff threshold, a higher value leads to faster performance at the cost of lower precision.
167
+
168
+ For a good balance between performance and precision, DBCache is configured by default with **F8B8**, 8 warmup steps, and unlimited cached steps.
169
+
170
+ ```python
171
+ from diffusers import FluxPipeline
172
+ from cache_dit.cache_factory import apply_cache_on_pipe, CacheType
173
+
174
+ pipe = FluxPipeline.from_pretrained(
175
+ "black-forest-labs/FLUX.1-dev",
176
+ torch_dtype=torch.bfloat16,
177
+ ).to("cuda")
178
+
179
+ # Default options, F8B8, good balance between performance and precision
180
+ cache_options = CacheType.default_options(CacheType.DBCache)
181
+
182
+ # Custom options, F8B16, higher precision
183
+ cache_options = {
184
+ "cache_type": CacheType.DBCache,
185
+ "warmup_steps": 8,
186
+ "max_cached_steps": 8, # -1 means no limit
187
+ "Fn_compute_blocks": 8, # Fn, F8, etc.
188
+ "Bn_compute_blocks": 16, # Bn, B16, etc.
189
+ "residual_diff_threshold": 0.12,
190
+ }
191
+
192
+ apply_cache_on_pipe(pipe, **cache_options)
193
+ ```
194
+ Moreover, users configuring higher **Bn** values (e.g., **F8B16**) while aiming to maintain good performance can specify **Bn_compute_blocks_ids** to work with Bn. DBCache will only compute the specified blocks, with the remaining estimated using the previous step's residual cache.
195
+
196
+ ```python
197
+ # Custom options, F8B16, higher precision with good performance.
198
+ cache_options = {
199
+ # 0, 2, 4, ..., 14, 15, etc. [0,16)
200
+ "Bn_compute_blocks_ids": CacheType.range(0, 16, 2),
201
+ # Skip Bn blocks (1, 3, 5 ,..., etc.) only if the L1 diff
202
+ # lower than this value, otherwise, compute it.
203
+ "non_compute_blocks_diff_threshold": 0.08,
204
+ }
205
+ ```
206
+
207
+ ## 🎉FBCache: First Block Cache
208
+
209
+ <div id="fbcache"></div>
210
+
211
+ ![](https://github.com/user-attachments/assets/0fb66656-b711-457a-92a7-a830f134272d)
212
+
213
+ **DBCache** is a more general cache algorithm than **FBCache**. When Fn=1 and Bn=0, DBCache behaves identically to FBCache. Therefore, you can either use the original FBCache implementation directly or configure **DBCache** with **F1B0** settings to achieve the same functionality.
214
+
215
+ ```python
216
+ from diffusers import FluxPipeline
217
+ from cache_dit.cache_factory import apply_cache_on_pipe, CacheType
218
+
219
+ pipe = FluxPipeline.from_pretrained(
220
+ "black-forest-labs/FLUX.1-dev",
221
+ torch_dtype=torch.bfloat16,
222
+ ).to("cuda")
223
+
224
+ # Using FBCache directly
225
+ cache_options = CacheType.default_options(CacheType.FBCache)
226
+
227
+ # Or using DBCache with F1B0.
228
+ # Fn=1, Bn=0, means FB Cache, otherwise, Dual Block Cache
229
+ cache_options = {
230
+ "cache_type": CacheType.DBCache,
231
+ "warmup_steps": 8,
232
+ "max_cached_steps": 8, # -1 means no limit
233
+ "Fn_compute_blocks": 1, # Fn, F1, etc.
234
+ "Bn_compute_blocks": 0, # Bn, B0, etc.
235
+ "residual_diff_threshold": 0.12,
236
+ }
237
+
238
+ apply_cache_on_pipe(pipe, **cache_options)
239
+ ```
240
+
241
+ ## ⚡️DBPrune: Dynamic Block Prune
242
+
243
+ <div id="dbprune"></div>
244
+
245
+ ![](https://github.com/user-attachments/assets/932b6360-9533-4352-b176-4c4d84bd4695)
246
+
247
+ We have further implemented a new **Dynamic Block Prune** algorithm based on **Residual Caching** for Diffusion Transformers, which is referred to as **DBPrune**. DBPrune caches each block's hidden states and residuals, then dynamically prunes blocks during inference by computing the L1 distance between previous hidden states. When a block is pruned, its output is approximated using the cached residuals. DBPrune is currently in the experimental phase, and we kindly invite you to stay tuned for upcoming updates.
248
+
249
+ ```python
250
+ from diffusers import FluxPipeline
251
+ from cache_dit.cache_factory import apply_cache_on_pipe, CacheType
252
+
253
+ pipe = FluxPipeline.from_pretrained(
254
+ "black-forest-labs/FLUX.1-dev",
255
+ torch_dtype=torch.bfloat16,
256
+ ).to("cuda")
257
+
258
+ # Using DBPrune
259
+ cache_options = CacheType.default_options(CacheType.DBPrune)
260
+
261
+ apply_cache_on_pipe(pipe, **cache_options)
262
+ ```
263
+
264
+ <div align="center">
265
+ <p align="center">
266
+ DBPrune, <b> L20x1 </b>, Steps: 28, "A cat holding a sign that says hello world with complex background"
267
+ </p>
268
+ </div>
269
+
270
+ |Baseline(L20x1)|Pruned(24%)|Pruned(35%)|Pruned(38%)|Pruned(45%)|Pruned(60%)|
271
+ |:---:|:---:|:---:|:---:|:---:|:---:|
272
+ |24.85s|19.43s|16.82s|15.95s|14.24s|10.66s|
273
+ |<img src=https://github.com/vipshop/DBCache/blob/main/assets/NONE_R0.08_S0.png width=105px>|<img src=https://github.com/vipshop/DBCache/blob/main/assets/DBPRUNE_F1B0_R0.03_P24.0_T19.43s.png width=105px> | <img src=https://github.com/vipshop/DBCache/blob/main/assets/DBPRUNE_F1B0_R0.04_P34.6_T16.82s.png width=105px>|<img src=https://github.com/vipshop/DBCache/blob/main/assets/DBPRUNE_F1B0_R0.05_P38.3_T15.95s.png width=105px>|<img src=https://github.com/vipshop/DBCache/blob/main/assets/DBPRUNE_F1B0_R0.06_P45.2_T14.24s.png width=105px>|<img src=https://github.com/vipshop/DBCache/blob/main/assets/DBPRUNE_F1B0_R0.2_P59.5_T10.66s.png width=105px>|
274
+
275
+ ## 🎉Context Parallelism
276
+
277
+ <div id="context-parallelism"></div>
278
+
279
+ DBCache and DBPrune are **plug-and-play** solutions that works hand-in-hand with [ParaAttention](https://github.com/chengzeyi/ParaAttention). Users can **easily tap into** its **Context Parallelism** features for distributed inference. Firstly, install `para-attn` from PyPI:
280
+
281
+ ```bash
282
+ pip3 install para-attn # or install `para-attn` from sources.
283
+ ```
284
+
285
+ Then, you can run **DBCache** with **Context Parallelism** on 4 GPUs:
286
+
287
+ ```python
288
+ from diffusers import FluxPipeline
289
+ from para_attn.context_parallel import init_context_parallel_mesh
290
+ from para_attn.context_parallel.diffusers_adapters import parallelize_pipe
291
+ from cache_dit.cache_factory import apply_cache_on_pipe, CacheType
292
+
293
+ pipe = FluxPipeline.from_pretrained(
294
+ "black-forest-labs/FLUX.1-dev",
295
+ torch_dtype=torch.bfloat16,
296
+ ).to("cuda")
297
+
298
+ # Context Parallel from ParaAttention
299
+ parallelize_pipe(
300
+ pipe, mesh=init_context_parallel_mesh(
301
+ pipe.device.type, max_ulysses_dim_size=4
302
+ )
303
+ )
304
+
305
+ # DBCache with F8B8 from this library
306
+ apply_cache_on_pipe(
307
+ pipe, **CacheType.default_options(CacheType.DBCache)
308
+ )
309
+ ```
310
+
311
+ ## ⚡️Torch Compile
312
+
313
+ <div id="compile"></div>
314
+
315
+ **DBCache** and **DBPrune** are designed to work compatibly with `torch.compile`. For example:
316
+
317
+ ```python
318
+ apply_cache_on_pipe(
319
+ pipe, **CacheType.default_options(CacheType.DBCache)
320
+ )
321
+ # Compile the Transformer module
322
+ pipe.transformer = torch.compile(pipe.transformer)
323
+ ```
324
+ However, users intending to use DBCache and DBPrune for DiT with **dynamic input shapes** should consider increasing the **recompile** **limit** of `torch._dynamo` to achieve better performance.
325
+
326
+ ```python
327
+ torch._dynamo.config.recompile_limit = 96 # default is 8
328
+ torch._dynamo.config.accumulated_recompile_limit = 2048 # default is 256
329
+ ```
330
+ Otherwise, the recompile_limit error may be triggered, causing the module to fall back to eager mode.
331
+
332
+ ## 🎉Supported Models
333
+
334
+ <div id="supported"></div>
335
+
336
+ - [🚀FLUX.1](https://github.com/vipshop/DBCache/blob/main/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters)
337
+ - [🚀CogVideoX](https://github.com/vipshop/DBCache/blob/main/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters)
338
+ - [🚀Mochi](https://github.com/vipshop/DBCache/blob/main/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters)
339
+
340
+ ## 👋Contribute
341
+ <div id="contribute"></div>
342
+
343
+ How to contribute? Star this repo or check [CONTRIBUTE.md](./CONTRIBUTE.md).
344
+
345
+ ## ©️License
346
+
347
+ <div id="license"></div>
348
+
349
+
350
+ We have followed the original License from [ParaAttention](https://github.com/chengzeyi/ParaAttention), please check [LICENSE](./LICENSE) for more details.