cache-dit 0.2.4__tar.gz → 0.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cache-dit might be problematic. Click here for more details.

Files changed (137) hide show
  1. {cache_dit-0.2.4 → cache_dit-0.2.5}/PKG-INFO +21 -8
  2. {cache_dit-0.2.4 → cache_dit-0.2.5}/README.md +20 -7
  3. {cache_dit-0.2.4 → cache_dit-0.2.5}/examples/.gitignore +1 -0
  4. {cache_dit-0.2.4 → cache_dit-0.2.5}/examples/README.md +10 -2
  5. cache_dit-0.2.5/examples/data/flf2v_input_first_frame.png +0 -0
  6. cache_dit-0.2.5/examples/data/flf2v_input_last_frame.png +0 -0
  7. {cache_dit-0.2.4 → cache_dit-0.2.5}/examples/requirements.txt +1 -1
  8. {cache_dit-0.2.4 → cache_dit-0.2.5}/examples/run_cogvideox.py +1 -1
  9. {cache_dit-0.2.4 → cache_dit-0.2.5}/examples/run_wan.py +8 -2
  10. cache_dit-0.2.5/examples/run_wan_flf2v.py +191 -0
  11. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/_version.py +2 -2
  12. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dual_block_cache/cache_context.py +138 -33
  13. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/first_block_cache/cache_context.py +2 -2
  14. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit.egg-info/PKG-INFO +21 -8
  15. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit.egg-info/SOURCES.txt +3 -0
  16. {cache_dit-0.2.4 → cache_dit-0.2.5}/.github/workflows/issue.yml +0 -0
  17. {cache_dit-0.2.4 → cache_dit-0.2.5}/.gitignore +0 -0
  18. {cache_dit-0.2.4 → cache_dit-0.2.5}/.pre-commit-config.yaml +0 -0
  19. {cache_dit-0.2.4 → cache_dit-0.2.5}/CONTRIBUTE.md +0 -0
  20. {cache_dit-0.2.4 → cache_dit-0.2.5}/LICENSE +0 -0
  21. {cache_dit-0.2.4 → cache_dit-0.2.5}/MANIFEST.in +0 -0
  22. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBCACHE_F12B12S4_R0.2_S16.png +0 -0
  23. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBCACHE_F12B16S4_R0.08_S6.png +0 -0
  24. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBCACHE_F16B16S2_R0.2_S14.png +0 -0
  25. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBCACHE_F16B16S4_R0.2_S13.png +0 -0
  26. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBCACHE_F1B0S1_R0.08_S11.png +0 -0
  27. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBCACHE_F1B0S1_R0.2_S19.png +0 -0
  28. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBCACHE_F8B0S2_R0.12_S12.png +0 -0
  29. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBCACHE_F8B16S1_R0.2_S18.png +0 -0
  30. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBCACHE_F8B8S1_R0.08_S9.png +0 -0
  31. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBCACHE_F8B8S1_R0.12_S12.png +0 -0
  32. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBCACHE_F8B8S1_R0.15_S15.png +0 -0
  33. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBPRUNE_F1B0_R0.03_P24.0_T19.43s.png +0 -0
  34. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBPRUNE_F1B0_R0.04_P34.6_T16.82s.png +0 -0
  35. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBPRUNE_F1B0_R0.05_P38.3_T15.95s.png +0 -0
  36. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBPRUNE_F1B0_R0.06_P45.2_T14.24s.png +0 -0
  37. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBPRUNE_F1B0_R0.07_P52.3_T12.53s.png +0 -0
  38. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBPRUNE_F1B0_R0.08_P52.4_T12.52s.png +0 -0
  39. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBPRUNE_F1B0_R0.09_P59.2_T10.81s.png +0 -0
  40. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBPRUNE_F1B0_R0.12_P59.5_T10.76s.png +0 -0
  41. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBPRUNE_F1B0_R0.12_P63.0_T9.90s.png +0 -0
  42. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBPRUNE_F1B0_R0.1_P62.8_T9.95s.png +0 -0
  43. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBPRUNE_F1B0_R0.2_P59.5_T10.66s.png +0 -0
  44. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/DBPRUNE_F1B0_R0.3_P63.1_T9.79s.png +0 -0
  45. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/NONE_R0.08_S0.png +0 -0
  46. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/TEXTURE_DBCACHE_F1B0_R0.08.png +0 -0
  47. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/TEXTURE_DBCACHE_F8B12_R0.12.png +0 -0
  48. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/TEXTURE_DBCACHE_F8B16_R0.2.png +0 -0
  49. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/TEXTURE_DBCACHE_F8B20_R0.2.png +0 -0
  50. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/TEXTURE_DBCACHE_F8B8_R0.12.png +0 -0
  51. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/TEXTURE_NONE_R0.08.png +0 -0
  52. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C0_DBCACHE_F1B0S1W0T0ET0_R0.12_S14_T12.85s.png +0 -0
  53. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C0_DBCACHE_F1B0S1W0T0ET0_R0.15_S17_T10.27s.png +0 -0
  54. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C0_DBCACHE_F1B0S1W0T1ET1_R0.12_S14_T12.86s.png +0 -0
  55. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C0_DBCACHE_F1B0S1W0T1ET1_R0.15_S17_T10.28s.png +0 -0
  56. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C1_DBCACHE_F1B0S1W0T1ET1_R0.15_S17_T8.48s.png +0 -0
  57. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C1_DBPRUNE_F1B0_R0.03_P24.0_T16.25s.png +0 -0
  58. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C1_DBPRUNE_F1B0_R0.045_P38.2_T13.41s.png +0 -0
  59. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C1_DBPRUNE_F1B0_R0.04_P34.6_T14.12s.png +0 -0
  60. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C1_DBPRUNE_F1B0_R0.055_P45.1_T12.00s.png +0 -0
  61. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C1_DBPRUNE_F1B0_R0.05_P41.6_T12.70s.png +0 -0
  62. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C1_DBPRUNE_F1B0_R0.2_P59.5_T8.86s.png +0 -0
  63. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C1_DBPRUNE_F8B8_R0.08_P23.1_T16.14s.png +0 -0
  64. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U0_C1_NONE_R0.08_S0_T20.43s.png +0 -0
  65. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_DBPRUNE_F1B0_R0.03_P27.3_T6.62s.png +0 -0
  66. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_DBPRUNE_F1B0_R0.03_P27.3_T6.63s.png +0 -0
  67. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_DBPRUNE_F1B0_R0.045_P38.2_T5.81s.png +0 -0
  68. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_DBPRUNE_F1B0_R0.045_P38.2_T5.82s.png +0 -0
  69. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_DBPRUNE_F1B0_R0.04_P34.6_T6.06s.png +0 -0
  70. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_DBPRUNE_F1B0_R0.04_P34.6_T6.07s.png +0 -0
  71. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_DBPRUNE_F1B0_R0.04_P34.6_T6.08s.png +0 -0
  72. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_DBPRUNE_F1B0_R0.055_P45.1_T5.27s.png +0 -0
  73. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_DBPRUNE_F1B0_R0.055_P45.1_T5.28s.png +0 -0
  74. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_DBPRUNE_F1B0_R0.2_P59.5_T3.95s.png +0 -0
  75. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_DBPRUNE_F1B0_R0.2_P59.5_T3.96s.png +0 -0
  76. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_NONE_R0.08_S0_T7.78s.png +0 -0
  77. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/U4_C1_NONE_R0.08_S0_T7.79s.png +0 -0
  78. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/cache-dit-v1.png +0 -0
  79. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/dbcache-fnbn-v1.png +0 -0
  80. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/dbcache-v1.png +0 -0
  81. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/dbprune-v1.png +0 -0
  82. {cache_dit-0.2.4 → cache_dit-0.2.5}/assets/fbcache-v1.png +0 -0
  83. {cache_dit-0.2.4 → cache_dit-0.2.5}/bench/.gitignore +0 -0
  84. {cache_dit-0.2.4 → cache_dit-0.2.5}/bench/bench.py +0 -0
  85. {cache_dit-0.2.4 → cache_dit-0.2.5}/docs/.gitignore +0 -0
  86. {cache_dit-0.2.4 → cache_dit-0.2.5}/examples/data/cup.png +0 -0
  87. {cache_dit-0.2.4 → cache_dit-0.2.5}/examples/data/cup_mask.png +0 -0
  88. {cache_dit-0.2.4 → cache_dit-0.2.5}/examples/run_flux.py +0 -0
  89. {cache_dit-0.2.4 → cache_dit-0.2.5}/examples/run_flux_fill.py +0 -0
  90. {cache_dit-0.2.4 → cache_dit-0.2.5}/examples/run_hunyuan_video.py +0 -0
  91. {cache_dit-0.2.4 → cache_dit-0.2.5}/examples/run_mochi.py +0 -0
  92. {cache_dit-0.2.4 → cache_dit-0.2.5}/pyproject.toml +0 -0
  93. {cache_dit-0.2.4 → cache_dit-0.2.5}/pytest.ini +0 -0
  94. {cache_dit-0.2.4 → cache_dit-0.2.5}/requirements.txt +0 -0
  95. {cache_dit-0.2.4 → cache_dit-0.2.5}/setup.cfg +0 -0
  96. {cache_dit-0.2.4 → cache_dit-0.2.5}/setup.py +0 -0
  97. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/__init__.py +0 -0
  98. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/__init__.py +0 -0
  99. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dual_block_cache/__init__.py +0 -0
  100. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/__init__.py +0 -0
  101. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/cogvideox.py +0 -0
  102. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/flux.py +0 -0
  103. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/hunyuan_video.py +0 -0
  104. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/mochi.py +0 -0
  105. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dual_block_cache/diffusers_adapters/wan.py +0 -0
  106. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dynamic_block_prune/__init__.py +0 -0
  107. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/__init__.py +0 -0
  108. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/cogvideox.py +0 -0
  109. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/flux.py +0 -0
  110. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/hunyuan_video.py +0 -0
  111. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/mochi.py +0 -0
  112. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dynamic_block_prune/diffusers_adapters/wan.py +0 -0
  113. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/dynamic_block_prune/prune_context.py +0 -0
  114. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/first_block_cache/__init__.py +0 -0
  115. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/__init__.py +0 -0
  116. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/cogvideox.py +0 -0
  117. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/flux.py +0 -0
  118. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/hunyuan_video.py +0 -0
  119. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/mochi.py +0 -0
  120. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/first_block_cache/diffusers_adapters/wan.py +0 -0
  121. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/taylorseer.py +0 -0
  122. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/cache_factory/utils.py +0 -0
  123. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/compile/__init__.py +0 -0
  124. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/compile/utils.py +0 -0
  125. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/custom_ops/__init__.py +0 -0
  126. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/custom_ops/triton_taylorseer.py +0 -0
  127. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/logger.py +0 -0
  128. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit/primitives.py +0 -0
  129. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit.egg-info/dependency_links.txt +0 -0
  130. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit.egg-info/requires.txt +0 -0
  131. {cache_dit-0.2.4 → cache_dit-0.2.5}/src/cache_dit.egg-info/top_level.txt +0 -0
  132. {cache_dit-0.2.4 → cache_dit-0.2.5}/tests/.gitignore +0 -0
  133. {cache_dit-0.2.4 → cache_dit-0.2.5}/tests/README.md +0 -0
  134. {cache_dit-0.2.4 → cache_dit-0.2.5}/tests/taylorseer_approximation_order_2.png +0 -0
  135. {cache_dit-0.2.4 → cache_dit-0.2.5}/tests/taylorseer_approximation_order_4.png +0 -0
  136. {cache_dit-0.2.4 → cache_dit-0.2.5}/tests/taylorseer_approximation_test.png +0 -0
  137. {cache_dit-0.2.4 → cache_dit-0.2.5}/tests/test_taylorseer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cache_dit
3
- Version: 0.2.4
3
+ Version: 0.2.5
4
4
  Summary: 🤗 CacheDiT: A Training-free and Easy-to-use Cache Acceleration Toolbox for Diffusion Transformers
5
5
  Author: DefTruth, vipshop.com, etc.
6
6
  Maintainer: DefTruth, vipshop.com, etc
@@ -44,7 +44,7 @@ Dynamic: requires-python
44
44
  <img src=https://img.shields.io/badge/PyPI-pass-brightgreen.svg >
45
45
  <img src=https://static.pepy.tech/badge/cache-dit >
46
46
  <img src=https://img.shields.io/badge/Python-3.10|3.11|3.12-9cf.svg >
47
- <img src=https://img.shields.io/badge/Release-v0.2.2-brightgreen.svg >
47
+ <img src=https://img.shields.io/badge/Release-v0.2-brightgreen.svg >
48
48
  </div>
49
49
  <p align="center">
50
50
  DeepCache is for UNet not DiT. Most DiT cache speedups are complex and not training-free. CacheDiT offers <br>a set of training-free cache accelerators for DiT: <b>🔥<a href="#dbcache">DBCache</a>, <a href="#dbprune">DBPrune</a>, <a href="#taylorseer">TaylorSeer</a>, <a href="#fbcache">FBCache</a></b>, etc🔥
@@ -169,7 +169,7 @@ The **CacheDiT** codebase is adapted from [FBCache](https://github.com/chengzeyi
169
169
  You can install the stable release of `cache-dit` from PyPI:
170
170
 
171
171
  ```bash
172
- pip3 install cache-dit
172
+ pip3 install -U cache-dit
173
173
  ```
174
174
  Or you can install the latest develop version from GitHub:
175
175
 
@@ -181,11 +181,13 @@ pip3 install git+https://github.com/vipshop/cache-dit.git
181
181
 
182
182
  <div id="supported"></div>
183
183
 
184
- - [🚀FLUX.1](https://github.com/vipshop/cache-dit/raw/main/examples)
185
- - [🚀Mochi](https://github.com/vipshop/cache-dit/raw/main/examples)
184
+ - [🚀FLUX.1-dev](https://github.com/vipshop/cache-dit/raw/main/examples)
185
+ - [🚀FLUX.1-Fill-dev](https://github.com/vipshop/cache-dit/raw/main/examples)
186
+ - [🚀mochi-1-preview](https://github.com/vipshop/cache-dit/raw/main/examples)
186
187
  - [🚀CogVideoX](https://github.com/vipshop/cache-dit/raw/main/examples)
187
188
  - [🚀CogVideoX1.5](https://github.com/vipshop/cache-dit/raw/main/examples)
188
- - [🚀Wan2.1](https://github.com/vipshop/cache-dit/raw/main/examples)
189
+ - [🚀Wan2.1-T2V](https://github.com/vipshop/cache-dit/raw/main/examples)
190
+ - [🚀Wan2.1-FLF2V](https://github.com/vipshop/cache-dit/raw/main/examples)
189
191
  - [🚀HunyuanVideo](https://github.com/vipshop/cache-dit/raw/main/examples)
190
192
 
191
193
 
@@ -281,7 +283,7 @@ cache_options = {
281
283
  "taylorseer_kwargs": {
282
284
  "n_derivatives": 2, # default is 2.
283
285
  },
284
- "warmup_steps": 3, # n_derivatives + 1
286
+ "warmup_steps": 3, # prefer: >= n_derivatives + 1
285
287
  "residual_diff_threshold": 0.12,
286
288
  }
287
289
  ```
@@ -304,12 +306,23 @@ cache_options = {
304
306
 
305
307
  <div id="cfg"></div>
306
308
 
307
- CacheDiT supports caching for CFG (classifier-free guidance). For models that fuse CFG and non-CFG into a single forward step, or models that do not include CFG (classifier-free guidance) in the forward step, please set `do_separate_classifier_free_guidance` param to False. Otherwise, set it to True. Wan 2.1: True. FLUX.1, HunyunVideo, CogVideoX, Mochi: False.
309
+ CacheDiT supports caching for **CFG (classifier-free guidance)**. For models that fuse CFG and non-CFG into a single forward step, or models that do not include CFG (classifier-free guidance) in the forward step, please set `do_separate_classifier_free_guidance` param to **False (default)**. Otherwise, set it to True. For examples:
308
310
 
309
311
  ```python
310
312
  cache_options = {
313
+ # CFG: classifier free guidance or not
314
+ # For model that fused CFG and non-CFG into single forward step,
315
+ # should set do_separate_classifier_free_guidance as False.
316
+ # For example, set it as True for Wan 2.1 and set it as False
317
+ # for FLUX.1, HunyuanVideo, CogVideoX, Mochi.
311
318
  "do_separate_classifier_free_guidance": True, # Wan 2.1
319
+ # Compute cfg forward first or not, default False, namely,
320
+ # 0, 2, 4, ..., -> non-CFG step; 1, 3, 5, ... -> CFG step.
312
321
  "cfg_compute_first": False,
322
+ # Compute spearate diff values for CFG and non-CFG step,
323
+ # default True. If False, we will use the computed diff from
324
+ # current non-CFG transformer step for current CFG step.
325
+ "cfg_diff_compute_separate": True,
313
326
  }
314
327
  ```
315
328
 
@@ -9,7 +9,7 @@
9
9
  <img src=https://img.shields.io/badge/PyPI-pass-brightgreen.svg >
10
10
  <img src=https://static.pepy.tech/badge/cache-dit >
11
11
  <img src=https://img.shields.io/badge/Python-3.10|3.11|3.12-9cf.svg >
12
- <img src=https://img.shields.io/badge/Release-v0.2.2-brightgreen.svg >
12
+ <img src=https://img.shields.io/badge/Release-v0.2-brightgreen.svg >
13
13
  </div>
14
14
  <p align="center">
15
15
  DeepCache is for UNet not DiT. Most DiT cache speedups are complex and not training-free. CacheDiT offers <br>a set of training-free cache accelerators for DiT: <b>🔥<a href="#dbcache">DBCache</a>, <a href="#dbprune">DBPrune</a>, <a href="#taylorseer">TaylorSeer</a>, <a href="#fbcache">FBCache</a></b>, etc🔥
@@ -134,7 +134,7 @@ The **CacheDiT** codebase is adapted from [FBCache](https://github.com/chengzeyi
134
134
  You can install the stable release of `cache-dit` from PyPI:
135
135
 
136
136
  ```bash
137
- pip3 install cache-dit
137
+ pip3 install -U cache-dit
138
138
  ```
139
139
  Or you can install the latest develop version from GitHub:
140
140
 
@@ -146,11 +146,13 @@ pip3 install git+https://github.com/vipshop/cache-dit.git
146
146
 
147
147
  <div id="supported"></div>
148
148
 
149
- - [🚀FLUX.1](https://github.com/vipshop/cache-dit/raw/main/examples)
150
- - [🚀Mochi](https://github.com/vipshop/cache-dit/raw/main/examples)
149
+ - [🚀FLUX.1-dev](https://github.com/vipshop/cache-dit/raw/main/examples)
150
+ - [🚀FLUX.1-Fill-dev](https://github.com/vipshop/cache-dit/raw/main/examples)
151
+ - [🚀mochi-1-preview](https://github.com/vipshop/cache-dit/raw/main/examples)
151
152
  - [🚀CogVideoX](https://github.com/vipshop/cache-dit/raw/main/examples)
152
153
  - [🚀CogVideoX1.5](https://github.com/vipshop/cache-dit/raw/main/examples)
153
- - [🚀Wan2.1](https://github.com/vipshop/cache-dit/raw/main/examples)
154
+ - [🚀Wan2.1-T2V](https://github.com/vipshop/cache-dit/raw/main/examples)
155
+ - [🚀Wan2.1-FLF2V](https://github.com/vipshop/cache-dit/raw/main/examples)
154
156
  - [🚀HunyuanVideo](https://github.com/vipshop/cache-dit/raw/main/examples)
155
157
 
156
158
 
@@ -246,7 +248,7 @@ cache_options = {
246
248
  "taylorseer_kwargs": {
247
249
  "n_derivatives": 2, # default is 2.
248
250
  },
249
- "warmup_steps": 3, # n_derivatives + 1
251
+ "warmup_steps": 3, # prefer: >= n_derivatives + 1
250
252
  "residual_diff_threshold": 0.12,
251
253
  }
252
254
  ```
@@ -269,12 +271,23 @@ cache_options = {
269
271
 
270
272
  <div id="cfg"></div>
271
273
 
272
- CacheDiT supports caching for CFG (classifier-free guidance). For models that fuse CFG and non-CFG into a single forward step, or models that do not include CFG (classifier-free guidance) in the forward step, please set `do_separate_classifier_free_guidance` param to False. Otherwise, set it to True. Wan 2.1: True. FLUX.1, HunyunVideo, CogVideoX, Mochi: False.
274
+ CacheDiT supports caching for **CFG (classifier-free guidance)**. For models that fuse CFG and non-CFG into a single forward step, or models that do not include CFG (classifier-free guidance) in the forward step, please set `do_separate_classifier_free_guidance` param to **False (default)**. Otherwise, set it to True. For examples:
273
275
 
274
276
  ```python
275
277
  cache_options = {
278
+ # CFG: classifier free guidance or not
279
+ # For model that fused CFG and non-CFG into single forward step,
280
+ # should set do_separate_classifier_free_guidance as False.
281
+ # For example, set it as True for Wan 2.1 and set it as False
282
+ # for FLUX.1, HunyuanVideo, CogVideoX, Mochi.
276
283
  "do_separate_classifier_free_guidance": True, # Wan 2.1
284
+ # Compute cfg forward first or not, default False, namely,
285
+ # 0, 2, 4, ..., -> non-CFG step; 1, 3, 5, ... -> CFG step.
277
286
  "cfg_compute_first": False,
287
+ # Compute spearate diff values for CFG and non-CFG step,
288
+ # default True. If False, we will use the computed diff from
289
+ # current non-CFG transformer step for current CFG step.
290
+ "cfg_diff_compute_separate": True,
278
291
  }
279
292
  ```
280
293
 
@@ -166,3 +166,4 @@ report*.html
166
166
  .DS_Store
167
167
  *.png
168
168
  *.mp4
169
+ tmp*
@@ -32,7 +32,7 @@ python3 run_cogvideox.py --cache --Fn 8 --Bn 8
32
32
  python3 run_cogvideox.py --cache --Fn 8 --Bn 0 --taylorseer
33
33
  ```
34
34
 
35
- - Wan2.1
35
+ - Wan2.1 T2V
36
36
 
37
37
  ```bash
38
38
  python3 run_wan.py # baseline
@@ -40,7 +40,15 @@ python3 run_wan.py --cache --Fn 8 --Bn 8
40
40
  python3 run_wan.py --cache --Fn 8 --Bn 0 --taylorseer
41
41
  ```
42
42
 
43
- - Mochi
43
+ - Wan2.1 FLF2V
44
+
45
+ ```bash
46
+ python3 run_wan_flf2v.py # baseline
47
+ python3 run_wan_flf2v.py --cache --Fn 8 --Bn 8
48
+ python3 run_wan_flf2v.py --cache --Fn 8 --Bn 0 --taylorseer
49
+ ```
50
+
51
+ - mochi-1-preview
44
52
 
45
53
  ```bash
46
54
  python3 run_mochi.py # baseline
@@ -1,4 +1,4 @@
1
1
  imageio-ffmpeg
2
2
  # wan currently requires installing from source
3
- diffusers @ git+https://github.com/huggingface/diffusers
3
+ diffusers>=0.34.0
4
4
  ftfy
@@ -70,7 +70,7 @@ if args.cache:
70
70
  "enable_taylorseer": args.taylorseer,
71
71
  "enable_encoder_taylorseer": args.taylorseer,
72
72
  # Taylorseer cache type cache be hidden_states or residual
73
- "taylorseer_cache_type": "residual",
73
+ "taylorseer_cache_type": "hidden_states",
74
74
  "taylorseer_kwargs": {
75
75
  "n_derivatives": args.taylorseer_order,
76
76
  },
@@ -63,7 +63,13 @@ if args.cache:
63
63
  # For model that fused CFG and non-CFG into single forward step,
64
64
  # should set do_separate_classifier_free_guidance as False.
65
65
  "do_separate_classifier_free_guidance": True,
66
+ # Compute cfg forward first or not, default False, namely,
67
+ # 0, 2, 4, ..., -> non-CFG step; 1, 3, 5, ... -> CFG step.
66
68
  "cfg_compute_first": False,
69
+ # Compute spearate diff values for CFG and non-CFG step,
70
+ # default True. If False, we will use the computed diff from
71
+ # current non-CFG transformer step for current CFG step.
72
+ "cfg_diff_compute_separate": True,
67
73
  "enable_taylorseer": args.taylorseer,
68
74
  "enable_encoder_taylorseer": args.taylorseer,
69
75
  # Taylorseer cache type cache be hidden_states or residual
@@ -89,12 +95,12 @@ pipe.enable_model_cpu_offload()
89
95
 
90
96
  # Wan currently requires installing diffusers from source
91
97
  assert isinstance(pipe.vae, AutoencoderKLWan) # enable type check for IDE
92
- if diffusers.__version__ >= "0.34.0.dev0":
98
+ if diffusers.__version__ >= "0.34.0":
93
99
  pipe.vae.enable_tiling()
94
100
  pipe.vae.enable_slicing()
95
101
  else:
96
102
  print(
97
- "Wan pipeline requires diffusers version >= 0.34.0.dev0 "
103
+ "Wan pipeline requires diffusers version >= 0.34.0 "
98
104
  "for vae tiling and slicing, please install diffusers "
99
105
  "from source."
100
106
  )
@@ -0,0 +1,191 @@
1
+ import os
2
+ import time
3
+ import torch
4
+ import diffusers
5
+ import argparse
6
+ import numpy as np
7
+ import torchvision.transforms.functional as TF
8
+ from diffusers import AutoencoderKLWan, WanImageToVideoPipeline
9
+ from diffusers.utils import export_to_video, load_image
10
+ from transformers import CLIPVisionModel
11
+
12
+ from cache_dit.cache_factory import CacheType, apply_cache_on_pipe
13
+
14
+
15
+ def get_args() -> argparse.ArgumentParser:
16
+ parser = argparse.ArgumentParser()
17
+ # General arguments
18
+ parser.add_argument("--cache", action="store_true", default=False)
19
+ parser.add_argument("--taylorseer", action="store_true", default=False)
20
+ parser.add_argument("--taylorseer-order", "--order", type=int, default=2)
21
+ parser.add_argument("--Fn-compute-blocks", "--Fn", type=int, default=1)
22
+ parser.add_argument("--Bn-compute-blocks", "--Bn", type=int, default=0)
23
+ parser.add_argument("--downsample-factor", "--df", type=int, default=4)
24
+ parser.add_argument("--rdt", type=float, default=0.08)
25
+ parser.add_argument("--warmup-steps", type=int, default=0)
26
+ return parser.parse_args()
27
+
28
+
29
+ def aspect_ratio_resize(image, pipe, max_area=720 * 1280):
30
+ aspect_ratio = image.height / image.width
31
+ mod_value = (
32
+ pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[1]
33
+ )
34
+ height = round(np.sqrt(max_area * aspect_ratio)) // mod_value * mod_value
35
+ width = round(np.sqrt(max_area / aspect_ratio)) // mod_value * mod_value
36
+ image = image.resize((width, height))
37
+ return image, height, width
38
+
39
+
40
+ def center_crop_resize(image, height, width):
41
+ # Calculate resize ratio to match first frame dimensions
42
+ resize_ratio = max(width / image.width, height / image.height)
43
+
44
+ # Resize the image
45
+ width = round(image.width * resize_ratio)
46
+ height = round(image.height * resize_ratio)
47
+ size = [width, height]
48
+ image = TF.center_crop(image, size)
49
+
50
+ return image, height, width
51
+
52
+
53
+ def prepare_pipeline(
54
+ pipe: WanImageToVideoPipeline,
55
+ args: argparse.ArgumentParser,
56
+ ):
57
+ if args.cache:
58
+ cache_options = {
59
+ "cache_type": CacheType.DBCache,
60
+ "warmup_steps": args.warmup_steps,
61
+ "max_cached_steps": -1, # -1 means no limit
62
+ "downsample_factor": args.downsample_factor,
63
+ # Fn=1, Bn=0, means FB Cache, otherwise, Dual Block Cache
64
+ "Fn_compute_blocks": args.Fn_compute_blocks, # Fn, F8, etc.
65
+ "Bn_compute_blocks": args.Bn_compute_blocks, # Bn, B16, etc.
66
+ "residual_diff_threshold": args.rdt,
67
+ # releative token diff threshold, default is 0.0
68
+ "important_condition_threshold": 0.00,
69
+ # CFG: classifier free guidance or not
70
+ # For model that fused CFG and non-CFG into single forward step,
71
+ # should set do_separate_classifier_free_guidance as False.
72
+ "do_separate_classifier_free_guidance": True,
73
+ # Compute cfg forward first or not, default False, namely,
74
+ # 0, 2, 4, ..., -> non-CFG step; 1, 3, 5, ... -> CFG step.
75
+ "cfg_compute_first": False,
76
+ # Compute spearate diff values for CFG and non-CFG step,
77
+ # default True. If False, we will use the computed diff from
78
+ # current non-CFG transformer step for current CFG step.
79
+ "cfg_diff_compute_separate": True,
80
+ "enable_taylorseer": args.taylorseer,
81
+ "enable_encoder_taylorseer": args.taylorseer,
82
+ # Taylorseer cache type cache be hidden_states or residual
83
+ "taylorseer_cache_type": "residual",
84
+ "taylorseer_kwargs": {
85
+ "n_derivatives": args.taylorseer_order,
86
+ },
87
+ }
88
+ cache_type_str = "DBCACHE"
89
+ cache_type_str = (
90
+ f"{cache_type_str}_F{args.Fn_compute_blocks}"
91
+ f"B{args.Bn_compute_blocks}W{args.warmup_steps}"
92
+ f"T{int(args.taylorseer)}O{args.taylorseer_order}"
93
+ )
94
+ print(f"cache options:\n{cache_options}")
95
+
96
+ apply_cache_on_pipe(pipe, **cache_options)
97
+ else:
98
+ cache_type_str = "NONE"
99
+
100
+ # Enable memory savings
101
+ pipe.enable_model_cpu_offload()
102
+
103
+ # Wan currently requires installing diffusers from source
104
+ assert isinstance(pipe.vae, AutoencoderKLWan) # enable type check for IDE
105
+ if diffusers.__version__ >= "0.34.0":
106
+ pipe.vae.enable_tiling()
107
+ pipe.vae.enable_slicing()
108
+ else:
109
+ print(
110
+ "Wan pipeline requires diffusers version >= 0.34.0 "
111
+ "for vae tiling and slicing, please install diffusers "
112
+ "from source."
113
+ )
114
+
115
+ return cache_type_str, pipe
116
+
117
+
118
+ def main():
119
+ args = get_args()
120
+ print(args)
121
+
122
+ model_id = os.environ.get(
123
+ "WAN_FLF2V_DIR",
124
+ "Wan-AI/Wan2.1-FLF2V-14B-720P-Diffusers",
125
+ )
126
+ image_encoder = CLIPVisionModel.from_pretrained(
127
+ model_id, subfolder="image_encoder", torch_dtype=torch.float32
128
+ )
129
+ vae = AutoencoderKLWan.from_pretrained(
130
+ model_id, subfolder="vae", torch_dtype=torch.float32
131
+ )
132
+ pipe = WanImageToVideoPipeline.from_pretrained(
133
+ model_id,
134
+ vae=vae,
135
+ image_encoder=image_encoder,
136
+ torch_dtype=torch.bfloat16,
137
+ )
138
+ pipe.to("cuda")
139
+
140
+ cache_type_str, pipe = prepare_pipeline(pipe, args)
141
+
142
+ first_frame = load_image("data/flf2v_input_first_frame.png")
143
+ last_frame = load_image("data/flf2v_input_last_frame.png")
144
+
145
+ first_frame, height, width = aspect_ratio_resize(first_frame, pipe)
146
+ if last_frame.size != first_frame.size:
147
+ last_frame, _, _ = center_crop_resize(last_frame, height, width)
148
+
149
+ prompt = (
150
+ "CG animation style, a small blue bird takes off from the ground, flapping its wings. "
151
+ + "The bird's feathers are delicate, with a unique pattern on its chest. The background shows "
152
+ + "a blue sky with white clouds under bright sunshine. The camera follows the bird upward, "
153
+ + "capturing its flight and the vastness of the sky from a close-up, low-angle perspective."
154
+ )
155
+
156
+ start = time.time()
157
+ output = pipe(
158
+ image=first_frame,
159
+ last_image=last_frame,
160
+ prompt=prompt,
161
+ height=height,
162
+ width=width,
163
+ guidance_scale=5.5,
164
+ num_frames=49,
165
+ num_inference_steps=35,
166
+ generator=torch.Generator("cpu").manual_seed(0),
167
+ ).frames[0]
168
+ end = time.time()
169
+
170
+ if hasattr(pipe.transformer, "_cached_steps"):
171
+ cached_steps = pipe.transformer._cached_steps
172
+ residual_diffs = pipe.transformer._residual_diffs
173
+ print(f"Cache Steps: {len(cached_steps)}, {cached_steps}")
174
+ print(f"Residual Diffs: {len(residual_diffs)}, {residual_diffs}")
175
+ if hasattr(pipe.transformer, "_cfg_cached_steps"):
176
+ cfg_cached_steps = pipe.transformer._cfg_cached_steps
177
+ cfg_residual_diffs = pipe.transformer._cfg_residual_diffs
178
+ print(f"CFG Cache Steps: {len(cfg_cached_steps)}, {cfg_cached_steps} ")
179
+ print(
180
+ f"CFG Residual Diffs: {len(cfg_residual_diffs)}, {cfg_residual_diffs}"
181
+ )
182
+
183
+ time_cost = end - start
184
+ save_path = f"wan.flf2v.{cache_type_str}.mp4"
185
+ print(f"Time cost: {time_cost:.2f}s")
186
+ print(f"Saving video to {save_path}")
187
+ export_to_video(output, save_path, fps=16)
188
+
189
+
190
+ if __name__ == "__main__":
191
+ main()
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.2.4'
21
- __version_tuple__ = version_tuple = (0, 2, 4)
20
+ __version__ = version = '0.2.5'
21
+ __version_tuple__ = version_tuple = (0, 2, 5)