cache-dit 0.2.34__py3-none-any.whl → 0.2.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cache-dit might be problematic. Click here for more details.
- cache_dit/__init__.py +5 -3
- cache_dit/_version.py +2 -2
- cache_dit/metrics/clip_score.py +135 -0
- cache_dit/metrics/fid.py +42 -0
- cache_dit/metrics/image_reward.py +177 -0
- cache_dit/metrics/lpips.py +2 -14
- cache_dit/metrics/metrics.py +449 -93
- cache_dit/utils.py +15 -0
- {cache_dit-0.2.34.dist-info → cache_dit-0.2.37.dist-info}/METADATA +142 -35
- {cache_dit-0.2.34.dist-info → cache_dit-0.2.37.dist-info}/RECORD +14 -12
- {cache_dit-0.2.34.dist-info → cache_dit-0.2.37.dist-info}/WHEEL +0 -0
- {cache_dit-0.2.34.dist-info → cache_dit-0.2.37.dist-info}/entry_points.txt +0 -0
- {cache_dit-0.2.34.dist-info → cache_dit-0.2.37.dist-info}/licenses/LICENSE +0 -0
- {cache_dit-0.2.34.dist-info → cache_dit-0.2.37.dist-info}/top_level.txt +0 -0
cache_dit/utils.py
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import torch
|
|
2
2
|
import dataclasses
|
|
3
3
|
import diffusers
|
|
4
|
+
import builtins as __builtin__
|
|
5
|
+
import contextlib
|
|
6
|
+
|
|
4
7
|
import numpy as np
|
|
5
8
|
from pprint import pprint
|
|
6
9
|
from diffusers import DiffusionPipeline
|
|
@@ -13,6 +16,18 @@ from cache_dit.logger import init_logger
|
|
|
13
16
|
logger = init_logger(__name__)
|
|
14
17
|
|
|
15
18
|
|
|
19
|
+
def dummy_print(*args, **kwargs):
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@contextlib.contextmanager
|
|
24
|
+
def disable_print():
|
|
25
|
+
origin_print = __builtin__.print
|
|
26
|
+
__builtin__.print = dummy_print
|
|
27
|
+
yield
|
|
28
|
+
__builtin__.print = origin_print
|
|
29
|
+
|
|
30
|
+
|
|
16
31
|
@torch.compiler.disable
|
|
17
32
|
def is_diffusers_at_least_0_3_5() -> bool:
|
|
18
33
|
return diffusers.__version__ >= "0.35.0"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cache_dit
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.37
|
|
4
4
|
Summary: 🤗 A Unified and Training-free Cache Acceleration Toolbox for Diffusion Transformers
|
|
5
5
|
Author: DefTruth, vipshop.com, etc.
|
|
6
6
|
Maintainer: DefTruth, vipshop.com, etc
|
|
@@ -18,7 +18,12 @@ Requires-Dist: scikit-image
|
|
|
18
18
|
Requires-Dist: scipy
|
|
19
19
|
Requires-Dist: lpips==0.1.4
|
|
20
20
|
Requires-Dist: torchao>=0.12.0
|
|
21
|
+
Requires-Dist: image-reward
|
|
21
22
|
Provides-Extra: all
|
|
23
|
+
Provides-Extra: metrics
|
|
24
|
+
Requires-Dist: image-reward; extra == "metrics"
|
|
25
|
+
Requires-Dist: pytorch-fid; extra == "metrics"
|
|
26
|
+
Requires-Dist: lpips==0.1.4; extra == "metrics"
|
|
22
27
|
Provides-Extra: dev
|
|
23
28
|
Requires-Dist: pre-commit; extra == "dev"
|
|
24
29
|
Requires-Dist: pytest<8.0.0,>=7.0.0; extra == "dev"
|
|
@@ -56,7 +61,9 @@ Dynamic: requires-python
|
|
|
56
61
|
<img src=https://img.shields.io/badge/Release-v0.2-brightgreen.svg >
|
|
57
62
|
</div>
|
|
58
63
|
<p align="center">
|
|
59
|
-
|
|
64
|
+
<b><a href="#unified">📚Unified Cache APIs</a></b> | <a href="#forward-pattern-matching">📚Forward Pattern Matching</a> | <a href="#automatic-block-adapter">📚Automatic Block Adapter</a><br>
|
|
65
|
+
<a href="#hybird-forward-pattern">📚Hybrid Forward Pattern</a> | <a href="#dbcache">📚DBCache</a> | <a href="#taylorseer">📚Hybrid TaylorSeer</a> | <a href="#cfg">📚Cache CFG</a><br>
|
|
66
|
+
<a href="#benchmarks">📚Text2Image DrawBench</a> | <a href="#benchmarks">📚Text2Image Distillation DrawBench</a>
|
|
60
67
|
</p>
|
|
61
68
|
<p align="center">
|
|
62
69
|
🎉Now, <b>cache-dit</b> covers <b>most</b> mainstream Diffusers' <b>DiT</b> Pipelines🎉<br>
|
|
@@ -111,8 +118,8 @@ Dynamic: requires-python
|
|
|
111
118
|
<img src=https://github.com/vipshop/cache-dit/raw/main/assets/gifs/skyreels_v2.C0_L0_Q0_NONE.gif width=125px>
|
|
112
119
|
<img src=https://github.com/vipshop/cache-dit/raw/main/assets/gifs/skyreels_v2.C0_L0_Q0_DBCACHE_F8B0_W8M0MC0_T0O2_R0.12_S17.gif width=125px>
|
|
113
120
|
<p><b>🔥Mochi-1-preview</b> | <a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:1.8x↑🎉 | <b>SkyReelsV2</b> | <a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:1.6x↑🎉</p>
|
|
114
|
-
<img src
|
|
115
|
-
<img src
|
|
121
|
+
<img src=https://github.com/vipshop/cache-dit/raw/main/examples/data/visualcloze/00555_00.jpg width=100px>
|
|
122
|
+
<img src=https://github.com/vipshop/cache-dit/raw/main/examples/data/visualcloze/12265_00.jpg width=100px>
|
|
116
123
|
<img src=https://github.com/vipshop/cache-dit/raw/main/assets/visualcloze-512.C0_L0_Q0_NONE.png width=100px>
|
|
117
124
|
<img src=https://github.com/vipshop/cache-dit/raw/main/assets/visualcloze-512.C0_L0_Q0_DBCACHE_F8B0_W8M0MC0_T0O2_R0.08_S15.png width=100px>
|
|
118
125
|
<img src=https://github.com/vipshop/cache-dit/raw/main/assets/visualcloze-512.C0_L0_Q0_DBCACHE_F1B0_W8M0MC0_T0O2_R0.08_S18.png width=100px>
|
|
@@ -180,14 +187,15 @@ Dynamic: requires-python
|
|
|
180
187
|
<div id="contents"></div>
|
|
181
188
|
|
|
182
189
|
- [⚙️Installation](#️installation)
|
|
183
|
-
- [🔥
|
|
190
|
+
- [🔥Benchmarks](#benchmarks)
|
|
191
|
+
- [🔥Supported Pipelines](#supported)
|
|
184
192
|
- [🎉Unified Cache APIs](#unified)
|
|
185
|
-
- [📚Forward Pattern Matching](#
|
|
186
|
-
- [♥️Cache with One-line Code](
|
|
187
|
-
- [🔥Automatic Block Adapter](#
|
|
188
|
-
- [📚Hybird Forward Pattern](#
|
|
189
|
-
- [📚Implement Patch Functor](#
|
|
190
|
-
- [🤖Cache Acceleration Stats](#
|
|
193
|
+
- [📚Forward Pattern Matching](#forward-pattern-matching)
|
|
194
|
+
- [♥️Cache with One-line Code](#%EF%B8%8Fcache-acceleration-with-one-line-code)
|
|
195
|
+
- [🔥Automatic Block Adapter](#automatic-block-adapter)
|
|
196
|
+
- [📚Hybird Forward Pattern](#automatic-block-adapter)
|
|
197
|
+
- [📚Implement Patch Functor](#implement-patch-functor)
|
|
198
|
+
- [🤖Cache Acceleration Stats](#cache-acceleration-stats-summary)
|
|
191
199
|
- [⚡️Dual Block Cache](#dbcache)
|
|
192
200
|
- [🔥Hybrid TaylorSeer](#taylorseer)
|
|
193
201
|
- [⚡️Hybrid Cache CFG](#cfg)
|
|
@@ -209,36 +217,48 @@ Or you can install the latest develop version from GitHub:
|
|
|
209
217
|
pip3 install git+https://github.com/vipshop/cache-dit.git
|
|
210
218
|
```
|
|
211
219
|
|
|
212
|
-
## 🔥Supported
|
|
220
|
+
## 🔥Supported Pipelines
|
|
213
221
|
|
|
214
222
|
<div id="supported"></div>
|
|
215
223
|
|
|
216
|
-
Currently, **cache-dit** library supports almost **Any** Diffusion Transformers (with **Transformer Blocks** that match the specific Input and Output **patterns**). Please check [🎉
|
|
224
|
+
Currently, **cache-dit** library supports almost **Any** Diffusion Transformers (with **Transformer Blocks** that match the specific Input and Output **patterns**). Please check [🎉Examples](./examples/pipeline) for more details. Here are just some of the tested models listed.
|
|
225
|
+
|
|
226
|
+
```python
|
|
227
|
+
>>> import cache_dit
|
|
228
|
+
>>> cache_dit.supported_pipelines()
|
|
229
|
+
(30, ['Flux*', 'Mochi*', 'CogVideoX*', 'Wan*', 'HunyuanVideo*', 'QwenImage*', 'LTX*', 'Allegro*',
|
|
230
|
+
'CogView3Plus*', 'CogView4*', 'Cosmos*', 'EasyAnimate*', 'SkyReelsV2*', 'StableDiffusion3*',
|
|
231
|
+
'ConsisID*', 'DiT*', 'Amused*', 'Bria*', 'Lumina*', 'OmniGen*', 'PixArt*', 'Sana*', 'StableAudio*',
|
|
232
|
+
'VisualCloze*', 'AuraFlow*', 'Chroma*', 'ShapE*', 'HiDream*', 'HunyuanDiT*', 'HunyuanDiTPAG*'])
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
<details>
|
|
236
|
+
<summary> Show all pipelines </summary>
|
|
217
237
|
|
|
218
238
|
- [🚀HunyuanImage-2.1](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
219
|
-
- [🚀Qwen-Image-Lightning](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
239
|
+
- [🚀Qwen-Image-Lightning](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
220
240
|
- [🚀Qwen-Image-Edit](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
221
241
|
- [🚀Qwen-Image](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
222
|
-
- [🚀SkyReelsV2](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
223
|
-
- [🚀LTXVideo](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
224
|
-
- [🚀OmniGen](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
225
|
-
- [🚀Lumina2](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
226
242
|
- [🚀FLUX.1-dev](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
227
243
|
- [🚀FLUX.1-Fill-dev](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
228
|
-
- [🚀FLUX.1-Kontext-dev](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
229
|
-
- [🚀Chroma1-HD](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
230
|
-
- [🚀VisualCloze](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
231
|
-
- [🚀CogVideoX](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
232
|
-
- [🚀CogVideoX1.5](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
233
|
-
- [🚀CogView3-Plus](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
244
|
+
- [🚀FLUX.1-Kontext-dev](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
234
245
|
- [🚀CogView4](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
235
246
|
- [🚀Wan2.2-T2V](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
247
|
+
- [🚀HunyuanVideo](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
248
|
+
- [🚀HiDream-I1-Full](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
249
|
+
- [🚀HunyuanDiT](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
236
250
|
- [🚀Wan2.1-T2V](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
237
251
|
- [🚀Wan2.1-FLF2V](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
252
|
+
- [🚀SkyReelsV2](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
253
|
+
- [🚀Chroma1-HD](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
254
|
+
- [🚀CogVideoX1.5](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
255
|
+
- [🚀CogView3-Plus](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
256
|
+
- [🚀CogVideoX](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
257
|
+
- [🚀VisualCloze](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
258
|
+
- [🚀LTXVideo](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
259
|
+
- [🚀OmniGen](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
260
|
+
- [🚀Lumina2](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
238
261
|
- [🚀mochi-1-preview](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
239
|
-
- [🚀HunyuanVideo](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
240
|
-
- [🚀HunyuanDiT](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
241
|
-
- [🚀HiDream-I1-Full](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
242
262
|
- [🚀AuraFlow-v0.3](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
243
263
|
- [🚀PixArt-Alpha](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
244
264
|
- [🚀PixArt-Sigma](https://github.com/vipshop/cache-dit/raw/main/examples)
|
|
@@ -252,6 +272,87 @@ Currently, **cache-dit** library supports almost **Any** Diffusion Transformers
|
|
|
252
272
|
|
|
253
273
|
</details>
|
|
254
274
|
|
|
275
|
+
## 🔥Benchmarks
|
|
276
|
+
|
|
277
|
+
<div id="benchmarks"></div>
|
|
278
|
+
|
|
279
|
+
cache-dit will support more mainstream Cache acceleration algorithms in the future. More benchmarks will be released, please stay tuned for update. Here, only the results of some precision and performance benchmarks are presented. The test dataset is **DrawBench**. For a complete benchmark, please refer to [📚Benchmarks](./bench/).
|
|
280
|
+
|
|
281
|
+
### 📚Text2Image DrawBench: FLUX.1-dev
|
|
282
|
+
|
|
283
|
+
Comparisons between different FnBn compute block configurations show that **more compute blocks result in higher precision**. For example, the F8B0_W8MC0 configuration achieves the best Clip Score (33.007) and ImageReward (1.0333). **Device**: NVIDIA L20. **F**: Fn_compute_blocks, **B**: Bn_compute_blocks, 50 steps.
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
| Config | Clip Score(↑) | ImageReward(↑) | PSNR(↑) | TFLOPs(↓) | SpeedUp(↑) |
|
|
287
|
+
| --- | --- | --- | --- | --- | --- |
|
|
288
|
+
| [**FLUX.1**-dev]: 50 steps | 32.9217 | 1.0412 | INF | 3726.87 | 1.00x |
|
|
289
|
+
| F8B0_W8MC0_R0.08 | 33.0070 | 1.0333 | 35.2008 | 2162.19 | 1.72x |
|
|
290
|
+
| F8B0_W4MC0_R0.08 | 32.9871 | 1.0370 | 33.8317 | 2064.81 | 1.80x |
|
|
291
|
+
| F4B0_W4MC2_R0.12 | 32.9718 | 1.0301 | 31.9394 | 1678.98 | 2.22x |
|
|
292
|
+
| F8B0_W8MC3_R0.12 | 32.9613 | 1.0270 | 34.2834 | 1977.69 | 1.88x |
|
|
293
|
+
| F8B0_W4MC2_R0.12 | 32.9535 | 1.0185 | 32.7346 | 1935.73 | 1.93x |
|
|
294
|
+
| F8B0_W8MC2_R0.12 | 32.9302 | 1.0227 | 34.7449 | 2072.18 | 1.80x |
|
|
295
|
+
| F8B0_W4MC3_R0.12 | 32.9234 | 1.0085 | 32.5385 | 1816.58 | 2.05x |
|
|
296
|
+
| F8B0_W8MC4_R0.12 | 32.9041 | 1.0140 | 33.9466 | 1897.61 | 1.96x |
|
|
297
|
+
| F4B0_W4MC3_R0.12 | 32.8981 | 1.0130 | 31.8031 | 1507.83 | 2.47x |
|
|
298
|
+
| F4B0_W4MC0_R0.08 | 32.8544 | 1.0065 | 32.3555 | 1654.72 | 2.25x |
|
|
299
|
+
| F8B0_W4MC4_R0.12 | 32.8443 | 1.0102 | 32.4231 | 1753.48 | 2.13x |
|
|
300
|
+
| F4B0_W4MC4_R0.12 | 32.8384 | 1.0065 | 31.5292 | 1400.08 | 2.66x |
|
|
301
|
+
| F1B0_W4MC4_R0.12 | 32.8291 | 1.0181 | 32.9462 | 1401.61 | 2.66x |
|
|
302
|
+
| F1B0_W4MC3_R0.12 | 32.8236 | 1.0166 | 33.0037 | 1457.62 | 2.56x |
|
|
303
|
+
| F1B0_W4MC10_R1.0 | 32.3183 | 0.8796 | 29.6757 | 651.90 | 5.72x |
|
|
304
|
+
|
|
305
|
+
The comparison between **cache-dit: DBCache** and algorithms such as Δ-DiT, Chipmunk, FORA, DuCa, TaylorSeer and FoCa is as follows. Now, in the comparison with a speedup ratio less than **3x**, cache-dit achieved the best accuracy. Please check [📚How to Reproduce?](./bench/) for more details.
|
|
306
|
+
|
|
307
|
+
| Method | TFLOPs(↓) | SpeedUp(↑) | ImageReward(↑) | Clip Score(↑) |
|
|
308
|
+
| --- | --- | --- | --- | --- |
|
|
309
|
+
| [**FLUX.1**-dev]: 50 steps | 3726.87 | 1.00× | 0.9898 | 32.404 |
|
|
310
|
+
| [**FLUX.1**-dev]: 60% steps | 2231.70 | 1.67× | 0.9663 | 32.312 |
|
|
311
|
+
| Δ-DiT(N=2) | 2480.01 | 1.50× | 0.9444 | 32.273 |
|
|
312
|
+
| Δ-DiT(N=3) | 1686.76 | 2.21× | 0.8721 | 32.102 |
|
|
313
|
+
| [**FLUX.1**-dev]: 34% steps | 1264.63 | 3.13× | 0.9453 | 32.114 |
|
|
314
|
+
| Chipmunk | 1505.87 | 2.47× | 0.9936 | 32.776 |
|
|
315
|
+
| FORA (N=3) | 1320.07 | 2.82× | 0.9776 | 32.266 |
|
|
316
|
+
| **[DBCache(F=4,B=0,W=4,MC=4)](https://github.com/vipshop/cache-dit)** | **1400.08** | **2.66×** | **1.0065** | **32.838** |
|
|
317
|
+
| DuCa(N=5) | 978.76 | 3.80× | 0.9955 | 32.241 |
|
|
318
|
+
| TaylorSeer(N=4,O=2) | 1042.27 | 3.57× | 0.9857 | 32.413 |
|
|
319
|
+
| **[DBCache+TaylorSeer(F=1,B=0,O=1)](https://github.com/vipshop/cache-dit)** | **1153.05** | **3.23×** | **1.0221** | **32.819** |
|
|
320
|
+
| **[FoCa(N=5) arxiv.2508.16211](https://arxiv.org/pdf/2508.16211)** | **893.54** | **4.16×** | **1.0029** | **32.948** |
|
|
321
|
+
| [**FLUX.1**-dev]: 22% steps | 818.29 | 4.55× | 0.8183 | 31.772 |
|
|
322
|
+
| FORA(N=4) | 967.91 | 3.84× | 0.9730 | 32.142 |
|
|
323
|
+
| ToCa(N=8) | 784.54 | 4.74× | 0.9451 | 31.993 |
|
|
324
|
+
| DuCa(N=7) | 760.14 | 4.89× | 0.9757 | 32.066 |
|
|
325
|
+
| TeaCache(l=0.8) | 892.35 | 4.17× | 0.8683 | 31.704 |
|
|
326
|
+
| **[DBCache(F=4,B=0,W=4,MC=10)](https://github.com/vipshop/cache-dit)** | 816.65 | 4.56x | 0.8245 | 32.191 |
|
|
327
|
+
| TaylorSeer(N=5,O=2) | 893.54 | 4.16× | 0.9768 | 32.467 |
|
|
328
|
+
| **[FoCa(N=7) arxiv.2508.16211](https://arxiv.org/pdf/2508.16211)** | **670.44** | **5.54×** | **0.9891** | **32.920** |
|
|
329
|
+
| FORA(N=7) | 670.14 | 5.55× | 0.7418 | 31.519 |
|
|
330
|
+
| ToCa(N=12) | 644.70 | 5.77× | 0.7155 | 31.808 |
|
|
331
|
+
| DuCa(N=10) | 606.91 | 6.13× | 0.8382 | 31.759 |
|
|
332
|
+
| TeaCache(l=1.2) | 669.27 | 5.56× | 0.7394 | 31.704 |
|
|
333
|
+
| **[DBCache(F=1,B=0,W=4,MC=10)](https://github.com/vipshop/cache-dit)** | **651.90** | **5.72x** | 0.8796 | **32.318** |
|
|
334
|
+
| TaylorSeer(N=7,O=2) | 670.44 | 5.54× | 0.9128 | 32.128 |
|
|
335
|
+
| **[FoCa(N=8) arxiv.2508.16211](https://arxiv.org/pdf/2508.16211)** | **596.07** | **6.24×** | **0.9502** | **32.706** |
|
|
336
|
+
|
|
337
|
+
NOTE: Except for DBCache, other performance data are referenced from the paper [FoCa, arxiv.2508.16211](https://arxiv.org/pdf/2508.16211).
|
|
338
|
+
|
|
339
|
+
### 📚Text2Image Distillation DrawBench: Qwen-Image-Lightning
|
|
340
|
+
|
|
341
|
+
Surprisingly, cache-dit: DBCache still works in the extremely few-step distill model. For example, **Qwen-Image-Lightning w/ 4 steps**, with the F16B16 configuration, the PSNR is 34.8163, the Clip Score is 35.6109, and the ImageReward is 1.2614. It maintained a relatively high precision.
|
|
342
|
+
|
|
343
|
+
| Config | PSNR(↑) | Clip Score(↑) | ImageReward(↑) | TFLOPs(↓) | SpeedUp(↑) |
|
|
344
|
+
|----------------------------|-----------|------------|--------------|----------|------------|
|
|
345
|
+
| [**Lightning**]: 4 steps | INF | 35.5797 | 1.2630 | 274.33 | 1.00x |
|
|
346
|
+
| F24B24_W2MC1_R0.8 | 36.3242 | 35.6224 | 1.2630 | 264.74 | 1.04x |
|
|
347
|
+
| F16B16_W2MC1_R0.8 | 34.8163 | 35.6109 | 1.2614 | 244.25 | 1.12x |
|
|
348
|
+
| F12B12_W2MC1_R0.8 | 33.8953 | 35.6535 | 1.2549 | 234.63 | 1.17x |
|
|
349
|
+
| F8B8_W2MC1_R0.8 | 33.1374 | 35.7284 | 1.2517 | 224.29 | 1.22x |
|
|
350
|
+
| F48B0_W2MC1_R0.8 | 30.0533 | 35.8483 | 1.1979 | 265.56 | 1.03x |
|
|
351
|
+
| F32B0_W2MC1_R0.8 | 29.6490 | 35.7684 | 1.2302 | 261.05 | 1.05x |
|
|
352
|
+
| F24B0_W2MC1_R0.8 | 29.6081 | 35.8599 | 1.1874 | 245.54 | 1.12x |
|
|
353
|
+
| F16B0_W2MC1_R0.8 | 29.4844 | 36.0810 | 1.1586 | 227.06 | 1.21x |
|
|
354
|
+
|
|
355
|
+
|
|
255
356
|
## 🎉Unified Cache APIs
|
|
256
357
|
|
|
257
358
|
<div id="unified"></div>
|
|
@@ -552,7 +653,7 @@ torch._dynamo.config.recompile_limit = 96 # default is 8
|
|
|
552
653
|
torch._dynamo.config.accumulated_recompile_limit = 2048 # default is 256
|
|
553
654
|
```
|
|
554
655
|
|
|
555
|
-
Please check [
|
|
656
|
+
Please check [perf.py](./bench/perf.py) for more details.
|
|
556
657
|
|
|
557
658
|
|
|
558
659
|
## 🛠Metrics CLI
|
|
@@ -563,13 +664,19 @@ You can utilize the APIs provided by cache-dit to quickly evaluate the accuracy
|
|
|
563
664
|
|
|
564
665
|
```python
|
|
565
666
|
from cache_dit.metrics import compute_psnr
|
|
566
|
-
from cache_dit.metrics import
|
|
567
|
-
from cache_dit.metrics import
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
667
|
+
from cache_dit.metrics import compute_ssim
|
|
668
|
+
from cache_dit.metrics import compute_fid
|
|
669
|
+
from cache_dit.metrics import compute_lpips
|
|
670
|
+
from cache_dit.metrics import compute_clip_score
|
|
671
|
+
from cache_dit.metrics import compute_image_reward
|
|
672
|
+
|
|
673
|
+
psnr, n = compute_psnr("true.png", "test.png") # Num: n
|
|
674
|
+
psnr, n = compute_psnr("true_dir", "test_dir")
|
|
675
|
+
ssim, n = compute_ssim("true_dir", "test_dir")
|
|
676
|
+
fid, n = compute_fid("true_dir", "test_dir")
|
|
677
|
+
lpips, n = compute_lpips("true_dir", "test_dir")
|
|
678
|
+
clip_score, n = compute_clip_score("DrawBench200.txt", "test_dir")
|
|
679
|
+
reward, n = compute_image_reward("DrawBench200.txt", "test_dir")
|
|
573
680
|
```
|
|
574
681
|
|
|
575
682
|
Please check [test_metrics.py](./tests/test_metrics.py) for more details. Or, you can use `cache-dit-metrics-cli` tool. For examples:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
cache_dit/__init__.py,sha256=
|
|
2
|
-
cache_dit/_version.py,sha256=
|
|
1
|
+
cache_dit/__init__.py,sha256=hzaexC1VQ0TxiWY6TJ1lTm-04e65WOTNHOfYryu1vFA,1284
|
|
2
|
+
cache_dit/_version.py,sha256=jVUPlUOcnlQRBFP8i5PUv2oJntFMrKgk1rs1guuDZ34,706
|
|
3
3
|
cache_dit/logger.py,sha256=0zsu42hN-3-rgGC_C29ms1IvVpV4_b4_SwJCKSenxBE,4304
|
|
4
|
-
cache_dit/utils.py,sha256=
|
|
4
|
+
cache_dit/utils.py,sha256=nuHHr6NB286qE9u6klLNfhAVRMOGipihOhM8LRqznmU,10775
|
|
5
5
|
cache_dit/cache_factory/.gitignore,sha256=5Cb-qT9wsTUoMJ7vACDF7ZcLpAXhi5v-xdcWSRit988,23
|
|
6
6
|
cache_dit/cache_factory/__init__.py,sha256=Iw6-iJLFbdzCsIDZXXOw371L-HPmoeZO_P9a3sDjP5s,1103
|
|
7
7
|
cache_dit/cache_factory/cache_adapters.py,sha256=OFJlxxyODhoZstN4EfPgC7tE8M1ZdQFcE25gDNrW7NA,18212
|
|
@@ -33,17 +33,19 @@ cache_dit/compile/utils.py,sha256=nN2OIrSdwRR5zGxJinKDqb07pXpvTNTF3g_OgLkeeBU,38
|
|
|
33
33
|
cache_dit/custom_ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
34
|
cache_dit/custom_ops/triton_taylorseer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
35
|
cache_dit/metrics/__init__.py,sha256=RaUhl5dieF40RqnizGzR30qoJJ9dyMUEADwgwMaMQrE,575
|
|
36
|
+
cache_dit/metrics/clip_score.py,sha256=ERNCFQFJKzJdbIX9OAg-1LiSPuXUVHLOFxbf2gcENpc,3938
|
|
36
37
|
cache_dit/metrics/config.py,sha256=ieOgD9ayz722RjVzk24bSIqS2D6o7TZjGk8KeXV-OLQ,551
|
|
37
|
-
cache_dit/metrics/fid.py,sha256=
|
|
38
|
+
cache_dit/metrics/fid.py,sha256=ZM_FM0XERtpnkMUfphmw2aOdljrh1uba-pnYItu0q6M,18219
|
|
39
|
+
cache_dit/metrics/image_reward.py,sha256=N8HalJo1T1js0dsNb2V1KRv4kIdcm3nhx7iOXJuqcns,5421
|
|
38
40
|
cache_dit/metrics/inception.py,sha256=pBVe2X6ylLPIXTG4-GWDM9DWnCviMJbJ45R3ulhktR0,12759
|
|
39
|
-
cache_dit/metrics/lpips.py,sha256=
|
|
40
|
-
cache_dit/metrics/metrics.py,sha256=
|
|
41
|
+
cache_dit/metrics/lpips.py,sha256=hrHrmdM-f2B4TKDs0xLqJO5JFaYcCjq2qNIR8oCrVkc,811
|
|
42
|
+
cache_dit/metrics/metrics.py,sha256=7UV-H2NRbhfr6dvrXEzU97Zy-BSQ5zEfm9CKtaK4ldg,40231
|
|
41
43
|
cache_dit/quantize/__init__.py,sha256=kWYoMAyZgBXu9BJlZjTQ0dRffW9GqeeY9_iTkXrb70A,59
|
|
42
44
|
cache_dit/quantize/quantize_ao.py,sha256=Fx1KW4l3gdEkdrcAYtPoDW7WKBJWrs3glOHiEwW_TgE,6160
|
|
43
45
|
cache_dit/quantize/quantize_interface.py,sha256=2s_R7xPSKuJeFpEGeLwRxnq_CqJcBG3a3lzyW5wh-UM,1241
|
|
44
|
-
cache_dit-0.2.
|
|
45
|
-
cache_dit-0.2.
|
|
46
|
-
cache_dit-0.2.
|
|
47
|
-
cache_dit-0.2.
|
|
48
|
-
cache_dit-0.2.
|
|
49
|
-
cache_dit-0.2.
|
|
46
|
+
cache_dit-0.2.37.dist-info/licenses/LICENSE,sha256=Dqb07Ik2dV41s9nIdMUbiRWEfDqo7-dQeRiY7kPO8PE,3769
|
|
47
|
+
cache_dit-0.2.37.dist-info/METADATA,sha256=dLOxpSzGT1izGxxPdbFc7jDKtiSgl-XAl-JZXRkX138,45826
|
|
48
|
+
cache_dit-0.2.37.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
49
|
+
cache_dit-0.2.37.dist-info/entry_points.txt,sha256=FX2gysXaZx6NeK1iCLMcIdP8Q4_qikkIHtEmi3oWn8o,65
|
|
50
|
+
cache_dit-0.2.37.dist-info/top_level.txt,sha256=ZJDydonLEhujzz0FOkVbO-BqfzO9d_VqRHmZU-3MOZo,10
|
|
51
|
+
cache_dit-0.2.37.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|