cache-dit 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. cache_dit/__init__.py +1 -0
  2. cache_dit/_version.py +2 -2
  3. cache_dit/cache_factory/__init__.py +3 -6
  4. cache_dit/cache_factory/block_adapters/block_adapters.py +8 -64
  5. cache_dit/cache_factory/cache_adapters/__init__.py +0 -1
  6. cache_dit/cache_factory/cache_adapters/cache_adapter.py +47 -14
  7. cache_dit/cache_factory/cache_contexts/__init__.py +10 -8
  8. cache_dit/cache_factory/cache_contexts/cache_context.py +186 -117
  9. cache_dit/cache_factory/cache_contexts/cache_manager.py +63 -131
  10. cache_dit/cache_factory/cache_contexts/calibrators/__init__.py +132 -0
  11. cache_dit/cache_factory/cache_contexts/{v2/calibrators → calibrators}/foca.py +1 -1
  12. cache_dit/cache_factory/cache_contexts/{v2/calibrators → calibrators}/taylorseer.py +7 -2
  13. cache_dit/cache_factory/cache_interface.py +128 -111
  14. cache_dit/cache_factory/params_modifier.py +87 -0
  15. cache_dit/metrics/__init__.py +3 -1
  16. cache_dit/utils.py +12 -21
  17. {cache_dit-0.3.1.dist-info → cache_dit-0.3.2.dist-info}/METADATA +78 -64
  18. {cache_dit-0.3.1.dist-info → cache_dit-0.3.2.dist-info}/RECORD +23 -28
  19. cache_dit/cache_factory/cache_adapters/v2/__init__.py +0 -3
  20. cache_dit/cache_factory/cache_adapters/v2/cache_adapter_v2.py +0 -524
  21. cache_dit/cache_factory/cache_contexts/taylorseer.py +0 -102
  22. cache_dit/cache_factory/cache_contexts/v2/__init__.py +0 -13
  23. cache_dit/cache_factory/cache_contexts/v2/cache_context_v2.py +0 -288
  24. cache_dit/cache_factory/cache_contexts/v2/cache_manager_v2.py +0 -799
  25. cache_dit/cache_factory/cache_contexts/v2/calibrators/__init__.py +0 -81
  26. /cache_dit/cache_factory/cache_contexts/{v2/calibrators → calibrators}/base.py +0 -0
  27. {cache_dit-0.3.1.dist-info → cache_dit-0.3.2.dist-info}/WHEEL +0 -0
  28. {cache_dit-0.3.1.dist-info → cache_dit-0.3.2.dist-info}/entry_points.txt +0 -0
  29. {cache_dit-0.3.1.dist-info → cache_dit-0.3.2.dist-info}/licenses/LICENSE +0 -0
  30. {cache_dit-0.3.1.dist-info → cache_dit-0.3.2.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cache_dit
3
- Version: 0.3.1
3
+ Version: 0.3.2
4
4
  Summary: A Unified, Flexible and Training-free Cache Acceleration Framework for 🤗Diffusers.
5
5
  Author: DefTruth, vipshop.com, etc.
6
6
  Maintainer: DefTruth, vipshop.com, etc
@@ -163,11 +163,11 @@ Dynamic: requires-python
163
163
 
164
164
  ## 🔥News
165
165
 
166
- - [2025-09-10] 🎉Day 1 support [**HunyuanImage-2.1**](https://github.com/Tencent-Hunyuan/HunyuanImage-2.1) with **1.7x↑🎉** speedup! Check this [example](./examples/pipeline/run_hunyuan_image_2.1.py).
167
- - [2025-09-08] 🔥[**Qwen-Image-Lightning**](./examples/pipeline/run_qwen_image_lightning.py) **7.1/3.5 steps🎉** inference with **[DBCache: F16B16](https://github.com/vipshop/cache-dit)**.
168
- - [2025-09-03] 🎉[**Wan2.2-MoE**](https://github.com/Wan-Video) **2.4x↑🎉** speedup! Please refer to [run_wan_2.2.py](./examples/pipeline/run_wan_2.2.py) as an example.
169
- - [2025-08-19] 🔥[**Qwen-Image-Edit**](https://github.com/QwenLM/Qwen-Image) **2x↑🎉** speedup! Check the example: [run_qwen_image_edit.py](./examples/pipeline/run_qwen_image_edit.py).
170
- - [2025-08-11] 🔥[**Qwen-Image**](https://github.com/QwenLM/Qwen-Image) **1.8x↑🎉** speedup! Please refer to [run_qwen_image.py](./examples/pipeline/run_qwen_image.py) as an example.
166
+ - [2025-09-10] 🎉Day 1 support [**HunyuanImage-2.1**](https://github.com/Tencent-Hunyuan/HunyuanImage-2.1) with **1.7x↑🎉** speedup! Check this [example](https://github.com/vipshop/cache-dit/raw/main/examples/pipeline/run_hunyuan_image_2.1.py).
167
+ - [2025-09-08] 🔥[**Qwen-Image-Lightning**](https://github.com/vipshop/cache-dit/raw/main/examples/pipeline/run_qwen_image_lightning.py) **7.1/3.5 steps🎉** inference with **[DBCache: F16B16](https://github.com/vipshop/cache-dit)**.
168
+ - [2025-09-03] 🎉[**Wan2.2-MoE**](https://github.com/Wan-Video) **2.4x↑🎉** speedup! Please refer to [run_wan_2.2.py](https://github.com/vipshop/cache-dit/raw/main/examples/pipeline/run_wan_2.2.py) as an example.
169
+ - [2025-08-19] 🔥[**Qwen-Image-Edit**](https://github.com/QwenLM/Qwen-Image) **2x↑🎉** speedup! Check the example: [run_qwen_image_edit.py](https://github.com/vipshop/cache-dit/raw/main/examples/pipeline/run_qwen_image_edit.py).
170
+ - [2025-08-11] 🔥[**Qwen-Image**](https://github.com/QwenLM/Qwen-Image) **1.8x↑🎉** speedup! Please refer to [run_qwen_image.py](https://github.com/vipshop/cache-dit/raw/main/examples/pipeline/run_qwen_image.py) as an example.
171
171
  - [2025-07-13] 🎉[**FLUX.1-dev**](https://github.com/xlite-dev/flux-faster) **3.3x↑🎉** speedup! NVIDIA L20 with **[cache-dit](https://github.com/vipshop/cache-dit)** + **compile + FP8 DQ**.
172
172
 
173
173
  <details>
@@ -176,8 +176,8 @@ Dynamic: requires-python
176
176
  - [2025-09-08] 🎉First caching mechanism in [Qwen-Image-Lightning](https://github.com/ModelTC/Qwen-Image-Lightning) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check this [PR](https://github.com/ModelTC/Qwen-Image-Lightning/pull/35).
177
177
  - [2025-09-08] 🎉First caching mechanism in [Wan2.2](https://github.com/Wan-Video/Wan2.2) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check this [PR](https://github.com/Wan-Video/Wan2.2/pull/127) for more details.
178
178
  - [2025-08-12] 🎉First caching mechanism in [QwenLM/Qwen-Image](https://github.com/QwenLM/Qwen-Image) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check this [PR](https://github.com/QwenLM/Qwen-Image/pull/61).
179
- - [2025-09-01] 📚[**Hybird Forward Pattern**](#unified) is supported! Please check [FLUX.1-dev](./examples/run_flux_adapter.py) as an example.
180
- - [2025-08-10] 🔥[**FLUX.1-Kontext-dev**](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev) is supported! Please refer [run_flux_kontext.py](./examples/pipeline/run_flux_kontext.py) as an example.
179
+ - [2025-09-01] 📚[**Hybird Forward Pattern**](#unified) is supported! Please check [FLUX.1-dev](https://github.com/vipshop/cache-dit/raw/main/examples/run_flux_adapter.py) as an example.
180
+ - [2025-08-10] 🔥[**FLUX.1-Kontext-dev**](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev) is supported! Please refer [run_flux_kontext.py](https://github.com/vipshop/cache-dit/raw/main/examples/pipeline/run_flux_kontext.py) as an example.
181
181
  - [2025-07-18] 🎉First caching mechanism in [🤗huggingface/flux-fast](https://github.com/huggingface/flux-fast) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check the [PR](https://github.com/huggingface/flux-fast/pull/13).
182
182
 
183
183
  </details>
@@ -221,7 +221,7 @@ pip3 install git+https://github.com/vipshop/cache-dit.git
221
221
 
222
222
  <div id="supported"></div>
223
223
 
224
- Currently, **cache-dit** library supports almost **Any** Diffusion Transformers (with **Transformer Blocks** that match the specific Input and Output **patterns**). Please check [🎉Examples](./examples/pipeline) for more details. Here are just some of the tested models listed.
224
+ Currently, **cache-dit** library supports almost **Any** Diffusion Transformers (with **Transformer Blocks** that match the specific Input and Output **patterns**). Please check [🎉Examples](https://github.com/vipshop/cache-dit/raw/main/examples/pipeline) for more details. Here are just some of the tested models listed.
225
225
 
226
226
  ```python
227
227
  >>> import cache_dit
@@ -276,7 +276,7 @@ Currently, **cache-dit** library supports almost **Any** Diffusion Transformers
276
276
 
277
277
  <div id="benchmarks"></div>
278
278
 
279
- cache-dit will support more mainstream Cache acceleration algorithms in the future. More benchmarks will be released, please stay tuned for update. Here, only the results of some precision and performance benchmarks are presented. The test dataset is **DrawBench**. For a complete benchmark, please refer to [📚Benchmarks](./bench/).
279
+ cache-dit will support more mainstream Cache acceleration algorithms in the future. More benchmarks will be released, please stay tuned for update. Here, only the results of some precision and performance benchmarks are presented. The test dataset is **DrawBench**. For a complete benchmark, please refer to [📚Benchmarks](https://github.com/vipshop/cache-dit/raw/main/bench/).
280
280
 
281
281
  ### 📚Text2Image DrawBench: FLUX.1-dev
282
282
 
@@ -292,7 +292,7 @@ Comparisons between different FnBn compute block configurations show that **more
292
292
  | F4B0_W4MC3_R0.12 | 32.8981 | 1.0130 | 31.8031 | 1507.83 | 2.47x |
293
293
  | F4B0_W4MC4_R0.12 | 32.8384 | 1.0065 | 31.5292 | 1400.08 | 2.66x |
294
294
 
295
- The comparison between **cache-dit: DBCache** and algorithms such as Δ-DiT, Chipmunk, FORA, DuCa, TaylorSeer and FoCa is as follows. Now, in the comparison with a speedup ratio less than **3x**, cache-dit achieved the best accuracy. Please check [📚How to Reproduce?](./bench/) for more details.
295
+ The comparison between **cache-dit: DBCache** and algorithms such as Δ-DiT, Chipmunk, FORA, DuCa, TaylorSeer and FoCa is as follows. Now, in the comparison with a speedup ratio less than **3x**, cache-dit achieved the best accuracy. Please check [📚How to Reproduce?](https://github.com/vipshop/cache-dit/raw/main/bench/) for more details.
296
296
 
297
297
  | Method | TFLOPs(↓) | SpeedUp(↑) | ImageReward(↑) | Clip Score(↑) |
298
298
  | --- | --- | --- | --- | --- |
@@ -375,7 +375,7 @@ Currently, for any **Diffusion** models with **Transformer Blocks** that match t
375
375
 
376
376
  ### ♥️Cache Acceleration with One-line Code
377
377
 
378
- In most cases, you only need to call **one-line** of code, that is `cache_dit.enable_cache(...)`. After this API is called, you just need to call the pipe as normal. The `pipe` param can be **any** Diffusion Pipeline. Please refer to [Qwen-Image](./examples/pipeline/run_qwen_image.py) as an example.
378
+ In most cases, you only need to call **one-line** of code, that is `cache_dit.enable_cache(...)`. After this API is called, you just need to call the pipe as normal. The `pipe` param can be **any** Diffusion Pipeline. Please refer to [Qwen-Image](https://github.com/vipshop/cache-dit/raw/main/examples/pipeline/run_qwen_image.py) as an example.
379
379
 
380
380
  ```python
381
381
  import cache_dit
@@ -396,7 +396,7 @@ cache_dit.disable_cache(pipe)
396
396
 
397
397
  ### 🔥Automatic Block Adapter
398
398
 
399
- But in some cases, you may have a **modified** Diffusion Pipeline or Transformer that is not located in the diffusers library or not officially supported by **cache-dit** at this time. The **BlockAdapter** can help you solve this problems. Please refer to [🔥Qwen-Image w/ BlockAdapter](./examples/adapter/run_qwen_image_adapter.py) as an example.
399
+ But in some cases, you may have a **modified** Diffusion Pipeline or Transformer that is not located in the diffusers library or not officially supported by **cache-dit** at this time. The **BlockAdapter** can help you solve this problems. Please refer to [🔥Qwen-Image w/ BlockAdapter](https://github.com/vipshop/cache-dit/raw/main/examples/adapter/run_qwen_image_adapter.py) as an example.
400
400
 
401
401
  ```python
402
402
  from cache_dit import ForwardPattern, BlockAdapter
@@ -422,11 +422,11 @@ cache_dit.enable_cache(
422
422
  ),
423
423
  )
424
424
  ```
425
- For such situations, **BlockAdapter** can help you quickly apply various cache acceleration features to your own Diffusion Pipelines and Transformers. Please check the [📚BlockAdapter.md](./docs/BlockAdapter.md) for more details.
425
+ For such situations, **BlockAdapter** can help you quickly apply various cache acceleration features to your own Diffusion Pipelines and Transformers. Please check the [📚BlockAdapter.md](https://github.com/vipshop/cache-dit/raw/main/docs/BlockAdapter.md) for more details.
426
426
 
427
427
  ### 📚Hybird Forward Pattern
428
428
 
429
- Sometimes, a Transformer class will contain more than one transformer `blocks`. For example, **FLUX.1** (HiDream, Chroma, etc) contains transformer_blocks and single_transformer_blocks (with different forward patterns). The **BlockAdapter** can also help you solve this problem. Please refer to [📚FLUX.1](./examples/adapter/run_flux_adapter.py) as an example.
429
+ Sometimes, a Transformer class will contain more than one transformer `blocks`. For example, **FLUX.1** (HiDream, Chroma, etc) contains transformer_blocks and single_transformer_blocks (with different forward patterns). The **BlockAdapter** can also help you solve this problem. Please refer to [📚FLUX.1](https://github.com/vipshop/cache-dit/raw/main/examples/adapter/run_flux_adapter.py) as an example.
430
430
 
431
431
  ```python
432
432
  # For diffusers <= 0.34.0, FLUX.1 transformer_blocks and
@@ -447,10 +447,10 @@ cache_dit.enable_cache(
447
447
  )
448
448
  ```
449
449
 
450
- Even sometimes you have more complex cases, such as **Wan 2.2 MoE**, which has more than one Transformer (namely `transformer` and `transformer_2`) in its structure. Fortunately, **cache-dit** can also handle this situation very well. Please refer to [📚Wan 2.2 MoE](./examples/pipeline/run_wan_2.2.py) as an example.
450
+ Even sometimes you have more complex cases, such as **Wan 2.2 MoE**, which has more than one Transformer (namely `transformer` and `transformer_2`) in its structure. Fortunately, **cache-dit** can also handle this situation very well. Please refer to [📚Wan 2.2 MoE](https://github.com/vipshop/cache-dit/raw/main/examples/pipeline/run_wan_2.2.py) as an example.
451
451
 
452
452
  ```python
453
- from cache_dit import ForwardPattern, BlockAdapter, ParamsModifier
453
+ from cache_dit import ForwardPattern, BlockAdapter, ParamsModifier, BasicCacheConfig
454
454
 
455
455
  cache_dit.enable_cache(
456
456
  BlockAdapter(
@@ -472,12 +472,16 @@ cache_dit.enable_cache(
472
472
  # value will be overwrite by the new one.
473
473
  params_modifiers=[
474
474
  ParamsModifier(
475
- max_warmup_steps=4,
476
- max_cached_steps=8,
475
+ cache_config=BasicCacheConfig(
476
+ max_warmup_steps=4,
477
+ max_cached_steps=8,
478
+ ),
477
479
  ),
478
480
  ParamsModifier(
479
- max_warmup_steps=2,
480
- max_cached_steps=20,
481
+ cache_config=BasicCacheConfig(
482
+ max_warmup_steps=2,
483
+ max_cached_steps=20,
484
+ ),
481
485
  ),
482
486
  ],
483
487
  has_separate_cfg=True,
@@ -486,11 +490,11 @@ cache_dit.enable_cache(
486
490
  ```
487
491
  ### 📚Implement Patch Functor
488
492
 
489
- For any PATTERN not {0...5}, we introduced the simple abstract concept of **Patch Functor**. Users can implement a subclass of Patch Functor to convert an unknown Pattern into a known PATTERN, and for some models, users may also need to fuse the operations within the blocks for loop into block forward.
493
+ For any PATTERN not in {0...5}, we introduced the simple abstract concept of **Patch Functor**. Users can implement a subclass of Patch Functor to convert an unknown Pattern into a known PATTERN, and for some models, users may also need to fuse the operations within the blocks for loop into block forward.
490
494
 
491
495
  ![](https://github.com/vipshop/cache-dit/raw/main/assets/patch-functor.png)
492
496
 
493
- Some Patch functors have already been provided in cache-dit: [📚HiDreamPatchFunctor](./src/cache_dit/cache_factory/patch_functors/functor_hidream.py), [📚ChromaPatchFunctor](./src/cache_dit/cache_factory/patch_functors/functor_chroma.py), etc. After implementing Patch Functor, users need to set the `patch_functor` property of **BlockAdapter**.
497
+ Some Patch functors have already been provided in cache-dit: [📚HiDreamPatchFunctor](https://github.com/vipshop/cache-dit/raw/main/src/cache_dit/cache_factory/patch_functors/functor_hidream.py), [📚ChromaPatchFunctor](https://github.com/vipshop/cache-dit/raw/main/src/cache_dit/cache_factory/patch_functors/functor_chroma.py), etc. After implementing Patch Functor, users need to set the `patch_functor` property of **BlockAdapter**.
494
498
 
495
499
  ```python
496
500
  @BlockAdapterRegistry.register("HiDream")
@@ -539,7 +543,7 @@ You can set `details` param as `True` to show more details of cache stats. (mark
539
543
 
540
544
  ![](https://github.com/vipshop/cache-dit/raw/main/assets/dbcache-v1.png)
541
545
 
542
- **DBCache**: **Dual Block Caching** for Diffusion Transformers. Different configurations of compute blocks (**F8B12**, etc.) can be customized in DBCache, enabling a balanced trade-off between performance and precision. Moreover, it can be entirely **training**-**free**. Please check [DBCache.md](./docs/DBCache.md) docs for more design details.
546
+ **DBCache**: **Dual Block Caching** for Diffusion Transformers. Different configurations of compute blocks (**F8B12**, etc.) can be customized in DBCache, enabling a balanced trade-off between performance and precision. Moreover, it can be entirely **training**-**free**. Please check [DBCache.md](https://github.com/vipshop/cache-dit/raw/main/docs/DBCache.md) docs for more design details.
543
547
 
544
548
  - **Fn**: Specifies that DBCache uses the **first n** Transformer blocks to fit the information at time step t, enabling the calculation of a more stable L1 diff and delivering more accurate information to subsequent blocks.
545
549
  - **Bn**: Further fuses approximate information in the **last n** Transformer blocks to enhance prediction accuracy. These blocks act as an auto-scaler for approximate hidden states that use residual cache.
@@ -548,23 +552,27 @@ You can set `details` param as `True` to show more details of cache stats. (mark
548
552
  import cache_dit
549
553
  from diffusers import FluxPipeline
550
554
 
551
- pipe = FluxPipeline.from_pretrained(
555
+ pipe_or_adapter = FluxPipeline.from_pretrained(
552
556
  "black-forest-labs/FLUX.1-dev",
553
557
  torch_dtype=torch.bfloat16,
554
558
  ).to("cuda")
555
559
 
556
560
  # Default options, F8B0, 8 warmup steps, and unlimited cached
557
561
  # steps for good balance between performance and precision
558
- cache_dit.enable_cache(pipe)
562
+ cache_dit.enable_cache(pipe_or_adapter)
559
563
 
560
564
  # Custom options, F8B8, higher precision
565
+ from cache_dit import BasicCacheConfig
566
+
561
567
  cache_dit.enable_cache(
562
- pipe,
563
- max_warmup_steps=8, # steps do not cache
564
- max_cached_steps=-1, # -1 means no limit
565
- Fn_compute_blocks=8, # Fn, F8, etc.
566
- Bn_compute_blocks=8, # Bn, B8, etc.
567
- residual_diff_threshold=0.12,
568
+ pipe_or_adapter,
569
+ cache_config=BasicCacheConfig(
570
+ max_warmup_steps=8, # steps do not cache
571
+ max_cached_steps=-1, # -1 means no limit
572
+ Fn_compute_blocks=8, # Fn, F8, etc.
573
+ Bn_compute_blocks=8, # Bn, B8, etc.
574
+ residual_diff_threshold=0.12,
575
+ ),
568
576
  )
569
577
  ```
570
578
 
@@ -592,16 +600,18 @@ $$
592
600
  **TaylorSeer** employs a differential method to approximate the higher-order derivatives of features and predict features in future timesteps with Taylor series expansion. The TaylorSeer implemented in cache-dit supports both hidden states and residual cache types. That is $\mathcal{F}\_{\text {pred }, m}\left(x_{t-k}^l\right)$ can be a residual cache or a hidden-state cache.
593
601
 
594
602
  ```python
595
- from cache_dit import TaylorSeerCalibratorConfig
603
+ from cache_dit import BasicCacheConfig, TaylorSeerCalibratorConfig
596
604
 
597
605
  cache_dit.enable_cache(
598
- pipe,
606
+ pipe_or_adapter,
599
607
  # Basic DBCache w/ FnBn configurations
600
- max_warmup_steps=8, # steps do not cache
601
- max_cached_steps=-1, # -1 means no limit
602
- Fn_compute_blocks=8, # Fn, F8, etc.
603
- Bn_compute_blocks=8, # Bn, B8, etc.
604
- residual_diff_threshold=0.12,
608
+ cache_config=BasicCacheConfig(
609
+ max_warmup_steps=8, # steps do not cache
610
+ max_cached_steps=-1, # -1 means no limit
611
+ Fn_compute_blocks=8, # Fn, F8, etc.
612
+ Bn_compute_blocks=8, # Bn, B8, etc.
613
+ residual_diff_threshold=0.12,
614
+ ),
605
615
  # Then, you can use the TaylorSeer Calibrator to approximate
606
616
  # the values in cached steps, taylorseer_order default is 1.
607
617
  calibrator_config=TaylorSeerCalibratorConfig(
@@ -631,22 +641,26 @@ cache_dit.enable_cache(
631
641
  cache-dit supports caching for **CFG (classifier-free guidance)**. For models that fuse CFG and non-CFG into a single forward step, or models that do not include CFG (classifier-free guidance) in the forward step, please set `enable_separate_cfg` param to **False (default, None)**. Otherwise, set it to True. For examples:
632
642
 
633
643
  ```python
644
+ from cache_dit import BasicCacheConfig
645
+
634
646
  cache_dit.enable_cache(
635
- pipe,
636
- ...,
637
- # CFG: classifier free guidance or not
638
- # For model that fused CFG and non-CFG into single forward step,
639
- # should set enable_separate_cfg as False. For example, set it as True
640
- # for Wan 2.1/Qwen-Image and set it as False for FLUX.1, HunyuanVideo,
641
- # CogVideoX, Mochi, LTXVideo, Allegro, CogView3Plus, EasyAnimate, SD3, etc.
642
- enable_separate_cfg=True, # Wan 2.1, Qwen-Image, CogView4, Cosmos, SkyReelsV2, etc.
643
- # Compute cfg forward first or not, default False, namely,
644
- # 0, 2, 4, ..., -> non-CFG step; 1, 3, 5, ... -> CFG step.
645
- cfg_compute_first=False,
646
- # Compute separate diff values for CFG and non-CFG step,
647
- # default True. If False, we will use the computed diff from
648
- # current non-CFG transformer step for current CFG step.
649
- cfg_diff_compute_separate=True,
647
+ pipe_or_adapter,
648
+ cache_config=BasicCacheConfig(
649
+ ...,
650
+ # CFG: classifier free guidance or not
651
+ # For model that fused CFG and non-CFG into single forward step,
652
+ # should set enable_separate_cfg as False. For example, set it as True
653
+ # for Wan 2.1/Qwen-Image and set it as False for FLUX.1, HunyuanVideo,
654
+ # CogVideoX, Mochi, LTXVideo, Allegro, CogView3Plus, EasyAnimate, SD3, etc.
655
+ enable_separate_cfg=True, # Wan 2.1, Qwen-Image, CogView4, Cosmos, SkyReelsV2, etc.
656
+ # Compute cfg forward first or not, default False, namely,
657
+ # 0, 2, 4, ..., -> non-CFG step; 1, 3, 5, ... -> CFG step.
658
+ cfg_compute_first=False,
659
+ # Compute separate diff values for CFG and non-CFG step,
660
+ # default True. If False, we will use the computed diff from
661
+ # current non-CFG transformer step for current CFG step.
662
+ cfg_diff_compute_separate=True,
663
+ ),
650
664
  )
651
665
  ```
652
666
 
@@ -668,7 +682,7 @@ torch._dynamo.config.recompile_limit = 96 # default is 8
668
682
  torch._dynamo.config.accumulated_recompile_limit = 2048 # default is 256
669
683
  ```
670
684
 
671
- Please check [perf.py](./bench/perf.py) for more details.
685
+ Please check [perf.py](https://github.com/vipshop/cache-dit/raw/main/bench/perf.py) for more details.
672
686
 
673
687
 
674
688
  ## 🛠Metrics CLI
@@ -685,16 +699,16 @@ from cache_dit.metrics import compute_lpips
685
699
  from cache_dit.metrics import compute_clip_score
686
700
  from cache_dit.metrics import compute_image_reward
687
701
 
688
- psnr, n = compute_psnr("true.png", "test.png") # Num: n
689
- psnr, n = compute_psnr("true_dir", "test_dir")
690
- ssim, n = compute_ssim("true_dir", "test_dir")
691
- fid, n = compute_fid("true_dir", "test_dir")
692
- lpips, n = compute_lpips("true_dir", "test_dir")
693
- clip_score, n = compute_clip_score("DrawBench200.txt", "test_dir")
694
- reward, n = compute_image_reward("DrawBench200.txt", "test_dir")
702
+ psnr, n = compute_psnr("true.png", "test.png") # Num: n
703
+ psnr, n = compute_psnr("true_dir", "test_dir")
704
+ ssim, n = compute_ssim("true_dir", "test_dir")
705
+ fid, n = compute_fid("true_dir", "test_dir")
706
+ lpips, n = compute_lpips("true_dir", "test_dir")
707
+ clip, n = compute_clip_score("DrawBench200.txt", "test_dir")
708
+ reward, n = compute_image_reward("DrawBench200.txt", "test_dir")
695
709
  ```
696
710
 
697
- Please check [test_metrics.py](./tests/test_metrics.py) for more details. Or, you can use `cache-dit-metrics-cli` tool. For examples:
711
+ Or, you can use `cache-dit-metrics-cli` tool. For examples:
698
712
 
699
713
  ```bash
700
714
  cache-dit-metrics-cli -h # show usage
@@ -706,7 +720,7 @@ cache-dit-metrics-cli all -i1 true_dir -i2 test_dir # image dir
706
720
  ## 👋Contribute
707
721
  <div id="contribute"></div>
708
722
 
709
- How to contribute? Star ⭐️ this repo to support us or check [CONTRIBUTE.md](./CONTRIBUTE.md).
723
+ How to contribute? Star ⭐️ this repo to support us or check [CONTRIBUTE.md](https://github.com/vipshop/cache-dit/raw/main/CONTRIBUTE.md).
710
724
 
711
725
  <div align='center'>
712
726
  <a href="https://star-history.com/#vipshop/cache-dit&Date">
@@ -1,36 +1,31 @@
1
- cache_dit/__init__.py,sha256=Nd4a609z8PLFMSO8J0sUe2xRaFDIYK8778ff8yBU7uQ,1457
2
- cache_dit/_version.py,sha256=gGLpQUQx-ty9SEy9PYw9OgJWWzJLBnCpfJOfzL7SjlI,704
1
+ cache_dit/__init__.py,sha256=sHRg0swXZZiw6lvSQ53fcVtN9JRayx0az2lXAz5OOGI,1510
2
+ cache_dit/_version.py,sha256=e8NqPtZ8fggRgk3GPrqZ_U_BDV8aSULw1u_Gn9NNbnk,704
3
3
  cache_dit/logger.py,sha256=0zsu42hN-3-rgGC_C29ms1IvVpV4_b4_SwJCKSenxBE,4304
4
- cache_dit/utils.py,sha256=bERXpCaCpOPThXB8Rkk52yAjjLrvxbt12ntpzpWdfUQ,11131
4
+ cache_dit/utils.py,sha256=AyYRwi5XBxYBH4GaXxOxv9-X24Te_IYOYwh54t_1d3A,10674
5
5
  cache_dit/cache_factory/.gitignore,sha256=5Cb-qT9wsTUoMJ7vACDF7ZcLpAXhi5v-xdcWSRit988,23
6
- cache_dit/cache_factory/__init__.py,sha256=Jj_Op6ACV35XilFPax3HEEsf_hOomjmogmNyWWteq_4,1539
7
- cache_dit/cache_factory/cache_interface.py,sha256=xpC-CWZDBfMb5BfnXnVW25xJhV8cYMRns-LKcPDksPU,9846
6
+ cache_dit/cache_factory/__init__.py,sha256=vy9I6Ofkj9jWeUoOvh-cY5a9QlDDKfj2FVPlVTf7BeA,1390
7
+ cache_dit/cache_factory/cache_interface.py,sha256=A_8bBsLfGOE5wM3_rniQKPJ223_-fSpNIq65uv00sF0,10620
8
8
  cache_dit/cache_factory/cache_types.py,sha256=ooukxQRG55uTLmaZ0SKw6gIeY6SQHhMxkbv55uj2Sqk,991
9
9
  cache_dit/cache_factory/forward_pattern.py,sha256=FumlCuZ-TSmSYH0hGBHctSJ-oGLCftdZjLygqhsmdR4,2258
10
+ cache_dit/cache_factory/params_modifier.py,sha256=zYJJsInTYCaYHBZ7mZJOP-PZnkSg3iN1WPewNOayXos,3628
10
11
  cache_dit/cache_factory/utils.py,sha256=XkVM9AXcB9zYq8-S8QKAsGz80r3tA6U3lBNGDGeHOe4,1871
11
12
  cache_dit/cache_factory/block_adapters/__init__.py,sha256=33geXMz56TxFWMp0c-H4__MY5SGRzKMKj3TXnUYOMlc,17512
12
- cache_dit/cache_factory/block_adapters/block_adapters.py,sha256=HlmStNIny0rZiRBYw-xdYYViVk9AEt0XlquoacEGr1U,24203
13
+ cache_dit/cache_factory/block_adapters/block_adapters.py,sha256=jAgzMPTaY4rBuq7DLK2VeEWuYLy7lvw7bZcPY4S93b4,21660
13
14
  cache_dit/cache_factory/block_adapters/block_registers.py,sha256=2L7QeM4ygnaKQpC9PoJod0QRYyxidUKU2AYpysDCUwE,2572
14
- cache_dit/cache_factory/cache_adapters/__init__.py,sha256=qB4bu1m3LgotOeNKluIkbQIf72PXpZWQMaSn1MOFEmY,149
15
- cache_dit/cache_factory/cache_adapters/cache_adapter.py,sha256=6WArUrTmtkZg147_Qef5jfzMVRg2hfYwvSB9Cvpf_HA,18297
16
- cache_dit/cache_factory/cache_adapters/v2/__init__.py,sha256=9PAH5YwpG_m0feE5eFQ7d2450nQR_Ctq8cd9Xu1Ldtk,96
17
- cache_dit/cache_factory/cache_adapters/v2/cache_adapter_v2.py,sha256=ove_pDh2QC3vjXWIYtrb8anc-NOmPIrDZN7hu16fjwU,18309
15
+ cache_dit/cache_factory/cache_adapters/__init__.py,sha256=py71WGD3JztQ1uk6qdLVbzYcQ1rvqFidNNaQYo7tqTo,79
16
+ cache_dit/cache_factory/cache_adapters/cache_adapter.py,sha256=GrkSz4was9gg_dYkfBobrOQ_eNqipQBqeuFfqcwkCXc,19650
18
17
  cache_dit/cache_factory/cache_blocks/__init__.py,sha256=08Ox7kD05lkRKCOsVTdEZeKAWBheqpxfrAT1Nz7eclI,2916
19
18
  cache_dit/cache_factory/cache_blocks/pattern_0_1_2.py,sha256=ElMps6_7uI74tSF9GDR_dEI0bZEhdzcepM29xFWnYo8,428
20
19
  cache_dit/cache_factory/cache_blocks/pattern_3_4_5.py,sha256=Bv56qETXhsREvCrNvnZpSqDIIHsi6Ze3FJW4Yk2x3uI,8597
21
20
  cache_dit/cache_factory/cache_blocks/pattern_base.py,sha256=d4H9kEB0AgnVMT8aF0Y54SUMUQUxw5HQ8gRkoCuTQ_A,14577
22
21
  cache_dit/cache_factory/cache_blocks/utils.py,sha256=dGOC1tMMOvcbvEgx44eTESKn_jsv-0RZ3tRHPa3wmQ4,1315
23
- cache_dit/cache_factory/cache_contexts/__init__.py,sha256=MQRxis-5gMhdJ6ZXIVN2nZEGPZoRLy59gSLniTYrWGY,437
24
- cache_dit/cache_factory/cache_contexts/cache_context.py,sha256=FWdgInClWY8VZBsZIevtYk--rX-RL8c3QfNOJtqR8a4,11855
25
- cache_dit/cache_factory/cache_contexts/cache_manager.py,sha256=Ig5VKoQ46iG3lKmsaMulYxd2vCm__2rY8NBvERwexwM,32719
26
- cache_dit/cache_factory/cache_contexts/taylorseer.py,sha256=4nxgSEZvDy-w-7XuJYzsyzdtF1_uFrDwlF06XBDFVKQ,3922
27
- cache_dit/cache_factory/cache_contexts/v2/__init__.py,sha256=GVafOd9BUa-Tyv7FZbTSkd4bGJPpMonb1AZv78qLeHU,385
28
- cache_dit/cache_factory/cache_contexts/v2/cache_context_v2.py,sha256=JkMJSm-zme9ayonSFq6Y6esCb6RMuGLvhVINM-LFj2Y,11776
29
- cache_dit/cache_factory/cache_contexts/v2/cache_manager_v2.py,sha256=ZRTl0M7jIPTIBS9lXoSh_pY6-hNu3JJ94WShv2CPWkk,32788
30
- cache_dit/cache_factory/cache_contexts/v2/calibrators/__init__.py,sha256=BLCV0EtOcu30iytErL_IK6J9ZwmpE6P9ffNt4OL-IaU,2343
31
- cache_dit/cache_factory/cache_contexts/v2/calibrators/base.py,sha256=mn6ZBkChGpGwN5csrHTUGMoX6BBPvqHXSLbIExiW-EU,748
32
- cache_dit/cache_factory/cache_contexts/v2/calibrators/foca.py,sha256=jrEkoiLgDR2fiX_scIpaLIDT0pTMc9stg6L9HBkgsZw,894
33
- cache_dit/cache_factory/cache_contexts/v2/calibrators/taylorseer.py,sha256=q5xBmT4EmpF_b3KPAjMIangTBvovE_c8ZfFjIN_E9tg,3834
22
+ cache_dit/cache_factory/cache_contexts/__init__.py,sha256=T6Vak3x7Rs0Oy15Tou49p-rPQRA2jiuYtJBsbv1lBBU,388
23
+ cache_dit/cache_factory/cache_contexts/cache_context.py,sha256=3EhaMCz3VUQ_NF81VgYwWoSEGIvhScPxPYhjL1OcgxE,15240
24
+ cache_dit/cache_factory/cache_contexts/cache_manager.py,sha256=hSKAeP1CxmO3RFUxjFjAK1xdvVvTmeayh5jEHMaQXNE,30225
25
+ cache_dit/cache_factory/cache_contexts/calibrators/__init__.py,sha256=mzYXO8tbytGpJJ9rpPu20kMoj1Iu_7Ym9tjfzV8rA98,5574
26
+ cache_dit/cache_factory/cache_contexts/calibrators/base.py,sha256=mn6ZBkChGpGwN5csrHTUGMoX6BBPvqHXSLbIExiW-EU,748
27
+ cache_dit/cache_factory/cache_contexts/calibrators/foca.py,sha256=nhHGs_hxwW1M942BQDMJb9-9IuHdnOxp774Jrna1bJI,891
28
+ cache_dit/cache_factory/cache_contexts/calibrators/taylorseer.py,sha256=aGxr9SpytYznTepDWGPAxWDnuVMSuNyn6uNXnLh2acQ,4001
34
29
  cache_dit/cache_factory/patch_functors/__init__.py,sha256=oI6F3N9ezahRHaFUOZ1GfrAw1qFdKrxFXXmlwwehHj4,530
35
30
  cache_dit/cache_factory/patch_functors/functor_base.py,sha256=Ahk0fTfrHgNdEl-9JSkACvfyyv9G-Ei5OSz7XBIlX5o,357
36
31
  cache_dit/cache_factory/patch_functors/functor_chroma.py,sha256=xD0Q96VArp1vYBLQ0pcjRIyFB1i_Y7muZ2q07Hz8Oqs,13430
@@ -42,7 +37,7 @@ cache_dit/compile/__init__.py,sha256=FcTVzCeyypl-mxlc59_ehHL3lBNiDAFsXuRoJ-5Cfi0
42
37
  cache_dit/compile/utils.py,sha256=nN2OIrSdwRR5zGxJinKDqb07pXpvTNTF3g_OgLkeeBU,3858
43
38
  cache_dit/custom_ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
39
  cache_dit/custom_ops/triton_taylorseer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
- cache_dit/metrics/__init__.py,sha256=RaUhl5dieF40RqnizGzR30qoJJ9dyMUEADwgwMaMQrE,575
40
+ cache_dit/metrics/__init__.py,sha256=UjPJ69DyyjZDfERTpKAjZKOxOTx58aWnkze7VfH3en8,673
46
41
  cache_dit/metrics/clip_score.py,sha256=ERNCFQFJKzJdbIX9OAg-1LiSPuXUVHLOFxbf2gcENpc,3938
47
42
  cache_dit/metrics/config.py,sha256=ieOgD9ayz722RjVzk24bSIqS2D6o7TZjGk8KeXV-OLQ,551
48
43
  cache_dit/metrics/fid.py,sha256=ZM_FM0XERtpnkMUfphmw2aOdljrh1uba-pnYItu0q6M,18219
@@ -53,9 +48,9 @@ cache_dit/metrics/metrics.py,sha256=7UV-H2NRbhfr6dvrXEzU97Zy-BSQ5zEfm9CKtaK4ldg,
53
48
  cache_dit/quantize/__init__.py,sha256=kWYoMAyZgBXu9BJlZjTQ0dRffW9GqeeY9_iTkXrb70A,59
54
49
  cache_dit/quantize/quantize_ao.py,sha256=Fx1KW4l3gdEkdrcAYtPoDW7WKBJWrs3glOHiEwW_TgE,6160
55
50
  cache_dit/quantize/quantize_interface.py,sha256=2s_R7xPSKuJeFpEGeLwRxnq_CqJcBG3a3lzyW5wh-UM,1241
56
- cache_dit-0.3.1.dist-info/licenses/LICENSE,sha256=Dqb07Ik2dV41s9nIdMUbiRWEfDqo7-dQeRiY7kPO8PE,3769
57
- cache_dit-0.3.1.dist-info/METADATA,sha256=I3gHe9m40_Ja0VurS7CDBYx_x_4rpra8zN245gBKv-A,46536
58
- cache_dit-0.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
59
- cache_dit-0.3.1.dist-info/entry_points.txt,sha256=FX2gysXaZx6NeK1iCLMcIdP8Q4_qikkIHtEmi3oWn8o,65
60
- cache_dit-0.3.1.dist-info/top_level.txt,sha256=ZJDydonLEhujzz0FOkVbO-BqfzO9d_VqRHmZU-3MOZo,10
61
- cache_dit-0.3.1.dist-info/RECORD,,
51
+ cache_dit-0.3.2.dist-info/licenses/LICENSE,sha256=Dqb07Ik2dV41s9nIdMUbiRWEfDqo7-dQeRiY7kPO8PE,3769
52
+ cache_dit-0.3.2.dist-info/METADATA,sha256=L8vWXW0w9Z4GXVXylKnqmhnfpKJ8YeL0LKIuwLL8HEo,47858
53
+ cache_dit-0.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
54
+ cache_dit-0.3.2.dist-info/entry_points.txt,sha256=FX2gysXaZx6NeK1iCLMcIdP8Q4_qikkIHtEmi3oWn8o,65
55
+ cache_dit-0.3.2.dist-info/top_level.txt,sha256=ZJDydonLEhujzz0FOkVbO-BqfzO9d_VqRHmZU-3MOZo,10
56
+ cache_dit-0.3.2.dist-info/RECORD,,
@@ -1,3 +0,0 @@
1
- from cache_dit.cache_factory.cache_adapters.v2.cache_adapter_v2 import (
2
- CachedAdapterV2,
3
- )