PyPI - cache-dit - Versions diffs - 0.2.27__py3-none-any.whl → 0.2.29__py3-none-any.whl - Mend

cache-dit 0.2.27py3-none-any.whl → 0.2.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cache-dit might be problematic. Click here for more details.

Files changed (24) hide show

cache_dit/__init__.py +2 -0
cache_dit/_version.py +2 -2
cache_dit/cache_factory/__init__.py +3 -0
cache_dit/cache_factory/block_adapters/__init__.py +105 -111
cache_dit/cache_factory/block_adapters/block_adapters.py +314 -41
cache_dit/cache_factory/block_adapters/block_registers.py +15 -6
cache_dit/cache_factory/cache_adapters.py +244 -116
cache_dit/cache_factory/cache_blocks/__init__.py +55 -4
cache_dit/cache_factory/cache_blocks/pattern_3_4_5.py +36 -37
cache_dit/cache_factory/cache_blocks/pattern_base.py +83 -76
cache_dit/cache_factory/cache_blocks/utils.py +26 -8
cache_dit/cache_factory/cache_contexts/__init__.py +4 -1
cache_dit/cache_factory/cache_contexts/cache_context.py +14 -876
cache_dit/cache_factory/cache_contexts/cache_manager.py +847 -0
cache_dit/cache_factory/cache_interface.py +91 -24
cache_dit/cache_factory/patch_functors/functor_chroma.py +1 -1
cache_dit/cache_factory/patch_functors/functor_flux.py +1 -1
cache_dit/utils.py +164 -58
{cache_dit-0.2.27.dist-info → cache_dit-0.2.29.dist-info}/METADATA +59 -34
{cache_dit-0.2.27.dist-info → cache_dit-0.2.29.dist-info}/RECORD +24 -24
{cache_dit-0.2.27.dist-info → cache_dit-0.2.29.dist-info}/WHEEL +0 -0
{cache_dit-0.2.27.dist-info → cache_dit-0.2.29.dist-info}/entry_points.txt +0 -0
{cache_dit-0.2.27.dist-info → cache_dit-0.2.29.dist-info}/licenses/LICENSE +0 -0
{cache_dit-0.2.27.dist-info → cache_dit-0.2.29.dist-info}/top_level.txt +0 -0

cache_dit/cache_factory/cache_interface.py CHANGED Viewed

@@ -1,9 +1,11 @@
+import torch
 from typing import Any, Tuple, List
 from diffusers import DiffusionPipeline
 from cache_dit.cache_factory.cache_types import CacheType
 from cache_dit.cache_factory.block_adapters import BlockAdapter
 from cache_dit.cache_factory.block_adapters import BlockAdapterRegistry
 from cache_dit.cache_factory.cache_adapters import CachedAdapter
+from cache_dit.cache_factory.cache_contexts import CachedContextManager
 from cache_dit.logger import init_logger
@@ -12,7 +14,7 @@ logger = init_logger(__name__)
 def enable_cache(
     # DiffusionPipeline or BlockAdapter
-    pipe_or_adapter: DiffusionPipeline | BlockAdapter | Any,
+    pipe_or_adapter: DiffusionPipeline | BlockAdapter,
     # Cache context kwargs
     Fn_compute_blocks: int = 8,
     Bn_compute_blocks: int = 0,
@@ -21,7 +23,7 @@ def enable_cache(
     max_continuous_cached_steps: int = -1,
     residual_diff_threshold: float = 0.08,
     # Cache CFG or not
-    do_separate_cfg: bool = False,
+    enable_spearate_cfg: bool = False,
     cfg_compute_first: bool = False,
     cfg_diff_compute_separate: bool = True,
     # Hybird TaylorSeer
@@ -30,7 +32,7 @@ def enable_cache(
     taylorseer_cache_type: str = "residual",
     taylorseer_order: int = 2,
     **other_cache_context_kwargs,
-) -> DiffusionPipeline | Any:
+) -> BlockAdapter:
     r"""
     Unified Cache API for  almost Any Diffusion Transformers (with Transformer Blocks
     that match the specific Input and Output patterns).
@@ -64,9 +66,9 @@ def enable_cache(
         residual_diff_threshold (`float`, *required*, defaults to 0.08):
             he value of residual diff threshold, a higher value leads to faster performance at the
             cost of lower precision.
-        do_separate_cfg (`bool`, *required*,  defaults to False):
+        enable_spearate_cfg (`bool`, *required*,  defaults to False):
             Whether to do separate cfg or not, such as Wan 2.1, Qwen-Image. For model that fused CFG
-            and non-CFG into single forward step, should set do_separate_cfg as False, for example:
+            and non-CFG into single forward step, should set enable_spearate_cfg as False, for example:
             CogVideoX, HunyuanVideo, Mochi, etc.
         cfg_compute_first (`bool`, *required*,  defaults to False):
             Compute cfg forward first or not, default False, namely, 0, 2, 4, ..., -> non-CFG step;
@@ -89,7 +91,7 @@ def enable_cache(
             The order of taylorseer, higher values of n_derivatives will lead to longer computation time,
             but may improve precision significantly.
         other_cache_kwargs: (`dict`, *optional*, defaults to {})
-            Other cache context kwargs, please check https://github.com/vipshop/cache-dit/blob/main/src/cache_dit/cache_factory/cache_context.py
+            Other cache context kwargs, please check https://github.com/vipshop/cache-dit/blob/main/src/cache_dit/cache_factory/cache_contexts/cache_context.py
             for more details.
     Examples:
@@ -104,6 +106,10 @@ def enable_cache(
     # Collect cache context kwargs
     cache_context_kwargs = other_cache_context_kwargs.copy()
+    if cache_type := cache_context_kwargs.get("cache_type", None):
+        if cache_type == CacheType.NONE:
+            return pipe_or_adapter
     cache_context_kwargs["cache_type"] = CacheType.DBCache
     cache_context_kwargs["Fn_compute_blocks"] = Fn_compute_blocks
     cache_context_kwargs["Bn_compute_blocks"] = Bn_compute_blocks
@@ -113,7 +119,7 @@ def enable_cache(
         max_continuous_cached_steps
     )
     cache_context_kwargs["residual_diff_threshold"] = residual_diff_threshold
-    cache_context_kwargs["do_separate_cfg"] = do_separate_cfg
+    cache_context_kwargs["enable_spearate_cfg"] = enable_spearate_cfg
     cache_context_kwargs["cfg_compute_first"] = cfg_compute_first
     cache_context_kwargs["cfg_diff_compute_separate"] = (
         cfg_diff_compute_separate
@@ -123,25 +129,11 @@ def enable_cache(
         enable_encoder_taylorseer
     )
     cache_context_kwargs["taylorseer_cache_type"] = taylorseer_cache_type
-    if "taylorseer_kwargs" in cache_context_kwargs:
-        cache_context_kwargs["taylorseer_kwargs"][
-            "n_derivatives"
-        ] = taylorseer_order
-    else:
-        cache_context_kwargs["taylorseer_kwargs"] = {
-            "n_derivatives": taylorseer_order
-        }
+    cache_context_kwargs["taylorseer_order"] = taylorseer_order
-    if isinstance(pipe_or_adapter, BlockAdapter):
-        return CachedAdapter.apply(
-            pipe=None,
-            block_adapter=pipe_or_adapter,
-            **cache_context_kwargs,
-        )
-    elif isinstance(pipe_or_adapter, DiffusionPipeline):
+    if isinstance(pipe_or_adapter, (DiffusionPipeline, BlockAdapter)):
         return CachedAdapter.apply(
-            pipe=pipe_or_adapter,
-            block_adapter=None,
+            pipe_or_adapter,
             **cache_context_kwargs,
         )
     else:
@@ -152,6 +144,81 @@ def enable_cache(
         )
+def disable_cache(
+    # DiffusionPipeline or BlockAdapter
+    pipe_or_adapter: DiffusionPipeline | BlockAdapter,
+):
+    from cache_dit.cache_factory.cache_blocks.utils import (
+        remove_cached_stats,
+    )
+    def _disable_blocks(blocks: torch.nn.ModuleList):
+        if blocks is None:
+            return
+        if hasattr(blocks, "_forward_pattern"):
+            del blocks._forward_pattern
+        if hasattr(blocks, "_cache_context_kwargs"):
+            del blocks._cache_context_kwargs
+        remove_cached_stats(blocks)
+    def _disable_transformer(transformer: torch.nn.Module):
+        if transformer is None or not BlockAdapter.is_cached(transformer):
+            return
+        if original_forward := getattr(transformer, "_original_forward"):
+            transformer.forward = original_forward.__get__(transformer)
+            del transformer._original_forward
+        if hasattr(transformer, "_is_cached"):
+            del transformer._is_cached
+        if hasattr(transformer, "_forward_pattern"):
+            del transformer._forward_pattern
+        if hasattr(transformer, "_has_separate_cfg"):
+            del transformer._has_separate_cfg
+        if hasattr(transformer, "_cache_context_kwargs"):
+            del transformer._cache_context_kwargs
+        remove_cached_stats(transformer)
+        for blocks in BlockAdapter.find_blocks(transformer):
+            _disable_blocks(blocks)
+    def _disable_pipe(pipe: DiffusionPipeline):
+        if pipe is None or not BlockAdapter.is_cached(pipe):
+            return
+        if original_call := getattr(pipe, "_original_call"):
+            pipe.__class__.__call__ = original_call
+            del pipe.__class__._original_call
+        if cache_manager := getattr(pipe, "_cache_manager"):
+            assert isinstance(cache_manager, CachedContextManager)
+            cache_manager.clear_contexts()
+            del pipe._cache_manager
+        if hasattr(pipe, "_is_cached"):
+            del pipe.__class__._is_cached
+        if hasattr(pipe, "_cache_context_kwargs"):
+            del pipe._cache_context_kwargs
+        remove_cached_stats(pipe)
+    if isinstance(pipe_or_adapter, DiffusionPipeline):
+        pipe = pipe_or_adapter
+        _disable_pipe(pipe)
+        if hasattr(pipe, "transformer"):
+            _disable_transformer(pipe.transformer)
+        if hasattr(pipe, "transformer_2"):  # Wan 2.2
+            _disable_transformer(pipe.transformer_2)
+        pipe_cls_name = pipe.__class__.__name__
+        logger.warning(f"Cache Acceleration is disabled for: {pipe_cls_name}")
+    elif isinstance(pipe_or_adapter, BlockAdapter):
+        # BlockAdapter
+        adapter = pipe_or_adapter
+        BlockAdapter.assert_normalized(adapter)
+        _disable_pipe(adapter.pipe)
+        for transformer in BlockAdapter.flatten(adapter.transformer):
+            _disable_transformer(transformer)
+        for blocks in BlockAdapter.flatten(adapter.blocks):
+            _disable_blocks(blocks)
+        pipe_cls_name = adapter.pipe.__class__.__name__
+        logger.warning(f"Cache Acceleration is disabled for: {pipe_cls_name}")
+    else:
+        pass  # do nothing
 def supported_pipelines(
     **kwargs,
 ) -> Tuple[int, List[str]]:

cache_dit/cache_factory/patch_functors/functor_chroma.py CHANGED Viewed

@@ -30,7 +30,7 @@ class ChromaPatchFunctor(PatchFunctor):
         blocks: torch.nn.ModuleList = None,
         **kwargs,
     ) -> ChromaTransformer2DModel:
-        if getattr(transformer, "_is_patched", False):
+        if hasattr(transformer, "_is_patched"):
             return transformer
         if blocks is None:

cache_dit/cache_factory/patch_functors/functor_flux.py CHANGED Viewed

@@ -31,7 +31,7 @@ class FluxPatchFunctor(PatchFunctor):
         **kwargs,
     ) -> FluxTransformer2DModel:
-        if getattr(transformer, "_is_patched", False):
+        if hasattr(transformer, "_is_patched"):
             return transformer
         if blocks is None:

cache_dit/utils.py CHANGED Viewed

@@ -5,9 +5,9 @@ import numpy as np
 from pprint import pprint
 from diffusers import DiffusionPipeline
-from typing import Dict, Any
+from typing import Dict, Any, List, Union
+from cache_dit.cache_factory import BlockAdapter
 from cache_dit.logger import init_logger
-from cache_dit.cache_factory import CacheType
 logger = init_logger(__name__)
@@ -30,9 +30,168 @@ class CacheStats:
 def summary(
-    pipe_or_module: DiffusionPipeline | torch.nn.Module | Any,
+    adapter_or_others: Union[
+        BlockAdapter,
+        DiffusionPipeline,
+        torch.nn.Module,
+    ],
     details: bool = False,
     logging: bool = True,
+    **kwargs,
+) -> List[CacheStats]:
+    if adapter_or_others is None:
+        return [CacheStats()]
+    if not isinstance(adapter_or_others, BlockAdapter):
+        if not isinstance(adapter_or_others, DiffusionPipeline):
+            transformer = adapter_or_others
+            transformer_2 = None
+        else:
+            transformer = adapter_or_others.transformer
+            transformer_2 = None
+            if hasattr(adapter_or_others, "transformer_2"):
+                transformer_2 = adapter_or_others.transformer_2
+        blocks_stats: List[CacheStats] = []
+        for blocks in BlockAdapter.find_blocks(transformer):
+            blocks_stats.append(
+                _summary(
+                    blocks,
+                    details=details,
+                    logging=logging,
+                    **kwargs,
+                )
+            )
+        if transformer_2 is not None:
+            for blocks in BlockAdapter.find_blocks(transformer_2):
+                blocks_stats.append(
+                    _summary(
+                        blocks,
+                        details=details,
+                        logging=logging,
+                        **kwargs,
+                    )
+                )
+        blocks_stats.append(
+            _summary(
+                transformer,
+                details=details,
+                logging=logging,
+                **kwargs,
+            )
+        )
+        if transformer_2 is not None:
+            blocks_stats.append(
+                _summary(
+                    transformer_2,
+                    details=details,
+                    logging=logging,
+                    **kwargs,
+                )
+            )
+        blocks_stats = [stats for stats in blocks_stats if stats.cache_options]
+        return blocks_stats if len(blocks_stats) else [CacheStats()]
+    adapter = adapter_or_others
+    if not BlockAdapter.check_block_adapter(adapter):
+        return [CacheStats()]
+    blocks_stats = []
+    flatten_blocks = BlockAdapter.flatten(adapter.blocks)
+    for blocks in flatten_blocks:
+        blocks_stats.append(
+            _summary(
+                blocks,
+                details=details,
+                logging=logging,
+                **kwargs,
+            )
+        )
+    blocks_stats = [stats for stats in blocks_stats if stats.cache_options]
+    return blocks_stats if len(blocks_stats) else [CacheStats()]
+def strify(
+    adapter_or_others: Union[
+        BlockAdapter,
+        DiffusionPipeline,
+        CacheStats,
+        List[CacheStats],
+        Dict[str, Any],
+    ],
+) -> str:
+    if isinstance(adapter_or_others, BlockAdapter):
+        stats = summary(adapter_or_others, logging=False)[-1]
+        cache_options = stats.cache_options
+        cached_steps = len(stats.cached_steps)
+    elif isinstance(adapter_or_others, DiffusionPipeline):
+        stats = summary(adapter_or_others, logging=False)[-1]
+        cache_options = stats.cache_options
+        cached_steps = len(stats.cached_steps)
+    elif isinstance(adapter_or_others, CacheStats):
+        stats = adapter_or_others
+        cache_options = stats.cache_options
+        cached_steps = len(stats.cached_steps)
+    elif isinstance(adapter_or_others, list):
+        stats = adapter_or_others[0]
+        cache_options = stats.cache_options
+        cached_steps = len(stats.cached_steps)
+    elif isinstance(adapter_or_others, dict):
+        from cache_dit.cache_factory import CacheType
+        # Assume cache_context_kwargs
+        cache_options = adapter_or_others
+        cached_steps = None
+        cache_type = cache_options.get("cache_type", CacheType.NONE)
+        if cache_type == CacheType.NONE:
+            return "NONE"
+    else:
+        raise ValueError(
+            "Please set pipe_or_stats param as one of: "
+            "DiffusionPipeline | CacheStats | Dict[str, Any]"
+        )
+    if not cache_options:
+        return "NONE"
+    def get_taylorseer_order():
+        taylorseer_order = 0
+        if "taylorseer_order" in cache_options:
+            taylorseer_order = cache_options["taylorseer_order"]
+        return taylorseer_order
+    cache_type_str = (
+        f"DBCACHE_F{cache_options.get('Fn_compute_blocks', 1)}"
+        f"B{cache_options.get('Bn_compute_blocks', 0)}_"
+        f"W{cache_options.get('max_warmup_steps', 0)}"
+        f"M{max(0, cache_options.get('max_cached_steps', -1))}"
+        f"MC{max(0, cache_options.get('max_continuous_cached_steps', -1))}_"
+        f"T{int(cache_options.get('enable_taylorseer', False))}"
+        f"O{get_taylorseer_order()}_"
+        f"R{cache_options.get('residual_diff_threshold', 0.08)}"
+    )
+    if cached_steps:
+        cache_type_str += f"_S{cached_steps}"
+    return cache_type_str
+def _summary(
+    pipe_or_module: Union[
+        DiffusionPipeline,
+        torch.nn.Module,
+    ],
+    details: bool = False,
+    logging: bool = True,
+    **kwargs,
 ) -> CacheStats:
     cache_stats = CacheStats()
@@ -52,6 +211,8 @@ def summary(
         cache_stats.cache_options = cache_options
         if logging:
             print(f"\n🤗Cache Options: {cls_name}\n\n{cache_options}")
+    else:
+        logger.warning(f"Can't find Cache Options for: {cls_name}")
     if hasattr(module, "_cached_steps"):
         cached_steps: list[int] = module._cached_steps
@@ -142,58 +303,3 @@ def summary(
                 )
     return cache_stats
-def strify(
-    pipe_or_stats: DiffusionPipeline | CacheStats | Dict[str, Any],
-) -> str:
-    if isinstance(pipe_or_stats, DiffusionPipeline):
-        stats = summary(pipe_or_stats, logging=False)
-        cache_options = stats.cache_options
-        cached_steps = len(stats.cached_steps)
-    elif isinstance(pipe_or_stats, CacheStats):
-        stats = pipe_or_stats
-        cache_options = stats.cache_options
-        cached_steps = len(stats.cached_steps)
-    elif isinstance(pipe_or_stats, dict):
-        # Assume cache_context_kwargs
-        cache_options = pipe_or_stats
-        cached_steps = None
-    else:
-        raise ValueError(
-            "Please set pipe_or_stats param as one of: "
-            "DiffusionPipeline | CacheStats | Dict[str, Any]"
-        )
-    if not cache_options:
-        return "NONE"
-    if cache_options.get("cache_type", None) != CacheType.DBCache:
-        return "NONE"
-    def get_taylorseer_order():
-        taylorseer_order = 0
-        if "taylorseer_kwargs" in cache_options:
-            if "n_derivatives" in cache_options["taylorseer_kwargs"]:
-                taylorseer_order = cache_options["taylorseer_kwargs"][
-                    "n_derivatives"
-                ]
-        elif "taylorseer_order" in cache_options:
-            taylorseer_order = cache_options["taylorseer_order"]
-        return taylorseer_order
-    cache_type_str = (
-        f"DBCACHE_F{cache_options.get('Fn_compute_blocks', 1)}"
-        f"B{cache_options.get('Bn_compute_blocks', 0)}_"
-        f"W{cache_options.get('max_warmup_steps', 0)}"
-        f"M{max(0, cache_options.get('max_cached_steps', -1))}"
-        f"MC{max(0, cache_options.get('max_continuous_cached_steps', -1))}_"
-        f"T{int(cache_options.get('enable_taylorseer', False))}"
-        f"O{get_taylorseer_order()}_"
-        f"R{cache_options.get('residual_diff_threshold', 0.08)}"
-    )
-    if cached_steps:
-        cache_type_str += f"_S{cached_steps}"
-    return cache_type_str

{cache_dit-0.2.27.dist-info → cache_dit-0.2.29.dist-info}/METADATA RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.4
 Name: cache_dit
-Version: 0.2.27
-Summary: 🤗 CacheDiT: An Unified and Training-free Cache Acceleration Toolbox for Diffusion Transformers
+Version: 0.2.29
+Summary: 🤗 A Unified and Training-free Cache Acceleration Toolbox for Diffusion Transformers
 Author: DefTruth, vipshop.com, etc.
 Maintainer: DefTruth, vipshop.com, etc
 Project-URL: Repository, https://github.com/vipshop/cache-dit.git
@@ -43,8 +43,8 @@ Dynamic: requires-python
 <div align="center">
   <img src=https://github.com/vipshop/cache-dit/raw/main/assets/cache-dit-logo.png height="120">
-  <p align="center">
-    An <b>Unified</b> and Training-free <b>Cache Acceleration</b> Toolbox for <b>Diffusion Transformers</b> <br>
+<p align="center">
+    A <b>Unified</b> and Training-free <b>Cache Acceleration</b> Toolbox for <b>Diffusion Transformers</b> <br>
     ♥️ <b>Cache Acceleration</b> with <b>One-line</b> Code ~ ♥️
   </p>
   <div align='center'>
@@ -59,32 +59,39 @@ Dynamic: requires-python
   🔥<b><a href="#unified">Unified Cache APIs</a> | <a href="#dbcache">DBCache</a> | <a href="#taylorseer">Hybrid TaylorSeer</a> | <a href="#cfg">Hybrid Cache CFG</a></b>🔥
   </p>
   <p align="center">
-  🎉Now, <b>cache-dit</b> covers <b>All</b> mainstream <b>DiT-based</b> Diffusers' Pipelines</b>🎉<br>
+  🎉Now, <b>cache-dit</b> covers <b>100%</b> Diffusers' <b>DiT-based</b> Pipelines🎉<br>
   🔥<b><a href="#supported">Qwen-Image</a> | <a href="#supported">FLUX.1</a> | <a href="#supported">Wan 2.1/2.2</a> | <a href="#supported"> ... </a> | <a href="#supported">CogVideoX</a></b>🔥
   </p>
 </div>
+<div align='center'>
+  <img src=./assets/gifs/wan2.2.C0_Q0_NONE.gif width=160px>
+  <img src=./assets/gifs/wan2.2.C1_Q0_DBCACHE_F1B0_W2M8MC2_T1O2_R0.08.gif width=160px>
+  <img src=./assets/gifs/wan2.2.C1_Q1_fp8_w8a8_dq_DBCACHE_F1B0_W2M8MC2_T1O2_R0.08.gif width=160px>
+  <p><b>🔥Wan2.2 MoE</b> Baseline | <b><a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:~2.0x↑🎉</b> | +FP8 DQ:<b>~2.4x↑🎉</b></p>
+  <img src=./assets/qwen-image.C0_Q0_NONE.png width=160px>
+  <img src=./assets/qwen-image.C1_Q0_DBCACHE_F8B0_W8M0MC0_T1O4_R0.12_S23.png width=160px>
+  <img src=./assets/qwen-image.C1_Q1_fp8_w8a8_dq_DBCACHE_F8B0_W8M0MC0_T1O4_R0.12_S18.png width=160px>
+  <p><b>🔥Qwen-Image</b> Baseline | <b><a href="https://github.com/vipshop/cache-dit">+cache-dit</a>:~1.8x↑🎉</b> | +FP8 DQ:<b>~2.2x↑🎉</b><br>♥️ Please consider to leave a <b>⭐️ Star</b> to support us ~ ♥️</p>
+  </p>
+</div>
 ## 🔥News
-- [2025-09-01] 📚[**Hybird Forward Pattern**](#unified) is supported! Please check [FLUX.1-dev](./examples/run_flux_adapter.py) as an example.
-- [2025-08-29] 🔥</b>Covers <b>All</b> Diffusers' <b>DiT-based</b> Pipelines via **[BlockAdapter](#unified) + [Pattern Matching](#unified).**
-- [2025-08-26] 🎉[**Wan2.2**](https://github.com/Wan-Video) **1.8x⚡️** speedup with `cache-dit + compile`! Please check the [example](./examples/run_wan_2.2.py).
-- [2025-08-19] 🔥[**Qwen-Image-Edit**](https://github.com/QwenLM/Qwen-Image) **2x⚡️** speedup! Check the example at [run_qwen_image_edit.py](./examples/run_qwen_image_edit.py).
+- [2025-09-03] 🎉[**Wan2.2-MoE**](https://github.com/Wan-Video) **2.4x↑🎉** speedup! Please refer to [run_wan_2.2.py](./examples/pipeline/run_wan_2.2.py) as an example.
+- [2025-08-19] 🔥[**Qwen-Image-Edit**](https://github.com/QwenLM/Qwen-Image) **2x↑🎉** speedup! Check the example: [run_qwen_image_edit.py](./examples/pipeline/run_qwen_image_edit.py).
 - [2025-08-12] 🎉First caching mechanism in [QwenLM/Qwen-Image](https://github.com/QwenLM/Qwen-Image) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check this [PR](https://github.com/QwenLM/Qwen-Image/pull/61).
-- [2025-08-11] 🔥[**Qwen-Image**](https://github.com/QwenLM/Qwen-Image) **1.8x⚡️** speedup! Please refer [run_qwen_image.py](./examples/run_qwen_image.py) as an example.
-- [2025-08-10] 🔥[**FLUX.1-Kontext-dev**](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev) is supported! Please refer [run_flux_kontext.py](./examples/run_flux_kontext.py) as an example.
-- [2025-07-18] 🎉First caching mechanism in [🤗huggingface/flux-fast](https://github.com/huggingface/flux-fast) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check the [PR](https://github.com/huggingface/flux-fast/pull/13).
-- [2025-07-13] **[🤗flux-faster](https://github.com/xlite-dev/flux-faster)** is released! **3.3x** speedup for FLUX.1 on NVIDIA L20 with **[cache-dit](https://github.com/vipshop/cache-dit)**.
+- [2025-08-11] 🔥[**Qwen-Image**](https://github.com/QwenLM/Qwen-Image) **1.8x↑🎉** speedup! Please refer to [run_qwen_image.py](./examples/pipeline/run_qwen_image.py) as an example.
+- [2025-07-13] 🎉[**FLUX.1-Dev**](https://github.com/xlite-dev/flux-faster) **3.3x↑🎉** speedup! NVIDIA L20 with **[cache-dit](https://github.com/vipshop/cache-dit)** + `compile + FP8 DQ`.
-<!--
 <details>
 <summary> Previous News </summary>
+- [2025-09-01] 📚[**Hybird Forward Pattern**](#unified) is supported! Please check [FLUX.1-dev](./examples/run_flux_adapter.py) as an example.
+- [2025-08-29] 🔥</b>Covers <b>100%</b> Diffusers' <b>DiT-based</b> Pipelines: **[BlockAdapter](#unified) + [Pattern Matching](#unified).**
+- [2025-08-10] 🔥[**FLUX.1-Kontext-dev**](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev) is supported! Please refer [run_flux_kontext.py](./examples/pipeline/run_flux_kontext.py) as an example.
 - [2025-07-18] 🎉First caching mechanism in [🤗huggingface/flux-fast](https://github.com/huggingface/flux-fast) with **[cache-dit](https://github.com/vipshop/cache-dit)**, check the [PR](https://github.com/huggingface/flux-fast/pull/13).
-- [2025-07-13] **[🤗flux-faster](https://github.com/xlite-dev/flux-faster)** is released! **3.3x** speedup for FLUX.1 on NVIDIA L20 with **[cache-dit](https://github.com/vipshop/cache-dit)**.
 </details>
--->
 ## 📖Contents
@@ -93,6 +100,11 @@ Dynamic: requires-python
 - [⚙️Installation](#️installation)
 - [🔥Supported Models](#supported)
 - [🎉Unified Cache APIs](#unified)
+  - [📚Forward Pattern Matching](#unified)
+  - [🎉Cache with One-line Code](#unified)
+  - [🔥Automatic Block Adapter](#unified)
+  - [📚Hybird Forward Pattern](#unified)
+  - [🤖Cache Acceleration Stats](#unified)
 - [⚡️Dual Block Cache](#dbcache)
 - [🔥Hybrid TaylorSeer](#taylorseer)
 - [⚡️Hybrid Cache CFG](#cfg)
@@ -185,7 +197,7 @@ Currently, for any **Diffusion** models with **Transformer Blocks** that match t
 ### ♥️Cache Acceleration with One-line Code
-In most cases, you only need to call **one-line** of code, that is `cache_dit.enable_cache(...)`. After this API is called, you just need to call the pipe as normal. The `pipe` param can be **any** Diffusion Pipeline. Please refer to [Qwen-Image](./examples/run_qwen_image.py) as an example.
+In most cases, you only need to call **one-line** of code, that is `cache_dit.enable_cache(...)`. After this API is called, you just need to call the pipe as normal. The `pipe` param can be **any** Diffusion Pipeline. Please refer to [Qwen-Image](./examples/pipeline/run_qwen_image.py) as an example.
 ```python
 import cache_dit
@@ -199,11 +211,14 @@ cache_dit.enable_cache(pipe)
 # Just call the pipe as normal.
 output = pipe(...)
+# Disable cache and run original pipe.
+cache_dit.disable_cache(pipe)
 ```
-### 🔥Automatic Block Adapter + 📚Hybird Forward Pattern
+### 🔥Automatic Block Adapter
-But in some cases, you may have a **modified** Diffusion Pipeline or Transformer that is not located in the diffusers library or not officially supported by **cache-dit** at this time. The **BlockAdapter** can help you solve this problems. Please refer to [🔥Qwen-Image w/ BlockAdapter](./examples/run_qwen_image_adapter.py) and [📚FLUX.1 w/ Hybird Forward Pattern](./examples/run_flux_adapter.py) for more details.
+But in some cases, you may have a **modified** Diffusion Pipeline or Transformer that is not located in the diffusers library or not officially supported by **cache-dit** at this time. The **BlockAdapter** can help you solve this problems. Please refer to [🔥Qwen-Image w/ BlockAdapter](./examples/adapter/run_qwen_image_adapter.py) as an example.
 ```python
 from cache_dit import ForwardPattern, BlockAdapter
@@ -225,14 +240,19 @@ cache_dit.enable_cache(
         pipe=pipe, # Qwen-Image, etc.
         transformer=pipe.transformer,
         blocks=pipe.transformer.transformer_blocks,
-        blocks_name="transformer_blocks",
         forward_pattern=ForwardPattern.Pattern_1,
     ),
 )
+```
+For such situations, **BlockAdapter** can help you quickly apply various cache acceleration features to your own Diffusion Pipelines and Transformers. Please check the [📚BlockAdapter.md](./docs/BlockAdapter.md) for more details.
+### 📚Hybird Forward Pattern
-# cache-dit supported 📚Hybird Forward Pattern, for example:
+Sometimes, a Transformer class will contain more than one transformer `blocks`. For example, **FLUX.1** (HiDream, Chroma, etc) contains transformer_blocks and single_transformer_blocks (with different forward patterns). The **BlockAdapter** can also help you solve this problem. Please refer to [📚FLUX.1](./examples/adapter/run_flux_adapter.py) as an example.
+```python
 # For diffusers <= 0.34.0, FLUX.1 transformer_blocks and
-# single_transformer_blocks has different forward pattern.
+# single_transformer_blocks have different forward patterns.
 cache_dit.enable_cache(
     BlockAdapter(
         pipe=pipe, # FLUX.1, etc.
@@ -241,10 +261,6 @@ cache_dit.enable_cache(
             pipe.transformer.transformer_blocks,
             pipe.transformer.single_transformer_blocks,
         ],
-        blocks_name=[
-            "transformer_blocks",
-            "single_transformer_blocks",
-        ],
         forward_pattern=[
             ForwardPattern.Pattern_1,
             ForwardPattern.Pattern_3,
@@ -252,7 +268,6 @@ cache_dit.enable_cache(
     ),
 )
 ```
-For such situations, **BlockAdapter** can help you quickly apply various cache acceleration features to your own Diffusion Pipelines and Transformers. Please check the [📚BlockAdapter.md](./docs/BlockAdapter.md) for more details.
 ### 🤖Cache Acceleration Stats Summary
@@ -377,7 +392,7 @@ cache_dit.enable_cache(
 <div id="cfg"></div>
-cache-dit supports caching for **CFG (classifier-free guidance)**. For models that fuse CFG and non-CFG into a single forward step, or models that do not include CFG (classifier-free guidance) in the forward step, please set `do_separate_cfg` param to **False (default)**. Otherwise, set it to True. For examples:
+cache-dit supports caching for **CFG (classifier-free guidance)**. For models that fuse CFG and non-CFG into a single forward step, or models that do not include CFG (classifier-free guidance) in the forward step, please set `enable_spearate_cfg` param to **False (default)**. Otherwise, set it to True. For examples:
 ```python
 cache_dit.enable_cache(
@@ -385,10 +400,10 @@ cache_dit.enable_cache(
     ...,
     # CFG: classifier free guidance or not
     # For model that fused CFG and non-CFG into single forward step,
-    # should set do_separate_cfg as False. For example, set it as True
+    # should set enable_spearate_cfg as False. For example, set it as True
     # for Wan 2.1/Qwen-Image and set it as False for FLUX.1, HunyuanVideo,
     # CogVideoX, Mochi, LTXVideo, Allegro, CogView3Plus, EasyAnimate, SD3, etc.
-    do_separate_cfg=True, # Wan 2.1, Qwen-Image, CogView4, Cosmos, SkyReelsV2, etc.
+    enable_spearate_cfg=True, # Wan 2.1, Qwen-Image, CogView4, Cosmos, SkyReelsV2, etc.
     # Compute cfg forward first or not, default False, namely,
     # 0, 2, 4, ..., -> non-CFG step; 1, 3, 5, ... -> CFG step.
     cfg_compute_first=False,
@@ -451,11 +466,21 @@ cache-dit-metrics-cli all  -i1 true_dir -i2 test_dir  # image dir
 How to contribute? Star ⭐️ this repo to support us or check [CONTRIBUTE.md](./CONTRIBUTE.md).
-## ©️License
+<div align='center'>
+<a href="https://star-history.com/#vipshop/cache-dit&Date">
+  <picture align='center'>
+    <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=vipshop/cache-dit&type=Date&theme=dark" />
+    <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=vipshop/cache-dit&type=Date" />
+    <img alt="Star History Chart" src="https://api.star-history.com/svg?repos=vipshop/cache-dit&type=Date" width=400px />
+  </picture>
+</a>
+</div>
+## ©️Acknowledgements
-<div id="license"></div>
+<div id="Acknowledgements"></div>
-The **cache-dit** codebase is adapted from FBCache. Special thanks to their excellent work! We have followed the original License from FBCache, please check [LICENSE](./LICENSE) for more details.
+The **cache-dit** codebase is adapted from FBCache. Over time its codebase diverged a lot, and **cache-dit** API is no longer compatible with FBCache.
 ## ©️Citations
@@ -463,7 +488,7 @@ The **cache-dit** codebase is adapted from FBCache. Special thanks to their exce
 ```BibTeX
 @misc{cache-dit@2025,
-  title={cache-dit: An Unified and Training-free Cache Acceleration Toolbox for Diffusion Transformers},
+  title={cache-dit: A Unified and Training-free Cache Acceleration Toolbox for Diffusion Transformers},
   url={https://github.com/vipshop/cache-dit.git},
   note={Open-source software available at https://github.com/vipshop/cache-dit.git},
   author={vipshop.com},

cache-dit 0.2.27__py3-none-any.whl → 0.2.29__py3-none-any.whl

Potentially problematic release.

cache-dit 0.2.27py3-none-any.whl → 0.2.29py3-none-any.whl