PyPI - cache-dit - Versions diffs - 0.2.37__py3-none-any.whl → 0.3.1__py3-none-any.whl - Mend

cache-dit 0.2.37py3-none-any.whl → 0.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cache-dit might be problematic. Click here for more details.

Files changed (24) hide show

cache_dit/cache_factory/cache_contexts/v2/calibrators/__init__.py ADDED Viewed

@@ -0,0 +1,81 @@
+from cache_dit.cache_factory.cache_contexts.v2.calibrators.base import (
+    CalibratorBase,
+)
+from cache_dit.cache_factory.cache_contexts.v2.calibrators.taylorseer import (
+    TaylorSeerCalibrator,
+)
+from cache_dit.cache_factory.cache_contexts.v2.calibrators.foca import (
+    FoCaCalibrator,
+)
+import dataclasses
+from typing import Any, Dict
+from cache_dit.logger import init_logger
+logger = init_logger(__name__)
+@dataclasses.dataclass
+class CalibratorConfig:  # no V1
+    enable_calibrator: bool = False
+    enable_encoder_calibrator: bool = False
+    calibrator_type: str = "taylorseer"  # taylorseer or foca, etc.
+    calibrator_cache_type: str = "residual"  # residual or hidden_states
+    calibrator_kwargs: Dict[str, Any] = dataclasses.field(default_factory=dict)
+    def strify(self) -> str:
+        return "CalibratorBase"
+    def to_kwargs(self) -> Dict:
+        return self.calibrator_kwargs.copy()
+@dataclasses.dataclass
+class TaylorSeerCalibratorConfig(CalibratorConfig):
+    enable_calibrator: bool = True
+    enable_encoder_calibrator: bool = True
+    calibrator_type: str = "taylorseer"
+    taylorseer_order: int = 1
+    def strify(self) -> str:
+        if self.taylorseer_order:
+            return f"TaylorSeer_O({self.taylorseer_order})"
+        return "TaylorSeer_O(0)"
+    def to_kwargs(self) -> Dict:
+        kwargs = self.calibrator_kwargs.copy()
+        kwargs["n_derivatives"] = self.taylorseer_order
+        return kwargs
+@dataclasses.dataclass
+class FoCaCalibratorConfig(CalibratorConfig):
+    enable_calibrator: bool = True
+    enable_encoder_calibrator: bool = True
+    calibrator_type: str = "foca"
+    def strify(self) -> str:
+        return "FoCa"
+class Calibrator:
+    _supported_calibrators = [
+        "taylorseer",
+    ]
+    def __new__(
+        cls,
+        calibrator_config: CalibratorConfig,
+    ) -> CalibratorBase:
+        assert (
+            calibrator_config.calibrator_type in cls._supported_calibrators
+        ), f"Calibrator {calibrator_config.calibrator_type} is not supported now!"
+        if calibrator_config.calibrator_type.lower() == "taylorseer":
+            return TaylorSeerCalibrator(**calibrator_config.to_kwargs())
+        else:
+            raise ValueError(
+                f"Calibrator {calibrator_config.calibrator_type} is not supported now!"
+            )

cache_dit/cache_factory/cache_contexts/v2/calibrators/base.py ADDED Viewed

@@ -0,0 +1,27 @@
+from abc import abstractmethod
+from cache_dit.logger import init_logger
+logger = init_logger(__name__)
+class CalibratorBase:
+    @abstractmethod
+    def reset_cache(self, *args, **kwargs):
+        raise NotImplementedError("reset_cache method is not implemented.")
+    @abstractmethod
+    def approximate(self, *args, **kwargs):
+        raise NotImplementedError("approximate method is not implemented.")
+    @abstractmethod
+    def mark_step_begin(self, *args, **kwargs):
+        raise NotImplementedError("mark_step_begin method is not implemented.")
+    @abstractmethod
+    def update(self, *args, **kwargs):
+        raise NotImplementedError("update method is not implemented.")
+    def __repr__(self):
+        return "CalibratorBase"

cache_dit/cache_factory/cache_contexts/v2/calibrators/foca.py ADDED Viewed

@@ -0,0 +1,26 @@
+from cache_dit.cache_factory.cache_contexts.v2.calibrators.base import (
+    CalibratorBase,
+)
+class FoCaCalibrator(CalibratorBase):
+    # TODO: Support FoCa, Forecast then Calibrate: Feature Caching as ODE for
+    # Efficient Diffusion Transformers, https://arxiv.org/pdf/2508.16211
+    def __init__(self, *args, **kwargs):
+        super().__init__()
+    def reset_cache(self, *args, **kwargs):
+        raise NotImplementedError("reset_cache method is not implemented.")
+    def approximate(self, *args, **kwargs):
+        raise NotImplementedError("approximate method is not implemented.")
+    def mark_step_begin(self, *args, **kwargs):
+        raise NotImplementedError("mark_step_begin method is not implemented.")
+    def update(self, *args, **kwargs):
+        raise NotImplementedError("update method is not implemented.")
+    def __repr__(self):
+        return "FoCaCalibrator"

cache_dit/cache_factory/cache_contexts/v2/calibrators/taylorseer.py ADDED Viewed

@@ -0,0 +1,105 @@
+import math
+import torch
+from typing import List, Dict
+from cache_dit.cache_factory.cache_contexts.v2.calibrators.base import (
+    CalibratorBase,
+)
+from cache_dit.logger import init_logger
+logger = init_logger(__name__)
+class TaylorSeerCalibrator(CalibratorBase):
+    def __init__(
+        self,
+        n_derivatives=1,
+        max_warmup_steps=1,
+        skip_interval_steps=1,
+        **kwargs,
+    ):
+        self.n_derivatives = n_derivatives
+        self.order = n_derivatives + 1
+        self.max_warmup_steps = max_warmup_steps
+        self.skip_interval_steps = skip_interval_steps
+        self.reset_cache()
+        logger.info(f"Created {self.__repr__()}_{id(self)}")
+    def reset_cache(self):  # NEED
+        self.state: Dict[str, List[torch.Tensor]] = {
+            "dY_prev": [None] * self.order,
+            "dY_current": [None] * self.order,
+        }
+        self.current_step = -1
+        self.last_non_approximated_step = -1
+    def should_compute(self, step=None):
+        step = self.current_step if step is None else step
+        if (
+            step < self.max_warmup_steps
+            or (step - self.max_warmup_steps + 1) % self.skip_interval_steps
+            == 0
+        ):
+            return True
+        return False
+    def derivative(self, Y: torch.Tensor) -> List[torch.Tensor]:
+        # Y(t) = Y(0) + dY(0)/dt * t + d^2Y(0)/dt^2 * t^2 / 2!
+        #        + ... + d^nY(0)/dt^n * t^n / n!
+        dY_current: List[torch.Tensor] = [None] * self.order
+        dY_current[0] = Y
+        window = self.current_step - self.last_non_approximated_step
+        if self.state["dY_prev"][0] is not None:
+            if dY_current[0].shape != self.state["dY_prev"][0].shape:
+                self.reset_cache()
+        for i in range(self.n_derivatives):
+            if self.state["dY_prev"][i] is not None and self.current_step > 1:
+                dY_current[i + 1] = (
+                    dY_current[i] - self.state["dY_prev"][i]
+                ) / window
+            else:
+                break
+        return dY_current
+    def approximate(self) -> torch.Tensor:  # NEED
+        elapsed = self.current_step - self.last_non_approximated_step
+        output = 0
+        for i, derivative in enumerate(self.state["dY_current"]):
+            if derivative is not None:
+                output += (1 / math.factorial(i)) * derivative * (elapsed**i)
+            else:
+                break
+        return output
+    def mark_step_begin(self):  # NEED
+        self.current_step += 1
+    def update(self, Y: torch.Tensor):  # NEED
+        # Directly call this method will ingnore the warmup
+        # policy and force full computation.
+        # Assume warmup steps is 3, and n_derivatives is 3.
+        # step 0: dY_prev    = [None, None,   None,    None   ]
+        #         dY_current = [Y0,   None,   None,    None   ]
+        # step 1: dY_prev    = [Y0,   None,   None,    None   ]
+        #         dY_current = [Y1,   dY1,    None,    None   ]
+        # step 2: dY_prev    = [Y1,   dY1,    None,    None   ]
+        #         dY_current = [Y2,   dY2/Y1, dY2/dY1, None   ]
+        # step 3: dY_prev    = [Y2,   dY2/Y1, dY2/dY1, None   ],
+        #         dY_current = [Y3,   dY3/Y2, dY3/dY2, dY3/dY1]
+        # step 4: dY_prev    = [Y3,   dY3/Y2, dY3/dY2, dY3/dY1]
+        #         dY_current = [Y4,   dY4/Y3, dY4/dY3, dY4/dY2]
+        self.state["dY_prev"] = self.state["dY_current"]
+        self.state["dY_current"] = self.derivative(Y)
+        self.last_non_approximated_step = self.current_step
+    def step(self, Y: torch.Tensor):
+        self.mark_step_begin()
+        if self.should_compute():
+            self.update(Y)
+            return Y
+        else:
+            return self.approximate()
+    def __repr__(self):
+        return f"TaylorSeerCalibrator_O({self.n_derivatives})"

cache_dit/cache_factory/cache_interface.py CHANGED Viewed

@@ -1,9 +1,11 @@
-from typing import Any, Tuple, List, Union
+from typing import Any, Tuple, List, Union, Optional
 from diffusers import DiffusionPipeline
 from cache_dit.cache_factory.cache_types import CacheType
 from cache_dit.cache_factory.block_adapters import BlockAdapter
 from cache_dit.cache_factory.block_adapters import BlockAdapterRegistry
 from cache_dit.cache_factory.cache_adapters import CachedAdapter
+from cache_dit.cache_factory.cache_adapters import CachedAdapterV2
+from cache_dit.cache_factory.cache_contexts import CalibratorConfig
 from cache_dit.logger import init_logger
@@ -32,6 +34,8 @@ def enable_cache(
     enable_encoder_taylorseer: bool = False,
     taylorseer_cache_type: str = "residual",
     taylorseer_order: int = 1,
+    # New param only for v2 API
+    calibrator_config: Optional[CalibratorConfig] = None,
     **other_cache_context_kwargs,
 ) -> Union[
     DiffusionPipeline,
@@ -94,6 +98,9 @@ def enable_cache(
         taylorseer_order (`int`, *required*, defaults to 1):
             The order of taylorseer, higher values of n_derivatives will lead to longer computation time,
             the recommended value is 1 or 2.
+        calibrator_config (`CalibratorConfig`, *optional*, defaults to None):
+            # config for calibrator, if calibrator_config is not None, means that user want to use CachedAdapterV2
+            # with specific calibrator, such as taylorseer, foca, and so on.
         other_cache_context_kwargs: (`dict`, *optional*, defaults to {})
             Other cache context kwargs, please check https://github.com/vipshop/cache-dit/blob/main/src/cache_dit/cache_factory/cache_contexts/cache_context.py
             for more details.
@@ -128,18 +135,32 @@ def enable_cache(
     cache_context_kwargs["cfg_diff_compute_separate"] = (
         cfg_diff_compute_separate
     )
-    cache_context_kwargs["enable_taylorseer"] = enable_taylorseer
-    cache_context_kwargs["enable_encoder_taylorseer"] = (
-        enable_encoder_taylorseer
-    )
-    cache_context_kwargs["taylorseer_cache_type"] = taylorseer_cache_type
-    cache_context_kwargs["taylorseer_order"] = taylorseer_order
-    if isinstance(pipe_or_adapter, (DiffusionPipeline, BlockAdapter)):
-        return CachedAdapter.apply(
-            pipe_or_adapter,
-            **cache_context_kwargs,
+    # V1 only supports the Taylorseer calibrator. We have decided to
+    # keep this code for API compatibility reasons.
+    if calibrator_config is None:
+        cache_context_kwargs["enable_taylorseer"] = enable_taylorseer
+        cache_context_kwargs["enable_encoder_taylorseer"] = (
+            enable_encoder_taylorseer
         )
+        cache_context_kwargs["taylorseer_cache_type"] = taylorseer_cache_type
+        cache_context_kwargs["taylorseer_order"] = taylorseer_order
+    else:
+        cache_context_kwargs["calibrator_config"] = calibrator_config
+    if isinstance(pipe_or_adapter, (DiffusionPipeline, BlockAdapter)):
+        if calibrator_config is None:
+            return CachedAdapter.apply(
+                pipe_or_adapter,
+                **cache_context_kwargs,
+            )
+        else:
+            logger.warning("You are using the un-stable V2 API!")
+            pipe_or_adapter._is_v2_api = True
+            return CachedAdapterV2.apply(
+                pipe_or_adapter,
+                **cache_context_kwargs,
+            )
     else:
         raise ValueError(
             f"type: {type(pipe_or_adapter)} is not valid, "
@@ -154,7 +175,13 @@ def disable_cache(
         BlockAdapter,
     ],
 ):
-    CachedAdapter.maybe_release_hooks(pipe_or_adapter)
+    if getattr(pipe_or_adapter, "_is_v2_api", False):
+        logger.warning("You are using the un-stable V2 API!")
+        CachedAdapterV2.maybe_release_hooks(pipe_or_adapter)
+        del pipe_or_adapter._is_v2_api
+    else:
+        CachedAdapter.maybe_release_hooks(pipe_or_adapter)
     logger.warning(
         f"Cache Acceleration is disabled for: "
         f"{pipe_or_adapter.__class__.__name__}."

cache_dit/utils.py CHANGED Viewed

@@ -10,6 +10,7 @@ from diffusers import DiffusionPipeline
 from typing import Dict, Any, List, Union
 from cache_dit.cache_factory import BlockAdapter
+from cache_dit.cache_factory import CalibratorConfig
 from cache_dit.logger import init_logger
@@ -179,11 +180,21 @@ def strify(
     if not cache_options:
         return "NONE"
-    def get_taylorseer_order():
-        taylorseer_order = 0
-        if "taylorseer_order" in cache_options:
-            taylorseer_order = cache_options["taylorseer_order"]
-        return taylorseer_order
+    def calibrator_str():
+        if not getattr(adapter_or_others, "_is_v2_api", False):
+            taylorseer_order = 0
+            if "taylorseer_order" in cache_options:
+                taylorseer_order = cache_options["taylorseer_order"]
+            return (
+                f"T{int(cache_options.get('enable_taylorseer', False))}"
+                f"O{taylorseer_order}"
+            )
+        calibrator_config: CalibratorConfig = cache_options.get(
+            "calibrator_config", None
+        )
+        return calibrator_config.strify() if calibrator_config else "NONE"
     cache_type_str = (
         f"DBCACHE_F{cache_options.get('Fn_compute_blocks', 1)}"
@@ -191,8 +202,7 @@ def strify(
         f"W{cache_options.get('max_warmup_steps', 0)}"
         f"M{max(0, cache_options.get('max_cached_steps', -1))}"
         f"MC{max(0, cache_options.get('max_continuous_cached_steps', -1))}_"
-        f"T{int(cache_options.get('enable_taylorseer', False))}"
-        f"O{get_taylorseer_order()}_"
+        f"{calibrator_str()}_"
         f"R{cache_options.get('residual_diff_threshold', 0.08)}"
     )

{cache_dit-0.2.37.dist-info → cache_dit-0.3.1.dist-info}/METADATA RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.4
 Name: cache_dit
-Version: 0.2.37
-Summary: 🤗 A Unified and Training-free Cache Acceleration Toolbox for Diffusion Transformers
+Version: 0.3.1
+Summary: A Unified, Flexible and Training-free Cache Acceleration Framework for 🤗Diffusers.
 Author: DefTruth, vipshop.com, etc.
 Maintainer: DefTruth, vipshop.com, etc
 Project-URL: Repository, https://github.com/vipshop/cache-dit.git
@@ -49,8 +49,8 @@ Dynamic: requires-python
   <img src=https://github.com/vipshop/cache-dit/raw/main/assets/cache-dit-logo.png height="120">
 <p align="center">
-    A <b>Unified</b> and Training-free <b>Cache Acceleration</b> Toolbox for <b>Diffusion Transformers</b> <br>
-    ♥️ <b>Cache Acceleration</b> with <b>One-line</b> Code ~ ♥️
+    A <b>Unified</b>, Flexible and Training-free <b>Cache Acceleration</b> Framework for <b>🤗Diffusers</b> <br>
+    ♥️ Cache Acceleration with <b>One-line</b> Code ~ ♥️
   </p>
   <div align='center'>
       <img src=https://img.shields.io/badge/Language-Python-brightgreen.svg >
@@ -58,15 +58,15 @@ Dynamic: requires-python
       <img src=https://img.shields.io/badge/PyPI-pass-brightgreen.svg >
       <img src=https://static.pepy.tech/badge/cache-dit >
       <img src=https://img.shields.io/badge/Python-3.10|3.11|3.12-9cf.svg >
-      <img src=https://img.shields.io/badge/Release-v0.2-brightgreen.svg >
+      <img src=https://img.shields.io/badge/Release-v0.3-brightgreen.svg >
  </div>
   <p align="center">
     <b><a href="#unified">📚Unified Cache APIs</a></b> | <a href="#forward-pattern-matching">📚Forward Pattern Matching</a> | <a href="#automatic-block-adapter">📚Automatic Block Adapter</a><br>
-    <a href="#hybird-forward-pattern">📚Hybrid Forward Pattern</a> | <a href="#dbcache">📚DBCache</a> | <a href="#taylorseer">📚Hybrid TaylorSeer</a> | <a href="#cfg">📚Cache CFG</a><br>
+    <a href="#hybird-forward-pattern">📚Hybrid Forward Pattern</a> | <a href="#dbcache">📚DBCache</a> | <a href="#taylorseer">📚TaylorSeer Calibrator</a> | <a href="#cfg">📚Cache CFG</a><br>
     <a href="#benchmarks">📚Text2Image DrawBench</a> | <a href="#benchmarks">📚Text2Image Distillation DrawBench</a>
   </p>
   <p align="center">
-    🎉Now, <b>cache-dit</b> covers <b>most</b> mainstream Diffusers' <b>DiT</b> Pipelines🎉<br>
+    🎉Now, <b>cache-dit</b> covers almost <b>All</b> Diffusers' <b>DiT</b> Pipelines🎉<br>
     🔥<a href="#supported">Qwen-Image</a> | <a href="#supported">FLUX.1</a> | <a href="#supported">Qwen-Image-Lightning</a> | <a href="#supported"> Wan 2.1 </a> | <a href="#supported"> Wan 2.2 </a>🔥<br>
     🔥<a href="#supported">HunyuanImage-2.1</a> | <a href="#supported">HunyuanVideo</a> | <a href="#supported">HunyuanDiT</a> | <a href="#supported">HiDream</a> | <a href="#supported">AuraFlow</a>🔥<br>
     🔥<a href="#supported">CogView3Plus</a> | <a href="#supported">CogView4</a> | <a href="#supported">LTXVideo</a> | <a href="#supported">CogVideoX</a> | <a href="#supported">CogVideoX 1.5</a> | <a href="#supported">ConsisID</a>🔥<br>
@@ -197,7 +197,7 @@ Dynamic: requires-python
   - [📚Implement Patch Functor](#implement-patch-functor)
   - [🤖Cache Acceleration Stats](#cache-acceleration-stats-summary)
 - [⚡️Dual Block Cache](#dbcache)
-- [🔥Hybrid TaylorSeer](#taylorseer)
+- [🔥TaylorSeer Calibrator](#taylorseer)
 - [⚡️Hybrid Cache CFG](#cfg)
 - [⚙️Torch Compile](#compile)
 - [🛠Metrics CLI](#metrics)
@@ -286,21 +286,11 @@ Comparisons between different FnBn compute block configurations show that **more
 | Config | Clip Score(↑) | ImageReward(↑) | PSNR(↑) | TFLOPs(↓) | SpeedUp(↑) |
 | --- | --- | --- | --- | --- | --- |
 | [**FLUX.1**-dev]: 50 steps | 32.9217 | 1.0412 | INF | 3726.87 | 1.00x |
-| F8B0_W8MC0_R0.08 | 33.0070 | 1.0333 | 35.2008 | 2162.19 | 1.72x |
 | F8B0_W4MC0_R0.08 | 32.9871 | 1.0370 | 33.8317 | 2064.81 | 1.80x |
-| F4B0_W4MC2_R0.12 | 32.9718 | 1.0301 | 31.9394 | 1678.98 | 2.22x |
-| F8B0_W8MC3_R0.12 | 32.9613 | 1.0270 | 34.2834 | 1977.69 | 1.88x |
 | F8B0_W4MC2_R0.12 | 32.9535 | 1.0185 | 32.7346 | 1935.73 | 1.93x |
-| F8B0_W8MC2_R0.12 | 32.9302 | 1.0227 | 34.7449 | 2072.18 | 1.80x |
 | F8B0_W4MC3_R0.12 | 32.9234 | 1.0085 | 32.5385 | 1816.58 | 2.05x |
-| F8B0_W8MC4_R0.12 | 32.9041 | 1.0140 | 33.9466 | 1897.61 | 1.96x |
 | F4B0_W4MC3_R0.12 | 32.8981 | 1.0130 | 31.8031 | 1507.83 | 2.47x |
-| F4B0_W4MC0_R0.08 | 32.8544 | 1.0065 | 32.3555 | 1654.72 | 2.25x |
-| F8B0_W4MC4_R0.12 | 32.8443 | 1.0102 | 32.4231 | 1753.48 | 2.13x |
 | F4B0_W4MC4_R0.12 | 32.8384 | 1.0065 | 31.5292 | 1400.08 | 2.66x |
-| F1B0_W4MC4_R0.12 | 32.8291 | 1.0181 | 32.9462 | 1401.61 | 2.66x |
-| F1B0_W4MC3_R0.12 | 32.8236 | 1.0166 | 33.0037 | 1457.62 | 2.56x |
-| F1B0_W4MC10_R1.0 | 32.3183 | 0.8796 | 29.6757 | 651.90 | 5.72x |
 The comparison between **cache-dit: DBCache** and algorithms such as Δ-DiT, Chipmunk, FORA, DuCa, TaylorSeer and FoCa is as follows. Now, in the comparison with a speedup ratio less than **3x**, cache-dit achieved the best accuracy. Please check [📚How to Reproduce?](./bench/) for more details.
@@ -312,12 +302,34 @@ The comparison between **cache-dit: DBCache** and algorithms such as Δ-DiT, Chi
 | Δ-DiT(N=3) | 1686.76 | 2.21× | 0.8721 | 32.102 |
 | [**FLUX.1**-dev]: 34% steps | 1264.63 | 3.13× | 0.9453 | 32.114 |
 | Chipmunk | 1505.87 | 2.47× | 0.9936 | 32.776 |
-| FORA (N=3) | 1320.07 | 2.82× | 0.9776 | 32.266 |
-| **[DBCache(F=4,B=0,W=4,MC=4)](https://github.com/vipshop/cache-dit)** | **1400.08** | **2.66×** | **1.0065** | **32.838** |
+| FORA(N=3) | 1320.07 | 2.82× | 0.9776 | 32.266 |
+| **[DBCache(F=4,B=0,W=4,MC=4)](https://github.com/vipshop/cache-dit)** | 1400.08 | **2.66×** | **1.0065** | 32.838 |
+| **[DBCache+TaylorSeer(F=1,B=0,O=1)](https://github.com/vipshop/cache-dit)** | 1153.05 | **3.23×** | **1.0221** | 32.819 |
 | DuCa(N=5) | 978.76 | 3.80× | 0.9955 | 32.241 |
 | TaylorSeer(N=4,O=2) | 1042.27 | 3.57× | 0.9857 | 32.413 |
-| **[DBCache+TaylorSeer(F=1,B=0,O=1)](https://github.com/vipshop/cache-dit)** | **1153.05** | **3.23×** | **1.0221** | **32.819** |
-| **[FoCa(N=5) arxiv.2508.16211](https://arxiv.org/pdf/2508.16211)** | **893.54** | **4.16×** | **1.0029** | **32.948** |
+| **[DBCache(F=1,B=0,W=4,MC=6)](https://github.com/vipshop/cache-dit)** | 944.75 | **3.94×** | 0.9997 | 32.849 |
+| **[DBCache+TaylorSeer(F=1,B=0,O=1)](https://github.com/vipshop/cache-dit)** | 944.75 | **3.94×** | **1.0107** | 32.865 |
+| **[FoCa(N=5): arxiv.2508.16211](https://arxiv.org/pdf/2508.16211)** | 893.54 | **4.16×** | **1.0029** | **32.948** |
+<details>
+<summary> Show all comparison </summary>
+| Method | TFLOPs(↓) | SpeedUp(↑) | ImageReward(↑) | Clip Score(↑) |
+| --- | --- | --- | --- | --- |
+| [**FLUX.1**-dev]: 50 steps | 3726.87 | 1.00× | 0.9898 | 32.404 |
+| [**FLUX.1**-dev]: 60% steps | 2231.70 | 1.67× | 0.9663 | 32.312 |
+| Δ-DiT(N=2) | 2480.01 | 1.50× | 0.9444 | 32.273 |
+| Δ-DiT(N=3) | 1686.76 | 2.21× | 0.8721 | 32.102 |
+| [**FLUX.1**-dev]: 34% steps | 1264.63 | 3.13× | 0.9453 | 32.114 |
+| Chipmunk | 1505.87 | 2.47× | 0.9936 | 32.776 |
+| FORA(N=3) | 1320.07 | 2.82× | 0.9776 | 32.266 |
+| **[DBCache(F=4,B=0,W=4,MC=4)](https://github.com/vipshop/cache-dit)** | 1400.08 | **2.66×** | **1.0065** | 32.838 |
+| DuCa(N=5) | 978.76 | 3.80× | 0.9955 | 32.241 |
+| TaylorSeer(N=4,O=2) | 1042.27 | 3.57× | 0.9857 | 32.413 |
+| **[DBCache+TaylorSeer(F=1,B=0,O=1)](https://github.com/vipshop/cache-dit)** | 1153.05 | **3.23×** | **1.0221** | 32.819 |
+| **[DBCache(F=1,B=0,W=4,MC=6)](https://github.com/vipshop/cache-dit)** | 944.75 | **3.94×** | 0.9997 | 32.849 |
+| **[DBCache+TaylorSeer(F=1,B=0,O=1)](https://github.com/vipshop/cache-dit)** | 944.75 | **3.94×** | **1.0107** | 32.865 |
+| **[FoCa(N=5): arxiv.2508.16211](https://arxiv.org/pdf/2508.16211)** | 893.54 | **4.16×** | **1.0029** | **32.948** |
 | [**FLUX.1**-dev]: 22% steps | 818.29 | 4.55× | 0.8183 | 31.772 |
 | FORA(N=4) | 967.91 | 3.84× | 0.9730 | 32.142 |
 | ToCa(N=8) | 784.54 | 4.74× | 0.9451 | 31.993 |
@@ -325,33 +337,31 @@ The comparison between **cache-dit: DBCache** and algorithms such as Δ-DiT, Chi
 | TeaCache(l=0.8) | 892.35 | 4.17× | 0.8683 | 31.704 |
 | **[DBCache(F=4,B=0,W=4,MC=10)](https://github.com/vipshop/cache-dit)** | 816.65 | 4.56x | 0.8245 | 32.191 |
 | TaylorSeer(N=5,O=2) | 893.54 | 4.16× | 0.9768 | 32.467 |
-| **[FoCa(N=7) arxiv.2508.16211](https://arxiv.org/pdf/2508.16211)** | **670.44** | **5.54×** | **0.9891** | **32.920** |
+| **[FoCa(N=7): arxiv.2508.16211](https://arxiv.org/pdf/2508.16211)** | 670.44 | **5.54×** | **0.9891** | **32.920** |
 | FORA(N=7) | 670.14 | 5.55× | 0.7418 | 31.519 |
 | ToCa(N=12) | 644.70 | 5.77× | 0.7155 | 31.808 |
 | DuCa(N=10) | 606.91 | 6.13× | 0.8382 | 31.759 |
 | TeaCache(l=1.2) | 669.27 | 5.56× | 0.7394 | 31.704 |
-| **[DBCache(F=1,B=0,W=4,MC=10)](https://github.com/vipshop/cache-dit)** | **651.90** | **5.72x** | 0.8796 | **32.318** |
+| **[DBCache(F=1,B=0,W=4,MC=10)](https://github.com/vipshop/cache-dit)** | 651.90 | **5.72x** | 0.8796 | **32.318** |
 | TaylorSeer(N=7,O=2) | 670.44 | 5.54× | 0.9128 | 32.128 |
-| **[FoCa(N=8) arxiv.2508.16211](https://arxiv.org/pdf/2508.16211)** | **596.07** | **6.24×** | **0.9502** | **32.706** |
+| **[FoCa(N=8): arxiv.2508.16211](https://arxiv.org/pdf/2508.16211)** | 596.07 | **6.24×** | **0.9502** | **32.706** |
 NOTE: Except for DBCache, other performance data are referenced from the paper [FoCa, arxiv.2508.16211](https://arxiv.org/pdf/2508.16211).
+</details>
 ### 📚Text2Image Distillation DrawBench: Qwen-Image-Lightning
 Surprisingly, cache-dit: DBCache still works in the extremely few-step distill model. For example,  **Qwen-Image-Lightning w/ 4 steps**, with the F16B16 configuration, the PSNR is 34.8163, the Clip Score is 35.6109, and the ImageReward is 1.2614. It maintained a relatively high precision.
 | Config                     |  PSNR(↑)      | Clip Score(↑) | ImageReward(↑) | TFLOPs(↓)   | SpeedUp(↑) |
 |----------------------------|-----------|------------|--------------|----------|------------|
-| [**Lightning**]: 4 steps | INF       | 35.5797    | 1.2630       | 274.33   | 1.00x       |
+| [**Lightning**]: 4 steps   | INF       | 35.5797    | 1.2630       | 274.33   | 1.00x       |
 | F24B24_W2MC1_R0.8          | 36.3242   | 35.6224    | 1.2630       | 264.74   | 1.04x       |
 | F16B16_W2MC1_R0.8          | 34.8163   | 35.6109    | 1.2614       | 244.25   | 1.12x       |
 | F12B12_W2MC1_R0.8          | 33.8953   | 35.6535    | 1.2549       | 234.63   | 1.17x       |
 | F8B8_W2MC1_R0.8            | 33.1374   | 35.7284    | 1.2517       | 224.29   | 1.22x       |
-| F48B0_W2MC1_R0.8           | 30.0533   | 35.8483    | 1.1979       | 265.56   | 1.03x       |
-| F32B0_W2MC1_R0.8           | 29.6490   | 35.7684    | 1.2302       | 261.05   | 1.05x       |
-| F24B0_W2MC1_R0.8           | 29.6081   | 35.8599    | 1.1874       | 245.54   | 1.12x       |
-| F16B0_W2MC1_R0.8           | 29.4844   | 36.0810    | 1.1586       | 227.06   | 1.21x       |
+| F1B0_W2MC1_R0.8            | 31.8317   | 35.6651    | 1.2397       | 206.90   | 1.33x       |
 ## 🎉Unified Cache APIs
@@ -569,7 +579,7 @@ cache_dit.enable_cache(
 |24.85s|15.59s|8.58s|15.41s|15.11s|17.74s|
 |<img src=https://github.com/vipshop/cache-dit/raw/main/assets/NONE_R0.08_S0.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F1B0S1_R0.08_S11.png width=105px> | <img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F1B0S1_R0.2_S19.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F8B8S1_R0.15_S15.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F12B12S4_R0.2_S16.png width=105px>|<img src=https://github.com/vipshop/cache-dit/raw/main/assets/DBCACHE_F16B16S4_R0.2_S13.png width=105px>|
-## 🔥Hybrid TaylorSeer
+## 🔥TaylorSeer Calibrator
 <div id="taylorseer"></div>
@@ -582,17 +592,22 @@ $$
 **TaylorSeer** employs a differential method to approximate the higher-order derivatives of features and predict features in future timesteps with Taylor series expansion. The TaylorSeer implemented in cache-dit supports both hidden states and residual cache types. That is $\mathcal{F}\_{\text {pred }, m}\left(x_{t-k}^l\right)$ can be a residual cache or a hidden-state cache.
 ```python
+from cache_dit import TaylorSeerCalibratorConfig
 cache_dit.enable_cache(
     pipe,
-    enable_taylorseer=True,
-    enable_encoder_taylorseer=True,
-    # Taylorseer cache type cache be hidden_states or residual.
-    taylorseer_cache_type="residual",
-    # Higher values of order will lead to longer computation time
-    taylorseer_order=1, # default is 1.
-    max_warmup_steps=3, # prefer: >= order + 1
-    residual_diff_threshold=0.12
-)s
+    # Basic DBCache w/ FnBn configurations
+    max_warmup_steps=8,  # steps do not cache
+    max_cached_steps=-1, # -1 means no limit
+    Fn_compute_blocks=8, # Fn, F8, etc.
+    Bn_compute_blocks=8, # Bn, B8, etc.
+    residual_diff_threshold=0.12,
+    # Then, you can use the TaylorSeer Calibrator to approximate
+    # the values in cached steps, taylorseer_order default is 1.
+    calibrator_config=TaylorSeerCalibratorConfig(
+        taylorseer_order=1,
+    ),
+)
 ```
 > [!Important]
@@ -715,7 +730,7 @@ The **cache-dit** codebase is adapted from FBCache. Over time its codebase diver
 ```BibTeX
 @misc{cache-dit@2025,
-  title={cache-dit: A Unified and Training-free Cache Acceleration Toolbox for Diffusion Transformers},
+  title={cache-dit: A Unified, Flexible and Training-free Cache Acceleration Framework for 🤗Diffusers.},
   url={https://github.com/vipshop/cache-dit.git},
   note={Open-source software available at https://github.com/vipshop/cache-dit.git},
   author={vipshop.com},

{cache_dit-0.2.37.dist-info → cache_dit-0.3.1.dist-info}/RECORD RENAMED Viewed

@@ -1,26 +1,36 @@
-cache_dit/__init__.py,sha256=hzaexC1VQ0TxiWY6TJ1lTm-04e65WOTNHOfYryu1vFA,1284
-cache_dit/_version.py,sha256=jVUPlUOcnlQRBFP8i5PUv2oJntFMrKgk1rs1guuDZ34,706
+cache_dit/__init__.py,sha256=Nd4a609z8PLFMSO8J0sUe2xRaFDIYK8778ff8yBU7uQ,1457
+cache_dit/_version.py,sha256=gGLpQUQx-ty9SEy9PYw9OgJWWzJLBnCpfJOfzL7SjlI,704
 cache_dit/logger.py,sha256=0zsu42hN-3-rgGC_C29ms1IvVpV4_b4_SwJCKSenxBE,4304
-cache_dit/utils.py,sha256=nuHHr6NB286qE9u6klLNfhAVRMOGipihOhM8LRqznmU,10775
+cache_dit/utils.py,sha256=bERXpCaCpOPThXB8Rkk52yAjjLrvxbt12ntpzpWdfUQ,11131
 cache_dit/cache_factory/.gitignore,sha256=5Cb-qT9wsTUoMJ7vACDF7ZcLpAXhi5v-xdcWSRit988,23
-cache_dit/cache_factory/__init__.py,sha256=Iw6-iJLFbdzCsIDZXXOw371L-HPmoeZO_P9a3sDjP5s,1103
-cache_dit/cache_factory/cache_adapters.py,sha256=OFJlxxyODhoZstN4EfPgC7tE8M1ZdQFcE25gDNrW7NA,18212
-cache_dit/cache_factory/cache_interface.py,sha256=tHQv7i8Hp6nfbjZWHwDx3nEvCfxLeBw26aMYjyu6nMw,8541
+cache_dit/cache_factory/__init__.py,sha256=Jj_Op6ACV35XilFPax3HEEsf_hOomjmogmNyWWteq_4,1539
+cache_dit/cache_factory/cache_interface.py,sha256=xpC-CWZDBfMb5BfnXnVW25xJhV8cYMRns-LKcPDksPU,9846
 cache_dit/cache_factory/cache_types.py,sha256=ooukxQRG55uTLmaZ0SKw6gIeY6SQHhMxkbv55uj2Sqk,991
 cache_dit/cache_factory/forward_pattern.py,sha256=FumlCuZ-TSmSYH0hGBHctSJ-oGLCftdZjLygqhsmdR4,2258
 cache_dit/cache_factory/utils.py,sha256=XkVM9AXcB9zYq8-S8QKAsGz80r3tA6U3lBNGDGeHOe4,1871
 cache_dit/cache_factory/block_adapters/__init__.py,sha256=33geXMz56TxFWMp0c-H4__MY5SGRzKMKj3TXnUYOMlc,17512
-cache_dit/cache_factory/block_adapters/block_adapters.py,sha256=zZbbsZYWbUClfa6He69w_Wdf8ZLhKwMAb9gURYEUmgQ,23725
+cache_dit/cache_factory/block_adapters/block_adapters.py,sha256=HlmStNIny0rZiRBYw-xdYYViVk9AEt0XlquoacEGr1U,24203
 cache_dit/cache_factory/block_adapters/block_registers.py,sha256=2L7QeM4ygnaKQpC9PoJod0QRYyxidUKU2AYpysDCUwE,2572
+cache_dit/cache_factory/cache_adapters/__init__.py,sha256=qB4bu1m3LgotOeNKluIkbQIf72PXpZWQMaSn1MOFEmY,149
+cache_dit/cache_factory/cache_adapters/cache_adapter.py,sha256=6WArUrTmtkZg147_Qef5jfzMVRg2hfYwvSB9Cvpf_HA,18297
+cache_dit/cache_factory/cache_adapters/v2/__init__.py,sha256=9PAH5YwpG_m0feE5eFQ7d2450nQR_Ctq8cd9Xu1Ldtk,96
+cache_dit/cache_factory/cache_adapters/v2/cache_adapter_v2.py,sha256=ove_pDh2QC3vjXWIYtrb8anc-NOmPIrDZN7hu16fjwU,18309
 cache_dit/cache_factory/cache_blocks/__init__.py,sha256=08Ox7kD05lkRKCOsVTdEZeKAWBheqpxfrAT1Nz7eclI,2916
 cache_dit/cache_factory/cache_blocks/pattern_0_1_2.py,sha256=ElMps6_7uI74tSF9GDR_dEI0bZEhdzcepM29xFWnYo8,428
 cache_dit/cache_factory/cache_blocks/pattern_3_4_5.py,sha256=Bv56qETXhsREvCrNvnZpSqDIIHsi6Ze3FJW4Yk2x3uI,8597
 cache_dit/cache_factory/cache_blocks/pattern_base.py,sha256=d4H9kEB0AgnVMT8aF0Y54SUMUQUxw5HQ8gRkoCuTQ_A,14577
 cache_dit/cache_factory/cache_blocks/utils.py,sha256=dGOC1tMMOvcbvEgx44eTESKn_jsv-0RZ3tRHPa3wmQ4,1315
-cache_dit/cache_factory/cache_contexts/__init__.py,sha256=rqnJ5__zqnpVHK5A1OqWILpNh5Ss-0ZDTGgtxZMKGGo,250
+cache_dit/cache_factory/cache_contexts/__init__.py,sha256=MQRxis-5gMhdJ6ZXIVN2nZEGPZoRLy59gSLniTYrWGY,437
 cache_dit/cache_factory/cache_contexts/cache_context.py,sha256=FWdgInClWY8VZBsZIevtYk--rX-RL8c3QfNOJtqR8a4,11855
 cache_dit/cache_factory/cache_contexts/cache_manager.py,sha256=Ig5VKoQ46iG3lKmsaMulYxd2vCm__2rY8NBvERwexwM,32719
 cache_dit/cache_factory/cache_contexts/taylorseer.py,sha256=4nxgSEZvDy-w-7XuJYzsyzdtF1_uFrDwlF06XBDFVKQ,3922
+cache_dit/cache_factory/cache_contexts/v2/__init__.py,sha256=GVafOd9BUa-Tyv7FZbTSkd4bGJPpMonb1AZv78qLeHU,385
+cache_dit/cache_factory/cache_contexts/v2/cache_context_v2.py,sha256=JkMJSm-zme9ayonSFq6Y6esCb6RMuGLvhVINM-LFj2Y,11776
+cache_dit/cache_factory/cache_contexts/v2/cache_manager_v2.py,sha256=ZRTl0M7jIPTIBS9lXoSh_pY6-hNu3JJ94WShv2CPWkk,32788
+cache_dit/cache_factory/cache_contexts/v2/calibrators/__init__.py,sha256=BLCV0EtOcu30iytErL_IK6J9ZwmpE6P9ffNt4OL-IaU,2343
+cache_dit/cache_factory/cache_contexts/v2/calibrators/base.py,sha256=mn6ZBkChGpGwN5csrHTUGMoX6BBPvqHXSLbIExiW-EU,748
+cache_dit/cache_factory/cache_contexts/v2/calibrators/foca.py,sha256=jrEkoiLgDR2fiX_scIpaLIDT0pTMc9stg6L9HBkgsZw,894
+cache_dit/cache_factory/cache_contexts/v2/calibrators/taylorseer.py,sha256=q5xBmT4EmpF_b3KPAjMIangTBvovE_c8ZfFjIN_E9tg,3834
 cache_dit/cache_factory/patch_functors/__init__.py,sha256=oI6F3N9ezahRHaFUOZ1GfrAw1qFdKrxFXXmlwwehHj4,530
 cache_dit/cache_factory/patch_functors/functor_base.py,sha256=Ahk0fTfrHgNdEl-9JSkACvfyyv9G-Ei5OSz7XBIlX5o,357
 cache_dit/cache_factory/patch_functors/functor_chroma.py,sha256=xD0Q96VArp1vYBLQ0pcjRIyFB1i_Y7muZ2q07Hz8Oqs,13430
@@ -43,9 +53,9 @@ cache_dit/metrics/metrics.py,sha256=7UV-H2NRbhfr6dvrXEzU97Zy-BSQ5zEfm9CKtaK4ldg,
 cache_dit/quantize/__init__.py,sha256=kWYoMAyZgBXu9BJlZjTQ0dRffW9GqeeY9_iTkXrb70A,59
 cache_dit/quantize/quantize_ao.py,sha256=Fx1KW4l3gdEkdrcAYtPoDW7WKBJWrs3glOHiEwW_TgE,6160
 cache_dit/quantize/quantize_interface.py,sha256=2s_R7xPSKuJeFpEGeLwRxnq_CqJcBG3a3lzyW5wh-UM,1241
-cache_dit-0.2.37.dist-info/licenses/LICENSE,sha256=Dqb07Ik2dV41s9nIdMUbiRWEfDqo7-dQeRiY7kPO8PE,3769
-cache_dit-0.2.37.dist-info/METADATA,sha256=dLOxpSzGT1izGxxPdbFc7jDKtiSgl-XAl-JZXRkX138,45826
-cache_dit-0.2.37.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-cache_dit-0.2.37.dist-info/entry_points.txt,sha256=FX2gysXaZx6NeK1iCLMcIdP8Q4_qikkIHtEmi3oWn8o,65
-cache_dit-0.2.37.dist-info/top_level.txt,sha256=ZJDydonLEhujzz0FOkVbO-BqfzO9d_VqRHmZU-3MOZo,10
-cache_dit-0.2.37.dist-info/RECORD,,
+cache_dit-0.3.1.dist-info/licenses/LICENSE,sha256=Dqb07Ik2dV41s9nIdMUbiRWEfDqo7-dQeRiY7kPO8PE,3769
+cache_dit-0.3.1.dist-info/METADATA,sha256=I3gHe9m40_Ja0VurS7CDBYx_x_4rpra8zN245gBKv-A,46536
+cache_dit-0.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+cache_dit-0.3.1.dist-info/entry_points.txt,sha256=FX2gysXaZx6NeK1iCLMcIdP8Q4_qikkIHtEmi3oWn8o,65
+cache_dit-0.3.1.dist-info/top_level.txt,sha256=ZJDydonLEhujzz0FOkVbO-BqfzO9d_VqRHmZU-3MOZo,10
+cache_dit-0.3.1.dist-info/RECORD,,

{cache_dit-0.2.37.dist-info → cache_dit-0.3.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{cache_dit-0.2.37.dist-info → cache_dit-0.3.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{cache_dit-0.2.37.dist-info → cache_dit-0.3.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{cache_dit-0.2.37.dist-info → cache_dit-0.3.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

cache-dit 0.2.37__py3-none-any.whl → 0.3.1__py3-none-any.whl

Potentially problematic release.

cache-dit 0.2.37py3-none-any.whl → 0.3.1py3-none-any.whl