PyPI - cache-dit - Versions diffs - 0.2.21__py3-none-any.whl → 0.2.22__py3-none-any.whl - Mend

cache-dit 0.2.21py3-none-any.whl → 0.2.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cache-dit might be problematic. Click here for more details.

Files changed (17) hide show

cache_dit/__init__.py +1 -1
cache_dit/_version.py +2 -2
cache_dit/cache_factory/__init__.py +4 -61
cache_dit/cache_factory/cache_adapters.py +4 -15
cache_dit/cache_factory/cache_context.py +40 -40
cache_dit/cache_factory/cache_interface.py +149 -0
cache_dit/cache_factory/cache_types.py +21 -52
cache_dit/cache_factory/taylorseer.py +0 -2
cache_dit/cache_factory/utils.py +4 -0
cache_dit/compile/utils.py +6 -2
cache_dit/utils.py +33 -4
{cache_dit-0.2.21.dist-info → cache_dit-0.2.22.dist-info}/METADATA +8 -13
{cache_dit-0.2.21.dist-info → cache_dit-0.2.22.dist-info}/RECORD +17 -16
{cache_dit-0.2.21.dist-info → cache_dit-0.2.22.dist-info}/WHEEL +0 -0
{cache_dit-0.2.21.dist-info → cache_dit-0.2.22.dist-info}/entry_points.txt +0 -0
{cache_dit-0.2.21.dist-info → cache_dit-0.2.22.dist-info}/licenses/LICENSE +0 -0
{cache_dit-0.2.21.dist-info → cache_dit-0.2.22.dist-info}/top_level.txt +0 -0

cache_dit/__init__.py CHANGED Viewed

@@ -7,13 +7,13 @@ except ImportError:
 from cache_dit.cache_factory import load_options
 from cache_dit.cache_factory import enable_cache
 from cache_dit.cache_factory import cache_type
-from cache_dit.cache_factory import default_options
 from cache_dit.cache_factory import block_range
 from cache_dit.cache_factory import CacheType
 from cache_dit.cache_factory import ForwardPattern
 from cache_dit.cache_factory import BlockAdapterParams
 from cache_dit.compile import set_compile_configs
 from cache_dit.utils import summary
+from cache_dit.utils import strify
 from cache_dit.logger import init_logger
 NONE = CacheType.NONE

cache_dit/_version.py CHANGED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.2.21'
-__version_tuple__ = version_tuple = (0, 2, 21)
+__version__ = version = '0.2.22'
+__version_tuple__ = version_tuple = (0, 2, 22)
 __commit_id__ = commit_id = None

cache_dit/cache_factory/__init__.py CHANGED Viewed

@@ -1,65 +1,8 @@
-from typing import Dict, List
-from diffusers import DiffusionPipeline
 from cache_dit.cache_factory.forward_pattern import ForwardPattern
 from cache_dit.cache_factory.cache_types import CacheType
+from cache_dit.cache_factory.cache_types import cache_type
+from cache_dit.cache_factory.cache_types import block_range
 from cache_dit.cache_factory.cache_adapters import BlockAdapterParams
 from cache_dit.cache_factory.cache_adapters import UnifiedCacheAdapter
-from cache_dit.cache_factory.utils import load_cache_options_from_yaml
-from cache_dit.logger import init_logger
-logger = init_logger(__name__)
-def load_options(path: str):
-    return load_cache_options_from_yaml(path)
-def cache_type(
-    type_hint: "CacheType | str",
-) -> CacheType:
-    return CacheType.type(cache_type=type_hint)
-def default_options(
-    cache_type: CacheType = CacheType.DBCache,
-) -> Dict:
-    return CacheType.default_options(cache_type)
-def block_range(
-    start: int,
-    end: int,
-    step: int = 1,
-) -> List[int]:
-    return CacheType.block_range(
-        start,
-        end,
-        step,
-    )
-def enable_cache(
-    pipe_or_adapter: DiffusionPipeline | BlockAdapterParams,
-    forward_pattern: ForwardPattern = ForwardPattern.Pattern_0,
-    **cache_options_kwargs,
-) -> DiffusionPipeline:
-    if isinstance(pipe_or_adapter, BlockAdapterParams):
-        return UnifiedCacheAdapter.apply(
-            pipe=None,
-            adapter_params=pipe_or_adapter,
-            forward_pattern=forward_pattern,
-            **cache_options_kwargs,
-        )
-    elif isinstance(pipe_or_adapter, DiffusionPipeline):
-        return UnifiedCacheAdapter.apply(
-            pipe=pipe_or_adapter,
-            adapter_params=None,
-            forward_pattern=forward_pattern,
-            **cache_options_kwargs,
-        )
-    else:
-        raise ValueError(
-            "Please pass DiffusionPipeline or BlockAdapterParams"
-            "(BlockAdapter) for the 1 position param: pipe_or_adapter"
-        )
+from cache_dit.cache_factory.cache_interface import enable_cache
+from cache_dit.cache_factory.utils import load_options

cache_dit/cache_factory/cache_adapters.py CHANGED Viewed

@@ -350,7 +350,7 @@ class UnifiedCacheAdapter:
         return adapter_params.pipe
     @classmethod
-    def has_separate_classifier_free_guidance(
+    def has_separate_cfg(
         cls,
         pipe_or_transformer: DiffusionPipeline | Any,
     ) -> bool:
@@ -364,20 +364,9 @@ class UnifiedCacheAdapter:
     @classmethod
     def check_context_kwargs(cls, pipe, **cache_context_kwargs):
         # Check cache_context_kwargs
-        if not cache_context_kwargs:
-            cache_context_kwargs = CacheType.default_options(CacheType.DBCache)
-            if cls.has_separate_classifier_free_guidance(pipe):
-                cache_context_kwargs["do_separate_classifier_free_guidance"] = (
-                    True
-                )
-            logger.warning(
-                "cache_context_kwargs is empty, use default "
-                f"cache options: {cache_context_kwargs}"
-            )
-        else:
-            # Allow empty cache_type, we only support DBCache now.
-            if cache_context_kwargs.get("cache_type", None):
-                cache_context_kwargs["cache_type"] = CacheType.DBCache
+        if not cache_context_kwargs["do_separate_cfg"]:
+            # Check cfg for some specific case if users don't set it as True
+            cache_context_kwargs["do_separate_cfg"] = cls.has_separate_cfg(pipe)
         if cache_type := cache_context_kwargs.pop("cache_type", None):
             assert (

cache_dit/cache_factory/cache_context.py CHANGED Viewed

@@ -69,11 +69,11 @@ class DBCacheContext:
     taylorseer: Optional[TaylorSeer] = None
     encoder_tarlorseer: Optional[TaylorSeer] = None
-    # Support do_separate_classifier_free_guidance, such as Wan 2.1,
+    # Support do_separate_cfg, such as Wan 2.1,
     # Qwen-Image. For model that fused CFG and non-CFG into single
-    # forward step, should set do_separate_classifier_free_guidance
-    # as False. For example: CogVideoX, HunyuanVideo, Mochi.
-    do_separate_classifier_free_guidance: bool = False
+    # forward step, should set do_separate_cfg as False.
+    # For example: CogVideoX, HunyuanVideo, Mochi.
+    do_separate_cfg: bool = False
     # Compute cfg forward first or not, default False, namely,
     # 0, 2, 4, ..., -> non-CFG step; 1, 3, 5, ... -> CFG step.
     cfg_compute_first: bool = False
@@ -97,10 +97,10 @@ class DBCacheContext:
     @torch.compiler.disable
     def __post_init__(self):
         # Some checks for settings
-        if self.do_separate_classifier_free_guidance:
+        if self.do_separate_cfg:
             assert self.enable_alter_cache is False, (
                 "enable_alter_cache must set as False if "
-                "do_separate_classifier_free_guidance is enabled."
+                "do_separate_cfg is enabled."
             )
             if self.cfg_diff_compute_separate:
                 assert self.cfg_compute_first is False, (
@@ -123,12 +123,12 @@ class DBCacheContext:
         if self.enable_taylorseer:
             self.taylorseer = TaylorSeer(**self.taylorseer_kwargs)
-            if self.do_separate_classifier_free_guidance:
+            if self.do_separate_cfg:
                 self.cfg_taylorseer = TaylorSeer(**self.taylorseer_kwargs)
         if self.enable_encoder_taylorseer:
             self.encoder_tarlorseer = TaylorSeer(**self.taylorseer_kwargs)
-            if self.do_separate_classifier_free_guidance:
+            if self.do_separate_cfg:
                 self.cfg_encoder_taylorseer = TaylorSeer(
                     **self.taylorseer_kwargs
                 )
@@ -175,16 +175,16 @@ class DBCacheContext:
         # incr    step: prev 0 -> 1; prev 1 -> 2
         # current step: incr step - 1
         self.transformer_executed_steps += 1
-        if not self.do_separate_classifier_free_guidance:
+        if not self.do_separate_cfg:
             self.executed_steps += 1
         else:
             # 0,1 -> 0 + 1, 2,3 -> 1 + 1, ...
             if not self.cfg_compute_first:
-                if not self.is_separate_classifier_free_guidance_step():
+                if not self.is_separate_cfg_step():
                     # transformer step: 0,2,4,...
                     self.executed_steps += 1
             else:
-                if self.is_separate_classifier_free_guidance_step():
+                if self.is_separate_cfg_step():
                     # transformer step: 0,2,4,...
                     self.executed_steps += 1
@@ -217,9 +217,9 @@ class DBCacheContext:
         # mark_step_begin of TaylorSeer must be called after the cache is reset.
         if self.enable_taylorseer or self.enable_encoder_taylorseer:
-            if self.do_separate_classifier_free_guidance:
+            if self.do_separate_cfg:
                 # Assume non-CFG steps: 0, 2, 4, 6, ...
-                if not self.is_separate_classifier_free_guidance_step():
+                if not self.is_separate_cfg_step():
                     taylorseer, encoder_taylorseer = self.get_taylorseers()
                     if taylorseer is not None:
                         taylorseer.mark_step_begin()
@@ -251,7 +251,7 @@ class DBCacheContext:
         # step: executed_steps - 1, not transformer_steps - 1
         step = str(self.get_current_step())
         # Only add the diff if it is not already recorded for this step
-        if not self.is_separate_classifier_free_guidance_step():
+        if not self.is_separate_cfg_step():
             if step not in self.residual_diffs:
                 self.residual_diffs[step] = diff
         else:
@@ -268,7 +268,7 @@ class DBCacheContext:
     @torch.compiler.disable
     def add_cached_step(self):
-        if not self.is_separate_classifier_free_guidance_step():
+        if not self.is_separate_cfg_step():
             self.cached_steps.append(self.get_current_step())
         else:
             self.cfg_cached_steps.append(self.get_current_step())
@@ -290,8 +290,8 @@ class DBCacheContext:
         return self.transformer_executed_steps - 1
     @torch.compiler.disable
-    def is_separate_classifier_free_guidance_step(self):
-        if not self.do_separate_classifier_free_guidance:
+    def is_separate_cfg_step(self):
+        if not self.do_separate_cfg:
             return False
         if self.cfg_compute_first:
             # CFG steps: 0, 2, 4, 6, ...
@@ -589,17 +589,17 @@ def Bn_compute_blocks_ids():
 @torch.compiler.disable
-def do_separate_classifier_free_guidance():
+def do_separate_cfg():
     cache_context = get_current_cache_context()
     assert cache_context is not None, "cache_context must be set before"
-    return cache_context.do_separate_classifier_free_guidance
+    return cache_context.do_separate_cfg
 @torch.compiler.disable
-def is_separate_classifier_free_guidance_step():
+def is_separate_cfg_step():
     cache_context = get_current_cache_context()
     assert cache_context is not None, "cache_context must be set before"
-    return cache_context.is_separate_classifier_free_guidance_step()
+    return cache_context.is_separate_cfg_step()
 @torch.compiler.disable
@@ -710,8 +710,8 @@ def are_two_tensors_similar(
     if all(
         (
-            do_separate_classifier_free_guidance(),
-            is_separate_classifier_free_guidance_step(),
+            do_separate_cfg(),
+            is_separate_cfg_step(),
             not cfg_diff_compute_separate(),
             get_current_step_residual_diff() is not None,
         )
@@ -789,7 +789,7 @@ def set_Fn_buffer(buffer: torch.Tensor, prefix: str = "Fn"):
     if downsample_factor > 1:
         buffer = buffer[..., ::downsample_factor]
         buffer = buffer.contiguous()
-    if is_separate_classifier_free_guidance_step():
+    if is_separate_cfg_step():
         _debugging_set_buffer(f"{prefix}_buffer_cfg")
         set_buffer(f"{prefix}_buffer_cfg", buffer)
     else:
@@ -799,7 +799,7 @@ def set_Fn_buffer(buffer: torch.Tensor, prefix: str = "Fn"):
 @torch.compiler.disable
 def get_Fn_buffer(prefix: str = "Fn"):
-    if is_separate_classifier_free_guidance_step():
+    if is_separate_cfg_step():
         _debugging_get_buffer(f"{prefix}_buffer_cfg")
         return get_buffer(f"{prefix}_buffer_cfg")
     _debugging_get_buffer(f"{prefix}_buffer")
@@ -808,7 +808,7 @@ def get_Fn_buffer(prefix: str = "Fn"):
 @torch.compiler.disable
 def set_Fn_encoder_buffer(buffer: torch.Tensor, prefix: str = "Fn"):
-    if is_separate_classifier_free_guidance_step():
+    if is_separate_cfg_step():
         _debugging_set_buffer(f"{prefix}_encoder_buffer_cfg")
         set_buffer(f"{prefix}_encoder_buffer_cfg", buffer)
     else:
@@ -818,7 +818,7 @@ def set_Fn_encoder_buffer(buffer: torch.Tensor, prefix: str = "Fn"):
 @torch.compiler.disable
 def get_Fn_encoder_buffer(prefix: str = "Fn"):
-    if is_separate_classifier_free_guidance_step():
+    if is_separate_cfg_step():
         _debugging_get_buffer(f"{prefix}_encoder_buffer_cfg")
         return get_buffer(f"{prefix}_encoder_buffer_cfg")
     _debugging_get_buffer(f"{prefix}_encoder_buffer")
@@ -832,7 +832,7 @@ def set_Bn_buffer(buffer: torch.Tensor, prefix: str = "Bn"):
     # This buffer is use for hidden states approximation.
     if is_taylorseer_enabled():
         # taylorseer, encoder_taylorseer
-        if is_separate_classifier_free_guidance_step():
+        if is_separate_cfg_step():
             taylorseer, _ = get_cfg_taylorseers()
         else:
             taylorseer, _ = get_taylorseers()
@@ -846,14 +846,14 @@ def set_Bn_buffer(buffer: torch.Tensor, prefix: str = "Bn"):
                     "TaylorSeer is enabled but not set in the cache context. "
                     "Falling back to default buffer retrieval."
                 )
-            if is_separate_classifier_free_guidance_step():
+            if is_separate_cfg_step():
                 _debugging_set_buffer(f"{prefix}_buffer_cfg")
                 set_buffer(f"{prefix}_buffer_cfg", buffer)
             else:
                 _debugging_set_buffer(f"{prefix}_buffer")
                 set_buffer(f"{prefix}_buffer", buffer)
     else:
-        if is_separate_classifier_free_guidance_step():
+        if is_separate_cfg_step():
             _debugging_set_buffer(f"{prefix}_buffer_cfg")
             set_buffer(f"{prefix}_buffer_cfg", buffer)
         else:
@@ -865,7 +865,7 @@ def set_Bn_buffer(buffer: torch.Tensor, prefix: str = "Bn"):
 def get_Bn_buffer(prefix: str = "Bn"):
     if is_taylorseer_enabled():
         # taylorseer, encoder_taylorseer
-        if is_separate_classifier_free_guidance_step():
+        if is_separate_cfg_step():
             taylorseer, _ = get_cfg_taylorseers()
         else:
             taylorseer, _ = get_taylorseers()
@@ -879,13 +879,13 @@ def get_Bn_buffer(prefix: str = "Bn"):
                     "Falling back to default buffer retrieval."
                 )
             # Fallback to default buffer retrieval
-            if is_separate_classifier_free_guidance_step():
+            if is_separate_cfg_step():
                 _debugging_get_buffer(f"{prefix}_buffer_cfg")
                 return get_buffer(f"{prefix}_buffer_cfg")
             _debugging_get_buffer(f"{prefix}_buffer")
             return get_buffer(f"{prefix}_buffer")
     else:
-        if is_separate_classifier_free_guidance_step():
+        if is_separate_cfg_step():
             _debugging_get_buffer(f"{prefix}_buffer_cfg")
             return get_buffer(f"{prefix}_buffer_cfg")
         _debugging_get_buffer(f"{prefix}_buffer")
@@ -897,7 +897,7 @@ def set_Bn_encoder_buffer(buffer: torch.Tensor, prefix: str = "Bn"):
     # This buffer is use for encoder hidden states approximation.
     if is_encoder_taylorseer_enabled():
         # taylorseer, encoder_taylorseer
-        if is_separate_classifier_free_guidance_step():
+        if is_separate_cfg_step():
             _, encoder_taylorseer = get_cfg_taylorseers()
         else:
             _, encoder_taylorseer = get_taylorseers()
@@ -911,14 +911,14 @@ def set_Bn_encoder_buffer(buffer: torch.Tensor, prefix: str = "Bn"):
                     "TaylorSeer is enabled but not set in the cache context. "
                     "Falling back to default buffer retrieval."
                 )
-            if is_separate_classifier_free_guidance_step():
+            if is_separate_cfg_step():
                 _debugging_set_buffer(f"{prefix}_encoder_buffer_cfg")
                 set_buffer(f"{prefix}_encoder_buffer_cfg", buffer)
             else:
                 _debugging_set_buffer(f"{prefix}_encoder_buffer")
                 set_buffer(f"{prefix}_encoder_buffer", buffer)
     else:
-        if is_separate_classifier_free_guidance_step():
+        if is_separate_cfg_step():
             _debugging_set_buffer(f"{prefix}_encoder_buffer_cfg")
             set_buffer(f"{prefix}_encoder_buffer_cfg", buffer)
         else:
@@ -929,7 +929,7 @@ def set_Bn_encoder_buffer(buffer: torch.Tensor, prefix: str = "Bn"):
 @torch.compiler.disable
 def get_Bn_encoder_buffer(prefix: str = "Bn"):
     if is_encoder_taylorseer_enabled():
-        if is_separate_classifier_free_guidance_step():
+        if is_separate_cfg_step():
             _, encoder_taylorseer = get_cfg_taylorseers()
         else:
             _, encoder_taylorseer = get_taylorseers()
@@ -944,13 +944,13 @@ def get_Bn_encoder_buffer(prefix: str = "Bn"):
                     "Falling back to default buffer retrieval."
                 )
             # Fallback to default buffer retrieval
-            if is_separate_classifier_free_guidance_step():
+            if is_separate_cfg_step():
                 _debugging_get_buffer(f"{prefix}_encoder_buffer_cfg")
                 return get_buffer(f"{prefix}_encoder_buffer_cfg")
             _debugging_get_buffer(f"{prefix}_encoder_buffer")
             return get_buffer(f"{prefix}_encoder_buffer")
     else:
-        if is_separate_classifier_free_guidance_step():
+        if is_separate_cfg_step():
             _debugging_get_buffer(f"{prefix}_encoder_buffer_cfg")
             return get_buffer(f"{prefix}_encoder_buffer_cfg")
         _debugging_get_buffer(f"{prefix}_encoder_buffer")
@@ -1021,7 +1021,7 @@ def get_can_use_cache(
         return False
     max_cached_steps = get_max_cached_steps()
-    if not is_separate_classifier_free_guidance_step():
+    if not is_separate_cfg_step():
         cached_steps = get_cached_steps()
     else:
         cached_steps = get_cfg_cached_steps()

cache_dit/cache_factory/cache_interface.py ADDED Viewed

@@ -0,0 +1,149 @@
+from diffusers import DiffusionPipeline
+from cache_dit.cache_factory.forward_pattern import ForwardPattern
+from cache_dit.cache_factory.cache_types import CacheType
+from cache_dit.cache_factory.cache_adapters import BlockAdapterParams
+from cache_dit.cache_factory.cache_adapters import UnifiedCacheAdapter
+from cache_dit.logger import init_logger
+logger = init_logger(__name__)
+def enable_cache(
+    # BlockAdapter & forward pattern
+    pipe_or_adapter: DiffusionPipeline | BlockAdapterParams,
+    forward_pattern: ForwardPattern = ForwardPattern.Pattern_0,
+    # Cache context kwargs
+    Fn_compute_blocks: int = 8,
+    Bn_compute_blocks: int = 0,
+    warmup_steps: int = 8,
+    max_cached_steps: int = -1,
+    residual_diff_threshold: float = 0.08,
+    # Cache CFG or not
+    do_separate_cfg: bool = False,
+    cfg_compute_first: bool = False,
+    cfg_diff_compute_separate: bool = False,
+    # Hybird TaylorSeer
+    enable_taylorseer: bool = False,
+    enable_encoder_taylorseer: bool = False,
+    taylorseer_cache_type: str = "residual",
+    taylorseer_order: int = 2,
+    **other_cache_kwargs,
+) -> DiffusionPipeline:
+    r"""
+    Unified Cache API for  almost Any Diffusion Transformers (with Transformer Blocks
+    that match the specific Input and Output patterns).
+    For a good balance between performance and precision, DBCache is configured by default
+    with F8B0, 8 warmup steps, and unlimited cached steps.
+    Args:
+        pipe_or_adapter (`DiffusionPipeline` or `BlockAdapterParams`, *required*):
+            The standard Diffusion Pipeline or custom BlockAdapter (from cache-dit or user-defined).
+            For example: cache_dit.enable_cache(FluxPipeline(...)). Please check https://github.com/vipshop/cache-dit/blob/main/docs/BlockAdapter.md
+            for the usgae of BlockAdapter.
+        forward_pattern (`ForwardPattern`, *required*, defaults to `ForwardPattern.Pattern_0`):
+            The forward pattern of Transformer block, please check https://github.com/vipshop/cache-dit/tree/main?tab=readme-ov-file#forward-pattern-matching
+            for more details.
+        Fn_compute_blocks (`int`, *required*, defaults to 8):
+            Specifies that `DBCache` uses the **first n** Transformer blocks to fit the information
+            at time step t, enabling the calculation of a more stable L1 diff and delivering more
+            accurate information to subsequent blocks. Please check https://github.com/vipshop/cache-dit/blob/main/docs/DBCache.md
+            for more details of DBCache.
+        Bn_compute_blocks: (`int`, *required*, defaults to 0):
+            Further fuses approximate information in the **last n** Transformer blocks to enhance
+            prediction accuracy. These blocks act as an auto-scaler for approximate hidden states
+            that use residual cache.
+        warmup_steps (`int`, *required*, defaults to 8):
+            DBCache does not apply the caching strategy when the number of running steps is less than
+            or equal to this value, ensuring the model sufficiently learns basic features during warmup.
+        max_cached_steps (`int`, *required*, defaults to -1):
+            DBCache disables the caching strategy when the previous cached steps exceed this value to
+            prevent precision degradation.
+        residual_diff_threshold (`float`, *required*, defaults to 0.08):
+            he value of residual diff threshold, a higher value leads to faster performance at the
+            cost of lower precision.
+        do_separate_cfg (`bool`, *required*,  defaults to False):
+            Whether to do separate cfg or not, such as Wan 2.1, Qwen-Image. For model that fused CFG
+            and non-CFG into single forward step, should set do_separate_cfg as False, for example:
+            CogVideoX, HunyuanVideo, Mochi, etc.
+        cfg_compute_first (`bool`, *required*,  defaults to False):
+            Compute cfg forward first or not, default False, namely, 0, 2, 4, ..., -> non-CFG step;
+            1, 3, 5, ... -> CFG step.
+        cfg_diff_compute_separate (`bool`, *required*,  defaults to True):
+            Compute spearate diff values for CFG and non-CFG step, default True. If False, we will
+            use the computed diff from current non-CFG transformer step for current CFG step.
+        enable_taylorseer (`bool`, *required*,  defaults to False):
+            Enable the hybird TaylorSeer for hidden_states or not. We have supported the
+            [TaylorSeers: From Reusing to Forecasting: Accelerating Diffusion Models with TaylorSeers](https://arxiv.org/pdf/2503.06923) algorithm
+            to further improve the precision of DBCache in cases where the cached steps are large,
+            namely, **Hybrid TaylorSeer + DBCache**. At timesteps with significant intervals,
+            the feature similarity in diffusion models decreases substantially, significantly
+            harming the generation quality.
+        enable_encoder_taylorseer (`bool`, *required*,  defaults to False):
+            Enable the hybird TaylorSeer for encoder_hidden_states or not.
+        taylorseer_cache_type (`str`, *required*,  defaults to `residual`):
+            The TaylorSeer implemented in cache-dit supports both `hidden_states` and `residual` as cache type.
+        taylorseer_order (`int`, *required*, defaults to 2):
+            The order of taylorseer, higher values of n_derivatives will lead to longer computation time,
+            but may improve precision significantly.
+        other_cache_kwargs: (`dict`, *optional*, defaults to {})
+            Other cache context kwargs, please check https://github.com/vipshop/cache-dit/blob/main/src/cache_dit/cache_factory/cache_context.py
+            for more details.
+    Examples:
+    ```py
+    >>> import cache_dit
+    >>> from diffusers import DiffusionPipeline
+    >>> pipe = DiffusionPipeline.from_pretrained("Qwen/Qwen-Image") # Can be any diffusion pipeline
+    >>> cache_dit.enable_cache(pipe) # One-line code with default cache options.
+    >>> output = pipe(...) # Just call the pipe as normal.
+    >>> stats = cache_dit.summary(pipe) # Then, get the summary of cache acceleration stats.
+    """
+    # Collect cache context kwargs
+    cache_context_kwargs = other_cache_kwargs.copy()
+    cache_context_kwargs["cache_type"] = CacheType.DBCache
+    cache_context_kwargs["Fn_compute_blocks"] = Fn_compute_blocks
+    cache_context_kwargs["Bn_compute_blocks"] = Bn_compute_blocks
+    cache_context_kwargs["warmup_steps"] = warmup_steps
+    cache_context_kwargs["max_cached_steps"] = max_cached_steps
+    cache_context_kwargs["residual_diff_threshold"] = residual_diff_threshold
+    cache_context_kwargs["do_separate_cfg"] = do_separate_cfg
+    cache_context_kwargs["cfg_compute_first"] = cfg_compute_first
+    cache_context_kwargs["cfg_diff_compute_separate"] = (
+        cfg_diff_compute_separate
+    )
+    cache_context_kwargs["enable_taylorseer"] = enable_taylorseer
+    cache_context_kwargs["enable_encoder_taylorseer"] = (
+        enable_encoder_taylorseer
+    )
+    cache_context_kwargs["taylorseer_cache_type"] = taylorseer_cache_type
+    if "taylorseer_kwargs" in cache_context_kwargs:
+        cache_context_kwargs["taylorseer_kwargs"][
+            "n_derivatives"
+        ] = taylorseer_order
+    else:
+        cache_context_kwargs["taylorseer_kwargs"] = {
+            "n_derivatives": taylorseer_order
+        }
+    if isinstance(pipe_or_adapter, BlockAdapterParams):
+        return UnifiedCacheAdapter.apply(
+            pipe=None,
+            adapter_params=pipe_or_adapter,
+            forward_pattern=forward_pattern,
+            **cache_context_kwargs,
+        )
+    elif isinstance(pipe_or_adapter, DiffusionPipeline):
+        return UnifiedCacheAdapter.apply(
+            pipe=pipe_or_adapter,
+            adapter_params=None,
+            forward_pattern=forward_pattern,
+            **cache_context_kwargs,
+        )
+    else:
+        raise ValueError(
+            "Please pass DiffusionPipeline or BlockAdapterParams"
+            "(BlockAdapter) for the 1 position param: pipe_or_adapter"
+        )

cache_dit/cache_factory/cache_types.py CHANGED Viewed

@@ -9,62 +9,31 @@ class CacheType(Enum):
     DBCache = "Dual_Block_Cache"
     @staticmethod
-    def type(cache_type: "CacheType | str") -> "CacheType":
-        if isinstance(cache_type, CacheType):
-            return cache_type
-        return CacheType.cache_type(cache_type)
+    def type(type_hint: "CacheType | str") -> "CacheType":
+        if isinstance(type_hint, CacheType):
+            return type_hint
+        return cache_type(type_hint)
-    @staticmethod
-    def cache_type(cache_type: "CacheType | str") -> "CacheType":
-        if cache_type is None:
-            return CacheType.NONE
-        if isinstance(cache_type, CacheType):
-            return cache_type
-        elif cache_type.lower() in (
-            "dual_block_cache",
-            "db_cache",
-            "dbcache",
-            "db",
-        ):
-            return CacheType.DBCache
+def cache_type(type_hint: "CacheType | str") -> "CacheType":
+    if type_hint is None:
         return CacheType.NONE
-    @staticmethod
-    def block_range(start: int, end: int, step: int = 1) -> list[int]:
-        if start > end or end <= 0 or step <= 1:
-            return []
-        # Always compute 0 and end - 1 blocks for DB Cache
-        return list(
-            sorted(set([0] + list(range(start, end, step)) + [end - 1]))
-        )
-    @staticmethod
-    def default_options(cache_type: "CacheType | str") -> dict:
-        _no_options = {
-            "cache_type": CacheType.NONE,
-        }
+    if isinstance(type_hint, CacheType):
+        return type_hint
-        _Fn_compute_blocks = 8
-        _Bn_compute_blocks = 0
+    elif type_hint.lower() in (
+        "dual_block_cache",
+        "db_cache",
+        "dbcache",
+        "db",
+    ):
+        return CacheType.DBCache
+    return CacheType.NONE
-        _db_options = {
-            "cache_type": CacheType.DBCache,
-            "residual_diff_threshold": 0.12,
-            "warmup_steps": 8,
-            "max_cached_steps": -1,  # -1 means no limit
-            "Fn_compute_blocks": _Fn_compute_blocks,
-            "Bn_compute_blocks": _Bn_compute_blocks,
-            "max_Fn_compute_blocks": 16,
-            "max_Bn_compute_blocks": 16,
-            "Fn_compute_blocks_ids": [],  # 0, 1, 2, ..., 7, etc.
-            "Bn_compute_blocks_ids": [],  # 0, 1, 2, ..., 7, etc.
-        }
-        if cache_type == CacheType.DBCache:
-            return _db_options
-        elif cache_type == CacheType.NONE:
-            return _no_options
-        else:
-            raise ValueError(f"Unknown cache type: {cache_type}")
+def block_range(start: int, end: int, step: int = 1) -> list[int]:
+    if start > end or end <= 0 or step <= 1:
+        return []
+    # Always compute 0 and end - 1 blocks for DB Cache
+    return list(sorted(set([0] + list(range(start, end, step)) + [end - 1])))

cache_dit/cache_factory/taylorseer.py CHANGED Viewed

@@ -1,5 +1,3 @@
-# Adapted from: https://github.com/chengzeyi/ParaAttention/blob/main/src/para_attn/first_block_cache/taylorseer.py
-# Reference: https://github.com/Shenyi-Z/TaylorSeer/TaylorSeer-FLUX/src/flux/taylor_utils/__init__.py
 import math
 import torch

cache_dit/cache_factory/utils.py CHANGED Viewed

@@ -51,3 +51,7 @@ def load_cache_options_from_yaml(yaml_file_path):
         )
     except yaml.YAMLError as e:
         raise yaml.YAMLError(f"YAML file parsing error: {str(e)}")
+def load_options(path: str):
+    return load_cache_options_from_yaml(path)

cache_dit/compile/utils.py CHANGED Viewed

@@ -24,14 +24,15 @@ def epilogue_prologue_fusion_enabled(**kwargs) -> bool:
 def set_compile_configs(
+    descent_tuning: bool = True,
     cuda_graphs: bool = False,
     force_disable_compile_caches: bool = False,
     use_fast_math: bool = False,
     **kwargs,  # other kwargs
 ):
     # Alway increase recompile_limit for dynamic shape compilation
-    torch._dynamo.config.recompile_limit = 96  # default is 8
-    torch._dynamo.config.accumulated_recompile_limit = 2048  # default is 256
+    torch._dynamo.config.recompile_limit = 1024  # default is 8
+    torch._dynamo.config.accumulated_recompile_limit = 8192  # default is 256
     # Handle compiler caches
     # https://github.com/vllm-project/vllm/blob/23baa2180b0ebba5ae94073ba9b8e93f88b75486/vllm/compilation/compiler_interface.py#L270
     torch._inductor.config.fx_graph_cache = True
@@ -47,6 +48,9 @@ def set_compile_configs(
             64 if "L20" in torch.cuda.get_device_name() else 300
         )
+    if not descent_tuning:
+        return
     FORCE_DISABLE_CUSTOM_COMPILE_CONFIG = (
         os.environ.get("CACHE_DIT_FORCE_DISABLE_CUSTOM_COMPILE_CONFIG", "0")
         == "1"

cache_dit/utils.py CHANGED Viewed

@@ -26,14 +26,17 @@ class CacheStats:
     )
-def summary(pipe: DiffusionPipeline, details: bool = False):
+def summary(
+    pipe: DiffusionPipeline, details: bool = False, logging: bool = True
+):
     cache_stats = CacheStats()
     pipe_cls_name = pipe.__class__.__name__
     if hasattr(pipe, "_cache_options"):
         cache_options = pipe._cache_options
         cache_stats.cache_options = cache_options
-        print(f"\n🤗Cache Options: {pipe_cls_name}\n\n{cache_options}")
+        if logging:
+            print(f"\n🤗Cache Options: {pipe_cls_name}\n\n{cache_options}")
     if hasattr(pipe.transformer, "_cached_steps"):
         cached_steps: list[int] = pipe.transformer._cached_steps
@@ -43,7 +46,7 @@ def summary(pipe: DiffusionPipeline, details: bool = False):
         cache_stats.cached_steps = cached_steps
         cache_stats.residual_diffs = residual_diffs
-        if residual_diffs:
+        if residual_diffs and logging:
             diffs_values = list(residual_diffs.values())
             qmin = np.min(diffs_values)
             q0 = np.percentile(diffs_values, 0)
@@ -90,7 +93,7 @@ def summary(pipe: DiffusionPipeline, details: bool = False):
         cache_stats.cfg_cached_steps = cfg_cached_steps
         cache_stats.cfg_residual_diffs = cfg_residual_diffs
-        if cfg_residual_diffs:
+        if cfg_residual_diffs and logging:
             cfg_diffs_values = list(cfg_residual_diffs.values())
             qmin = np.min(cfg_diffs_values)
             q0 = np.percentile(cfg_diffs_values, 0)
@@ -130,3 +133,29 @@ def summary(pipe: DiffusionPipeline, details: bool = False):
                 )
     return cache_stats
+def strify(pipe_or_stats: DiffusionPipeline | CacheStats):
+    if not isinstance(pipe_or_stats, CacheStats):
+        stats = summary(pipe_or_stats, logging=False)
+    else:
+        stats = pipe_or_stats
+    cache_options = stats.cache_options
+    cached_steps = len(stats.cached_steps)
+    if not cache_options:
+        return "NONE"
+    cache_type_str = (
+        f"DBCACHE_F{cache_options['Fn_compute_blocks']}"
+        f"B{cache_options['Bn_compute_blocks']}"
+        f"W{cache_options['warmup_steps']}"
+        f"M{max(0, cache_options['max_cached_steps'])}"
+        f"T{int(cache_options['enable_taylorseer'])}"
+        f"O{cache_options['taylorseer_kwargs']['n_derivatives']}_"
+        f"R{cache_options['residual_diff_threshold']}_"
+        f"S{cached_steps}"  # skiped steps
+    )
+    return cache_type_str

{cache_dit-0.2.21.dist-info → cache_dit-0.2.22.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cache_dit
-Version: 0.2.21
+Version: 0.2.22
 Summary: 🤗 CacheDiT: An Unified and Training-free Cache Acceleration Toolbox for Diffusion Transformers
 Author: DefTruth, vipshop.com, etc.
 Maintainer: DefTruth, vipshop.com, etc
@@ -179,7 +179,6 @@ cache_dit.enable_cache(
         pipe=pipe, # Qwen-Image, etc.
         transformer=pipe.transformer,
         blocks=pipe.transformer.transformer_blocks,
-        blocks_name="transformer_blocks",
     ),
     # Check `📚Forward Pattern Matching` documentation and hack the code of
     # of Qwen-Image, you will find that it has satisfied `FORWARD_PATTERN_1`.
@@ -200,7 +199,7 @@ You can set `details` param as `True` to show more details of cache stats. (mark
 ```python
 ⚡️Cache Steps and Residual Diffs Statistics: QwenImagePipeline
-| Cache Steps | Diffs P00 | Diffs P25 | Diffs P50 | Diffs P75 | Diffs P95 | Diffs Max |
+| Cache Steps | Diffs Min | Diffs P25 | Diffs P50 | Diffs P75 | Diffs P95 | Diffs Max |
 |-------------|-----------|-----------|-----------|-----------|-----------|-----------|
 | 23          | 0.045     | 0.084     | 0.114     | 0.147     | 0.241     | 0.297     |
 ```
@@ -209,7 +208,7 @@ You can set `details` param as `True` to show more details of cache stats. (mark
 <div id="dbcache"></div>
-![](https://github.com/vipshop/cache-dit/raw/main/assets/dbcache-fnbn-v1.png)
+![](https://github.com/vipshop/cache-dit/raw/main/assets/dbcache-v1.png)
 **DBCache**: **Dual Block Caching** for Diffusion Transformers. Different configurations of compute blocks (**F8B12**, etc.) can be customized in DBCache, enabling a balanced trade-off between performance and precision. Moreover, it can be entirely **training**-**free**. Please check [DBCache.md](./docs/DBCache.md) docs for more design details.
@@ -314,7 +313,7 @@ cache_dit.enable_cache(
 <div id="cfg"></div>
-cache-dit supports caching for **CFG (classifier-free guidance)**. For models that fuse CFG and non-CFG into a single forward step, or models that do not include CFG (classifier-free guidance) in the forward step, please set `do_separate_classifier_free_guidance` param to **False (default)**. Otherwise, set it to True. For examples:
+cache-dit supports caching for **CFG (classifier-free guidance)**. For models that fuse CFG and non-CFG into a single forward step, or models that do not include CFG (classifier-free guidance) in the forward step, please set `do_separate_cfg` param to **False (default)**. Otherwise, set it to True. For examples:
 ```python
 cache_dit.enable_cache(
@@ -322,10 +321,10 @@ cache_dit.enable_cache(
     ...,
     # CFG: classifier free guidance or not
     # For model that fused CFG and non-CFG into single forward step,
-    # should set do_separate_classifier_free_guidance as False.
-    # For example, set it as True for Wan 2.1 and set it as False
-    # for FLUX.1, HunyuanVideo, CogVideoX, Mochi.
-    do_separate_classifier_free_guidance=True, # Wan 2.1, Qwen-Image
+    # should set do_separate_cfg as False. For example, set it as True
+    # for Wan 2.1/Qwen-Image and set it as False for FLUX.1, HunyuanVideo,
+    # CogVideoX, Mochi, etc.
+    do_separate_cfg=True, # Wan 2.1, Qwen-Image
     # Compute cfg forward first or not, default False, namely,
     # 0, 2, 4, ..., -> non-CFG step; 1, 3, 5, ... -> CFG step.
     cfg_compute_first=False,
@@ -381,10 +380,6 @@ cache-dit-metrics-cli -h  # show usage
 # all: PSNR, FID, SSIM, MSE, ..., etc.
 cache-dit-metrics-cli all  -i1 true.png -i2 test.png  # image
 cache-dit-metrics-cli all  -i1 true_dir -i2 test_dir  # image dir
-cache-dit-metrics-cli all  -v1 true.mp4 -v2 test.mp4  # video
-cache-dit-metrics-cli all  -v1 true_dir -v2 test_dir  # video dir
-cache-dit-metrics-cli fid  -i1 true_dir -i2 test_dir  # FID
-cache-dit-metrics-cli psnr -i1 true_dir -i2 test_dir  # PSNR
 ```
 ## 👋Contribute

{cache_dit-0.2.21.dist-info → cache_dit-0.2.22.dist-info}/RECORD RENAMED Viewed

@@ -1,21 +1,22 @@
-cache_dit/__init__.py,sha256=lpJ4vUcWuMWE0KXMJpTMxUjkMixifj5jRlp2xPI3n0c,963
-cache_dit/_version.py,sha256=ssMI60XkNnqSio7IM67x0S2AEvggFaGv48yM5LcifrI,706
+cache_dit/__init__.py,sha256=wVOaj_LSDsgYygL0cDdUU80_6RINh_JctQFyDalZN7k,946
+cache_dit/_version.py,sha256=I7oxlElEVr-U2wT5qgQ2G41IxS87cokjF8Z2fKVHGrc,706
 cache_dit/logger.py,sha256=0zsu42hN-3-rgGC_C29ms1IvVpV4_b4_SwJCKSenxBE,4304
 cache_dit/primitives.py,sha256=A2iG9YLot3gOsZSPp-_gyjqjLgJvWQRx8aitD4JQ23Y,3877
-cache_dit/utils.py,sha256=UF_vDtOzUb0fbzDlj353amPQSdKYdMNuY_BpMewK-mg,4968
+cache_dit/utils.py,sha256=3UgVhfmTFG28w6CV-Rfxp5u1uzLrRozocHwLCTGiQ5M,5865
 cache_dit/cache_factory/.gitignore,sha256=5Cb-qT9wsTUoMJ7vACDF7ZcLpAXhi5v-xdcWSRit988,23
-cache_dit/cache_factory/__init__.py,sha256=r2JHx-vPe5eZ8pZFN0Fgl2ZZYO7rSN3MQtTrnuZyasQ,1863
-cache_dit/cache_factory/cache_adapters.py,sha256=qrtRxI__xn2G127c47g-gfD5-s5v-vk291vYRPJGrgM,20775
+cache_dit/cache_factory/__init__.py,sha256=cXqBAVvldXNStpxAmNIJnpfJEf2miDlzzyjIqDauFI8,505
+cache_dit/cache_factory/cache_adapters.py,sha256=bNcUz4SP3XFpVbkgSlehLdAqKbEXjQJcm-5oS8pKqxg,20289
 cache_dit/cache_factory/cache_blocks.py,sha256=kMEOoNvygzeiM2yvUSAPkKpHeQTOpXQYH2qz34TqXzs,18457
-cache_dit/cache_factory/cache_context.py,sha256=UwrRRGmh1-IuBXD83X7Jyyl7yli4LyHbjprURoE0xE4,39819
-cache_dit/cache_factory/cache_types.py,sha256=vRAIrbtgzeWc9m28oXG9H6Qx8UD3z7aY9vYF2iiW6Jg,2123
+cache_dit/cache_factory/cache_context.py,sha256=4thx9NYxVaYZ_Nr2quUVE8bsNmTsXhZK0F960rccOc8,39000
+cache_dit/cache_factory/cache_interface.py,sha256=V1FbtwI78Qj-yoDnz956o5lpnPxH8bMmiZNhiuiYLQo,8090
+cache_dit/cache_factory/cache_types.py,sha256=FIFa6ZBfvvSMMHyBBhvarvgg2Y2wbRgITcG_uGylGe0,991
 cache_dit/cache_factory/forward_pattern.py,sha256=B2YeqV2t_zo2Ar8m7qimPBjwQgoXHGp2grPZmEAhi8s,1286
-cache_dit/cache_factory/taylorseer.py,sha256=LKSNo2ode69EVo9xrxjxAMEjz0yDGiGADeDYnEqddA8,3987
-cache_dit/cache_factory/utils.py,sha256=Fg55vEMFNXQKGmFBsWfGGm8DW7TjRPKInkb7HlK4naE,1790
+cache_dit/cache_factory/taylorseer.py,sha256=WeK2WlAJa4Px_pnAKokmnZXeqQYylQkPw4-EDqBIqeQ,3770
+cache_dit/cache_factory/utils.py,sha256=YGtn02O3fVlrfQ32gGV4WAtTRvzzwSXNxzP_FmnE2Uk,1867
 cache_dit/cache_factory/patch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 cache_dit/cache_factory/patch/flux.py,sha256=iNQ-1RlOgXupZ4uPiEvJ__Ro6vKT_fOKja9JrpMrO78,8998
 cache_dit/compile/__init__.py,sha256=FcTVzCeyypl-mxlc59_ehHL3lBNiDAFsXuRoJ-5Cfi0,56
-cache_dit/compile/utils.py,sha256=ugHrv3QRieG1xKwcg_pi3yVZF6EpSOEJjRmbnfa7VG0,3779
+cache_dit/compile/utils.py,sha256=53KPsMWHyGmHGtw0T4oP0VY4O60cVXOFwFGWTlZrUqI,3857
 cache_dit/custom_ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 cache_dit/custom_ops/triton_taylorseer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 cache_dit/metrics/__init__.py,sha256=RaUhl5dieF40RqnizGzR30qoJJ9dyMUEADwgwMaMQrE,575
@@ -24,9 +25,9 @@ cache_dit/metrics/fid.py,sha256=9Ivtazl6mW0Bon2VXa-Ia5Xj2ewxRD3V1Qkd69zYM3Y,1706
 cache_dit/metrics/inception.py,sha256=pBVe2X6ylLPIXTG4-GWDM9DWnCviMJbJ45R3ulhktR0,12759
 cache_dit/metrics/lpips.py,sha256=I2qCNi6qJh5TRsaIsdxO0WoRX1DN7U_H3zS0oCSahYM,1032
 cache_dit/metrics/metrics.py,sha256=8jvM1sF-nDxUuwCRy44QEoo4dYVLCQVh1QyAMs4eaQY,27840
-cache_dit-0.2.21.dist-info/licenses/LICENSE,sha256=Dqb07Ik2dV41s9nIdMUbiRWEfDqo7-dQeRiY7kPO8PE,3769
-cache_dit-0.2.21.dist-info/METADATA,sha256=w-_o415q6fDdDw_LR8ZKZhyvdcsGuKAAfYseZ8vpDj0,19908
-cache_dit-0.2.21.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-cache_dit-0.2.21.dist-info/entry_points.txt,sha256=FX2gysXaZx6NeK1iCLMcIdP8Q4_qikkIHtEmi3oWn8o,65
-cache_dit-0.2.21.dist-info/top_level.txt,sha256=ZJDydonLEhujzz0FOkVbO-BqfzO9d_VqRHmZU-3MOZo,10
-cache_dit-0.2.21.dist-info/RECORD,,
+cache_dit-0.2.22.dist-info/licenses/LICENSE,sha256=Dqb07Ik2dV41s9nIdMUbiRWEfDqo7-dQeRiY7kPO8PE,3769
+cache_dit-0.2.22.dist-info/METADATA,sha256=BABVrkyVTakN0jel9xgApSd9IzDBRLqJHLHhauqka50,19566
+cache_dit-0.2.22.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+cache_dit-0.2.22.dist-info/entry_points.txt,sha256=FX2gysXaZx6NeK1iCLMcIdP8Q4_qikkIHtEmi3oWn8o,65
+cache_dit-0.2.22.dist-info/top_level.txt,sha256=ZJDydonLEhujzz0FOkVbO-BqfzO9d_VqRHmZU-3MOZo,10
+cache_dit-0.2.22.dist-info/RECORD,,

{cache_dit-0.2.21.dist-info → cache_dit-0.2.22.dist-info}/WHEEL RENAMED Viewed

File without changes

{cache_dit-0.2.21.dist-info → cache_dit-0.2.22.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{cache_dit-0.2.21.dist-info → cache_dit-0.2.22.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{cache_dit-0.2.21.dist-info → cache_dit-0.2.22.dist-info}/top_level.txt RENAMED Viewed

File without changes

cache-dit 0.2.21__py3-none-any.whl → 0.2.22__py3-none-any.whl

Potentially problematic release.

cache-dit 0.2.21py3-none-any.whl → 0.2.22py3-none-any.whl