PyPI - cache-dit - Versions diffs - 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl - Mend

cache-dit 1.0.3py3-none-any.whl → 1.0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cache-dit might be problematic. Click here for more details.

Files changed (29) hide show

cache_dit/__init__.py +3 -0
cache_dit/_version.py +2 -2
cache_dit/cache_factory/__init__.py +8 -1
cache_dit/cache_factory/cache_adapters/cache_adapter.py +90 -76
cache_dit/cache_factory/cache_blocks/__init__.py +167 -17
cache_dit/cache_factory/cache_blocks/pattern_0_1_2.py +10 -0
cache_dit/cache_factory/cache_blocks/pattern_3_4_5.py +256 -24
cache_dit/cache_factory/cache_blocks/pattern_base.py +273 -38
cache_dit/cache_factory/cache_blocks/pattern_utils.py +55 -10
cache_dit/cache_factory/cache_contexts/__init__.py +15 -2
cache_dit/cache_factory/cache_contexts/cache_config.py +102 -0
cache_dit/cache_factory/cache_contexts/cache_context.py +15 -93
cache_dit/cache_factory/cache_contexts/cache_manager.py +7 -7
cache_dit/cache_factory/cache_contexts/calibrators/taylorseer.py +78 -8
cache_dit/cache_factory/cache_contexts/context_manager.py +29 -0
cache_dit/cache_factory/cache_contexts/prune_config.py +69 -0
cache_dit/cache_factory/cache_contexts/prune_context.py +155 -0
cache_dit/cache_factory/cache_contexts/prune_manager.py +154 -0
cache_dit/cache_factory/cache_interface.py +20 -14
cache_dit/cache_factory/cache_types.py +19 -2
cache_dit/cache_factory/params_modifier.py +7 -7
cache_dit/cache_factory/utils.py +18 -7
cache_dit/utils.py +191 -54
{cache_dit-1.0.3.dist-info → cache_dit-1.0.4.dist-info}/METADATA +9 -9
{cache_dit-1.0.3.dist-info → cache_dit-1.0.4.dist-info}/RECORD +29 -24
{cache_dit-1.0.3.dist-info → cache_dit-1.0.4.dist-info}/WHEEL +0 -0
{cache_dit-1.0.3.dist-info → cache_dit-1.0.4.dist-info}/entry_points.txt +0 -0
{cache_dit-1.0.3.dist-info → cache_dit-1.0.4.dist-info}/licenses/LICENSE +0 -0
{cache_dit-1.0.3.dist-info → cache_dit-1.0.4.dist-info}/top_level.txt +0 -0

cache_dit/cache_factory/cache_blocks/pattern_base.py CHANGED Viewed

@@ -3,11 +3,16 @@ import torch
 import torch.distributed as dist
 from cache_dit.cache_factory.cache_contexts.cache_context import CachedContext
+from cache_dit.cache_factory.cache_contexts.prune_context import PrunedContext
 from cache_dit.cache_factory.cache_contexts.cache_manager import (
     CachedContextManager,
-    CacheNotExistError,
+    ContextNotExistError,
+)
+from cache_dit.cache_factory.cache_contexts.prune_manager import (
+    PrunedContextManager,
 )
 from cache_dit.cache_factory import ForwardPattern
+from cache_dit.cache_factory.cache_types import CacheType
 from cache_dit.logger import init_logger
 logger = init_logger(__name__)
@@ -31,7 +36,8 @@ class CachedBlocks_Pattern_Base(torch.nn.Module):
         # 1. Cache context configuration
         cache_prefix: str = None,  # maybe un-need.
         cache_context: CachedContext | str = None,
-        cache_manager: CachedContextManager = None,
+        context_manager: CachedContextManager = None,
+        cache_type: CacheType = CacheType.DBCache,
         **kwargs,
     ):
         super().__init__()
@@ -45,13 +51,15 @@ class CachedBlocks_Pattern_Base(torch.nn.Module):
         # 1. Cache context configuration
         self.cache_prefix = cache_prefix
         self.cache_context = cache_context
-        self.cache_manager = cache_manager
+        self.context_manager = context_manager
+        self.cache_type = cache_type
         self._check_forward_pattern()
+        self._check_cache_type()
         logger.info(
-            f"Match Cached Blocks: {self.__class__.__name__}, for "
+            f"Match Blocks: {self.__class__.__name__}, for "
             f"{self.cache_prefix}, cache_context: {self.cache_context}, "
-            f"cache_manager: {self.cache_manager.name}."
+            f"context_manager: {self.context_manager.name}."
         )
     def _check_forward_pattern(self):
@@ -94,18 +102,25 @@ class CachedBlocks_Pattern_Base(torch.nn.Module):
                         required_param in forward_parameters
                     ), f"The input parameters must contains: {required_param}."
+    @torch.compiler.disable
+    def _check_cache_type(self):
+        assert (
+            self.cache_type == CacheType.DBCache
+        ), f"Cache type {self.cache_type} is not supported for CachedBlocks."
     @torch.compiler.disable
     def _check_cache_params(self):
-        assert self.cache_manager.Fn_compute_blocks() <= len(
+        self._check_cache_type()
+        assert self.context_manager.Fn_compute_blocks() <= len(
             self.transformer_blocks
         ), (
-            f"Fn_compute_blocks {self.cache_manager.Fn_compute_blocks()} must be less than "
+            f"Fn_compute_blocks {self.context_manager.Fn_compute_blocks()} must be less than "
             f"the number of transformer blocks {len(self.transformer_blocks)}"
         )
-        assert self.cache_manager.Bn_compute_blocks() <= len(
+        assert self.context_manager.Bn_compute_blocks() <= len(
             self.transformer_blocks
         ), (
-            f"Bn_compute_blocks {self.cache_manager.Bn_compute_blocks()} must be less than "
+            f"Bn_compute_blocks {self.context_manager.Bn_compute_blocks()} must be less than "
             f"the number of transformer blocks {len(self.transformer_blocks)}"
         )
@@ -174,9 +189,9 @@ class CachedBlocks_Pattern_Base(torch.nn.Module):
     ):
         # Use it's own cache context.
         try:
-            self.cache_manager.set_context(self.cache_context)
+            self.context_manager.set_context(self.cache_context)
             self._check_cache_params()
-        except CacheNotExistError as e:
+        except ContextNotExistError as e:
             logger.warning(f"Cache context not exist: {e}, skip cache.")
             # Call all blocks to process the hidden states.
             hidden_states, encoder_hidden_states = self.call_blocks(
@@ -203,38 +218,38 @@ class CachedBlocks_Pattern_Base(torch.nn.Module):
         Fn_hidden_states_residual = hidden_states - original_hidden_states
         del original_hidden_states
-        self.cache_manager.mark_step_begin()
+        self.context_manager.mark_step_begin()
         # Residual L1 diff or Hidden States L1 diff
-        can_use_cache = self.cache_manager.can_cache(
+        can_use_cache = self.context_manager.can_cache(
             (
                 Fn_hidden_states_residual
-                if not self.cache_manager.is_l1_diff_enabled()
+                if not self.context_manager.is_l1_diff_enabled()
                 else hidden_states
             ),
             parallelized=self._is_parallelized(),
             prefix=(
                 f"{self.cache_prefix}_Fn_residual"
-                if not self.cache_manager.is_l1_diff_enabled()
+                if not self.context_manager.is_l1_diff_enabled()
                 else f"{self.cache_prefix}_Fn_hidden_states"
             ),
         )
         torch._dynamo.graph_break()
         if can_use_cache:
-            self.cache_manager.add_cached_step()
+            self.context_manager.add_cached_step()
             del Fn_hidden_states_residual
             hidden_states, encoder_hidden_states = (
-                self.cache_manager.apply_cache(
+                self.context_manager.apply_cache(
                     hidden_states,
                     encoder_hidden_states,
                     prefix=(
                         f"{self.cache_prefix}_Bn_residual"
-                        if self.cache_manager.is_cache_residual()
+                        if self.context_manager.is_cache_residual()
                         else f"{self.cache_prefix}_Bn_hidden_states"
                     ),
                     encoder_prefix=(
                         f"{self.cache_prefix}_Bn_residual"
-                        if self.cache_manager.is_encoder_cache_residual()
+                        if self.context_manager.is_encoder_cache_residual()
                         else f"{self.cache_prefix}_Bn_hidden_states"
                     ),
                 )
@@ -249,13 +264,13 @@ class CachedBlocks_Pattern_Base(torch.nn.Module):
                 **kwargs,
             )
         else:
-            self.cache_manager.set_Fn_buffer(
+            self.context_manager.set_Fn_buffer(
                 Fn_hidden_states_residual,
                 prefix=f"{self.cache_prefix}_Fn_residual",
             )
-            if self.cache_manager.is_l1_diff_enabled():
+            if self.context_manager.is_l1_diff_enabled():
                 # for hidden states L1 diff
-                self.cache_manager.set_Fn_buffer(
+                self.context_manager.set_Fn_buffer(
                     hidden_states,
                     f"{self.cache_prefix}_Fn_hidden_states",
                 )
@@ -273,24 +288,24 @@ class CachedBlocks_Pattern_Base(torch.nn.Module):
                 **kwargs,
             )
             torch._dynamo.graph_break()
-            if self.cache_manager.is_cache_residual():
-                self.cache_manager.set_Bn_buffer(
+            if self.context_manager.is_cache_residual():
+                self.context_manager.set_Bn_buffer(
                     hidden_states_residual,
                     prefix=f"{self.cache_prefix}_Bn_residual",
                 )
             else:
-                self.cache_manager.set_Bn_buffer(
+                self.context_manager.set_Bn_buffer(
                     hidden_states,
                     prefix=f"{self.cache_prefix}_Bn_hidden_states",
                 )
-            if self.cache_manager.is_encoder_cache_residual():
-                self.cache_manager.set_Bn_encoder_buffer(
+            if self.context_manager.is_encoder_cache_residual():
+                self.context_manager.set_Bn_encoder_buffer(
                     encoder_hidden_states_residual,
                     prefix=f"{self.cache_prefix}_Bn_residual",
                 )
             else:
-                self.cache_manager.set_Bn_encoder_buffer(
+                self.context_manager.set_Bn_encoder_buffer(
                     encoder_hidden_states,
                     prefix=f"{self.cache_prefix}_Bn_hidden_states",
                 )
@@ -333,11 +348,11 @@ class CachedBlocks_Pattern_Base(torch.nn.Module):
         # If so, we can skip some Bn blocks and directly
         # use the cached values.
         return (
-            self.cache_manager.get_current_step()
-            in self.cache_manager.get_cached_steps()
+            self.context_manager.get_current_step()
+            in self.context_manager.get_cached_steps()
         ) or (
-            self.cache_manager.get_current_step()
-            in self.cache_manager.get_cfg_cached_steps()
+            self.context_manager.get_current_step()
+            in self.context_manager.get_cfg_cached_steps()
         )
     @torch.compiler.disable
@@ -346,20 +361,20 @@ class CachedBlocks_Pattern_Base(torch.nn.Module):
         # more stable diff calculation.
         # Fn: [0,...,n-1]
         selected_Fn_blocks = self.transformer_blocks[
-            : self.cache_manager.Fn_compute_blocks()
+            : self.context_manager.Fn_compute_blocks()
         ]
         return selected_Fn_blocks
     @torch.compiler.disable
     def _Mn_blocks(self):  # middle blocks
         # M(N-2n): only transformer_blocks [n,...,N-n], middle
-        if self.cache_manager.Bn_compute_blocks() == 0:  # WARN: x[:-0] = []
+        if self.context_manager.Bn_compute_blocks() == 0:  # WARN: x[:-0] = []
             selected_Mn_blocks = self.transformer_blocks[
-                self.cache_manager.Fn_compute_blocks() :
+                self.context_manager.Fn_compute_blocks() :
             ]
         else:
             selected_Mn_blocks = self.transformer_blocks[
-                self.cache_manager.Fn_compute_blocks() : -self.cache_manager.Bn_compute_blocks()
+                self.context_manager.Fn_compute_blocks() : -self.context_manager.Bn_compute_blocks()
             ]
         return selected_Mn_blocks
@@ -367,7 +382,7 @@ class CachedBlocks_Pattern_Base(torch.nn.Module):
     def _Bn_blocks(self):
         # Bn: transformer_blocks [N-n+1,...,N-1]
         selected_Bn_blocks = self.transformer_blocks[
-            -self.cache_manager.Bn_compute_blocks() :
+            -self.context_manager.Bn_compute_blocks() :
         ]
         return selected_Bn_blocks
@@ -441,7 +456,7 @@ class CachedBlocks_Pattern_Base(torch.nn.Module):
         *args,
         **kwargs,
     ):
-        if self.cache_manager.Bn_compute_blocks() == 0:
+        if self.context_manager.Bn_compute_blocks() == 0:
             return hidden_states, encoder_hidden_states
         for block in self._Bn_blocks():
@@ -456,3 +471,223 @@ class CachedBlocks_Pattern_Base(torch.nn.Module):
             )
         return hidden_states, encoder_hidden_states
+class PrunedBlocks_Pattern_Base(CachedBlocks_Pattern_Base):
+    pruned_blocks_step: int = 0  # number of pruned blocks in current step
+    def __init__(
+        self,
+        # 0. Transformer blocks configuration
+        transformer_blocks: torch.nn.ModuleList,
+        transformer: torch.nn.Module = None,
+        forward_pattern: ForwardPattern = ForwardPattern.Pattern_0,
+        check_forward_pattern: bool = True,
+        check_num_outputs: bool = True,
+        # 1. Prune context configuration
+        cache_prefix: str = None,  # maybe un-need.
+        cache_context: PrunedContext | str = None,
+        context_manager: PrunedContextManager = None,
+        cache_type: CacheType = CacheType.DBPrune,
+        **kwargs,
+    ):
+        super().__init__(
+            # 0. Transformer blocks configuration
+            transformer_blocks,
+            transformer=transformer,
+            forward_pattern=forward_pattern,
+            check_forward_pattern=check_forward_pattern,
+            check_num_outputs=check_num_outputs,
+            # 1. Cache context configuration
+            cache_prefix=cache_prefix,
+            cache_context=cache_context,
+            context_manager=context_manager,
+            cache_type=cache_type,
+            **kwargs,
+        )
+        assert isinstance(
+            self.context_manager, PrunedContextManager
+        ), "context_manager must be PrunedContextManager for PrunedBlocks."
+        self.context_manager: PrunedContextManager = (
+            self.context_manager
+        )  # For type hint
+    @torch.compiler.disable
+    def _check_cache_type(self):
+        assert (
+            self.cache_type == CacheType.DBPrune
+        ), f"Cache type {self.cache_type} is not supported for PrunedBlocks."
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        encoder_hidden_states: torch.Tensor,
+        *args,
+        **kwargs,
+    ):
+        self.pruned_blocks_step: int = 0  # reset for each step
+        # Use it's own cache context.
+        try:
+            self.context_manager.set_context(self.cache_context)
+            self._check_cache_params()
+        except ContextNotExistError as e:
+            logger.warning(f"Cache context not exist: {e}, skip prune.")
+            # Fallback to call all blocks to process the hidden states w/o prune.
+            hidden_states, encoder_hidden_states = self.call_blocks(
+                hidden_states,
+                encoder_hidden_states,
+                *args,
+                **kwargs,
+            )
+            return self._process_forward_outputs(
+                hidden_states,
+                encoder_hidden_states,
+            )
+        self.context_manager.mark_step_begin()
+        # Call all blocks with prune strategy to process the hidden states.
+        for i, block in enumerate(self.transformer_blocks):
+            hidden_states, encoder_hidden_states = self.compute_or_prune(
+                i,
+                block,
+                hidden_states,
+                encoder_hidden_states,
+                *args,
+                **kwargs,
+            )
+        self.context_manager.add_pruned_block(self.pruned_blocks_step)
+        self.context_manager.add_actual_block(self.num_blocks)
+        return self._process_forward_outputs(
+            hidden_states,
+            encoder_hidden_states,
+        )
+    @property
+    @torch.compiler.disable
+    def num_blocks(self):
+        return len(self.transformer_blocks)
+    @torch.compiler.disable
+    def _skip_prune(self, block_id: int) -> bool:
+        # Wrap for non compiled mode.
+        return block_id in self.context_manager.get_non_prune_blocks_ids(
+            self.num_blocks
+        )
+    @torch.compiler.disable
+    def _maybe_prune(
+        self,
+        block_id: int,  # Block index in the transformer blocks
+        hidden_states: torch.Tensor,  # hidden_states or residual
+        prefix: str = "Bn_original",  # prev step name for single blocks
+    ):
+        # Wrap for non compiled mode.
+        can_use_prune = False
+        if not self._skip_prune(block_id):
+            can_use_prune = self.context_manager.can_prune(
+                hidden_states,  # curr step
+                parallelized=self._is_parallelized(),
+                prefix=prefix,  # prev step
+            )
+        self.pruned_blocks_step += int(can_use_prune)
+        return can_use_prune
+    def compute_or_prune(
+        self,
+        block_id: int,  # Block index in the transformer blocks
+        # Below are the inputs to the block
+        block,  # The transformer block to be executed
+        hidden_states: torch.Tensor,
+        encoder_hidden_states: torch.Tensor,
+        *args,
+        **kwargs,
+    ):
+        original_hidden_states = hidden_states
+        original_encoder_hidden_states = encoder_hidden_states
+        can_use_prune = self._maybe_prune(
+            block_id,
+            hidden_states,
+            prefix=f"{self.cache_prefix}_{block_id}_Fn_original",
+        )
+        # Prune steps: Prune current block and reuse the cached
+        # residuals for hidden states approximate.
+        torch._dynamo.graph_break()
+        if can_use_prune:
+            self.context_manager.add_pruned_step()
+            hidden_states, encoder_hidden_states = (
+                self.context_manager.apply_prune(
+                    hidden_states,
+                    encoder_hidden_states,
+                    prefix=(
+                        f"{self.cache_prefix}_{block_id}_Bn_residual"
+                        if self.context_manager.is_cache_residual()
+                        else f"{self.cache_prefix}_{block_id}_Bn_hidden_states"
+                    ),
+                    encoder_prefix=(
+                        f"{self.cache_prefix}_{block_id}_Bn_encoder_residual"
+                        if self.context_manager.is_encoder_cache_residual()
+                        else f"{self.cache_prefix}_{block_id}_Bn_encoder_hidden_states"
+                    ),
+                )
+            )
+            torch._dynamo.graph_break()
+        else:
+            # Normal steps: Compute the block and cache the residuals.
+            hidden_states = block(
+                hidden_states,
+                encoder_hidden_states,
+                *args,
+                **kwargs,
+            )
+            hidden_states, encoder_hidden_states = self._process_block_outputs(
+                hidden_states, encoder_hidden_states
+            )
+            if not self._skip_prune(block_id):
+                hidden_states = hidden_states.contiguous()
+                hidden_states_residual = hidden_states - original_hidden_states
+                if (
+                    encoder_hidden_states is not None
+                    and original_encoder_hidden_states is not None
+                ):
+                    encoder_hidden_states = encoder_hidden_states.contiguous()
+                    encoder_hidden_states_residual = (
+                        encoder_hidden_states - original_encoder_hidden_states
+                    )
+                else:
+                    encoder_hidden_states_residual = None
+                self.context_manager.set_Fn_buffer(
+                    original_hidden_states,
+                    prefix=f"{self.cache_prefix}_{block_id}_Fn_original",
+                )
+                if self.context_manager.is_cache_residual():
+                    self.context_manager.set_Bn_buffer(
+                        hidden_states_residual,
+                        prefix=f"{self.cache_prefix}_{block_id}_Bn_residual",
+                    )
+                else:
+                    self.context_manager.set_Bn_buffer(
+                        hidden_states,
+                        prefix=f"{self.cache_prefix}_{block_id}_Bn_hidden_states",
+                    )
+                if encoder_hidden_states_residual is not None:
+                    if self.context_manager.is_encoder_cache_residual():
+                        self.context_manager.set_Bn_encoder_buffer(
+                            encoder_hidden_states_residual,
+                            prefix=f"{self.cache_prefix}_{block_id}_Bn_encoder_residual",
+                        )
+                    else:
+                        self.context_manager.set_Bn_encoder_buffer(
+                            encoder_hidden_states_residual,
+                            prefix=f"{self.cache_prefix}_{block_id}_Bn_encoder_hidden_states",
+                        )
+            torch._dynamo.graph_break()
+        return hidden_states, encoder_hidden_states

cache_dit/cache_factory/cache_blocks/pattern_utils.py CHANGED Viewed

@@ -3,34 +3,61 @@ import torch
 from typing import Any
 from cache_dit.cache_factory import CachedContext
 from cache_dit.cache_factory import CachedContextManager
+from cache_dit.cache_factory import PrunedContextManager
-def patch_cached_stats(
+def apply_stats(
     module: torch.nn.Module | Any,
     cache_context: CachedContext | str = None,
-    cache_manager: CachedContextManager = None,
+    context_manager: CachedContextManager | PrunedContextManager = None,
 ):
     # Patch the cached stats to the module, the cached stats
     # will be reset for each calling of pipe.__call__(**kwargs).
-    if module is None or cache_manager is None:
+    if module is None or context_manager is None:
         return
     if cache_context is not None:
-        cache_manager.set_context(cache_context)
+        context_manager.set_context(cache_context)
-    # TODO: Patch more cached stats to the module
-    module._cached_steps = cache_manager.get_cached_steps()
-    module._residual_diffs = cache_manager.get_residual_diffs()
-    module._cfg_cached_steps = cache_manager.get_cfg_cached_steps()
-    module._cfg_residual_diffs = cache_manager.get_cfg_residual_diffs()
+    # Cache stats for Dual Block Cache
+    module._cached_steps = context_manager.get_cached_steps()
+    module._residual_diffs = context_manager.get_residual_diffs()
+    module._cfg_cached_steps = context_manager.get_cfg_cached_steps()
+    module._cfg_residual_diffs = context_manager.get_cfg_residual_diffs()
+    # Pruned stats for Dynamic Block Prune
+    if not isinstance(context_manager, PrunedContextManager):
+        return
+    module._pruned_steps = context_manager.get_pruned_steps()
+    module._cfg_pruned_steps = context_manager.get_cfg_pruned_steps()
+    module._pruned_blocks = context_manager.get_pruned_blocks()
+    module._cfg_pruned_blocks = context_manager.get_cfg_pruned_blocks()
+    module._actual_blocks = context_manager.get_actual_blocks()
+    module._cfg_actual_blocks = context_manager.get_cfg_actual_blocks()
+    # Caculate pruned ratio
+    if len(module._pruned_blocks) > 0 and sum(module._actual_blocks) > 0:
+        module._pruned_ratio = sum(module._pruned_blocks) / sum(
+            module._actual_blocks
+        )
+    else:
+        module._pruned_ratio = None
+    if (
+        len(module._cfg_pruned_blocks) > 0
+        and sum(module._cfg_actual_blocks) > 0
+    ):
+        module._cfg_pruned_ratio = sum(module._cfg_pruned_blocks) / sum(
+            module._cfg_actual_blocks
+        )
+    else:
+        module._cfg_pruned_ratio = None
-def remove_cached_stats(
+def remove_stats(
     module: torch.nn.Module | Any,
 ):
     if module is None:
         return
+    # Dual Block Cache
     if hasattr(module, "_cached_steps"):
         del module._cached_steps
     if hasattr(module, "_residual_diffs"):
@@ -39,3 +66,21 @@ def remove_cached_stats(
         del module._cfg_cached_steps
     if hasattr(module, "_cfg_residual_diffs"):
         del module._cfg_residual_diffs
+    # Dynamic Block Prune
+    if hasattr(module, "_pruned_steps"):
+        del module._pruned_steps
+    if hasattr(module, "_cfg_pruned_steps"):
+        del module._cfg_pruned_steps
+    if hasattr(module, "_pruned_blocks"):
+        del module._pruned_blocks
+    if hasattr(module, "_cfg_pruned_blocks"):
+        del module._cfg_pruned_blocks
+    if hasattr(module, "_actual_blocks"):
+        del module._actual_blocks
+    if hasattr(module, "_cfg_actual_blocks"):
+        del module._cfg_actual_blocks
+    if hasattr(module, "_pruned_ratio"):
+        del module._pruned_ratio
+    if hasattr(module, "_cfg_pruned_ratio"):
+        del module._cfg_pruned_ratio

cache_dit/cache_factory/cache_contexts/__init__.py CHANGED Viewed

@@ -5,11 +5,24 @@ from cache_dit.cache_factory.cache_contexts.calibrators import (
     TaylorSeerCalibratorConfig,
     FoCaCalibratorConfig,
 )
+from cache_dit.cache_factory.cache_contexts.cache_config import (
+    BasicCacheConfig,
+    DBCacheConfig,
+)
 from cache_dit.cache_factory.cache_contexts.cache_context import (
     CachedContext,
-    BasicCacheConfig,
 )
 from cache_dit.cache_factory.cache_contexts.cache_manager import (
     CachedContextManager,
-    CacheNotExistError,
+    ContextNotExistError,
+)
+from cache_dit.cache_factory.cache_contexts.prune_config import DBPruneConfig
+from cache_dit.cache_factory.cache_contexts.prune_context import (
+    PrunedContext,
+)
+from cache_dit.cache_factory.cache_contexts.prune_manager import (
+    PrunedContextManager,
+)
+from cache_dit.cache_factory.cache_contexts.context_manager import (
+    ContextManager,
 )

cache_dit/cache_factory/cache_contexts/cache_config.py ADDED Viewed

@@ -0,0 +1,102 @@
+import torch
+import dataclasses
+from typing import Optional, Union
+from cache_dit.cache_factory.cache_types import CacheType
+from cache_dit.logger import init_logger
+logger = init_logger(__name__)
+@dataclasses.dataclass
+class BasicCacheConfig:
+    # Default: Dual Block Cache with Flexible FnBn configuration.
+    cache_type: CacheType = CacheType.DBCache  # DBCache, DBPrune, NONE
+    # Fn_compute_blocks: (`int`, *required*, defaults to 8):
+    #     Specifies that `DBCache` uses the **first n** Transformer blocks to fit the information
+    #     at time step t, enabling the calculation of a more stable L1 diff and delivering more
+    #     accurate information to subsequent blocks. Please check https://github.com/vipshop/cache-dit/blob/main/docs/DBCache.md
+    #     for more details of DBCache.
+    Fn_compute_blocks: int = 8
+    # Bn_compute_blocks: (`int`, *required*, defaults to 0):
+    #     Further fuses approximate information in the **last n** Transformer blocks to enhance
+    #     prediction accuracy. These blocks act as an auto-scaler for approximate hidden states
+    #     that use residual cache.
+    Bn_compute_blocks: int = 0
+    # residual_diff_threshold (`float`, *required*, defaults to 0.08):
+    #     the value of residual diff threshold, a higher value leads to faster performance at the
+    #     cost of lower precision.
+    residual_diff_threshold: Union[torch.Tensor, float] = 0.08
+    # max_warmup_steps (`int`, *required*, defaults to 8):
+    #     DBCache does not apply the caching strategy when the number of running steps is less than
+    #     or equal to this value, ensuring the model sufficiently learns basic features during warmup.
+    max_warmup_steps: int = 8  # DON'T Cache in warmup steps
+    # warmup_interval (`int`, *required*, defaults to 1):
+    #     Skip interval in warmup steps, e.g., when warmup_interval is 2, only 0, 2, 4, ... steps
+    #     in warmup steps will be computed, others will use dynamic cache.
+    warmup_interval: int = 1  # skip interval in warmup steps
+    # max_cached_steps (`int`, *required*, defaults to -1):
+    #     DBCache disables the caching strategy when the previous cached steps exceed this value to
+    #     prevent precision degradation.
+    max_cached_steps: int = -1  # for both CFG and non-CFG
+    # max_continuous_cached_steps (`int`, *required*, defaults to -1):
+    #     DBCache disables the caching strategy when the previous continous cached steps exceed this value to
+    #     prevent precision degradation.
+    max_continuous_cached_steps: int = -1  # the max continuous cached steps
+    # enable_separate_cfg (`bool`, *required*,  defaults to None):
+    #     Whether to do separate cfg or not, such as Wan 2.1, Qwen-Image. For model that fused CFG
+    #     and non-CFG into single forward step, should set enable_separate_cfg as False, for example:
+    #     CogVideoX, HunyuanVideo, Mochi, etc.
+    enable_separate_cfg: Optional[bool] = None
+    # cfg_compute_first (`bool`, *required*,  defaults to False):
+    #     Compute cfg forward first or not, default False, namely, 0, 2, 4, ..., -> non-CFG step;
+    #     1, 3, 5, ... -> CFG step.
+    cfg_compute_first: bool = False
+    # cfg_diff_compute_separate (`bool`, *required*,  defaults to True):
+    #     Compute separate diff values for CFG and non-CFG step, default True. If False, we will
+    #     use the computed diff from current non-CFG transformer step for current CFG step.
+    cfg_diff_compute_separate: bool = True
+    def update(self, **kwargs) -> "BasicCacheConfig":
+        for key, value in kwargs.items():
+            if hasattr(self, key):
+                setattr(self, key, value)
+        return self
+    def strify(self) -> str:
+        return (
+            f"{self.cache_type}_"
+            f"F{self.Fn_compute_blocks}"
+            f"B{self.Bn_compute_blocks}_"
+            f"W{self.max_warmup_steps}"
+            f"I{self.warmup_interval}"
+            f"M{max(0, self.max_cached_steps)}"
+            f"MC{max(0, self.max_continuous_cached_steps)}_"
+            f"R{self.residual_diff_threshold}"
+        )
+@dataclasses.dataclass
+class ExtraCacheConfig:
+    # Some other not very important settings for Dual Block Cache.
+    # NOTE: These flags maybe deprecated in the future and users
+    # should never use these extra configurations in their cases.
+    # l1_hidden_states_diff_threshold (`float`, *optional*, defaults to None):
+    #     The hidden states diff threshold for DBCache if use hidden_states as
+    #     cache (not residual).
+    l1_hidden_states_diff_threshold: float = None
+    # important_condition_threshold (`float`, *optional*, defaults to 0.0):
+    #     Only select the most important tokens while calculating the l1 diff.
+    important_condition_threshold: float = 0.0
+    # downsample_factor (`int`, *optional*, defaults to 1):
+    #     Downsample factor for Fn buffer, in order the save GPU memory.
+    downsample_factor: int = 1
+    # num_inference_steps (`int`, *optional*, defaults to -1):
+    #     num_inference_steps for DiffusionPipeline, for future use.
+    num_inference_steps: int = -1
+@dataclasses.dataclass
+class DBCacheConfig(BasicCacheConfig):
+    pass  # Just an alias for BasicCacheConfig

cache-dit 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl

Potentially problematic release.

cache-dit 1.0.3py3-none-any.whl → 1.0.4py3-none-any.whl