PyPI - cache-dit - Versions diffs - 0.2.23__py3-none-any.whl → 0.2.25__py3-none-any.whl - Mend

cache-dit 0.2.23py3-none-any.whl → 0.2.25py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cache-dit might be problematic. Click here for more details.

Files changed (20) hide show

cache_dit/__init__.py +1 -0
cache_dit/_version.py +2 -2
cache_dit/cache_factory/cache_adapters.py +137 -76
cache_dit/cache_factory/cache_context.py +112 -39
cache_dit/cache_factory/cache_interface.py +11 -4
cache_dit/cache_factory/taylorseer.py +5 -4
cache_dit/cache_factory/utils.py +1 -1
cache_dit/compile/utils.py +1 -1
cache_dit/quantize/__init__.py +1 -0
cache_dit/quantize/quantize_ao.py +182 -0
cache_dit/quantize/quantize_interface.py +46 -0
cache_dit/quantize/quantize_svdq.py +0 -0
cache_dit/utils.py +68 -34
{cache_dit-0.2.23.dist-info → cache_dit-0.2.25.dist-info}/METADATA +15 -15
{cache_dit-0.2.23.dist-info → cache_dit-0.2.25.dist-info}/RECORD +19 -16
cache_dit/primitives.py +0 -152
{cache_dit-0.2.23.dist-info → cache_dit-0.2.25.dist-info}/WHEEL +0 -0
{cache_dit-0.2.23.dist-info → cache_dit-0.2.25.dist-info}/entry_points.txt +0 -0
{cache_dit-0.2.23.dist-info → cache_dit-0.2.25.dist-info}/licenses/LICENSE +0 -0
{cache_dit-0.2.23.dist-info → cache_dit-0.2.25.dist-info}/top_level.txt +0 -0

cache_dit/__init__.py CHANGED Viewed

@@ -12,6 +12,7 @@ from cache_dit.cache_factory import CacheType
 from cache_dit.cache_factory import BlockAdapter
 from cache_dit.cache_factory import ForwardPattern
 from cache_dit.compile import set_compile_configs
+from cache_dit.quantize import quantize
 from cache_dit.utils import summary
 from cache_dit.utils import strify
 from cache_dit.logger import init_logger

cache_dit/_version.py CHANGED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.2.23'
-__version_tuple__ = version_tuple = (0, 2, 23)
+__version__ = version = '0.2.25'
+__version_tuple__ = version_tuple = (0, 2, 25)
 __commit_id__ = commit_id = None

cache_dit/cache_factory/cache_adapters.py CHANGED Viewed

@@ -5,7 +5,7 @@ import unittest
 import functools
 import dataclasses
-from typing import Any, Tuple, List
+from typing import Any, Tuple, List, Optional
 from contextlib import ExitStack
 from diffusers import DiffusionPipeline
 from cache_dit.cache_factory.patch.flux import (
@@ -40,6 +40,7 @@ class BlockAdapter:
             "layers",
         ]
     )
+    check_prefixes: bool = True
     allow_suffixes: List[str] = dataclasses.field(
         default_factory=lambda: ["TransformerBlock"]
     )
@@ -48,8 +49,24 @@ class BlockAdapter:
         default="max", metadata={"allowed_values": ["max", "min"]}
     )
+    def __post_init__(self):
+        self.maybe_apply_patch()
+    def maybe_apply_patch(self):
+        # Process some specificial cases, specific for transformers
+        # that has different forward patterns between single_transformer_blocks
+        # and transformer_blocks , such as Flux (diffusers < 0.35.0).
+        if self.transformer.__class__.__name__.startswith("Flux"):
+            self.transformer = maybe_patch_flux_transformer(
+                self.transformer,
+                blocks=self.blocks,
+            )
     @staticmethod
-    def auto_block_adapter(adapter: "BlockAdapter") -> "BlockAdapter":
+    def auto_block_adapter(
+        adapter: "BlockAdapter",
+        forward_pattern: Optional[ForwardPattern] = None,
+    ) -> "BlockAdapter":
         assert adapter.auto, (
             "Please manually set `auto` to True, or, manually "
             "set all the transformer blocks configuration."
@@ -66,8 +83,10 @@ class BlockAdapter:
             transformer=transformer,
             allow_prefixes=adapter.allow_prefixes,
             allow_suffixes=adapter.allow_suffixes,
+            check_prefixes=adapter.check_prefixes,
             check_suffixes=adapter.check_suffixes,
             blocks_policy=adapter.blocks_policy,
+            forward_pattern=forward_pattern,
         )
         return BlockAdapter(
@@ -87,6 +106,8 @@ class BlockAdapter:
             and isinstance(adapter.blocks, torch.nn.ModuleList)
         ):
             return True
+        logger.warning("Check block adapter failed!")
         return False
     @staticmethod
@@ -101,24 +122,30 @@ class BlockAdapter:
         allow_suffixes: List[str] = [
             "TransformerBlock",
         ],
+        check_prefixes: bool = True,
         check_suffixes: bool = False,
         **kwargs,
     ) -> Tuple[torch.nn.ModuleList, str]:
+        # Check prefixes
+        if check_prefixes:
+            blocks_names = []
+            for attr_name in dir(transformer):
+                for prefix in allow_prefixes:
+                    if attr_name.startswith(prefix):
+                        blocks_names.append(attr_name)
+        else:
+            blocks_names = dir(transformer)
-        blocks_names = []
-        for attr_name in dir(transformer):
-            for prefix in allow_prefixes:
-                if attr_name.startswith(prefix):
-                    blocks_names.append(attr_name)
-        # Type check
+        # Check ModuleList
         valid_names = []
         valid_count = []
+        forward_pattern = kwargs.get("forward_pattern", None)
         for blocks_name in blocks_names:
             if blocks := getattr(transformer, blocks_name, None):
                 if isinstance(blocks, torch.nn.ModuleList):
                     block = blocks[0]
                     block_cls_name = block.__class__.__name__
+                    # Check suffixes
                     if isinstance(block, torch.nn.Module) and (
                         any(
                             (
@@ -128,8 +155,18 @@ class BlockAdapter:
                         )
                         or (not check_suffixes)
                     ):
-                        valid_names.append(blocks_name)
-                        valid_count.append(len(blocks))
+                        # May check forward pattern
+                        if forward_pattern is not None:
+                            if BlockAdapter.match_blocks_pattern(
+                                blocks,
+                                forward_pattern,
+                                logging=False,
+                            ):
+                                valid_names.append(blocks_name)
+                                valid_count.append(len(blocks))
+                        else:
+                            valid_names.append(blocks_name)
+                            valid_count.append(len(blocks))
         if not valid_names:
             raise ValueError(
@@ -139,6 +176,7 @@ class BlockAdapter:
         final_name = valid_names[0]
         final_count = valid_count[0]
         block_policy = kwargs.get("blocks_policy", "max")
         for blocks_name, count in zip(valid_names, valid_count):
             blocks = getattr(transformer, blocks_name)
             logger.info(
@@ -165,6 +203,67 @@ class BlockAdapter:
         return final_blocks, final_name
+    @staticmethod
+    def match_block_pattern(
+        block: torch.nn.Module,
+        forward_pattern: ForwardPattern,
+    ) -> bool:
+        assert (
+            forward_pattern.Supported
+            and forward_pattern in ForwardPattern.supported_patterns()
+        ), f"Pattern {forward_pattern} is not support now!"
+        forward_parameters = set(
+            inspect.signature(block.forward).parameters.keys()
+        )
+        num_outputs = str(
+            inspect.signature(block.forward).return_annotation
+        ).count("torch.Tensor")
+        in_matched = True
+        out_matched = True
+        if num_outputs > 0 and len(forward_pattern.Out) != num_outputs:
+            # output pattern not match
+            out_matched = False
+        for required_param in forward_pattern.In:
+            if required_param not in forward_parameters:
+                in_matched = False
+        return in_matched and out_matched
+    @staticmethod
+    def match_blocks_pattern(
+        transformer_blocks: torch.nn.ModuleList,
+        forward_pattern: ForwardPattern,
+        logging: bool = True,
+    ) -> bool:
+        assert (
+            forward_pattern.Supported
+            and forward_pattern in ForwardPattern.supported_patterns()
+        ), f"Pattern {forward_pattern} is not support now!"
+        assert isinstance(transformer_blocks, torch.nn.ModuleList)
+        pattern_matched_states = []
+        for block in transformer_blocks:
+            pattern_matched_states.append(
+                BlockAdapter.match_block_pattern(
+                    block,
+                    forward_pattern,
+                )
+            )
+        pattern_matched = all(pattern_matched_states)  # all block match
+        if pattern_matched and logging:
+            block_cls_name = transformer_blocks[0].__class__.__name__
+            logger.info(
+                f"Match Block Forward Pattern: {block_cls_name}, {forward_pattern}"
+                f"\nIN:{forward_pattern.In}, OUT:{forward_pattern.Out})"
+            )
+        return pattern_matched
 @dataclasses.dataclass
 class UnifiedCacheParams:
@@ -463,19 +562,42 @@ class UnifiedCacheAdapter:
     ) -> DiffusionPipeline:
         if block_adapter.auto:
-            block_adapter = BlockAdapter.auto_block_adapter(block_adapter)
+            block_adapter = BlockAdapter.auto_block_adapter(
+                block_adapter,
+                forward_pattern,
+            )
         if BlockAdapter.check_block_adapter(block_adapter):
-            assert isinstance(block_adapter.blocks, torch.nn.ModuleList)
             # Apply cache on pipeline: wrap cache context
-            cls.create_context(block_adapter.pipe, **cache_context_kwargs)
+            cls.create_context(
+                block_adapter.pipe,
+                **cache_context_kwargs,
+            )
             # Apply cache on transformer: mock cached transformer blocks
             cls.mock_blocks(
                 block_adapter,
                 forward_pattern=forward_pattern,
             )
+            cls.patch_params(
+                block_adapter,
+                forward_pattern=forward_pattern,
+                **cache_context_kwargs,
+            )
         return block_adapter.pipe
+    @classmethod
+    def patch_params(
+        cls,
+        block_adapter: BlockAdapter,
+        forward_pattern: ForwardPattern = None,
+        **cache_context_kwargs,
+    ):
+        block_adapter.transformer._forward_pattern = forward_pattern
+        block_adapter.transformer._cache_context_kwargs = cache_context_kwargs
+        block_adapter.pipe.__class__._cache_context_kwargs = (
+            cache_context_kwargs
+        )
     @classmethod
     def has_separate_cfg(
         cls,
@@ -534,7 +656,6 @@ class UnifiedCacheAdapter:
         pipe.__class__.__call__ = new_call
         pipe.__class__._is_cached = True
-        pipe.__class__._cache_options = cache_kwargs
         return pipe
     @classmethod
@@ -544,28 +665,11 @@ class UnifiedCacheAdapter:
         forward_pattern: ForwardPattern = ForwardPattern.Pattern_0,
     ) -> torch.nn.Module:
-        if (
-            block_adapter.transformer is None
-            or block_adapter.blocks_name is None
-            or block_adapter.blocks is None
-        ):
-            assert block_adapter.auto, (
-                "Please manually set `auto` to True, or, "
-                "manually set transformer blocks configuration."
-            )
         if getattr(block_adapter.transformer, "_is_cached", False):
             return block_adapter.transformer
-        # Firstly, process some specificial cases (TODO: more patches)
-        if block_adapter.transformer.__class__.__name__.startswith("Flux"):
-            block_adapter.transformer = maybe_patch_flux_transformer(
-                block_adapter.transformer,
-                blocks=block_adapter.blocks,
-            )
         # Check block forward pattern matching
-        assert cls.match_pattern(
+        assert BlockAdapter.match_blocks_pattern(
             block_adapter.blocks,
             forward_pattern=forward_pattern,
         ), (
@@ -615,46 +719,3 @@ class UnifiedCacheAdapter:
         block_adapter.transformer._is_cached = True
         return block_adapter.transformer
-    @classmethod
-    def match_pattern(
-        cls,
-        transformer_blocks: torch.nn.ModuleList,
-        forward_pattern: ForwardPattern = ForwardPattern.Pattern_0,
-    ) -> bool:
-        pattern_matched_states = []
-        assert (
-            forward_pattern.Supported
-            and forward_pattern in ForwardPattern.supported_patterns()
-        ), f"Pattern {forward_pattern} is not support now!"
-        for block in transformer_blocks:
-            forward_parameters = set(
-                inspect.signature(block.forward).parameters.keys()
-            )
-            num_outputs = str(
-                inspect.signature(block.forward).return_annotation
-            ).count("torch.Tensor")
-            in_matched = True
-            out_matched = True
-            if num_outputs > 0 and len(forward_pattern.Out) != num_outputs:
-                # output pattern not match
-                out_matched = False
-            for required_param in forward_pattern.In:
-                if required_param not in forward_parameters:
-                    in_matched = False
-            pattern_matched_states.append(in_matched and out_matched)
-        pattern_matched = all(pattern_matched_states)  # all block match
-        if pattern_matched:
-            block_cls_name = transformer_blocks[0].__class__.__name__
-            logger.info(
-                f"Match Block Forward Pattern: {block_cls_name}, {forward_pattern}"
-                f"\nIN:{forward_pattern.In}, OUT:{forward_pattern.Out})"
-            )
-        return pattern_matched

cache_dit/cache_factory/cache_context.py CHANGED Viewed

@@ -5,8 +5,8 @@ from collections import defaultdict
 from typing import Any, DefaultDict, Dict, List, Optional, Union, Tuple
 import torch
+import torch.distributed as dist
-import cache_dit.primitives as primitives
 from cache_dit.cache_factory.taylorseer import TaylorSeer
 from cache_dit.logger import init_logger
@@ -47,10 +47,11 @@ class DBCacheContext:
     # Other settings
     downsample_factor: int = 1
-    num_inference_steps: int = -1  # un-used now
-    warmup_steps: int = 0  # DON'T Cache in warmup steps
+    num_inference_steps: int = -1  # for future use
+    max_warmup_steps: int = 0  # DON'T Cache in warmup steps
     # DON'T Cache if the number of cached steps >= max_cached_steps
     max_cached_steps: int = -1  # for both CFG and non-CFG
+    max_continuous_cached_steps: int = -1  # the max continuous cached steps
     # Record the steps that have been cached, both cached and non-cache
     executed_steps: int = 0  # cache + non-cache steps pippeline
@@ -89,10 +90,12 @@ class DBCacheContext:
     residual_diffs: DefaultDict[str, float] = dataclasses.field(
         default_factory=lambda: defaultdict(float),
     )
+    continuous_cached_steps: int = 0
     cfg_cached_steps: List[int] = dataclasses.field(default_factory=list)
     cfg_residual_diffs: DefaultDict[str, float] = dataclasses.field(
         default_factory=lambda: defaultdict(float),
     )
+    cfg_continuous_cached_steps: int = 0
     @torch.compiler.disable
     def __post_init__(self):
@@ -108,17 +111,17 @@ class DBCacheContext:
                     "cfg_diff_compute_separate is enabled."
                 )
-        if "warmup_steps" not in self.taylorseer_kwargs:
-            # If warmup_steps is not set in taylorseer_kwargs,
-            # set the same as warmup_steps for DBCache
-            self.taylorseer_kwargs["warmup_steps"] = (
-                self.warmup_steps if self.warmup_steps > 0 else 1
+        if "max_warmup_steps" not in self.taylorseer_kwargs:
+            # If max_warmup_steps is not set in taylorseer_kwargs,
+            # set the same as max_warmup_steps for DBCache
+            self.taylorseer_kwargs["max_warmup_steps"] = (
+                self.max_warmup_steps if self.max_warmup_steps > 0 else 1
             )
         # Only set n_derivatives as 2 or 3, which is enough for most cases.
         if "n_derivatives" not in self.taylorseer_kwargs:
             self.taylorseer_kwargs["n_derivatives"] = max(
-                2, min(3, self.taylorseer_kwargs["warmup_steps"])
+                2, min(3, self.taylorseer_kwargs["max_warmup_steps"])
             )
         if self.enable_taylorseer:
@@ -268,10 +271,31 @@ class DBCacheContext:
     @torch.compiler.disable
     def add_cached_step(self):
+        curr_cached_step = self.get_current_step()
         if not self.is_separate_cfg_step():
-            self.cached_steps.append(self.get_current_step())
+            if self.cached_steps:
+                prev_cached_step = self.cached_steps[-1]
+                if curr_cached_step - prev_cached_step == 1:
+                    if self.continuous_cached_steps == 0:
+                        self.continuous_cached_steps += 2
+                    else:
+                        self.continuous_cached_steps += 1
+            else:
+                self.continuous_cached_steps += 1
+            self.cached_steps.append(curr_cached_step)
         else:
-            self.cfg_cached_steps.append(self.get_current_step())
+            if self.cfg_cached_steps:
+                prev_cfg_cached_step = self.cfg_cached_steps[-1]
+                if curr_cached_step - prev_cfg_cached_step == 1:
+                    if self.cfg_continuous_cached_steps == 0:
+                        self.cfg_continuous_cached_steps += 2
+                    else:
+                        self.cfg_continuous_cached_steps += 1
+            else:
+                self.cfg_continuous_cached_steps += 1
+            self.cfg_cached_steps.append(curr_cached_step)
     @torch.compiler.disable
     def get_cached_steps(self):
@@ -301,7 +325,34 @@ class DBCacheContext:
     @torch.compiler.disable
     def is_in_warmup(self):
-        return self.get_current_step() < self.warmup_steps
+        return self.get_current_step() < self.max_warmup_steps
+# TODO: Support context manager for different cache_context
+def create_cache_context(*args, **kwargs):
+    return DBCacheContext(*args, **kwargs)
+def get_current_cache_context():
+    return _current_cache_context
+def set_current_cache_context(cache_context=None):
+    global _current_cache_context
+    _current_cache_context = cache_context
+@contextlib.contextmanager
+def cache_context(cache_context):
+    global _current_cache_context
+    old_cache_context = _current_cache_context
+    _current_cache_context = cache_context
+    try:
+        yield
+    finally:
+        _current_cache_context = old_cache_context
 @torch.compiler.disable
@@ -396,6 +447,27 @@ def get_max_cached_steps():
     return cache_context.max_cached_steps
+@torch.compiler.disable
+def get_max_continuous_cached_steps():
+    cache_context = get_current_cache_context()
+    assert cache_context is not None, "cache_context must be set before"
+    return cache_context.max_continuous_cached_steps
+@torch.compiler.disable
+def get_continuous_cached_steps():
+    cache_context = get_current_cache_context()
+    assert cache_context is not None, "cache_context must be set before"
+    return cache_context.continuous_cached_steps
+@torch.compiler.disable
+def get_cfg_continuous_cached_steps():
+    cache_context = get_current_cache_context()
+    assert cache_context is not None, "cache_context must be set before"
+    return cache_context.cfg_continuous_cached_steps
 @torch.compiler.disable
 def add_cached_step():
     cache_context = get_current_cache_context()
@@ -612,19 +684,6 @@ def cfg_diff_compute_separate():
 _current_cache_context: DBCacheContext = None
-def create_cache_context(*args, **kwargs):
-    return DBCacheContext(*args, **kwargs)
-def get_current_cache_context():
-    return _current_cache_context
-def set_current_cache_context(cache_context=None):
-    global _current_cache_context
-    _current_cache_context = cache_context
 def collect_cache_kwargs(default_attrs: dict, **kwargs):
     # NOTE: This API will split kwargs into cache_kwargs and other_kwargs
     # default_attrs: specific settings for different pipelines
@@ -671,17 +730,6 @@ def collect_cache_kwargs(default_attrs: dict, **kwargs):
     return cache_kwargs, kwargs
-@contextlib.contextmanager
-def cache_context(cache_context):
-    global _current_cache_context
-    old_cache_context = _current_cache_context
-    _current_cache_context = cache_context
-    try:
-        yield
-    finally:
-        _current_cache_context = old_cache_context
 @torch.compiler.disable
 def are_two_tensors_similar(
     t1: torch.Tensor,  # prev residual R(t-1,n) = H(t-1,n) - H(t-1,0)
@@ -744,8 +792,8 @@ def are_two_tensors_similar(
             mean_t1 = t1.abs().mean()
         if parallelized:
-            mean_diff = primitives.all_reduce_sync(mean_diff, "avg")
-            mean_t1 = primitives.all_reduce_sync(mean_t1, "avg")
+            dist.all_reduce(mean_diff, op=dist.ReduceOp.AVG)
+            dist.all_reduce(mean_t1, op=dist.ReduceOp.AVG)
         # D = (t1 - t2) / t1 = 1 - (t2 / t1), if D = 0, then t1 = t2.
         # Futher, if we assume that (H(t,  0) - H(t-1,0)) ~ 0, then,
@@ -1020,6 +1068,7 @@ def get_can_use_cache(
     if is_in_warmup():
         return False
+    # max cached steps
     max_cached_steps = get_max_cached_steps()
     if not is_separate_cfg_step():
         cached_steps = get_cached_steps()
@@ -1030,10 +1079,34 @@ def get_can_use_cache(
         if logger.isEnabledFor(logging.DEBUG):
             logger.debug(
                 f"{prefix}, max_cached_steps reached: {max_cached_steps}, "
-                "cannot use cache."
+                "can not use cache."
             )
         return False
+    # max continuous cached steps
+    max_continuous_cached_steps = get_max_continuous_cached_steps()
+    if not is_separate_cfg_step():
+        continuous_cached_steps = get_continuous_cached_steps()
+    else:
+        continuous_cached_steps = get_cfg_continuous_cached_steps()
+    if max_continuous_cached_steps >= 0 and (
+        continuous_cached_steps >= max_continuous_cached_steps
+    ):
+        if logger.isEnabledFor(logging.DEBUG):
+            logger.debug(
+                f"{prefix}, max_continuous_cached_steps "
+                f"reached: {max_continuous_cached_steps}, "
+                "can not use cache."
+            )
+        # reset continuous cached steps stats
+        cache_context = get_current_cache_context()
+        if not is_separate_cfg_step():
+            cache_context.continuous_cached_steps = 0
+        else:
+            cache_context.cfg_continuous_cached_steps = 0
+        return False
     if threshold is None or threshold <= 0.0:
         threshold = get_residual_diff_threshold()
     if threshold <= 0.0:

cache_dit/cache_factory/cache_interface.py CHANGED Viewed

@@ -16,13 +16,14 @@ def enable_cache(
     # Cache context kwargs
     Fn_compute_blocks: int = 8,
     Bn_compute_blocks: int = 0,
-    warmup_steps: int = 8,
+    max_warmup_steps: int = 8,
     max_cached_steps: int = -1,
+    max_continuous_cached_steps: int = -1,
     residual_diff_threshold: float = 0.08,
     # Cache CFG or not
     do_separate_cfg: bool = False,
     cfg_compute_first: bool = False,
-    cfg_diff_compute_separate: bool = False,
+    cfg_diff_compute_separate: bool = True,
     # Hybird TaylorSeer
     enable_taylorseer: bool = False,
     enable_encoder_taylorseer: bool = False,
@@ -54,12 +55,15 @@ def enable_cache(
             Further fuses approximate information in the **last n** Transformer blocks to enhance
             prediction accuracy. These blocks act as an auto-scaler for approximate hidden states
             that use residual cache.
-        warmup_steps (`int`, *required*, defaults to 8):
+        max_warmup_steps (`int`, *required*, defaults to 8):
             DBCache does not apply the caching strategy when the number of running steps is less than
             or equal to this value, ensuring the model sufficiently learns basic features during warmup.
         max_cached_steps (`int`, *required*, defaults to -1):
             DBCache disables the caching strategy when the previous cached steps exceed this value to
             prevent precision degradation.
+        max_continuous_cached_steps (`int`, *required*, defaults to -1):
+            DBCache disables the caching strategy when the previous continous cached steps exceed this value to
+            prevent precision degradation.
         residual_diff_threshold (`float`, *required*, defaults to 0.08):
             he value of residual diff threshold, a higher value leads to faster performance at the
             cost of lower precision.
@@ -106,8 +110,11 @@ def enable_cache(
     cache_context_kwargs["cache_type"] = CacheType.DBCache
     cache_context_kwargs["Fn_compute_blocks"] = Fn_compute_blocks
     cache_context_kwargs["Bn_compute_blocks"] = Bn_compute_blocks
-    cache_context_kwargs["warmup_steps"] = warmup_steps
+    cache_context_kwargs["max_warmup_steps"] = max_warmup_steps
     cache_context_kwargs["max_cached_steps"] = max_cached_steps
+    cache_context_kwargs["max_continuous_cached_steps"] = (
+        max_continuous_cached_steps
+    )
     cache_context_kwargs["residual_diff_threshold"] = residual_diff_threshold
     cache_context_kwargs["do_separate_cfg"] = do_separate_cfg
     cache_context_kwargs["cfg_compute_first"] = cfg_compute_first

cache_dit/cache_factory/taylorseer.py CHANGED Viewed

@@ -6,13 +6,13 @@ class TaylorSeer:
     def __init__(
         self,
         n_derivatives=2,
-        warmup_steps=1,
+        max_warmup_steps=1,
         skip_interval_steps=1,
         compute_step_map=None,
     ):
         self.n_derivatives = n_derivatives
         self.ORDER = n_derivatives + 1
-        self.warmup_steps = warmup_steps
+        self.max_warmup_steps = max_warmup_steps
         self.skip_interval_steps = skip_interval_steps
         self.compute_step_map = compute_step_map
         self.reset_cache()
@@ -32,8 +32,9 @@ class TaylorSeer:
         if self.compute_step_map is not None:
             return self.compute_step_map[step]
         if (
-            step < self.warmup_steps
-            or (step - self.warmup_steps + 1) % self.skip_interval_steps == 0
+            step < self.max_warmup_steps
+            or (step - self.max_warmup_steps + 1) % self.skip_interval_steps
+            == 0
         ):
             return True
         return False

cache_dit/cache_factory/utils.py CHANGED Viewed

@@ -9,7 +9,7 @@ def load_cache_options_from_yaml(yaml_file_path):
         required_keys = [
             "cache_type",
-            "warmup_steps",
+            "max_warmup_steps",
             "max_cached_steps",
             "Fn_compute_blocks",
             "Bn_compute_blocks",

cache_dit/compile/utils.py CHANGED Viewed

@@ -24,7 +24,7 @@ def epilogue_prologue_fusion_enabled(**kwargs) -> bool:
 def set_compile_configs(
-    descent_tuning: bool = True,
+    descent_tuning: bool = False,
     cuda_graphs: bool = False,
     force_disable_compile_caches: bool = False,
     use_fast_math: bool = False,

cache_dit/quantize/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from cache_dit.quantize.quantize_interface import quantize

cache-dit 0.2.23__py3-none-any.whl → 0.2.25__py3-none-any.whl

Potentially problematic release.

cache-dit 0.2.23py3-none-any.whl → 0.2.25py3-none-any.whl