PyPI - cache-dit - Versions diffs - 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl - Mend

cache-dit 0.3.1py3-none-any.whl → 0.3.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cache-dit might be problematic. Click here for more details.

Files changed (34) hide show

cache_dit/cache_factory/cache_contexts/taylorseer.py DELETED Viewed

@@ -1,102 +0,0 @@
-import math
-import torch
-from typing import List, Dict
-class TaylorSeer:
-    def __init__(
-        self,
-        n_derivatives=2,
-        max_warmup_steps=1,
-        skip_interval_steps=1,
-        compute_step_map=None,
-    ):
-        self.n_derivatives = n_derivatives
-        self.ORDER = n_derivatives + 1
-        self.max_warmup_steps = max_warmup_steps
-        self.skip_interval_steps = skip_interval_steps
-        self.compute_step_map = compute_step_map
-        self.reset_cache()
-    def reset_cache(self):
-        self.state: Dict[str, List[torch.Tensor]] = {
-            "dY_prev": [None] * self.ORDER,
-            "dY_current": [None] * self.ORDER,
-        }
-        self.current_step = -1
-        self.last_non_approximated_step = -1
-    def should_compute_full(self, step=None):
-        step = self.current_step if step is None else step
-        if self.compute_step_map is not None:
-            return self.compute_step_map[step]
-        if (
-            step < self.max_warmup_steps
-            or (step - self.max_warmup_steps + 1) % self.skip_interval_steps
-            == 0
-        ):
-            return True
-        return False
-    def approximate_derivative(self, Y: torch.Tensor) -> List[torch.Tensor]:
-        # n-th order Taylor expansion:
-        # Y(t) = Y(0) + dY(0)/dt * t + d^2Y(0)/dt^2 * t^2 / 2!
-        #        + ... + d^nY(0)/dt^n * t^n / n!
-        # TODO: Custom Triton/CUDA kernel for better performance,
-        # especially for large n_derivatives.
-        dY_current: List[torch.Tensor] = [None] * self.ORDER
-        dY_current[0] = Y
-        window = self.current_step - self.last_non_approximated_step
-        if self.state["dY_prev"][0] is not None:
-            if dY_current[0].shape != self.state["dY_prev"][0].shape:
-                self.reset_cache()
-        for i in range(self.n_derivatives):
-            if self.state["dY_prev"][i] is not None and self.current_step > 1:
-                dY_current[i + 1] = (
-                    dY_current[i] - self.state["dY_prev"][i]
-                ) / window
-            else:
-                break
-        return dY_current
-    def approximate_value(self) -> torch.Tensor:
-        # TODO: Custom Triton/CUDA kernel for better performance,
-        # especially for large n_derivatives.
-        elapsed = self.current_step - self.last_non_approximated_step
-        output = 0
-        for i, derivative in enumerate(self.state["dY_current"]):
-            if derivative is not None:
-                output += (1 / math.factorial(i)) * derivative * (elapsed**i)
-            else:
-                break
-        return output
-    def mark_step_begin(self):
-        self.current_step += 1
-    def update(self, Y: torch.Tensor):
-        # Directly call this method will ingnore the warmup
-        # policy and force full computation.
-        # Assume warmup steps is 3, and n_derivatives is 3.
-        # step 0: dY_prev    = [None, None,   None,    None   ]
-        #         dY_current = [Y0,   None,   None,    None   ]
-        # step 1: dY_prev    = [Y0,   None,   None,    None   ]
-        #         dY_current = [Y1,   dY1,    None,    None   ]
-        # step 2: dY_prev    = [Y1,   dY1,    None,    None   ]
-        #         dY_current = [Y2,   dY2/Y1, dY2/dY1, None   ]
-        # step 3: dY_prev    = [Y2,   dY2/Y1, dY2/dY1, None   ],
-        #         dY_current = [Y3,   dY3/Y2, dY3/dY2, dY3/dY1]
-        # step 4: dY_prev    = [Y3,   dY3/Y2, dY3/dY2, dY3/dY1]
-        #         dY_current = [Y4,   dY4/Y3, dY4/dY3, dY4/dY2]
-        self.state["dY_prev"] = self.state["dY_current"]
-        self.state["dY_current"] = self.approximate_derivative(Y)
-        self.last_non_approximated_step = self.current_step
-    def step(self, Y: torch.Tensor):
-        self.mark_step_begin()
-        if self.should_compute_full():
-            self.update(Y)
-            return Y
-        else:
-            return self.approximate_value()

cache_dit/cache_factory/cache_contexts/v2/__init__.py DELETED Viewed

@@ -1,13 +0,0 @@
-from cache_dit.cache_factory.cache_contexts.v2.calibrators import (
-    Calibrator,
-    CalibratorBase,
-    CalibratorConfig,
-    TaylorSeerCalibratorConfig,
-    FoCaCalibratorConfig,
-)
-from cache_dit.cache_factory.cache_contexts.v2.cache_context_v2 import (
-    CachedContextV2,
-)
-from cache_dit.cache_factory.cache_contexts.v2.cache_manager_v2 import (
-    CachedContextManagerV2,
-)

cache_dit/cache_factory/cache_contexts/v2/cache_context_v2.py DELETED Viewed

@@ -1,288 +0,0 @@
-import logging
-import dataclasses
-from collections import defaultdict
-from typing import Any, DefaultDict, Dict, List, Optional, Union, Tuple
-import torch
-from cache_dit.cache_factory.cache_contexts.v2.calibrators import (
-    Calibrator,
-    CalibratorBase,
-    CalibratorConfig,
-)
-from cache_dit.logger import init_logger
-logger = init_logger(__name__)
-@dataclasses.dataclass
-class CachedContextV2:  # Internal CachedContext Impl class
-    name: str = "default"
-    # Dual Block Cache with flexible FnBn configuration.
-    Fn_compute_blocks: int = 1
-    Bn_compute_blocks: int = 0
-    # non compute blocks diff threshold, we don't skip the non
-    # compute blocks if the diff >= threshold
-    non_compute_blocks_diff_threshold: float = 0.08
-    max_Fn_compute_blocks: int = -1
-    max_Bn_compute_blocks: int = -1
-    # L1 hidden states or residual diff threshold for Fn
-    residual_diff_threshold: Union[torch.Tensor, float] = 0.05
-    l1_hidden_states_diff_threshold: float = None
-    important_condition_threshold: float = 0.0
-    # Buffer for storing the residuals and other tensors
-    buffers: Dict[str, Any] = dataclasses.field(default_factory=dict)
-    incremental_name_counters: DefaultDict[str, int] = dataclasses.field(
-        default_factory=lambda: defaultdict(int),
-    )
-    # Other settings
-    downsample_factor: int = 1
-    num_inference_steps: int = -1  # for future use
-    max_warmup_steps: int = 0  # DON'T Cache in warmup steps
-    # DON'T Cache if the number of cached steps >= max_cached_steps
-    max_cached_steps: int = -1  # for both CFG and non-CFG
-    max_continuous_cached_steps: int = -1  # the max continuous cached steps
-    # Record the steps that have been cached, both cached and non-cache
-    executed_steps: int = 0  # cache + non-cache steps pippeline
-    # steps for transformer, for CFG, transformer_executed_steps will
-    # be double of executed_steps.
-    transformer_executed_steps: int = 0
-    # Support calibrators in Dual Block Cache: TaylorSeer, FoCa, etc.
-    calibrator_config: Optional[CalibratorConfig] = None
-    calibrator: Optional[CalibratorBase] = None
-    encoder_calibrator: Optional[CalibratorBase] = None
-    # Support enable_separate_cfg, such as Wan 2.1,
-    # Qwen-Image. For model that fused CFG and non-CFG into single
-    # forward step, should set enable_separate_cfg as False.
-    # For example: CogVideoX, HunyuanVideo, Mochi.
-    enable_separate_cfg: bool = False
-    # Compute cfg forward first or not, default False, namely,
-    # 0, 2, 4, ..., -> non-CFG step; 1, 3, 5, ... -> CFG step.
-    cfg_compute_first: bool = False
-    # Compute separate diff values for CFG and non-CFG step,
-    # default True. If False, we will use the computed diff from
-    # current non-CFG transformer step for current CFG step.
-    cfg_diff_compute_separate: bool = True
-    cfg_calibrator: Optional[CalibratorBase] = None
-    cfg_encoder_calibrator: Optional[CalibratorBase] = None
-    # CFG & non-CFG cached steps
-    cached_steps: List[int] = dataclasses.field(default_factory=list)
-    residual_diffs: DefaultDict[str, float] = dataclasses.field(
-        default_factory=lambda: defaultdict(float),
-    )
-    continuous_cached_steps: int = 0
-    cfg_cached_steps: List[int] = dataclasses.field(default_factory=list)
-    cfg_residual_diffs: DefaultDict[str, float] = dataclasses.field(
-        default_factory=lambda: defaultdict(float),
-    )
-    cfg_continuous_cached_steps: int = 0
-    def __post_init__(self):
-        if logger.isEnabledFor(logging.DEBUG):
-            logger.info(f"Created _CacheContextV2: {self.name}")
-        # Some checks for settings
-        if self.enable_separate_cfg:
-            if self.cfg_diff_compute_separate:
-                assert self.cfg_compute_first is False, (
-                    "cfg_compute_first must set as False if "
-                    "cfg_diff_compute_separate is enabled."
-                )
-        if self.calibrator_config.enable_calibrator:
-            self.calibrator = Calibrator(self.calibrator_config)
-            if self.enable_separate_cfg:
-                self.cfg_calibrator = Calibrator(self.calibrator_config)
-        if self.calibrator_config.enable_encoder_calibrator:
-            self.encoder_calibrator = Calibrator(self.calibrator_config)
-            if self.enable_separate_cfg:
-                self.cfg_encoder_calibrator = Calibrator(self.calibrator_config)
-    def enable_calibrator(self):
-        if self.calibrator_config is not None:
-            return self.calibrator_config.enable_calibrator
-        return False
-    def enable_encoder_calibrator(self):
-        if self.calibrator_config is not None:
-            return self.calibrator_config.enable_encoder_calibrator
-        return False
-    def calibrator_cache_type(self):
-        if self.calibrator_config is not None:
-            return self.calibrator_config.calibrator_cache_type
-        return "residual"
-    def get_residual_diff_threshold(self):
-        residual_diff_threshold = self.residual_diff_threshold
-        if self.l1_hidden_states_diff_threshold is not None:
-            # Use the L1 hidden states diff threshold if set
-            residual_diff_threshold = self.l1_hidden_states_diff_threshold
-        if isinstance(residual_diff_threshold, torch.Tensor):
-            residual_diff_threshold = residual_diff_threshold.item()
-        return residual_diff_threshold
-    def get_buffer(self, name):
-        return self.buffers.get(name)
-    def set_buffer(self, name, buffer):
-        self.buffers[name] = buffer
-    def remove_buffer(self, name):
-        if name in self.buffers:
-            del self.buffers[name]
-    def clear_buffers(self):
-        self.buffers.clear()
-    def mark_step_begin(self):
-        # Always increase transformer executed steps
-        # incr     step: prev 0 -> 1; prev 1 -> 2
-        # current  step: incr step - 1
-        self.transformer_executed_steps += 1
-        if not self.enable_separate_cfg:
-            self.executed_steps += 1
-        else:
-            # 0,1 -> 0 + 1, 2,3 -> 1 + 1, ...
-            if not self.cfg_compute_first:
-                if not self.is_separate_cfg_step():
-                    # transformer step: 0,2,4,...
-                    self.executed_steps += 1
-            else:
-                if self.is_separate_cfg_step():
-                    # transformer step: 0,2,4,...
-                    self.executed_steps += 1
-        # Reset the cached steps and residual diffs at the beginning
-        # of each inference.
-        if self.get_current_transformer_step() == 0:
-            self.cached_steps.clear()
-            self.residual_diffs.clear()
-            self.cfg_cached_steps.clear()
-            self.cfg_residual_diffs.clear()
-            # Reset the calibrators cache at the beginning of each inference.
-            # reset_cache will set the current step to -1 for calibrator,
-            if (
-                self.calibrator_config.enable_calibrator
-                or self.calibrator_config.enable_encoder_calibrator
-            ):
-                calibrator, encoder_calibrator = self.get_calibrators()
-                if calibrator is not None:
-                    calibrator.reset_cache()
-                if encoder_calibrator is not None:
-                    encoder_calibrator.reset_cache()
-                cfg_calibrator, cfg_encoder_calibrator = (
-                    self.get_cfg_calibrators()
-                )
-                if cfg_calibrator is not None:
-                    cfg_calibrator.reset_cache()
-                if cfg_encoder_calibrator is not None:
-                    cfg_encoder_calibrator.reset_cache()
-        # mark_step_begin of calibrator must be called after the cache is reset.
-        if (
-            self.calibrator_config.enable_calibrator
-            or self.calibrator_config.enable_encoder_calibrator
-        ):
-            if self.enable_separate_cfg:
-                # Assume non-CFG steps: 0, 2, 4, 6, ...
-                if not self.is_separate_cfg_step():
-                    calibrator, encoder_calibrator = self.get_calibrators()
-                    if calibrator is not None:
-                        calibrator.mark_step_begin()
-                    if encoder_calibrator is not None:
-                        encoder_calibrator.mark_step_begin()
-                else:
-                    cfg_calibrator, cfg_encoder_calibrator = (
-                        self.get_cfg_calibrators()
-                    )
-                    if cfg_calibrator is not None:
-                        cfg_calibrator.mark_step_begin()
-                    if cfg_encoder_calibrator is not None:
-                        cfg_encoder_calibrator.mark_step_begin()
-            else:
-                calibrator, encoder_calibrator = self.get_calibrators()
-                if calibrator is not None:
-                    calibrator.mark_step_begin()
-                if encoder_calibrator is not None:
-                    encoder_calibrator.mark_step_begin()
-    def get_calibrators(self) -> Tuple[CalibratorBase, CalibratorBase]:
-        return self.calibrator, self.encoder_calibrator
-    def get_cfg_calibrators(self) -> Tuple[CalibratorBase, CalibratorBase]:
-        return self.cfg_calibrator, self.cfg_encoder_calibrator
-    def add_residual_diff(self, diff):
-        # step: executed_steps - 1, not transformer_steps - 1
-        step = str(self.get_current_step())
-        # Only add the diff if it is not already recorded for this step
-        if not self.is_separate_cfg_step():
-            if step not in self.residual_diffs:
-                self.residual_diffs[step] = diff
-        else:
-            if step not in self.cfg_residual_diffs:
-                self.cfg_residual_diffs[step] = diff
-    def get_residual_diffs(self):
-        return self.residual_diffs.copy()
-    def get_cfg_residual_diffs(self):
-        return self.cfg_residual_diffs.copy()
-    def add_cached_step(self):
-        curr_cached_step = self.get_current_step()
-        if not self.is_separate_cfg_step():
-            if self.cached_steps:
-                prev_cached_step = self.cached_steps[-1]
-                if curr_cached_step - prev_cached_step == 1:
-                    if self.continuous_cached_steps == 0:
-                        self.continuous_cached_steps += 2
-                    else:
-                        self.continuous_cached_steps += 1
-            else:
-                self.continuous_cached_steps += 1
-            self.cached_steps.append(curr_cached_step)
-        else:
-            if self.cfg_cached_steps:
-                prev_cfg_cached_step = self.cfg_cached_steps[-1]
-                if curr_cached_step - prev_cfg_cached_step == 1:
-                    if self.cfg_continuous_cached_steps == 0:
-                        self.cfg_continuous_cached_steps += 2
-                    else:
-                        self.cfg_continuous_cached_steps += 1
-            else:
-                self.cfg_continuous_cached_steps += 1
-            self.cfg_cached_steps.append(curr_cached_step)
-    def get_cached_steps(self):
-        return self.cached_steps.copy()
-    def get_cfg_cached_steps(self):
-        return self.cfg_cached_steps.copy()
-    def get_current_step(self):
-        return self.executed_steps - 1
-    def get_current_transformer_step(self):
-        return self.transformer_executed_steps - 1
-    def is_separate_cfg_step(self):
-        if not self.enable_separate_cfg:
-            return False
-        if self.cfg_compute_first:
-            # CFG steps: 0, 2, 4, 6, ...
-            return self.get_current_transformer_step() % 2 == 0
-        # CFG steps: 1, 3, 5, 7, ...
-        return self.get_current_transformer_step() % 2 != 0
-    def is_in_warmup(self):
-        return self.get_current_step() < self.max_warmup_steps

cache-dit 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

Potentially problematic release.

cache-dit 0.3.1py3-none-any.whl → 0.3.3py3-none-any.whl