PyPI - cache-dit - Versions diffs - 1.0.8__py3-none-any.whl → 1.0.10__py3-none-any.whl - Mend

cache-dit 1.0.8py3-none-any.whl → 1.0.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cache-dit might be problematic. Click here for more details.

Files changed (45) hide show

cache_dit/parallelism/backends/native_pytorch/tensor_parallelism/__init__.py ADDED Viewed

@@ -0,0 +1,48 @@
+try:
+    import einops
+except ImportError:
+    raise ImportError(
+        "Metrics functionality requires the 'parallelism' extra dependencies. "
+        "Install with:\npip install cache-dit[parallelism]"
+    )
+import torch
+from typing import Optional
+from diffusers.models.modeling_utils import ModelMixin
+from cache_dit.parallelism.parallel_backend import ParallelismBackend
+from cache_dit.parallelism.parallel_config import ParallelismConfig
+from cache_dit.logger import init_logger
+from .tp_planners import *
+logger = init_logger(__name__)
+def maybe_enable_tensor_parallelism(
+    transformer: torch.nn.Module | ModelMixin,
+    parallelism_config: Optional[ParallelismConfig],
+) -> torch.nn.Module:
+    assert isinstance(transformer, torch.nn.Module), (
+        "transformer must be an instance of torch.nn.Module, "
+        f"but got {type(transformer)}"
+    )
+    assert isinstance(transformer, ModelMixin), (
+        "transformer must be an instance of diffusers' ModelMixin, "
+        f"but got {type(transformer)}"
+    )
+    if parallelism_config is None:
+        return transformer
+    assert parallelism_config.backend == ParallelismBackend.NATIVE_PYTORCH, (
+        "parallelism_config.backend must be ParallelismBackend.NATIVE_PYTORCH "
+        f"but got {parallelism_config.backend}"
+    )
+    extra_parallel_kwargs = {}
+    if parallelism_config.parallel_kwargs is not None:
+        extra_parallel_kwargs = parallelism_config.parallel_kwargs
+    return TensorParallelismPlannerRegister.get_planner(transformer)().apply(
+        transformer=transformer,
+        parallelism_config=parallelism_config,
+        **extra_parallel_kwargs,
+    )

cache_dit/parallelism/backends/native_pytorch/tensor_parallelism/tp_plan_flux.py ADDED Viewed

@@ -0,0 +1,159 @@
+import torch
+from diffusers.models.transformers.transformer_flux import (
+    FluxSingleTransformerBlock,
+)
+from einops import rearrange
+from torch import nn
+from torch.distributed import DeviceMesh, init_device_mesh
+from torch.distributed._tensor import Replicate
+from torch.distributed.tensor.parallel import (
+    ColwiseParallel,
+    RowwiseParallel,
+    parallelize_module,
+)
+from cache_dit.parallelism.parallel_config import ParallelismConfig
+from .tp_plan_registers import (
+    TensorParallelismPlanner,
+    TensorParallelismPlannerRegister,
+)
+from cache_dit.logger import init_logger
+logger = init_logger(__name__)
+@TensorParallelismPlannerRegister.register("Flux")
+class FluxTensorParallelismPlanner(TensorParallelismPlanner):
+    def apply(
+        self,
+        transformer: torch.nn.Module,
+        parallelism_config: ParallelismConfig,
+        **kwargs,
+    ) -> torch.nn.Module:
+        assert (
+            parallelism_config.tp_size is not None
+            and parallelism_config.tp_size > 1
+        ), (
+            "parallel_config.tp_size must be set and greater than 1 for "
+            "tensor parallelism"
+        )
+        device_type = torch.accelerator.current_accelerator().type
+        tp_mesh: DeviceMesh = init_device_mesh(
+            device_type=device_type,
+            mesh_shape=[parallelism_config.tp_size],
+        )
+        transformer = self.parallelize_transformer(
+            transformer=transformer,
+            tp_mesh=tp_mesh,
+        )
+        # TODO: Parallelize t5 text encoder via `apply_extra`
+        # abstract method and `extra_parallel_kwargs` ?
+        return transformer
+    def parallelize_t5(
+        self,
+        text_encoder: nn.Module,
+        tp_mesh: DeviceMesh,
+    ):
+        for i, block in enumerate(text_encoder.encoder.block):
+            block.layer[0].SelfAttention.n_heads //= tp_mesh.size()
+            block.layer[0].SelfAttention.inner_dim //= tp_mesh.size()
+            layer_plan = {
+                "layer.0.SelfAttention.q": ColwiseParallel(),
+                "layer.0.SelfAttention.k": ColwiseParallel(),
+                "layer.0.SelfAttention.v": ColwiseParallel(),
+                "layer.0.SelfAttention.o": RowwiseParallel(),
+                "layer.1.DenseReluDense.wi_0": ColwiseParallel(),
+                "layer.1.DenseReluDense.wi_1": ColwiseParallel(),
+                "layer.1.DenseReluDense.wo": RowwiseParallel(),
+            }
+            if i == 0:
+                layer_plan["layer.0.SelfAttention.relative_attention_bias"] = (
+                    ColwiseParallel()
+                )
+            parallelize_module(
+                module=block,
+                device_mesh=tp_mesh,
+                parallelize_plan=layer_plan,
+            )
+        return text_encoder
+    def parallelize_transformer(
+        self,
+        transformer: nn.Module,
+        tp_mesh: DeviceMesh,
+    ):
+        for _, block in transformer.transformer_blocks.named_children():
+            block.attn.heads //= tp_mesh.size()
+            layer_plan = {
+                "attn.to_q": ColwiseParallel(),
+                "attn.to_k": ColwiseParallel(),
+                "attn.to_v": ColwiseParallel(),
+                "attn.to_out.0": RowwiseParallel(),
+                "norm1.linear": ColwiseParallel(output_layouts=Replicate()),
+                "ff.net.0.proj": ColwiseParallel(),
+                "ff.net.2": RowwiseParallel(),
+                "attn.add_q_proj": ColwiseParallel(),
+                "attn.add_k_proj": ColwiseParallel(),
+                "attn.add_v_proj": ColwiseParallel(),
+                "attn.to_add_out": RowwiseParallel(),
+                "norm1_context.linear": ColwiseParallel(
+                    output_layouts=Replicate()
+                ),
+                "ff_context.net.0.proj": ColwiseParallel(),
+                "ff_context.net.2": RowwiseParallel(),
+            }
+            parallelize_module(
+                module=block,
+                device_mesh=tp_mesh,
+                parallelize_plan=layer_plan,
+            )
+        # NOTE: special handling for FluxSingleTransformerBlock, we have to
+        # rearrange the proj_out weight because it contains both out and down
+        # projection weights in a single matrix.
+        def rearrange_proj_out_weight(
+            single_block: FluxSingleTransformerBlock, tp_group_size
+        ):
+            # rowwise
+            hidden_dim = 3072
+            requires_grad = single_block.proj_out.weight.requires_grad
+            linear2_weight_data = (
+                single_block.proj_out.weight.data.T.detach().clone()
+            )
+            out_weight = linear2_weight_data[:hidden_dim, ...]
+            out_weight = rearrange(
+                out_weight, "(G D) C -> G D C", G=tp_group_size
+            )
+            down_weight = linear2_weight_data.data[hidden_dim:, ...]
+            down_weight = rearrange(
+                down_weight, "(G D) C -> G D C", G=tp_group_size
+            )
+            new_linear2_weight = torch.cat([out_weight, down_weight], dim=1)
+            new_linear2_weight = rearrange(
+                new_linear2_weight, "G D C -> (G D) C"
+            )
+            single_block.proj_out.weight.data.copy_(new_linear2_weight.T)
+            single_block.proj_out.weight.requires_grad_(requires_grad)
+        for _, block in transformer.single_transformer_blocks.named_children():
+            rearrange_proj_out_weight(block, tp_mesh.size())
+            block.attn.heads //= tp_mesh.size()
+            layer_plan = {
+                "attn.to_q": ColwiseParallel(),
+                "attn.to_k": ColwiseParallel(),
+                "attn.to_v": ColwiseParallel(),
+                "proj_mlp": ColwiseParallel(),
+                "proj_out": RowwiseParallel(),
+                "norm.linear": ColwiseParallel(output_layouts=Replicate()),
+            }
+            parallelize_module(
+                module=block,
+                device_mesh=tp_mesh,
+                parallelize_plan=layer_plan,
+            )
+        return transformer

cache_dit/parallelism/backends/native_pytorch/tensor_parallelism/tp_plan_qwen_image.py ADDED Viewed

@@ -0,0 +1,78 @@
+import torch
+from torch import nn
+from torch.distributed import DeviceMesh, init_device_mesh
+from torch.distributed._tensor import Replicate
+from torch.distributed.tensor.parallel import (
+    ColwiseParallel,
+    RowwiseParallel,
+    parallelize_module,
+)
+from cache_dit.parallelism.parallel_config import ParallelismConfig
+from .tp_plan_registers import (
+    TensorParallelismPlanner,
+    TensorParallelismPlannerRegister,
+)
+from cache_dit.logger import init_logger
+logger = init_logger(__name__)
+@TensorParallelismPlannerRegister.register("QwenImage")
+class QwenImageTensorParallelismPlanner(TensorParallelismPlanner):
+    def apply(
+        self,
+        transformer: torch.nn.Module,
+        parallelism_config: ParallelismConfig,
+        **kwargs,
+    ) -> torch.nn.Module:
+        assert (
+            parallelism_config.tp_size is not None
+            and parallelism_config.tp_size > 1
+        ), (
+            "parallel_config.tp_size must be set and greater than 1 for "
+            "tensor parallelism"
+        )
+        device_type = torch.accelerator.current_accelerator().type
+        tp_mesh: DeviceMesh = init_device_mesh(
+            device_type=device_type,
+            mesh_shape=[parallelism_config.tp_size],
+        )
+        transformer = self.parallelize_transformer(
+            transformer=transformer,
+            tp_mesh=tp_mesh,
+        )
+        return transformer
+    def parallelize_transformer(
+        self,
+        transformer: nn.Module,
+        tp_mesh: DeviceMesh,
+    ):
+        for _, block in transformer.transformer_blocks.named_children():
+            block.attn.heads //= tp_mesh.size()
+            layer_plan = {
+                "attn.to_q": ColwiseParallel(),
+                "attn.to_k": ColwiseParallel(),
+                "attn.to_v": ColwiseParallel(),
+                "attn.to_out.0": RowwiseParallel(),
+                "img_mod.1": ColwiseParallel(output_layouts=Replicate()),
+                "img_mlp.net.0.proj": ColwiseParallel(),
+                "img_mlp.net.2": RowwiseParallel(),
+                "attn.add_q_proj": ColwiseParallel(),
+                "attn.add_k_proj": ColwiseParallel(),
+                "attn.add_v_proj": ColwiseParallel(),
+                "attn.to_add_out": RowwiseParallel(),
+                "txt_mod.1": ColwiseParallel(output_layouts=Replicate()),
+                "txt_mlp.net.0.proj": ColwiseParallel(),
+                "txt_mlp.net.2": RowwiseParallel(),
+            }
+            parallelize_module(
+                module=block,
+                device_mesh=tp_mesh,
+                parallelize_plan=layer_plan,
+            )
+        return transformer

cache_dit/parallelism/backends/native_pytorch/tensor_parallelism/tp_plan_registers.py ADDED Viewed

@@ -0,0 +1,58 @@
+import torch
+import logging
+from abc import abstractmethod
+from typing import Dict
+from cache_dit.parallelism.parallel_config import ParallelismConfig
+from cache_dit.logger import init_logger
+logger = init_logger(__name__)
+class TensorParallelismPlanner:
+    # TODO: add `apply_extra` abstract method for extra
+    # parallelism kwargs handling
+    @abstractmethod
+    def apply(
+        self,
+        transformer: torch.nn.Module,
+        parallelism_config: ParallelismConfig,
+        **kwargs,
+    ) -> torch.nn.Module:
+        raise NotImplementedError(
+            "apply method must be implemented by subclasses"
+        )
+class TensorParallelismPlannerRegister:
+    _tp_planner_registry: Dict[str, TensorParallelismPlanner] = {}
+    @classmethod
+    def register(cls, name: str):
+        def decorator(planner_cls: type[TensorParallelismPlanner]):
+            assert (
+                name not in cls._tp_planner_registry
+            ), f"TensorParallelismPlanner with name {name} is already registered."
+            if logger.isEnabledFor(logging.DEBUG):
+                logger.debug(f"Registering TensorParallelismPlanner: {name}")
+            cls._tp_planner_registry[name] = planner_cls
+            return planner_cls
+        return decorator
+    @classmethod
+    def get_planner(
+        cls, transformer: str | torch.nn.Module
+    ) -> type[TensorParallelismPlanner]:
+        if isinstance(transformer, torch.nn.Module):
+            name = transformer.__class__.__name__
+        else:
+            name = transformer
+        planner_cls = None
+        for planner_name in cls._tp_planner_registry:
+            if name.startswith(planner_name):
+                planner_cls = cls._tp_planner_registry.get(planner_name)
+                break
+        if planner_cls is None:
+            raise ValueError(f"No planner registered under name: {name}")
+        return planner_cls

cache_dit/parallelism/backends/native_pytorch/tensor_parallelism/tp_plan_wan.py ADDED Viewed

@@ -0,0 +1,153 @@
+from typing import Optional, Union
+import torch
+from torch import nn
+from torch.distributed import DeviceMesh, init_device_mesh
+from torch.distributed.tensor.parallel import (
+    ColwiseParallel,
+    RowwiseParallel,
+    parallelize_module,
+)
+from cache_dit.logger import init_logger
+from cache_dit.parallelism.parallel_config import ParallelismConfig
+from .tp_plan_registers import (
+    TensorParallelismPlanner,
+    TensorParallelismPlannerRegister,
+)
+logger = init_logger(__name__)
+class DistributedRMSNorm(nn.Module):
+    def __init__(
+        self,
+        tp_mesh: DeviceMesh,
+        normalized_shape: Union[int, list[int], torch.Size],
+        eps: Optional[float],
+        elementwise_affine: bool,
+        weight: torch.nn.parameter.Parameter,
+    ):
+        super().__init__()
+        self.tp_mesh = tp_mesh
+        self.elementwise_affine = elementwise_affine
+        self.normalized_shape = normalized_shape
+        self.eps = eps
+        if self.elementwise_affine:
+            assert weight is not None
+        self.weight = weight
+    @classmethod
+    def from_rmsnorm(cls, tp_mesh: DeviceMesh, rmsnorm: nn.RMSNorm):
+        if not isinstance(rmsnorm, int):
+            assert len(rmsnorm.normalized_shape) == 1
+        if rmsnorm.weight is not None:
+            tp_size = tp_mesh.get_group().size()
+            tp_rank = tp_mesh.get_group().rank()
+            weight = rmsnorm.weight.chunk(tp_size, dim=0)[tp_rank]
+        else:
+            weight = None
+        norm = cls(
+            tp_mesh=tp_mesh,
+            normalized_shape=rmsnorm.normalized_shape,
+            eps=rmsnorm.eps,
+            elementwise_affine=rmsnorm.elementwise_affine,
+            weight=weight,
+        )
+        return norm
+    def forward(self, x):
+        if self.elementwise_affine:
+            assert x.shape[-1] == self.weight.shape[0]
+        mean_square = torch.mean(x * x, dim=-1, keepdim=True)
+        torch.distributed.all_reduce(
+            mean_square,
+            op=torch.distributed.ReduceOp.AVG,
+            group=self.tp_mesh.get_group(),
+        )
+        root_mean_square = torch.sqrt(mean_square + self.eps)
+        x_normed = x / root_mean_square
+        if self.elementwise_affine:
+            x_normed = x_normed * self.weight.to(device=x.device)
+        assert x_normed.device.type != "cpu"
+        return x_normed
+@TensorParallelismPlannerRegister.register("Wan")
+class WanTensorParallelismPlanner(TensorParallelismPlanner):
+    def apply(
+        self,
+        transformer: torch.nn.Module,
+        parallelism_config: ParallelismConfig,
+        **kwargs,
+    ) -> torch.nn.Module:
+        assert (
+            parallelism_config.tp_size is not None
+            and parallelism_config.tp_size > 1
+        ), (
+            "parallel_config.tp_size must be set and greater than 1 for "
+            "tensor parallelism"
+        )
+        device_type = torch.accelerator.current_accelerator().type
+        tp_mesh: DeviceMesh = init_device_mesh(
+            device_type=device_type,
+            mesh_shape=[parallelism_config.tp_size],
+        )
+        transformer = self.parallelize_transformer(
+            transformer=transformer,
+            tp_mesh=tp_mesh,
+        )
+        return transformer
+    def parallelize_transformer(
+        self,
+        transformer: nn.Module,
+        tp_mesh: DeviceMesh,
+    ):
+        for _, block in transformer.blocks.named_children():
+            block.attn1.heads //= tp_mesh.size()
+            block.attn2.heads //= tp_mesh.size()
+            layer_plan = {
+                "attn1.to_q": ColwiseParallel(),
+                "attn1.to_k": ColwiseParallel(),
+                "attn1.to_v": ColwiseParallel(),
+                "attn1.to_out.0": RowwiseParallel(),
+                "attn2.to_q": ColwiseParallel(),
+                "attn2.to_k": ColwiseParallel(),
+                "attn2.to_v": ColwiseParallel(),
+                "attn2.to_out.0": RowwiseParallel(),
+                "ffn.net.0.proj": ColwiseParallel(),
+                "ffn.net.2": RowwiseParallel(),
+            }
+            if getattr(block.attn2, "add_k_proj", None):
+                layer_plan["attn2.add_k_proj"] = ColwiseParallel()
+            if getattr(block.attn2, "add_v_proj", None):
+                layer_plan["attn2.add_v_proj"] = ColwiseParallel()
+            parallelize_module(
+                module=block,
+                device_mesh=tp_mesh,
+                parallelize_plan=layer_plan,
+            )
+            block.attn1.norm_q = DistributedRMSNorm.from_rmsnorm(
+                tp_mesh, block.attn1.norm_q
+            )
+            block.attn1.norm_k = DistributedRMSNorm.from_rmsnorm(
+                tp_mesh, block.attn1.norm_k
+            )
+            block.attn2.norm_q = DistributedRMSNorm.from_rmsnorm(
+                tp_mesh, block.attn2.norm_q
+            )
+            block.attn2.norm_k = DistributedRMSNorm.from_rmsnorm(
+                tp_mesh, block.attn2.norm_k
+            )
+            if getattr(block.attn2, "norm_added_k", None):
+                block.attn2.norm_added_k = DistributedRMSNorm.from_rmsnorm(
+                    tp_mesh, block.attn2.norm_added_k
+                )
+        return transformer

cache_dit/parallelism/backends/native_pytorch/tensor_parallelism/tp_planners.py ADDED Viewed

@@ -0,0 +1,12 @@
+# NOTE: must import all planner classes to register them
+from .tp_plan_registers import TensorParallelismPlannerRegister
+from .tp_plan_flux import FluxTensorParallelismPlanner
+from .tp_plan_qwen_image import QwenImageTensorParallelismPlanner
+from .tp_plan_wan import WanTensorParallelismPlanner
+__all__ = [
+    "TensorParallelismPlannerRegister",
+    "FluxTensorParallelismPlanner",
+    "QwenImageTensorParallelismPlanner",
+    "WanTensorParallelismPlanner",
+]

cache_dit/parallelism/parallel_backend.py CHANGED Viewed

@@ -8,6 +8,8 @@ class ParallelismBackend(Enum):
     @classmethod
     def is_supported(cls, backend: "ParallelismBackend") -> bool:
+        if backend in [cls.NATIVE_PYTORCH]:
+            return True
         # Now, only Native_Diffuser backend is supported
         if backend in [cls.NATIVE_DIFFUSER]:
             try:

cache_dit/parallelism/parallel_config.py CHANGED Viewed

@@ -23,8 +23,8 @@ class ParallelismConfig:
     tp_size: int = None
     # parallel_kwargs (`dict`, *optional*):
     #     Additional kwargs for parallelism backends. For example, for
-    #     NATIVE_DIFFUSER backend, it can include `cp_plan` and other
-    #     arguments for `Context Parallelism`.
+    #     NATIVE_DIFFUSER backend, it can include `cp_plan` and
+    #     `attention_backend` arguments for `Context Parallelism`.
     parallel_kwargs: Optional[Dict[str, Any]] = dataclasses.field(
         default_factory=dict
     )
@@ -34,7 +34,14 @@ class ParallelismConfig:
             f"Parallel backend {self.backend} is not supported. "
             f"Please make sure the required packages are installed."
         )
-        assert self.tp_size is None, "Tensor parallelism is not supported yet."
+        if self.tp_size is not None and self.tp_size > 1:
+            assert (
+                self.ulysses_size is None or self.ulysses_size == 1
+            ), "Tensor parallelism plus Ulysses parallelism is not supported right now."
+            assert (
+                self.ring_size is None or self.ring_size == 1
+            ), "Tensor parallelism plus Ring parallelism is not supported right now."
     def strify(self, details: bool = False) -> str:
         if details:

cache_dit/parallelism/parallel_interface.py CHANGED Viewed

@@ -24,12 +24,17 @@ def enable_parallelism(
     if parallelism_config.backend == ParallelismBackend.NATIVE_DIFFUSER:
         from cache_dit.parallelism.backends.native_diffusers import (
             maybe_enable_parallelism,
-            native_diffusers_parallelism_available,
         )
-        assert (
-            native_diffusers_parallelism_available()
-        ), "Please install diffusers>=0.36.dev0 to use Native_Diffuser backend."
+        transformer = maybe_enable_parallelism(
+            transformer,
+            parallelism_config,
+        )
+    elif parallelism_config.backend == ParallelismBackend.NATIVE_PYTORCH:
+        from cache_dit.parallelism.backends.native_pytorch import (
+            maybe_enable_parallelism,
+        )
         transformer = maybe_enable_parallelism(
             transformer,
             parallelism_config,
@@ -40,8 +45,12 @@ def enable_parallelism(
         )
     transformer._is_parallelized = True  # type: ignore[attr-defined]
+    # Use `parallelism` not `parallel` to avoid name conflict with diffusers.
     transformer._parallelism_config = parallelism_config  # type: ignore[attr-defined]
-    logger.info(f"Enabled parallelism: {parallelism_config.strify(True)}")
+    logger.info(
+        f"Enabled parallelism: {parallelism_config.strify(True)}, "
+        f"transformer id:{id(transformer)}"
+    )
     return transformer

cache_dit/quantize/backends/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .torchao import quantize_ao

cache_dit/quantize/backends/bitsandbytes/__init__.py ADDED Viewed

File without changes

cache_dit/quantize/backends/torchao/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .quantize_ao import quantize_ao

cache_dit/quantize/{quantize_ao.py → backends/torchao/quantize_ao.py} RENAMED Viewed

@@ -9,7 +9,7 @@ logger = init_logger(__name__)
 def quantize_ao(
     module: torch.nn.Module,
-    quant_type: str = "fp8_w8a8_dq",
+    quant_type: str = "float8_weight_only",
     exclude_layers: List[str] = [
         "embedder",
         "embed",
@@ -24,6 +24,18 @@ def quantize_ao(
     # set `exclude_layers` as `[]` if you don't want this behavior.
     assert isinstance(module, torch.nn.Module)
+    alias_map = {
+        "float8": "fp8_w8a8_dq",
+        "float8_weight_only": "fp8_w8a16_wo",
+        "int8": "int8_w8a8_dq",
+        "int8_weight_only": "int8_w8a16_wo",
+        "int4": "int4_w4a8_dq",
+        "int4_w4a4": "int4_w4a4_dq",
+        "int4_weight_only": "int4_w4a16_wo",
+    }
+    if quant_type.lower() in alias_map:
+        quant_type = alias_map[quant_type.lower()]
     quant_type = quant_type.lower()
     assert quant_type in (
         "fp8_w8a8_dq",
@@ -183,7 +195,11 @@ def quantize_ao(
         device=kwargs.get("device", None),
     )
-    force_empty_cache()
+    maybe_empty_cache()
+    alias_map_rev = {v: k for k, v in alias_map.items()}
+    if quant_type in alias_map_rev:
+        quant_type = alias_map_rev[quant_type]
     logger.info(
         f"Quantized        Module: {module.__class__.__name__:>5}\n"
@@ -199,10 +215,13 @@ def quantize_ao(
     return module
-def force_empty_cache():
-    time.sleep(1)
-    gc.collect()
-    torch.cuda.empty_cache()
-    time.sleep(1)
-    gc.collect()
-    torch.cuda.empty_cache()
+def maybe_empty_cache():
+    try:
+        time.sleep(1)
+        gc.collect()
+        torch.cuda.empty_cache()
+        time.sleep(1)
+        gc.collect()
+        torch.cuda.empty_cache()
+    except Exception:
+        pass

cache_dit/quantize/quantize_backend.py ADDED Viewed

File without changes

cache_dit/quantize/quantize_config.py ADDED Viewed

File without changes

cache-dit 1.0.8__py3-none-any.whl → 1.0.10__py3-none-any.whl

Potentially problematic release.

cache-dit 1.0.8py3-none-any.whl → 1.0.10py3-none-any.whl