PyPI - compressed-tensors - Versions diffs - 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl - Mend

compressed-tensors 0.8.0py3-none-any.whl → 0.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py ADDED Viewed

@@ -0,0 +1,238 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from dataclasses import dataclass
+from typing import Dict, List, Tuple, Union
+import torch
+from compressed_tensors.compressors.base import BaseCompressor
+from compressed_tensors.compressors.sparse_compressors.base import BaseSparseCompressor
+from compressed_tensors.config import CompressionFormat, SparsityStructure
+from compressed_tensors.quantization import FP8_DTYPE
+from compressed_tensors.utils import merge_names, pack_bitmasks, unpack_bitmasks
+from torch import Tensor
+__all__ = [
+    "Sparse24BitMaskCompressor",
+    "Sparse24BitMaskTensor",
+    "sparse24_bitmask_compress",
+    "sparse24_bitmask_decompress",
+    "get_24_bytemasks",
+]
+@BaseCompressor.register(name=CompressionFormat.sparse_24_bitmask.value)
+class Sparse24BitMaskCompressor(BaseSparseCompressor):
+    """
+    Compression for sparse models using bitmasks. Non-zero weights are stored in a 2d
+    values tensor, with their locations stored in a 2d bitmask
+    """
+    COMPRESSION_PARAM_NAMES = [
+        "shape",
+        "compressed",
+        "bitmask",
+    ]
+    def compress_weight(self, name, value):
+        bitmask_tensor = Sparse24BitMaskTensor.from_dense(
+            value, self.config.sparsity_structure
+        )
+        bitmask_dict = bitmask_tensor.dict(name_prefix=name, device="cpu")
+        return bitmask_dict
+    def decompress_weight(self, weight_data):
+        data = Sparse24BitMaskTensor.from_compressed_data(**weight_data)
+        decompressed = data.decompress()
+        return decompressed
+@dataclass
+class Sparse24BitMaskTensor:
+    """
+    Owns compressions and decompression for a single 2:4 sparse
+    bitmask compressed tensor.
+    :param shape: shape of dense tensor
+    :param compressed: 2d tensor of non-zero values
+    :param bitmask: 2d bitmask of non-zero values
+    """
+    shape: List[int]
+    compressed: Tensor
+    bitmask: Tensor
+    @staticmethod
+    def from_dense(
+        tensor: Tensor,
+        sparsity_structure: Union[SparsityStructure, str] = SparsityStructure.TWO_FOUR,
+    ) -> "Sparse24BitMaskTensor":
+        """
+        :param tensor: dense tensor to compress
+        :return: instantiated compressed tensor
+        """
+        shape = list(tensor.shape)
+        compressed, bitmask = sparse24_bitmask_compress(
+            tensor.cpu(), sparsity_structure=sparsity_structure
+        )
+        return Sparse24BitMaskTensor(
+            shape=shape,
+            compressed=compressed,
+            bitmask=bitmask,
+        )
+    @staticmethod
+    def from_compressed_data(
+        shape: Union[List[int], Tensor], compressed: Tensor, bitmask: Tensor
+    ) -> "Sparse24BitMaskTensor":
+        """
+        :param shape: shape of the dense tensor (can be a list or a tensor)
+        :param compressed: 2d tensor of non-zero values
+        :param bitmask: 2d bitmask of non-zero values
+        :return: instantiated Sparse24BitMaskTensor
+        """
+        if isinstance(shape, Tensor):
+            shape = shape.tolist()
+        return Sparse24BitMaskTensor(
+            shape=shape, compressed=compressed, bitmask=bitmask
+        )
+    def decompress(self) -> Tensor:
+        """
+        :return: reconstructed dense tensor
+        """
+        return sparse24_bitmask_decompress(self.compressed, self.bitmask, self.shape)
+    def curr_memory_size_bytes(self) -> int:
+        """
+        :return: size in bytes required to store compressed tensor on disk
+        """
+        def sizeof_tensor(a: Tensor) -> int:
+            return a.element_size() * a.nelement()
+        return sizeof_tensor(self.compressed) + sizeof_tensor(self.bitmask)
+    def dict(self, name_prefix: str, device: str = "cpu") -> Dict[str, Tensor]:
+        """
+        :param name_prefix: name of original tensor to store compressed weight as
+        :return: dict of compressed data for the stored weight
+        """
+        if name_prefix.endswith(".weight"):
+            name_prefix = name_prefix[: -len(".weight")]
+        return {
+            merge_names(name_prefix, "shape"): torch.tensor(
+                self.shape, device=device
+            ).reshape(-1, 1),
+            merge_names(name_prefix, "compressed"): self.compressed.to(device),
+            merge_names(name_prefix, "bitmask"): self.bitmask.to(device),
+        }
+    def __repr__(self) -> str:
+        return f"BitMaskTensor(shape={self.shape}, compressed=True)"
+def sparse24_bitmask_compress(
+    tensor: Tensor,
+    sparsity_structure: Union[SparsityStructure, str] = SparsityStructure.TWO_FOUR,
+) -> Tuple[Tensor, Tensor, Tensor]:
+    """
+    Compresses a dense tensor using bitmask compression
+    :param tensor: dense 2D tensor to compress
+    :param sparsity_structure: structure of sparsity in the tensor, defaults
+        to unstructured, can also be set to `2:4`
+    :return: tuple of compressed data representing tensor
+    """
+    assert len(tensor.shape) == 2, "Only 2D tensors are supported"
+    assert (
+        SparsityStructure(sparsity_structure) == SparsityStructure.TWO_FOUR
+    ), "Only 2:4 sparsity is supported"
+    bytemasks = get_24_bytemasks(tensor=tensor)
+    if tensor.dtype == FP8_DTYPE:
+        # acces raw bytes of the tensor
+        tensor_view = tensor.view(torch.int8)
+        values = tensor_view[bytemasks]
+        values = values.view(FP8_DTYPE)
+    else:
+        values = tensor[bytemasks]
+    num_rows, num_cols = tensor.shape
+    compressed_values = values.reshape(num_rows, num_cols // 2)
+    bitmasks_packed = pack_bitmasks(bytemasks)
+    return compressed_values, bitmasks_packed
+def sparse24_bitmask_decompress(
+    values: Tensor, bitmasks: Tensor, original_shape: torch.Size
+) -> Tensor:
+    """
+    Reconstructs a dense tensor from a compressed one
+    :param values: 1d tensor of non-zero values
+    :param bitmasks: 2d int8 tensor flagging locations of non-zero values in the
+    tensors original shape
+    :param original_shape: shape of the dense tensor
+    :return: decompressed dense tensor
+    """
+    bytemasks_unpacked = unpack_bitmasks(bitmasks, original_shape)
+    decompressed_tensor = torch.zeros(original_shape, dtype=values.dtype)
+    decompressed_tensor = decompressed_tensor.to(values.device)
+    values = values.flatten()
+    if decompressed_tensor.dtype == FP8_DTYPE:
+        decompressed_tensor[bytemasks_unpacked] = values
+        decompressed_tensor = decompressed_tensor.cuda()
+    else:
+        decompressed_tensor[bytemasks_unpacked] = values
+    return decompressed_tensor
+def get_24_bytemasks(tensor):
+    """
+    Generate a 2:4 sparsity mask for the given tensor.
+    This function creates a mask where exactly 2 out of every 4 elements are
+    preserved based on their magnitudes. The preserved elements are the ones
+    with the highest absolute values in each group of 4 elements.
+    :param tensor: The input tensor for which the 2:4 sparsity mask is to be created.
+                   The tensor can be of any shape but its total number of elements
+                   must be a multiple of 4.
+    :return: A boolean tensor of the same shape as the input tensor, where `True`
+             indicates the preserved elements and `False` indicates the pruned elements.
+    :raises ValueError: If the total number of elements in the tensor is not a
+                        multiple of 4.
+    """
+    original_dtype = tensor.dtype
+    if tensor.dtype == FP8_DTYPE:
+        tensor = tensor.view(torch.int8)
+    original_shape = tensor.shape
+    num_elements = tensor.numel()
+    if num_elements % 4 != 0:
+        raise ValueError("Tensor size must be a multiple of 4 for TWO_FOUR sparsity")
+    reshaped_tensor = tensor.view(-1, 4)
+    abs_tensor = reshaped_tensor.abs()
+    topk_indices = abs_tensor.topk(2, dim=1).indices
+    mask = torch.zeros_like(reshaped_tensor, dtype=torch.bool)
+    mask.scatter_(1, topk_indices, True)
+    mask = mask.view(original_shape)
+    tensor = tensor.view(original_dtype)
+    return mask

compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py CHANGED Viewed

@@ -14,12 +14,12 @@
 from typing import Dict, List, Tuple, Union
-import numpy
 import torch
 from compressed_tensors.compressors.base import BaseCompressor
 from compressed_tensors.compressors.sparse_compressors.base import BaseSparseCompressor
 from compressed_tensors.config import CompressionFormat
-from compressed_tensors.utils import merge_names
+from compressed_tensors.quantization import FP8_DTYPE
+from compressed_tensors.utils import merge_names, pack_bitmasks, unpack_bitmasks
 from torch import Tensor
@@ -28,8 +28,6 @@ __all__ = [
     "BitmaskTensor",
     "bitmask_compress",
     "bitmask_decompress",
-    "pack_bitmasks",
-    "unpack_bitmasks",
 ]
@@ -134,9 +132,14 @@ def bitmask_compress(tensor: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
     bytemasks = tensor != 0
     row_counts = bytemasks.sum(dim=-1)
     row_offsets = torch.cumsum(row_counts, 0) - row_counts
-    values = tensor[bytemasks]
+    if tensor.dtype == FP8_DTYPE:
+        # acces raw bytes of the tensor
+        tensor_view = tensor.view(torch.int8)
+        values = tensor_view[bytemasks]
+        values = values.view(FP8_DTYPE)
+    else:
+        values = tensor[bytemasks]
     bitmasks_packed = pack_bitmasks(bytemasks)
     return values, bitmasks_packed, row_offsets
@@ -158,37 +161,3 @@ def bitmask_decompress(
     decompressed_tensor[bytemasks_unpacked] = values
     return decompressed_tensor
-def pack_bitmasks(bytemasks: Tensor) -> Tensor:
-    """
-    Converts a bytemask tensor to a bitmask tensor to reduce memory. Shape RxC will be
-    compressed to R x ceil(C/8)
-    :param bytemasks: mask tensor where each byte corresponds to a weight
-    :return: mask tensor where each bit corresounds to a weight
-    """
-    packed_bits_numpy = numpy.packbits(bytemasks.numpy(), axis=-1, bitorder="little")
-    packed_bits_torch = torch.from_numpy(packed_bits_numpy)
-    return packed_bits_torch
-def unpack_bitmasks(packed_bitmasks: Tensor, original_shape: torch.Size) -> Tensor:
-    """
-    Converts a bitmask tensor back to a bytemask tensor for use during decompression
-    :param packed_bitmasks: mask tensor where each bit corresponds to a weight
-    :param original_shape: dense shape to decompress to
-    :return: boolean mask of weights in the original dense shape
-    """
-    # Unpack the bits
-    unpacked_bits = numpy.unpackbits(
-        packed_bitmasks.numpy(), axis=-1, count=original_shape[-1], bitorder="little"
-    )
-    # Reshape to match the original shape
-    unpacked_bitmasks_torch = torch.from_numpy(
-        unpacked_bits.reshape(original_shape).astype(bool)
-    )
-    return unpacked_bitmasks_torch

compressed_tensors/config/__init__.py CHANGED Viewed

@@ -15,4 +15,5 @@
 # flake8: noqa
 from .base import *
 from .dense import *
+from .sparse_24_bitmask import *
 from .sparse_bitmask import *

compressed_tensors/config/base.py CHANGED Viewed

@@ -26,6 +26,7 @@ __all__ = ["SparsityCompressionConfig", "CompressionFormat", "SparsityStructure"
 class CompressionFormat(Enum):
     dense = "dense"
     sparse_bitmask = "sparse-bitmask"
+    sparse_24_bitmask = "sparse-24-bitmask"
     int_quantized = "int-quantized"
     float_quantized = "float-quantized"
     naive_quantized = "naive-quantized"

compressed_tensors/config/sparse_24_bitmask.py ADDED Viewed

@@ -0,0 +1,40 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+from compressed_tensors.config import (
+    CompressionFormat,
+    SparsityCompressionConfig,
+    SparsityStructure,
+)
+__all__ = ["Sparse24BitMaskConfig"]
+@SparsityCompressionConfig.register(name=CompressionFormat.sparse_24_bitmask.value)
+class Sparse24BitMaskConfig(SparsityCompressionConfig):
+    """
+    Configuration for storing a 24 sparse model using
+    bytemask compression
+    :param global_sparsity: average sparsity of the entire model
+    :param sparsity_structure: structure of the sparsity, should always be
+        "2:4" for this compression format
+    """
+    format: str = CompressionFormat.sparse_24_bitmask.value
+    global_sparsity: Optional[float] = 0.0
+    sparsity_structure: Optional[str] = SparsityStructure.TWO_FOUR.value

compressed_tensors/linear/compressed_linear.py CHANGED Viewed

@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import Dict, Tuple
 import torch
 from compressed_tensors.compressors.base import BaseCompressor
 from compressed_tensors.quantization import (
@@ -53,7 +55,7 @@ class CompressedLinear(Linear):
         )
         # get the shape and dtype of compressed parameters
-        compression_params = module.compressor.compression_param_info(
+        compression_params: Dict[str, Tuple] = module.compressor.compression_param_info(
             module.weight.shape, quantization_scheme.weights
         )

compressed_tensors/quantization/lifecycle/apply.py CHANGED Viewed

@@ -18,7 +18,7 @@ from collections import OrderedDict, defaultdict
 from copy import deepcopy
 from typing import Dict, Iterable, List, Optional
 from typing import OrderedDict as OrderedDictType
-from typing import Union
+from typing import Set, Union
 import torch
 from compressed_tensors.config import CompressionFormat
@@ -52,6 +52,8 @@ __all__ = [
     "apply_quantization_config",
     "apply_quantization_status",
     "find_name_or_class_matches",
+    "expand_sparse_target_names",
+    "is_sparse_target",
 ]
 from compressed_tensors.quantization.utils.helpers import is_module_quantized
@@ -106,7 +108,8 @@ def apply_quantization_config(
     model: Module, config: Union[QuantizationConfig, None], run_compressed: bool = False
 ) -> OrderedDict:
     """
-    Initializes the model for quantization in-place based on the given config
+    Initializes the model for quantization in-place based on the given config.
+    Optionally coverts quantizable modules to compressed_linear modules
     :param model: model to apply quantization config to
     :param config: quantization config
@@ -244,6 +247,49 @@ def apply_quantization_status(model: Module, status: QuantizationStatus):
         model.apply(compress_quantized_weights)
+def expand_sparse_target_names(
+    model: Module, targets: Iterable[str], ignore: Iterable[str]
+) -> Set[str]:
+    """
+    Finds all unique module names in the model that match the given
+    targets and ignore lists.
+    Note: Targets must be regexes, layer types, or full layer names.
+    :param model: model to search for targets in
+    :param targets: list of targets to search for
+    :param ignore: list of targets to ignore
+    :return: set of all targets that match the given targets and should
+        not be ignored
+    """
+    return {
+        name
+        for name, module in iter_named_leaf_modules(model)
+        if is_sparse_target(name, module, targets, ignore)
+    }
+def is_sparse_target(
+    name: str, module: Module, targets: Iterable[str], ignore: Iterable[str]
+) -> bool:
+    """
+    Determines if a module should be included in the targets based on the
+    targets and ignore lists.
+    Note: Targets must be regexes, layer types, or full layer names.
+    :param name: name of the module
+    :param module: the module itself
+    :param targets: list of targets to search for
+    :param ignore: list of targets to ignore
+    :return: True if the module is a target and not ignored, False otherwise
+    """
+    return bool(
+        find_name_or_class_matches(name, module, targets)
+        and not find_name_or_class_matches(name, module, ignore or [])
+    )
 def find_name_or_class_matches(
     name: str, module: Module, targets: Iterable[str], check_contains: bool = False
 ) -> List[str]:

compressed_tensors/quantization/lifecycle/forward.py CHANGED Viewed

@@ -82,8 +82,8 @@ def quantize(
 def dequantize(
     x_q: torch.Tensor,
     scale: torch.Tensor,
-    zero_point: torch.Tensor = None,
-    args: QuantizationArgs = None,
+    zero_point: Optional[torch.Tensor] = None,
+    args: Optional[QuantizationArgs] = None,
     dtype: Optional[torch.dtype] = None,
     g_idx: Optional[torch.Tensor] = None,
 ) -> torch.Tensor:

compressed_tensors/quantization/lifecycle/initialize.py CHANGED Viewed

@@ -29,7 +29,11 @@ from compressed_tensors.quantization.quant_args import (
 from compressed_tensors.quantization.quant_config import QuantizationStatus
 from compressed_tensors.quantization.quant_scheme import QuantizationScheme
 from compressed_tensors.quantization.utils import is_kv_cache_quant_scheme
-from compressed_tensors.utils import get_execution_device, is_module_offloaded
+from compressed_tensors.utils import (
+    disable_hf_hook,
+    has_offloaded_params,
+    register_offload_parameter,
+)
 from torch.nn import Module, Parameter
@@ -112,43 +116,10 @@ def initialize_module_for_quantization(
         module.quantization_scheme = scheme
         module.quantization_status = QuantizationStatus.INITIALIZED
-        offloaded = False
-        # What is this doing/why isn't this in the attn case?
-        if is_module_offloaded(module):
-            try:
-                from accelerate.hooks import add_hook_to_module, remove_hook_from_module
-                from accelerate.utils import PrefixedDataset
-            except ModuleNotFoundError:
-                raise ModuleNotFoundError(
-                    "Offloaded model detected. To use CPU offloading with "
-                    "compressed-tensors the `accelerate` package must be installed, "
-                    "run `pip install compressed-tensors[accelerate]`"
-                )
-            offloaded = True
-            hook = module._hf_hook
-            prefix_dict = module._hf_hook.weights_map
-            new_prefix = {}
-            # recreate the prefix dict (since it is immutable)
-            # and add quantization parameters
-            for key, data in module.named_parameters():
-                if key not in prefix_dict:
-                    new_prefix[f"{prefix_dict.prefix}{key}"] = data
-                else:
-                    new_prefix[f"{prefix_dict.prefix}{key}"] = prefix_dict[key]
-            new_prefix_dict = PrefixedDataset(new_prefix, prefix_dict.prefix)
-            remove_hook_from_module(module)
-        # wrap forward call of module to perform
-        # quantized actions based on calltime status
-        wrap_module_forward_quantized(module, scheme)
-        if offloaded:
-            # we need to re-add the hook for offloading now that we've wrapped forward
-            add_hook_to_module(module, hook)
-            if prefix_dict is not None:
-                module._hf_hook.weights_map = new_prefix_dict
+        with disable_hf_hook(module):
+            # wrap forward call of module to perform
+            # quantized actions based on calltime status
+            wrap_module_forward_quantized(module, scheme)
 def is_attention_module(module: Module):
@@ -169,12 +140,17 @@ def _initialize_scale_zero_point(
     if quantization_args.dynamic:
         return
-    device = next(module.parameters()).device
-    if is_module_offloaded(module):
-        device = get_execution_device(module)
+    # begin on the same device as other parameters or cpu if offloaded.
+    # in the offloaded case, there's no point moving tensors to the execution device
+    # if they're going to be immediately offloaded by `register_offload_parameter`
+    params_device = next(module.parameters()).device
+    device = "cpu" if has_offloaded_params(module) else params_device
     # infer expected scale/zero point shape
-    expected_shape = 1  # per tensor
+    if quantization_args.strategy == QuantizationStrategy.TOKEN:
+        expected_shape = (1, 1)
+    else:
+        expected_shape = 1
     if base_name == "weight" and weight_shape is not None:
         if quantization_args.strategy == QuantizationStrategy.CHANNEL:
@@ -193,7 +169,7 @@ def _initialize_scale_zero_point(
         torch.empty(expected_shape, dtype=scale_dtype, device=device),
         requires_grad=False,
     )
-    module.register_parameter(f"{base_name}_scale", init_scale)
+    register_offload_parameter(module, f"{base_name}_scale", init_scale)
     if force_zero_point or not quantization_args.symmetric:
         zp_dtype = quantization_args.pytorch_dtype()
@@ -201,7 +177,7 @@ def _initialize_scale_zero_point(
             torch.zeros(expected_shape, device=device, dtype=zp_dtype),
             requires_grad=False,
         )
-        module.register_parameter(f"{base_name}_zero_point", init_zero_point)
+        register_offload_parameter(module, f"{base_name}_zero_point", init_zero_point)
     # only grouped activation ordering has g_idx
     if quantization_args.actorder == ActivationOrdering.GROUP:
@@ -211,7 +187,7 @@ def _initialize_scale_zero_point(
             torch.full(g_idx_shape, -1, device=device, dtype=g_idx_dtype),
             requires_grad=False,
         )
-        module.register_parameter(f"{base_name}_g_idx", init_g_idx)
+        register_offload_parameter(module, f"{base_name}_g_idx", init_g_idx)
 def _initialize_attn_scales(module: Module) -> None:

compressed_tensors/quantization/quant_args.py CHANGED Viewed

@@ -17,6 +17,7 @@ from enum import Enum
 from typing import Any, Dict, Optional, Union
 import torch
+from compressed_tensors.utils import Aliasable
 from pydantic import BaseModel, Field, field_validator, model_validator
@@ -53,17 +54,29 @@ class QuantizationStrategy(str, Enum):
     TOKEN = "token"
-class ActivationOrdering(str, Enum):
+class ActivationOrdering(Aliasable, str, Enum):
     """
     Enum storing strategies for activation ordering
     Group: reorder groups and weight\n
-    Weight: only reorder weight, not groups. Slightly lower latency and
-    accuracy compared to group actorder\n
+    Weight: only reorder weight, not groups. Slightly lower accuracy but also lower
+    latency when compared to group actorder\n
+    Dynamic: alias for Group\n
+    Static: alias for Weight\n
     """
     GROUP = "group"
     WEIGHT = "weight"
+    # aliases
+    DYNAMIC = "dynamic"
+    STATIC = "static"
+    @staticmethod
+    def get_aliases() -> Dict[str, str]:
+        return {
+            "dynamic": "group",
+            "static": "weight",
+        }
 class QuantizationArgs(BaseModel, use_enum_values=True):

compressed_tensors/quantization/quant_config.py CHANGED Viewed

@@ -132,9 +132,9 @@ class QuantizationConfig(BaseModel):
         `k_proj` and `v_proj` in their names. If this is not the case
         and kv_cache_scheme != None, the quantization of kv cache will fail
     :global_compression_ratio: optional informational config to report the model
-    compression ratio acheived by the quantization config
+        compression ratio acheived by the quantization config
     :ignore: optional list of layers to ignore from config_groups. Layers in this list
-    are not quantized even if they match up with a target in config_groups
+        are not quantized even if they match up with a target in config_groups
     """
     config_groups: Dict[str, Union[QuantizationScheme, List[str]]]
@@ -160,7 +160,7 @@ class QuantizationConfig(BaseModel):
     def to_dict(self):
         # for compatibility with HFQuantizer
-        return self.dict()
+        return self.model_dump()
     @staticmethod
     def from_pretrained(

compressed-tensors 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

compressed-tensors 0.8.0py3-none-any.whl → 0.9.0py3-none-any.whl