PyPI - compressed-tensors - Versions diffs - 0.4.0__tar.gz → 0.5.0__tar.gz - Mend

compressed-tensors 0.4.0tar.gz → 0.5.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

{compressed-tensors-0.4.0/src/compressed_tensors.egg-info → compressed_tensors-0.5.0}/PKG-INFO RENAMED Viewed

@@ -1,14 +1,24 @@
 Metadata-Version: 2.1
 Name: compressed-tensors
-Version: 0.4.0
+Version: 0.5.0
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.
 Author-email: support@neuralmagic.com
 License: Apache 2.0
 Description-Content-Type: text/markdown
-Provides-Extra: dev
 License-File: LICENSE
+Requires-Dist: torch>=1.7.0
+Requires-Dist: transformers
+Requires-Dist: accelerate
+Requires-Dist: pydantic>=2.0
+Provides-Extra: dev
+Requires-Dist: black==22.12.0; extra == "dev"
+Requires-Dist: isort==5.8.0; extra == "dev"
+Requires-Dist: wheel>=0.36.2; extra == "dev"
+Requires-Dist: flake8>=3.8.3; extra == "dev"
+Requires-Dist: pytest>=6.0.0; extra == "dev"
+Requires-Dist: nbconvert>=7.16.3; extra == "dev"
 # compressed_tensors

{compressed-tensors-0.4.0 → compressed_tensors-0.5.0}/setup.py RENAMED Viewed

@@ -46,7 +46,7 @@ def _setup_packages() -> List:
     )
 def _setup_install_requires() -> List:
-    return ["torch>=1.7.0", "transformers", "pydantic>=2.0"]
+    return ["torch>=1.7.0", "transformers", "accelerate", "pydantic>=2.0"]
 def _setup_extras() -> Dict:
     return {"dev": ["black==22.12.0", "isort==5.8.0", "wheel>=0.36.2", "flake8>=3.8.3", "pytest>=6.0.0", "nbconvert>=7.16.3"]}

{compressed-tensors-0.4.0 → compressed_tensors-0.5.0}/src/compressed_tensors/base.py RENAMED Viewed

@@ -15,3 +15,4 @@
 SPARSITY_CONFIG_NAME = "sparsity_config"
 QUANTIZATION_CONFIG_NAME = "quantization_config"
 COMPRESSION_CONFIG_NAME = "compression_config"
+KV_CACHE_SCHEME_NAME = "kv_cache_scheme"

{compressed-tensors-0.4.0 → compressed_tensors-0.5.0}/src/compressed_tensors/compressors/__init__.py RENAMED Viewed

@@ -17,8 +17,12 @@
 from .base import Compressor
 from .dense import DenseCompressor
 from .helpers import load_compressed, save_compressed, save_compressed_model
-from .int_quantized import IntQuantizationCompressor
 from .marlin_24 import Marlin24Compressor
 from .model_compressor import ModelCompressor, map_modules_to_quant_args
+from .naive_quantized import (
+    FloatQuantizationCompressor,
+    IntQuantizationCompressor,
+    QuantizationCompressor,
+)
 from .pack_quantized import PackedQuantizationCompressor
 from .sparse_bitmask import BitmaskCompressor, BitmaskTensor

{compressed-tensors-0.4.0 → compressed_tensors-0.5.0}/src/compressed_tensors/compressors/base.py RENAMED Viewed

@@ -45,7 +45,7 @@ class Compressor(RegistryMixin):
         raise NotImplementedError()
     def decompress(
-        self, path_to_model_or_tensors: str, device: str = "cpu"
+        self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs
     ) -> Generator[Tuple[str, Tensor], None, None]:
         """
         Reads a compressed state dict located at path_to_model_or_tensors

{compressed-tensors-0.4.0 → compressed_tensors-0.5.0}/src/compressed_tensors/compressors/dense.py RENAMED Viewed

@@ -29,6 +29,6 @@ class DenseCompressor(Compressor):
         return model_state
     def decompress(
-        self, path_to_model_or_tensors: str, device: str = "cpu"
+        self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs
     ) -> Generator[Tuple[str, Tensor], None, None]:
         return iter([])

{compressed-tensors-0.4.0 → compressed_tensors-0.5.0}/src/compressed_tensors/compressors/marlin_24.py RENAMED Viewed

@@ -18,15 +18,16 @@ from typing import Dict, Generator, Tuple
 import numpy as np
 import torch
 from compressed_tensors.compressors import Compressor
-from compressed_tensors.compressors.utils import (
+from compressed_tensors.config import CompressionFormat
+from compressed_tensors.quantization import QuantizationArgs, QuantizationStrategy
+from compressed_tensors.quantization.lifecycle.forward import quantize
+from compressed_tensors.utils import (
     get_permutations_24,
+    is_quantization_param,
+    merge_names,
     sparse_semi_structured_from_dense_cutlass,
     tensor_follows_mask_structure,
 )
-from compressed_tensors.config import CompressionFormat
-from compressed_tensors.quantization import QuantizationArgs, QuantizationStrategy
-from compressed_tensors.quantization.lifecycle.forward import quantize
-from compressed_tensors.utils import is_quantization_param, merge_names
 from torch import Tensor
 from tqdm import tqdm
@@ -107,7 +108,7 @@ class Marlin24Compressor(Compressor):
     def compress(
         self,
         model_state: Dict[str, Tensor],
-        model_quant_args: Dict[str, QuantizationArgs],
+        names_to_scheme: Dict[str, QuantizationArgs],
         **kwargs,
     ) -> Dict[str, Tensor]:
         """
@@ -115,11 +116,11 @@ class Marlin24Compressor(Compressor):
         with the Marlin24 kernel
         :param model_state: state dict of uncompressed model
-        :param model_quant_args: quantization args for each quantized weight, needed for
+        :param names_to_scheme: quantization args for each quantized weight, needed for
            quantize function to calculate bit depth
         :return: compressed state dict
         """
-        self.validate_quant_compatability(model_quant_args)
+        self.validate_quant_compatability(names_to_scheme)
         compressed_dict = {}
         weight_suffix = ".weight"
@@ -139,7 +140,7 @@ class Marlin24Compressor(Compressor):
                     value = value.to(torch.float16)
                     # quantize weight, keeping it as a float16 for now
-                    quant_args = model_quant_args[prefix]
+                    quant_args = names_to_scheme[prefix]
                     value = quantize(
                         x=value, scale=scale, zero_point=zp, args=quant_args
                     )
@@ -175,7 +176,7 @@ class Marlin24Compressor(Compressor):
         return compressed_dict
     def decompress(
-        self, path_to_model_or_tensors: str, device: str = "cpu"
+        self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs
     ) -> Generator[Tuple[str, Tensor], None, None]:
         raise NotImplementedError(
             "Decompression is not implemented for the Marlin24 Compressor."

{compressed-tensors-0.4.0 → compressed_tensors-0.5.0}/src/compressed_tensors/compressors/model_compressor.py RENAMED Viewed

@@ -16,9 +16,12 @@ import json
 import logging
 import operator
 import os
+import re
 from copy import deepcopy
 from typing import Any, Dict, Optional, Union
+import torch
+import transformers
 from compressed_tensors.base import (
     COMPRESSION_CONFIG_NAME,
     QUANTIZATION_CONFIG_NAME,
@@ -36,10 +39,10 @@ from compressed_tensors.quantization.utils import (
     is_module_quantized,
     iter_named_leaf_modules,
 )
-from compressed_tensors.utils import get_safetensors_folder
+from compressed_tensors.utils import get_safetensors_folder, update_parameter_data
 from compressed_tensors.utils.helpers import fix_fsdp_module_name
 from torch import Tensor
-from torch.nn import Module, Parameter
+from torch.nn import Module
 from tqdm import tqdm
 from transformers import AutoConfig
 from transformers.file_utils import CONFIG_NAME
@@ -78,6 +81,7 @@ class ModelCompressor:
     def from_pretrained(
         cls,
         pretrained_model_name_or_path: str,
+        **kwargs,
     ) -> Optional["ModelCompressor"]:
         """
         Given a path to a model config, extract the sparsity and/or quantization
@@ -86,7 +90,7 @@ class ModelCompressor:
         :param pretrained_model_name_or_path: path to model config on disk or HF hub
         :return: compressor for the extracted configs
         """
-        config = AutoConfig.from_pretrained(pretrained_model_name_or_path)
+        config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
         compression_config = getattr(config, COMPRESSION_CONFIG_NAME, None)
         return cls.from_compression_config(compression_config)
@@ -228,7 +232,7 @@ class ModelCompressor:
         quantized_modules_to_args = map_modules_to_quant_args(model)
         if self.quantization_compressor is not None:
             compressed_state_dict = self.quantization_compressor.compress(
-                state_dict, model_quant_args=quantized_modules_to_args
+                state_dict, names_to_scheme=quantized_modules_to_args
             )
         if self.sparsity_compressor is not None:
@@ -236,6 +240,11 @@ class ModelCompressor:
                 compressed_state_dict
             )
+        # HACK: Override the dtype_byte_size function in transformers to
+        # support float8 types. Fix is posted upstream
+        # https://github.com/huggingface/transformers/pull/30488
+        transformers.modeling_utils.dtype_byte_size = new_dtype_byte_size
         return compressed_state_dict
     def decompress(self, model_path: str, model: Module):
@@ -252,9 +261,11 @@ class ModelCompressor:
             setattr(model, SPARSITY_CONFIG_NAME, self.sparsity_compressor.config)
         if self.quantization_compressor is not None:
-            apply_quantization_config(model, self.quantization_config)
+            names_to_scheme = apply_quantization_config(model, self.quantization_config)
             load_pretrained_quantization(model, model_path)
-            dense_gen = self.quantization_compressor.decompress(model_path)
+            dense_gen = self.quantization_compressor.decompress(
+                model_path, names_to_scheme=names_to_scheme
+            )
             self._replace_weights(dense_gen, model)
             def update_status(module):
@@ -296,12 +307,10 @@ class ModelCompressor:
     def _replace_weights(self, dense_weight_generator, model):
         for name, data in tqdm(dense_weight_generator, desc="Decompressing model"):
-            # loading the decompressed weights into the model
-            model_device = operator.attrgetter(name)(model).device
-            data_old = operator.attrgetter(name)(model)
-            data_dtype = data_old.dtype
-            data_new = Parameter(data.to(model_device).to(data_dtype))
-            data_old.data = data_new.data
+            split_name = name.split(".")
+            prefix, param_name = ".".join(split_name[:-1]), split_name[-1]
+            module = operator.attrgetter(prefix)(model)
+            update_parameter_data(module, data, param_name)
 def map_modules_to_quant_args(model: Module) -> Dict:
@@ -313,3 +322,15 @@ def map_modules_to_quant_args(model: Module) -> Dict:
                 quantized_modules_to_args[name] = submodule.quantization_scheme.weights
     return quantized_modules_to_args
+# HACK: Override the dtype_byte_size function in transformers to support float8 types
+# Fix is posted upstream https://github.com/huggingface/transformers/pull/30488
+def new_dtype_byte_size(dtype):
+    if dtype == torch.bool:
+        return 1 / 8
+    bit_search = re.search(r"[^\d](\d+)_?", str(dtype))
+    if bit_search is None:
+        raise ValueError(f"`dtype` is not a valid dtype: {dtype}.")
+    bit_size = int(bit_search.groups()[0])
+    return bit_size // 8

compressed-tensors-0.4.0/src/compressed_tensors/compressors/int_quantized.py → compressed_tensors-0.5.0/src/compressed_tensors/compressors/naive_quantized.py RENAMED Viewed

@@ -27,17 +27,21 @@ from torch import Tensor
 from tqdm import tqdm
-__all__ = ["IntQuantizationCompressor"]
+__all__ = [
+    "QuantizationCompressor",
+    "IntQuantizationCompressor",
+    "FloatQuantizationCompressor",
+]
 _LOGGER: logging.Logger = logging.getLogger(__name__)
-@Compressor.register(name=CompressionFormat.int_quantized.value)
-class IntQuantizationCompressor(Compressor):
+@Compressor.register(name=CompressionFormat.naive_quantized.value)
+class QuantizationCompressor(Compressor):
     """
-    Integer compression for quantized models. Weight of each quantized layer is
-    converted from its original float type to the format specified by the layer's
-    quantization scheme.
+    Implements naive compression for quantized models. Weight of each
+    quantized layer is converted from its original float type to the closest Pytorch
+    type to the type specified by the layer's QuantizationArgs.
     """
     COMPRESSION_PARAM_NAMES = ["weight", "weight_scale", "weight_zero_point"]
@@ -45,14 +49,14 @@ class IntQuantizationCompressor(Compressor):
     def compress(
         self,
         model_state: Dict[str, Tensor],
-        model_quant_args: Dict[str, QuantizationArgs],
+        names_to_scheme: Dict[str, QuantizationArgs],
         **kwargs,
     ) -> Dict[str, Tensor]:
         """
         Compresses a dense state dict
         :param model_state: state dict of uncompressed model
-        :param model_quant_args: quantization args for each quantized weight, needed for
+        :param names_to_scheme: quantization args for each quantized weight, needed for
         quantize function to calculate bit depth
         :return: compressed state dict
         """
@@ -69,7 +73,7 @@ class IntQuantizationCompressor(Compressor):
                 zp = model_state.get(merge_names(prefix, "weight_zero_point"), None)
                 if scale is not None and zp is not None:
                     # weight is quantized, compress it
-                    quant_args = model_quant_args[prefix]
+                    quant_args = names_to_scheme[prefix]
                     if can_quantize(value, quant_args):
                         # only quantize if not already quantized
                         value = quantize(
@@ -77,7 +81,7 @@ class IntQuantizationCompressor(Compressor):
                             scale=scale,
                             zero_point=zp,
                             args=quant_args,
-                            dtype=torch.int8,
+                            dtype=quant_args.pytorch_dtype(),
                         )
             elif name.endswith("zero_point"):
                 if torch.all(value == 0):
@@ -89,7 +93,7 @@ class IntQuantizationCompressor(Compressor):
         return compressed_dict
     def decompress(
-        self, path_to_model_or_tensors: str, device: str = "cpu"
+        self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs
     ) -> Generator[Tuple[str, Tensor], None, None]:
         """
         Reads a compressed state dict located at path_to_model_or_tensors
@@ -114,13 +118,27 @@ class IntQuantizationCompressor(Compressor):
             if "weight_scale" in weight_data:
                 zero_point = weight_data.get("weight_zero_point", None)
                 scale = weight_data["weight_scale"]
-                if zero_point is None:
-                    # zero_point assumed to be 0 if not included in state_dict
-                    zero_point = torch.zeros_like(scale)
                 decompressed = dequantize(
                     x_q=weight_data["weight"],
                     scale=scale,
                     zero_point=zero_point,
                 )
                 yield merge_names(weight_name, "weight"), decompressed
+@Compressor.register(name=CompressionFormat.int_quantized.value)
+class IntQuantizationCompressor(QuantizationCompressor):
+    """
+    Alias for integer quantized models
+    """
+    pass
+@Compressor.register(name=CompressionFormat.float_quantized.value)
+class FloatQuantizationCompressor(QuantizationCompressor):
+    """
+    Alias for fp quantized models
+    """
+    pass

{compressed-tensors-0.4.0 → compressed_tensors-0.5.0}/src/compressed_tensors/compressors/pack_quantized.py RENAMED Viewed

@@ -29,7 +29,7 @@ from torch import Tensor
 from tqdm import tqdm
-__all__ = ["PackedQuantizationCompressor", "pack_4bit_ints", "unpack_4bit_ints"]
+__all__ = ["PackedQuantizationCompressor", "pack_to_int32", "unpack_from_int32"]
 _LOGGER: logging.Logger = logging.getLogger(__name__)
@@ -50,14 +50,14 @@ class PackedQuantizationCompressor(Compressor):
     def compress(
         self,
         model_state: Dict[str, Tensor],
-        model_quant_args: Dict[str, QuantizationArgs],
+        names_to_scheme: Dict[str, QuantizationArgs],
         **kwargs,
     ) -> Dict[str, Tensor]:
         """
         Compresses a dense state dict
         :param model_state: state dict of uncompressed model
-        :param model_quant_args: quantization args for each quantized weight, needed for
+        :param names_to_scheme: quantization args for each quantized weight, needed for
         quantize function to calculate bit depth
         :return: compressed state dict
         """
@@ -75,7 +75,7 @@ class PackedQuantizationCompressor(Compressor):
                 shape = torch.tensor(value.shape)
                 if scale is not None and zp is not None:
                     # weight is quantized, compress it
-                    quant_args = model_quant_args[prefix]
+                    quant_args = names_to_scheme[prefix]
                     if can_quantize(value, quant_args):
                         # convert weight to an int if not already compressed
                         value = quantize(
@@ -85,7 +85,7 @@ class PackedQuantizationCompressor(Compressor):
                             args=quant_args,
                             dtype=torch.int8,
                         )
-                    value = pack_4bit_ints(value.cpu())
+                    value = pack_to_int32(value.cpu(), quant_args.num_bits)
                     compressed_dict[merge_names(prefix, "weight_shape")] = shape
                     compressed_dict[merge_names(prefix, "weight_packed")] = value
                     continue
@@ -101,7 +101,10 @@ class PackedQuantizationCompressor(Compressor):
         return compressed_dict
     def decompress(
-        self, path_to_model_or_tensors: str, device: str = "cpu"
+        self,
+        path_to_model_or_tensors: str,
+        names_to_scheme: Dict[str, QuantizationArgs],
+        device: str = "cpu",
     ) -> Generator[Tuple[str, Tensor], None, None]:
         """
         Reads a compressed state dict located at path_to_model_or_tensors
@@ -119,6 +122,7 @@ class PackedQuantizationCompressor(Compressor):
         for weight_name in weight_mappings.keys():
             weight_data = {}
             for param_name, safe_path in weight_mappings[weight_name].items():
+                weight_data["num_bits"] = names_to_scheme.get(weight_name).num_bits
                 full_name = merge_names(weight_name, param_name)
                 with safe_open(safe_path, framework="pt", device=device) as f:
                     weight_data[param_name] = f.get_tensor(full_name)
@@ -126,13 +130,10 @@ class PackedQuantizationCompressor(Compressor):
             if "weight_scale" in weight_data:
                 zero_point = weight_data.get("weight_zero_point", None)
                 scale = weight_data["weight_scale"]
-                if zero_point is None:
-                    # zero_point assumed to be 0 if not included in state_dict
-                    zero_point = torch.zeros_like(scale)
                 weight = weight_data["weight_packed"]
+                num_bits = weight_data["num_bits"]
                 original_shape = torch.Size(weight_data["weight_shape"])
-                unpacked = unpack_4bit_ints(weight, original_shape)
+                unpacked = unpack_from_int32(weight, num_bits, original_shape)
                 decompressed = dequantize(
                     x_q=unpacked,
                     scale=scale,
@@ -141,45 +142,50 @@ class PackedQuantizationCompressor(Compressor):
                 yield merge_names(weight_name, "weight"), decompressed
-def pack_4bit_ints(value: torch.Tensor) -> torch.Tensor:
+def pack_to_int32(value: torch.Tensor, num_bits: int) -> torch.Tensor:
     """
-    Packs a tensor of int4 weights stored in int8 into int32s with padding
+    Packs a tensor of quantized weights stored in int8 into int32s with padding
     :param value: tensor to pack
+    :param num_bits: number of bits used to store underlying data
     :returns: packed int32 tensor
     """
     if value.dtype is not torch.int8:
         raise ValueError("Tensor must be quantized to torch.int8 before packing")
-    # need to convert to unsigned 8bit to use numpy's pack/unpack
-    temp = (value - 8).to(torch.uint8)
-    bits = np.unpackbits(temp.numpy(), axis=-1, bitorder="little")
-    ranges = np.array([range(x, x + 4) for x in range(0, bits.shape[1], 8)]).flatten()
-    only_4_bits = bits[:, ranges]  # top 4 bits are 0 because we're really uint4
+    if num_bits > 8:
+        raise ValueError("Packing is only supported for less than 8 bits")
-    # pad each row to fill a full 32bit int
-    pack_depth = 32
-    padding = (
-        math.ceil(only_4_bits.shape[1] / pack_depth) * pack_depth - only_4_bits.shape[1]
-    )
-    padded_bits = np.pad(
-        only_4_bits, pad_width=[(0, 0), (0, padding)], constant_values=0
-    )
+    # convert to unsigned for packing
+    offset = pow(2, num_bits) // 2
+    value = (value + offset).to(torch.uint8)
+    value = value.cpu().numpy().astype(np.uint32)
+    pack_factor = 32 // num_bits
-    # after packbits each uint8 is two packed uint4s
-    # then we keep the bit pattern the same but convert to int32
-    compressed = np.packbits(padded_bits, axis=-1, bitorder="little")
-    compressed = np.ascontiguousarray(compressed).view(np.int32)
+    # pad input tensor and initialize packed output
+    packed_size = math.ceil(value.shape[1] / pack_factor)
+    packed = np.zeros((value.shape[0], packed_size), dtype=np.uint32)
+    padding = packed.shape[1] * pack_factor - value.shape[1]
+    value = np.pad(value, pad_width=[(0, 0), (0, padding)], constant_values=0)
-    return torch.from_numpy(compressed)
+    # pack values
+    for i in range(pack_factor):
+        packed |= value[:, i::pack_factor] << num_bits * i
+    # convert back to signed and torch
+    packed = np.ascontiguousarray(packed).view(np.int32)
+    return torch.from_numpy(packed)
-def unpack_4bit_ints(value: torch.Tensor, shape: torch.Size) -> torch.Tensor:
+def unpack_from_int32(
+    value: torch.Tensor, num_bits: int, shape: torch.Size
+) -> torch.Tensor:
     """
-    Unpacks a tensor packed int4 weights into individual int8s, maintaining the
-    original their int4 range
+    Unpacks a tensor of packed int32 weights into individual int8s, maintaining the
+    original their bit range
     :param value: tensor to upack
+    :param num_bits: number of bits to unpack each data point into
     :param shape: shape to unpack into, used to remove padding
     :returns: unpacked int8 tensor
     """
@@ -188,25 +194,26 @@ def unpack_4bit_ints(value: torch.Tensor, shape: torch.Size) -> torch.Tensor:
             f"Expected {torch.int32} but got {value.dtype}, Aborting unpack."
         )
-    # unpack bits and undo padding to nearest int32 bits
-    individual_depth = 4
-    as_uint8 = value.numpy().view(np.uint8)
-    bits = np.unpackbits(as_uint8, axis=-1, bitorder="little")
-    original_row_size = int(shape[1] * individual_depth)
-    bits = bits[:, :original_row_size]
+    if num_bits > 8:
+        raise ValueError("Unpacking is only supported for less than 8 bits")
+    # convert packed input to unsigned numpy
+    value = value.numpy().view(np.uint32)
+    pack_factor = 32 // num_bits
-    # reformat each packed uint4 to a uint8 by filling to top 4 bits with zeros
-    # (uint8 format is required by np.packbits)
-    shape_8bit = (bits.shape[0], bits.shape[1] * 2)
-    bits_as_8bit = np.zeros(shape_8bit, dtype=np.uint8)
-    ranges = np.array([range(x, x + 4) for x in range(0, shape_8bit[1], 8)]).flatten()
-    bits_as_8bit[:, ranges] = bits
+    # unpack
+    mask = pow(2, num_bits) - 1
+    unpacked = np.zeros((value.shape[0], value.shape[1] * pack_factor))
+    for i in range(pack_factor):
+        unpacked[:, i::pack_factor] = (value >> (num_bits * i)) & mask
-    # repack the bits to uint8
-    repacked = np.packbits(bits_as_8bit, axis=-1, bitorder="little")
+    # remove padding
+    original_row_size = int(shape[1])
+    unpacked = unpacked[:, :original_row_size]
     # bits are packed in unsigned format, reformat to signed
-    # update the value range from uint4 to int4
-    final = repacked.astype(np.int8) - 8
+    # update the value range from unsigned to signed
+    offset = pow(2, num_bits) // 2
+    unpacked = (unpacked.astype(np.int16) - offset).astype(np.int8)
-    return torch.from_numpy(final)
+    return torch.from_numpy(unpacked)

{compressed-tensors-0.4.0 → compressed_tensors-0.5.0}/src/compressed_tensors/compressors/sparse_bitmask.py RENAMED Viewed

@@ -72,7 +72,7 @@ class BitmaskCompressor(Compressor):
         return compressed_dict
     def decompress(
-        self, path_to_model_or_tensors: str, device: str = "cpu"
+        self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs
     ) -> Generator[Tuple[str, Tensor], None, None]:
         """
         Reads a bitmask compressed state dict located

{compressed-tensors-0.4.0 → compressed_tensors-0.5.0}/src/compressed_tensors/config/base.py RENAMED Viewed

@@ -26,6 +26,8 @@ class CompressionFormat(Enum):
     dense = "dense"
     sparse_bitmask = "sparse-bitmask"
     int_quantized = "int-quantized"
+    float_quantized = "float-quantized"
+    naive_quantized = "naive-quantized"
     pack_quantized = "pack-quantized"
     marlin_24 = "marlin-24"

{compressed-tensors-0.4.0 → compressed_tensors-0.5.0}/src/compressed_tensors/quantization/lifecycle/__init__.py RENAMED Viewed

@@ -21,3 +21,4 @@ from .frozen import *
 from .initialize import *
 from .compressed import *
 from .apply import *
+from .helpers import *

compressed-tensors 0.4.0__tar.gz → 0.5.0__tar.gz

compressed-tensors 0.4.0tar.gz → 0.5.0tar.gz