PyPI - compressed-tensors - Versions diffs - 0.9.1__tar.gz → 0.9.3__tar.gz - Mend

compressed-tensors 0.9.1tar.gz → 0.9.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

{compressed-tensors-0.9.1 → compressed_tensors-0.9.3}/PKG-INFO RENAMED Viewed

@@ -1,15 +1,35 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.9.1
+Version: 0.9.3
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.
 Author-email: support@neuralmagic.com
 License: Apache 2.0
 Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: torch>=1.7.0
+Requires-Dist: transformers
+Requires-Dist: pydantic>=2.0
 Provides-Extra: dev
+Requires-Dist: black==22.12.0; extra == "dev"
+Requires-Dist: isort==5.8.0; extra == "dev"
+Requires-Dist: wheel>=0.36.2; extra == "dev"
+Requires-Dist: flake8>=3.8.3; extra == "dev"
+Requires-Dist: pytest>=6.0.0; extra == "dev"
+Requires-Dist: nbconvert>=7.16.3; extra == "dev"
 Provides-Extra: accelerate
-License-File: LICENSE
+Requires-Dist: accelerate; extra == "accelerate"
+Dynamic: author
+Dynamic: author-email
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: license
+Dynamic: license-file
+Dynamic: provides-extra
+Dynamic: requires-dist
+Dynamic: summary
 # compressed-tensors

{compressed-tensors-0.9.1 → compressed_tensors-0.9.3}/src/compressed_tensors/compressors/base.py RENAMED Viewed

@@ -77,6 +77,15 @@ class BaseCompressor(RegistryMixin, ABC):
         """
         raise NotImplementedError()
+    @property
+    @abstractmethod
+    def compression_param_names(self) -> Tuple[str]:
+        """
+        Returns a tuple of compression parameter names introduced by
+        the compressor during compression
+        """
+        raise NotImplementedError()
     @abstractmethod
     def compress(
         self,

{compressed-tensors-0.9.1 → compressed_tensors-0.9.3}/src/compressed_tensors/compressors/model_compressors/model_compressor.py RENAMED Viewed

@@ -19,7 +19,7 @@ import os
 import re
 from contextlib import contextmanager
 from copy import deepcopy
-from typing import TYPE_CHECKING, Any, Dict, Optional, Set, TypeVar, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple, TypeVar, Union
 import compressed_tensors
 import torch
@@ -39,13 +39,17 @@ from compressed_tensors.quantization import (
     apply_quantization_config,
     load_pretrained_quantization,
 )
-from compressed_tensors.quantization.lifecycle import expand_sparse_target_names
+from compressed_tensors.quantization.lifecycle import expand_target_names
 from compressed_tensors.quantization.quant_args import QuantizationArgs
 from compressed_tensors.quantization.utils import (
     is_module_quantized,
     iter_named_leaf_modules,
 )
-from compressed_tensors.utils import get_safetensors_folder, update_parameter_data
+from compressed_tensors.utils import (
+    get_safetensors_folder,
+    merge_names,
+    update_parameter_data,
+)
 from compressed_tensors.utils.helpers import (
     fix_fsdp_module_name,
     is_compressed_tensors_config,
@@ -254,6 +258,107 @@ class ModelCompressor:
                 quantization_config.format, config=quantization_config
             )
+    def get_missing_module_keys(self, model: Module) -> List[str]:
+        """
+        Identifies the expected missing weight keys in the compressed state_dict.
+        When a model undergoes sparsity or quantization compression, certain
+        weight tensors may be absent from the checkpoint by virtue of compression.
+        This function determines which weight keys are missing based on the
+        applied compression techniques.
+        :param model: The PyTorch model to check for missing keys.
+        :return: A list of missing keys expected in the compressed state_dict.
+        """
+        missing_keys = set()
+        # Determine missing keys due to sparsity compression
+        if (
+            self.sparsity_compressor
+            and self.sparsity_config.format != CompressionFormat.dense.value
+        ):
+            sparse_targets = expand_target_names(
+                model=model,
+                targets=self.sparsity_config.targets,
+                ignore=self.sparsity_config.ignore,
+            )
+            missing_keys.update(
+                merge_names(target, "weight") for target in sparse_targets
+            )
+        # Determine missing keys due to pack quantization
+        if (
+            self.quantization_compressor
+            and self.quantization_config.format
+            == CompressionFormat.pack_quantized.value
+        ):
+            for scheme in self.quantization_config.config_groups.values():
+                quant_targets = expand_target_names(
+                    model=model,
+                    targets=scheme.targets,
+                    ignore=self.quantization_config.ignore,
+                )
+                missing_keys.update(
+                    merge_names(target, "weight") for target in quant_targets
+                )
+        return list(missing_keys)
+    def get_unexpected_file_keys(self, model: Module) -> List[str]:
+        """
+        Identifies extra keys introduced by the compression process in the
+        compressed state_dict that are not expected by the model graph.
+        During sparsity or quantization compression, additional metadata or
+        auxiliary parameters may be stored in the checkpoint, which do not
+        correspond to any parameter in the original model. These keys are
+        typically introduced to support the reconstruction of compressed weights.
+        For example, Sparse24Bitmask compression may introduce keys such as
+        'compressed', 'bitmask', and 'shape' in the checkpoint, which are
+        not part of the original model parameters.
+        :param model: The PyTorch model to check for unexpected keys.
+        :return: A list of extra keys introduced by the compression process
+                that are not expected by the model.
+        """
+        unexpected_keys = set()
+        # Identify unexpected keys from sparsity compression
+        if (
+            self.sparsity_compressor
+            and self.sparsity_config.format != CompressionFormat.dense.value
+        ):
+            sparse_targets: Set[str] = expand_target_names(
+                model=model,
+                targets=self.sparsity_config.targets,
+                ignore=self.sparsity_config.ignore,
+            )
+            unexpected_keys.update(
+                merge_names(target, param)
+                for target in sparse_targets
+                for param in self.sparsity_compressor.compression_param_names
+            )
+        # Identify unexpected keys from quantization compression
+        if self.quantization_compressor:
+            for scheme in self.quantization_config.config_groups.values():
+                quant_targets: Set[str] = expand_target_names(
+                    model=model,
+                    targets=scheme.targets,
+                    ignore=self.quantization_config.ignore,
+                )
+                unexpected_keys.update(
+                    merge_names(target, param)
+                    for target in quant_targets
+                    for param in self.quantization_compressor.compression_param_names
+                    if param != "weight"
+                )
+        return list(unexpected_keys)
     def compress(
         self, model: Module, state_dict: Optional[Dict[str, Tensor]] = None
     ) -> Dict[str, Tensor]:
@@ -283,7 +388,7 @@ class ModelCompressor:
                 )
         if self.sparsity_compressor is not None:
-            sparse_compression_targets: Set[str] = expand_sparse_target_names(
+            sparse_compression_targets: Set[str] = expand_target_names(
                 model=model,
                 targets=self.sparsity_config.targets,
                 ignore=self.sparsity_config.ignore,
@@ -417,10 +522,13 @@ class ModelCompressor:
                 update_parameter_data(module, data, param_name)
-def map_modules_to_quant_args(model: Module) -> Dict[str, QuantizationArgs]:
+def map_modules_to_quant_args(
+    model: Module,
+) -> Dict[str, Union[QuantizationArgs, Tuple[QuantizationArgs, QuantizationArgs]]]:
     """
     Given a pytorch model, map out the submodule name (usually linear layers)
-     to the QuantizationArgs
+    to the weight QuantizationArgs. If running input activation quantization, will also
+    map to the input QuantizationArgs in a tuple.
     :param model: pytorch model
     """
@@ -430,6 +538,12 @@ def map_modules_to_quant_args(model: Module) -> Dict[str, QuantizationArgs]:
             if submodule.quantization_scheme.weights is not None:
                 name = fix_fsdp_module_name(name)
                 quantized_modules_to_args[name] = submodule.quantization_scheme.weights
+                if submodule.quantization_scheme.input_activations is not None:
+                    weight_args = quantized_modules_to_args.get(name)
+                    quantized_modules_to_args[name] = (
+                        weight_args,
+                        submodule.quantization_scheme.input_activations,
+                    )
     return quantized_modules_to_args

{compressed-tensors-0.9.1 → compressed_tensors-0.9.3}/src/compressed_tensors/compressors/quantized_compressors/base.py RENAMED Viewed

@@ -82,11 +82,32 @@ class BaseQuantizationCompressor(BaseCompressor):
         """
         compressed_dict = {}
         weight_suffix = ".weight"
+        input_zp_suffix = ".input_zero_point"
+        weight_zp_suffix = ".weight_zero_point"
         _LOGGER.debug(
             f"Compressing model with {len(model_state)} parameterized layers..."
         )
         for name, value in tqdm(model_state.items(), desc="Quantized Compression"):
+            # check if the parameter we're compressing is the weight zp
+            # or the input zp
+            is_weight_zp = name.endswith(weight_zp_suffix)
+            is_input_zp = name.endswith(input_zp_suffix)
+            # if we're saving the weight zp, fetch weight quant args
+            if is_weight_zp:
+                quant_args_zp = names_to_scheme.get(name[: -(len(weight_zp_suffix))])
+                if isinstance(quant_args_zp, tuple):
+                    # If tuple, first value is weight args, second is input args
+                    quant_args_zp = quant_args_zp[0]
+            # if we're saving the input zp, fetch input quant args
+            if is_input_zp:
+                input_args_zp = names_to_scheme.get(name[: -(len(input_zp_suffix))])
+                if isinstance(input_args_zp, tuple):
+                    # If tuple, first value is weight args, second is input args
+                    input_args_zp = input_args_zp[-1]
             if name.endswith(weight_suffix):
                 prefix = name[: -(len(weight_suffix))]
                 scale = model_state.get(merge_names(prefix, "weight_scale"), None)
@@ -94,7 +115,11 @@ class BaseQuantizationCompressor(BaseCompressor):
                 g_idx = model_state.get(merge_names(prefix, "weight_g_idx"), None)
                 if scale is not None:
                     # weight is quantized, compress it
-                    quant_args = names_to_scheme[prefix]
+                    if isinstance(names_to_scheme[prefix], tuple):
+                        quant_args = names_to_scheme[prefix][0]
+                    else:
+                        quant_args = names_to_scheme[prefix]
                     compressed_data = self.compress_weight(
                         weight=value,
                         scale=scale,
@@ -107,7 +132,11 @@ class BaseQuantizationCompressor(BaseCompressor):
                         compressed_dict[merge_names(prefix, key)] = value
                 else:
                     compressed_dict[name] = value.to("cpu")
-            elif name.endswith("zero_point") and torch.all(value == 0):
+            # only save if asym
+            elif is_weight_zp and quant_args_zp.symmetric:
+                continue
+            # only save if asym
+            elif is_input_zp and input_args_zp.symmetric:
                 continue
             elif name.endswith("g_idx") and torch.any(value <= -1):
                 continue
@@ -144,7 +173,7 @@ class BaseQuantizationCompressor(BaseCompressor):
     def _decompress_from_path(self, path_to_model, names_to_scheme, device):
         weight_mappings = get_nested_weight_mappings(
-            path_to_model, self.COMPRESSION_PARAM_NAMES
+            path_to_model, self.compression_param_names
         )
         for weight_name in weight_mappings.keys():
             weight_data = {}
@@ -161,7 +190,7 @@ class BaseQuantizationCompressor(BaseCompressor):
     def _decompress_from_state_dict(self, state_dict, names_to_scheme):
         weight_mappings = get_nested_mappings_from_state_dict(
-            state_dict, self.COMPRESSION_PARAM_NAMES
+            state_dict, self.compression_param_names
         )
         for weight_name in weight_mappings.keys():
             weight_data = {}

{compressed-tensors-0.9.1 → compressed_tensors-0.9.3}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py RENAMED Viewed

@@ -41,12 +41,18 @@ class NaiveQuantizationCompressor(BaseQuantizationCompressor):
     type to the type specified by the layer's QuantizationArgs.
     """
-    COMPRESSION_PARAM_NAMES = [
-        "weight",
-        "weight_scale",
-        "weight_zero_point",
-        "weight_g_idx",
-    ]
+    @property
+    def compression_param_names(self) -> Tuple[str]:
+        """
+        Returns a tuple of compression parameter names introduced by
+        the compressor during compression
+        """
+        return (
+            "weight",
+            "weight_scale",
+            "weight_zero_point",
+            "weight_g_idx",
+        )
     def compression_param_info(
         self,

{compressed-tensors-0.9.1 → compressed_tensors-0.9.3}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py RENAMED Viewed

@@ -36,13 +36,19 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
     Compresses a quantized model by packing every eight 4-bit weights into an int32
     """
-    COMPRESSION_PARAM_NAMES = [
-        "weight_packed",
-        "weight_scale",
-        "weight_zero_point",
-        "weight_g_idx",
-        "weight_shape",
-    ]
+    @property
+    def compression_param_names(self) -> Tuple[str]:
+        """
+        Returns a tuple of compression parameter names introduced by
+        the compressor during compression
+        """
+        return (
+            "weight_packed",
+            "weight_scale",
+            "weight_zero_point",
+            "weight_g_idx",
+            "weight_shape",
+        )
     def compression_param_info(
         self,
@@ -138,8 +144,20 @@ def pack_to_int32(value: torch.Tensor, num_bits: int) -> torch.Tensor:
     """
     Packs a tensor of quantized weights stored in int8 into int32s with padding
+    Pseudocode:
+     1. Shift wrt num_bits to convert to unsigned. num_bits=8
+        [1,2] -> [129, 130]
+     2. Pad to fill in 32 bits
+        [129, 130] -> [129, 130, 0, 0]
+     3. convert to binary align in order
+        [129, 130, 0, 0] -> 00000000 00000000 10000010 10000001
+     4. convert aligned binary to number
+        00000000000000001000001010000001 -> 33409
+     5. covert back to uint32
+        33409 -> 33409
     :param value: tensor to pack
-    :param num_bits: number of bits used to store underlying data
+    :param num_bits: number of bits used to store underlying data, must be at least 1
     :returns: packed int32 tensor
     """
     if value.dtype is not torch.int8:
@@ -148,19 +166,22 @@ def pack_to_int32(value: torch.Tensor, num_bits: int) -> torch.Tensor:
     if num_bits > 8:
         raise ValueError("Packing is only supported for less than 8 bits")
+    if num_bits < 1:
+        raise ValueError(f"num_bits must be at least 1, got {num_bits}")
     # convert to unsigned for packing
-    offset = pow(2, num_bits) // 2
+    offset = 1 << (num_bits - 1)
     value = (value + offset).to(torch.uint8)
     value = value.cpu().numpy().astype(np.uint32)
     pack_factor = 32 // num_bits
     # pad input tensor and initialize packed output
     packed_size = math.ceil(value.shape[1] / pack_factor)
-    packed = np.zeros((value.shape[0], packed_size), dtype=np.uint32)
-    padding = packed.shape[1] * pack_factor - value.shape[1]
+    padding = packed_size * pack_factor - value.shape[1]
     value = np.pad(value, pad_width=[(0, 0), (0, padding)], constant_values=0)
     # pack values
+    packed = np.zeros((value.shape[0], packed_size), dtype=np.uint32)
     for i in range(pack_factor):
         packed |= value[:, i::pack_factor] << num_bits * i
@@ -174,7 +195,9 @@ def unpack_from_int32(
 ) -> torch.Tensor:
     """
     Unpacks a tensor of packed int32 weights into individual int8s, maintaining the
-    original their bit range
+    original bit range.
+    Return tensors in int8
     :param value: tensor to upack
     :param num_bits: number of bits to unpack each data point into
@@ -192,7 +215,7 @@ def unpack_from_int32(
     pack_factor = 32 // num_bits
     # unpack
-    mask = pow(2, num_bits) - 1
+    mask = (1 << num_bits) - 1
     unpacked = torch.zeros(
         (value.shape[0], value.shape[1] * pack_factor),
         device=value.device,

{compressed-tensors-0.9.1 → compressed_tensors-0.9.3}/src/compressed_tensors/compressors/sparse_compressors/base.py RENAMED Viewed

@@ -30,8 +30,7 @@ _LOGGER: logging.Logger = logging.getLogger(__name__)
 class BaseSparseCompressor(BaseCompressor):
     """
     Base class representing a sparse compression algorithm. Each child class should
-    implement compression_param_info, compress_weight and decompress_weight; child
-    classes should also define COMPRESSION_PARAM_NAMES.
+    implement compression_param_names, compress_weight and decompress_weight;
     Compressors support compressing/decompressing a full module state dict or a single
     quantized PyTorch leaf module.
@@ -113,7 +112,7 @@ class BaseSparseCompressor(BaseCompressor):
         """
         weight_mappings, ignored_params = get_nested_weight_mappings(
             path_to_model_or_tensors,
-            self.COMPRESSION_PARAM_NAMES,
+            self.compression_param_names,
             return_unmatched_params=True,
         )
         for weight_name in weight_mappings.keys():

{compressed-tensors-0.9.1 → compressed_tensors-0.9.3}/src/compressed_tensors/compressors/sparse_compressors/dense.py RENAMED Viewed

@@ -25,6 +25,14 @@ class DenseCompressor(BaseCompressor):
     Identity compressor for dense models, returns the original state_dict
     """
+    @property
+    def compression_param_names(self) -> Tuple[str]:
+        """
+        Returns a tuple of compression parameter names introduced by
+        the compressor during compression
+        """
+        return ()
     def compress(self, model_state: Dict[str, Tensor], **kwargs) -> Dict[str, Tensor]:
         return model_state

{compressed-tensors-0.9.1 → compressed_tensors-0.9.3}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py RENAMED Viewed

@@ -40,11 +40,17 @@ class Sparse24BitMaskCompressor(BaseSparseCompressor):
     values tensor, with their locations stored in a 2d bitmask
     """
-    COMPRESSION_PARAM_NAMES = [
-        "shape",
-        "compressed",
-        "bitmask",
-    ]
+    @property
+    def compression_param_names(self) -> Tuple[str]:
+        """
+        Returns a tuple of compression parameter names introduced by
+        the compressor during compression
+        """
+        return (
+            "shape",
+            "compressed",
+            "bitmask",
+        )
     def compress_weight(self, name, value):
         bitmask_tensor = Sparse24BitMaskTensor.from_dense(

{compressed-tensors-0.9.1 → compressed_tensors-0.9.3}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py RENAMED Viewed

@@ -38,7 +38,13 @@ class BitmaskCompressor(BaseSparseCompressor):
     values tensor, with their locations stored in a 2d bitmask
     """
-    COMPRESSION_PARAM_NAMES = ["shape", "compressed", "bitmask", "row_offsets"]
+    @property
+    def compression_param_names(self) -> Tuple[str]:
+        """
+        Returns a tuple of compression parameter names introduced by
+        the compressor during compression
+        """
+        return ("shape", "compressed", "bitmask", "row_offsets")
     def compress_weight(self, name, value):
         bitmask_tensor = BitmaskTensor.from_dense(value)

{compressed-tensors-0.9.1 → compressed_tensors-0.9.3}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py RENAMED Viewed

@@ -42,8 +42,6 @@ class Marlin24Compressor(BaseCompressor):
     Marlin24 kernel. Decompression is not implemented for this compressor.
     """
-    COMPRESSION_PARAM_NAMES = ["weight_packed", "scale_packed", "meta"]
     @staticmethod
     def validate_quant_compatability(
         model_quant_args: Dict[str, QuantizationArgs]
@@ -105,6 +103,14 @@ class Marlin24Compressor(BaseCompressor):
         return True
+    @property
+    def compression_param_names(self) -> Tuple[str]:
+        """
+        Returns a tuple of compression parameter names introduced by
+        the compressor during compression
+        """
+        return ("weight_packed", "scale_packed", "meta")
     def compress(
         self,
         model_state: Dict[str, Tensor],

{compressed-tensors-0.9.1 → compressed_tensors-0.9.3}/src/compressed_tensors/linear/compressed_linear.py RENAMED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import warnings
 from typing import Dict, Tuple
 import torch
@@ -21,6 +22,7 @@ from compressed_tensors.quantization import (
     QuantizationStatus,
     initialize_module_for_quantization,
 )
+from compressed_tensors.utils import register_offload_parameter
 from torch import Tensor
 from torch.nn import Parameter
 from torch.nn.functional import linear
@@ -32,11 +34,16 @@ class CompressedLinear(Linear):
     Wrapper module for running a compressed forward pass of a quantized Linear module.
     The wrapped layer will decompressed on each forward call.
-    :param module: dense linear module to replace
-    :param quantization_scheme: quantization config for the module to wrap
-    :param quantization_format: compression format module is stored as
     """
+    def __init__(self, *args, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+        warnings.warn(
+            "CompressedLinear should not be initialized directly. "
+            "Use the from_linear method instead.",
+            UserWarning,
+        )
     @classmethod
     @torch.no_grad()
     def from_linear(
@@ -45,6 +52,12 @@ class CompressedLinear(Linear):
         quantization_scheme: QuantizationScheme,
         quantization_format: str,
     ):
+        """
+        :param module: dense linear module to replace
+        :param quantization_scheme: quantization config for the module to wrap
+        :param quantization_format: compression format module is stored as
+        :return: CompressedLinear module wrapping the input module
+        """
         module.__class__ = CompressedLinear
         module.compressor = BaseCompressor.load_from_registry(quantization_format)
         device = next(module.parameters()).device
@@ -68,7 +81,7 @@ class CompressedLinear(Linear):
             param = Parameter(
                 torch.empty(shape, device=device, dtype=dtype), requires_grad=False
             )
-            module.register_parameter(name, param)
+            register_offload_parameter(module, name, param)
         # mark module as compressed
         module.quantization_status = QuantizationStatus.COMPRESSED
@@ -85,5 +98,11 @@ class CompressedLinear(Linear):
         """
         Decompresses the weight, then runs the wrapped forward pass
         """
-        uncompressed_weight = self.compressor.decompress_module(self)
-        return linear(input, uncompressed_weight, self.bias)
+        if self.quantization_status == QuantizationStatus.COMPRESSED:
+            weight_data = self.compressor.decompress_module(self)
+            param = Parameter(weight_data, requires_grad=False)
+            register_offload_parameter(self, "weight", param)
+            self.quantization_status = QuantizationStatus.FROZEN
+        return linear(input, self.weight, self.bias)

{compressed-tensors-0.9.1 → compressed_tensors-0.9.3}/src/compressed_tensors/quantization/lifecycle/apply.py RENAMED Viewed

@@ -52,8 +52,8 @@ __all__ = [
     "apply_quantization_config",
     "apply_quantization_status",
     "find_name_or_class_matches",
-    "expand_sparse_target_names",
-    "is_sparse_target",
+    "expand_target_names",
+    "is_target",
 ]
 from compressed_tensors.quantization.utils.helpers import is_module_quantized
@@ -247,8 +247,10 @@ def apply_quantization_status(model: Module, status: QuantizationStatus):
         model.apply(compress_quantized_weights)
-def expand_sparse_target_names(
-    model: Module, targets: Iterable[str], ignore: Iterable[str]
+def expand_target_names(
+    model: Module,
+    targets: Optional[Iterable[str]] = None,
+    ignore: Optional[Iterable[str]] = None,
 ) -> Set[str]:
     """
     Finds all unique module names in the model that match the given
@@ -257,20 +259,23 @@ def expand_sparse_target_names(
     Note: Targets must be regexes, layer types, or full layer names.
     :param model: model to search for targets in
-    :param targets: list of targets to search for
-    :param ignore: list of targets to ignore
+    :param targets: Iterable of targets to search for
+    :param ignore: Iterable of targets to ignore
     :return: set of all targets that match the given targets and should
         not be ignored
     """
     return {
         name
         for name, module in iter_named_leaf_modules(model)
-        if is_sparse_target(name, module, targets, ignore)
+        if is_target(name, module, targets, ignore)
     }
-def is_sparse_target(
-    name: str, module: Module, targets: Iterable[str], ignore: Iterable[str]
+def is_target(
+    name: str,
+    module: Module,
+    targets: Optional[Iterable[str]] = None,
+    ignore: Optional[Iterable[str]] = None,
 ) -> bool:
     """
     Determines if a module should be included in the targets based on the
@@ -280,12 +285,12 @@ def is_sparse_target(
     :param name: name of the module
     :param module: the module itself
-    :param targets: list of targets to search for
-    :param ignore: list of targets to ignore
+    :param targets: Iterable of targets to search for
+    :param ignore: Iterable of targets to ignore
     :return: True if the module is a target and not ignored, False otherwise
     """
     return bool(
-        find_name_or_class_matches(name, module, targets)
+        find_name_or_class_matches(name, module, targets or [])
         and not find_name_or_class_matches(name, module, ignore or [])
     )

{compressed-tensors-0.9.1 → compressed_tensors-0.9.3}/src/compressed_tensors/quantization/lifecycle/initialize.py RENAMED Viewed

@@ -203,11 +203,10 @@ def _initialize_attn_scales(module: Module) -> None:
         torch.empty(expected_shape, dtype=scale_dtype, device=device),
         requires_grad=False,
     )
-    module.register_parameter(KVCacheScaleType.KEY.value, init_scale)
+    register_offload_parameter(module, KVCacheScaleType.KEY.value, init_scale)
     init_scale = Parameter(
         torch.empty(expected_shape, dtype=scale_dtype, device=device),
         requires_grad=False,
     )
-    module.register_parameter(KVCacheScaleType.VALUE.value, init_scale)
+    register_offload_parameter(module, KVCacheScaleType.VALUE.value, init_scale)

{compressed-tensors-0.9.1 → compressed_tensors-0.9.3}/src/compressed_tensors/quantization/quant_args.py RENAMED Viewed

@@ -18,6 +18,7 @@ from typing import Any, Dict, Optional, Union
 import torch
 from compressed_tensors.utils import Aliasable
+from compressed_tensors.utils.helpers import deprecated
 from pydantic import BaseModel, Field, field_validator, model_validator
@@ -109,10 +110,10 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
     dynamic: bool = False
     actorder: Union[ActivationOrdering, bool, None] = None
     observer: Optional[str] = Field(
-        default="minmax",
+        default=None,
         description=(
-            "The class to use to compute the quantization param - "
-            "scale and zero-point'"
+            "Determines the method of computing quantization parameters (scales and "
+            "zero-points). Defaults to min-max when not using dynamic quantization"
         ),
     )
     observer_kwargs: Dict[str, Any] = Field(
@@ -123,12 +124,6 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
         ),
     )
-    def get_observer(self):
-        """
-        :return: torch quantization FakeQuantize built based on these QuantizationArgs
-        """
-        return self.observer
     @field_validator("type", mode="before")
     def validate_type(cls, value) -> QuantizationType:
         if isinstance(value, str):
@@ -250,6 +245,10 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
         else:
             raise ValueError(f"Invalid quantization type {self.type}")
+    @deprecated("QuantizationArgs.observer")
+    def get_observer(self) -> str:
+        return self.observer
 def round_to_quantized_type(
     tensor: torch.Tensor, args: QuantizationArgs

{compressed-tensors-0.9.1 → compressed_tensors-0.9.3}/src/compressed_tensors/utils/helpers.py RENAMED Viewed

@@ -14,13 +14,17 @@
 import warnings
 from functools import wraps
-from typing import Any, Callable, Dict, List, Optional
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional
 import numpy
 import torch
 from transformers import AutoConfig
+if TYPE_CHECKING:
+    from compressed_tensors.compressors import ModelCompressor
 __all__ = [
     "infer_compressor_from_model_config",
     "fix_fsdp_module_name",
@@ -166,8 +170,8 @@ def deprecated(future_name: Optional[str] = None, message: Optional[str] = None)
     """
     Decorator to mark functions as deprecated
-    :param new_function: Function called in place of depreciated function
-    :param message: Depreciation message, replaces default depreciation message
+    :param new_function: Function called in place of deprecated function
+    :param message: Deprecation message, replaces default deprecation message
     """
     def decorator(func: Callable[[Any], Any]):

{compressed-tensors-0.9.1 → compressed_tensors-0.9.3}/src/compressed_tensors/utils/offload.py RENAMED Viewed

@@ -26,6 +26,7 @@ Utilities associated with offloading functionality provided by `accelerate`.
 """
 import contextlib
+import warnings
 from functools import wraps
 from typing import Any, Callable, Dict, Literal, Optional, Union
@@ -200,9 +201,14 @@ def update_offload_parameter(
     """
     param = getattr(module, name)
     data = data.to(param.dtype)
+    if param.data.shape != data.shape:
+        warnings.warn(
+            f"Shape of parameter being updated {param.data.shape} does not match shape "
+            f"of update data {data.shape}"
+        )
     # copy data into onloaded parameter if applicable
-    if param.device != "meta":
+    if param.device != torch.device("meta"):
         param.data.copy_(data)
     # update offload dict

{compressed-tensors-0.9.1 → compressed_tensors-0.9.3}/src/compressed_tensors/utils/safetensors_load.py RENAMED Viewed

@@ -16,7 +16,7 @@ import json
 import os
 import re
 import struct
-from typing import Dict, List, Optional, Tuple, Union
+from typing import Dict, Iterable, Optional, Tuple, Union
 from safetensors import safe_open
 from torch import Tensor
@@ -180,7 +180,9 @@ def get_weight_mappings(path_to_model_or_tensors: str) -> Dict[str, str]:
 def get_nested_weight_mappings(
-    model_path: str, params_to_nest: List[str], return_unmatched_params: bool = False
+    model_path: str,
+    params_to_nest: Iterable[str],
+    return_unmatched_params: bool = False,
 ) -> Union[NestedWeightMappingType, Tuple[NestedWeightMappingType, WeightMappingType]]:
     """
     Takes a path to a state dict saved in safetensors format and returns a nested
@@ -211,7 +213,7 @@ def get_nested_weight_mappings(
     :param model_path: Path to the safetensors state dict, must contain either a
         single safetensors file or multiple files with an index.
-    :param params_to_nest: List of parameter names to nest.
+    :param params_to_nest: Iterable of parameter names to nest.
     :param return_unmatched_params: If True, return a second dictionary containing
         the remaining parameters that were not matched to the params_to_nest.
     :return:
@@ -247,7 +249,7 @@ def get_nested_weight_mappings(
 def get_nested_mappings_from_state_dict(
-    state_dict, params_to_nest
+    state_dict, params_to_nest: Iterable[str]
 ) -> NestedWeightMappingType:
     """
     Takes a state dict and returns a nested mapping from uncompressed
@@ -262,7 +264,7 @@ def get_nested_mappings_from_state_dict(
     }
     :param state_dict: state dict of the model
-    :param params_to_nest: List of parameter names to nest.
+    :param params_to_nest: Iterable of parameter names to nest.
     :return: Nested mapping of parameterized layer names to the value of
         each layer's compression parameters.
     """

{compressed-tensors-0.9.1 → compressed_tensors-0.9.3}/src/compressed_tensors/version.py RENAMED Viewed

@@ -17,7 +17,7 @@ Functionality for storing and setting the version info for SparseML
 """
-version_base = "0.9.1"
+version_base = "0.9.3"
 is_release = True  # change to True to set the generated version as a release version

{compressed-tensors-0.9.1 → compressed_tensors-0.9.3}/src/compressed_tensors.egg-info/PKG-INFO RENAMED Viewed

@@ -1,15 +1,35 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.9.1
+Version: 0.9.3
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.
 Author-email: support@neuralmagic.com
 License: Apache 2.0
 Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: torch>=1.7.0
+Requires-Dist: transformers
+Requires-Dist: pydantic>=2.0
 Provides-Extra: dev
+Requires-Dist: black==22.12.0; extra == "dev"
+Requires-Dist: isort==5.8.0; extra == "dev"
+Requires-Dist: wheel>=0.36.2; extra == "dev"
+Requires-Dist: flake8>=3.8.3; extra == "dev"
+Requires-Dist: pytest>=6.0.0; extra == "dev"
+Requires-Dist: nbconvert>=7.16.3; extra == "dev"
 Provides-Extra: accelerate
-License-File: LICENSE
+Requires-Dist: accelerate; extra == "accelerate"
+Dynamic: author
+Dynamic: author-email
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: license
+Dynamic: license-file
+Dynamic: provides-extra
+Dynamic: requires-dist
+Dynamic: summary
 # compressed-tensors

{compressed-tensors-0.9.1 → compressed_tensors-0.9.3}/src/compressed_tensors.egg-info/SOURCES.txt RENAMED Viewed

@@ -54,4 +54,6 @@ src/compressed_tensors/utils/offload.py
 src/compressed_tensors/utils/permutations_24.py
 src/compressed_tensors/utils/permute.py
 src/compressed_tensors/utils/safetensors_load.py
-src/compressed_tensors/utils/semi_structured_conversions.py
+src/compressed_tensors/utils/semi_structured_conversions.py
+tests/test_registry.py
+tests/testing_utils.py

compressed_tensors-0.9.3/tests/test_registry.py ADDED Viewed

@@ -0,0 +1,53 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+from compressed_tensors import (
+    BaseCompressor,
+    BitmaskCompressor,
+    BitmaskConfig,
+    CompressionFormat,
+    DenseCompressor,
+    DenseSparsityConfig,
+    SparsityCompressionConfig,
+)
+@pytest.mark.parametrize(
+    "name,type",
+    [
+        [CompressionFormat.sparse_bitmask.value, BitmaskConfig],
+        [CompressionFormat.dense.value, DenseSparsityConfig],
+    ],
+)
+def test_configs(name, type):
+    config = SparsityCompressionConfig.load_from_registry(name)
+    assert isinstance(config, type)
+    assert config.format == name
+@pytest.mark.parametrize(
+    "name,type",
+    [
+        [CompressionFormat.sparse_bitmask.value, BitmaskCompressor],
+        [CompressionFormat.dense.value, DenseCompressor],
+    ],
+)
+def test_compressors(name, type):
+    compressor = BaseCompressor.load_from_registry(
+        name, config=SparsityCompressionConfig(format="none")
+    )
+    assert isinstance(compressor, type)
+    assert isinstance(compressor.config, SparsityCompressionConfig)
+    assert compressor.config.format == "none"

compressed_tensors-0.9.3/tests/testing_utils.py ADDED Viewed

@@ -0,0 +1,144 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# flake8: noqa
+import unittest
+import pytest
+def compressed_tensors_config_available():
+    try:
+        from transformers.utils.quantization_config import (  # noqa: F401
+            CompressedTensorsConfig,
+        )
+        return True
+    except ImportError:
+        return False
+def accelerate_availabe():
+    try:
+        import accelerate  # noqa: F401
+        return True
+    except ImportError:
+        return False
+_is_compressed_tensors_config_available = compressed_tensors_config_available()
+_is_accelerate_available = accelerate_availabe()
+def requires_hf_quantizer():
+    return pytest.mark.skipif(
+        not _is_compressed_tensors_config_available,
+        reason="requires transformers>=4.45 to support CompressedTensorsHfQuantizer",
+    )
+def requires_accelerate():
+    return pytest.mark.skipif(
+        not _is_accelerate_available,
+        reason="requires accelerate",
+    )
+def get_random_mat(M, K, dtype) -> "torch.Tensor":
+    """
+    :param M: number of rows
+    :param K: number of columns
+    :param dtype: data type of the matrix
+    :return: random matrix of shape (M, K) with non-zero values
+    """
+    import torch
+    from compressed_tensors.quantization import FP8_DTYPE
+    rand_tensor_dtype = dtype
+    if dtype in [torch.int8, FP8_DTYPE]:
+        rand_tensor_dtype = torch.float16
+    mat = torch.rand(M, K, dtype=rand_tensor_dtype).cuda()
+    mat = mat.masked_fill_(mat == 0, 1)
+    return mat.to(dtype)
+def generate_pruned_semi_structured_mat(M, K, dtype) -> "torch.Tensor":
+    """
+    :param M: number of rows
+    :param K: number of columns
+    :param dtype: data type of the matrix
+    :return: random matrix of shape (M, K) with 2:4 sparsity pattern
+    """
+    import torch
+    from compressed_tensors.quantization import FP8_DTYPE
+    mask = torch.Tensor([0, 0, 1, 1]).tile((M, K // 4)).bool()
+    rand_tensor_dtype = dtype
+    if dtype in [torch.int8, FP8_DTYPE]:
+        rand_tensor_dtype = torch.float16
+    mat = torch.rand(M, K, dtype=rand_tensor_dtype)
+    mat = mat.masked_fill_(mat == 0, 1)
+    if dtype == FP8_DTYPE:
+        # some float8_e4m3fn operations are not supported on CPU
+        mat = mat.cuda()
+        mask = mask.cuda()
+    mat = mat * mask
+    return mat.to(dtype)
+def induce_sparsity(tensor, sparsity_ratio) -> "torch.Tensor":
+    """
+    Makes a tensor sparse by zeroing out a given fraction
+    of its smallest absolute values.
+    :param: weight_tensor (torch.Tensor): The input weight tensor.
+    :param: sparsity_ratio (float): Fraction of weights to be zeroed
+        (0 <= sparsity_ratio <= 1).
+    :returns: torch.Tensor: Sparse version of the input tensor.
+    """
+    import torch
+    if not (0 <= sparsity_ratio <= 1):
+        raise ValueError("Sparsity ratio must be between 0 and 1.")
+    # Flatten the tensor and compute the threshold for sparsity
+    flattened = tensor.view(-1)
+    k = int(sparsity_ratio * flattened.numel())
+    if k > 0:
+        threshold = torch.topk(flattened.abs(), k, largest=False).values.max()
+        sparse_tensor = torch.where(
+            tensor.abs() > threshold, tensor, torch.zeros_like(tensor)
+        )
+    else:
+        sparse_tensor = tensor
+    return sparse_tensor
+def is_gpu_available():
+    """
+    :return: True if a GPU is available, False otherwise
+    """
+    try:
+        import torch  # noqa: F401
+        return torch.cuda.device_count() > 0
+    except ImportError:
+        return False
+def requires_gpu(test_case):
+    return unittest.skipUnless(is_gpu_available(), "test requires GPU")(test_case)