PyPI - compressed-tensors - Versions diffs - 0.9.5a20250428__tar.gz → 0.9.5a20250507__tar.gz - Mend

compressed-tensors 0.9.5a20250428tar.gz → 0.9.5a20250507tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (123) hide show

{compressed_tensors-0.9.5a20250428/src/compressed_tensors.egg-info → compressed_tensors-0.9.5a20250507}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.9.5a20250428
+Version: 0.9.5a20250507
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.9.5a20250428 → compressed_tensors-0.9.5a20250507}/src/compressed_tensors/compressors/model_compressors/model_compressor.py RENAMED Viewed

@@ -19,7 +19,7 @@ import os
 import re
 from contextlib import contextmanager
 from copy import deepcopy
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple, TypeVar, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, TypeVar, Union
 import compressed_tensors
 import torch
@@ -36,12 +36,12 @@ from compressed_tensors.config import CompressionFormat, SparsityCompressionConf
 from compressed_tensors.quantization import (
     DEFAULT_QUANTIZATION_METHOD,
     QuantizationConfig,
+    QuantizationScheme,
     QuantizationStatus,
     apply_quantization_config,
     load_pretrained_quantization_parameters,
 )
 from compressed_tensors.quantization.lifecycle import expand_target_names
-from compressed_tensors.quantization.quant_args import QuantizationArgs
 from compressed_tensors.quantization.utils import (
     is_module_quantized,
     iter_named_leaf_modules,
@@ -64,7 +64,7 @@ from transformers import AutoConfig
 from transformers.file_utils import CONFIG_NAME
-__all__ = ["ModelCompressor", "map_modules_to_quant_args"]
+__all__ = ["ModelCompressor", "map_module_to_scheme"]
 _LOGGER: logging.Logger = logging.getLogger(__name__)
@@ -372,20 +372,17 @@ class ModelCompressor:
         :param state_dict: optional uncompressed state_dict to insert into model
         :return: compressed state dict
         """
         if state_dict is None:
             state_dict = model.state_dict()
-        compressed_state_dict = state_dict
-        quantized_modules_to_args: Dict[
-            str, QuantizationArgs
-        ] = map_modules_to_quant_args(model)
         if self.quantization_compressor is not None:
-            compressed_state_dict = self.quantization_compressor.compress(
-                state_dict, names_to_scheme=quantized_modules_to_args
+            module_to_scheme = map_module_to_scheme(model)
+            state_dict = self.quantization_compressor.compress(
+                state_dict, names_to_scheme=module_to_scheme
             )
+            # TODO: consider sparse compression to also be compression
             if self.quantization_config.format != CompressionFormat.dense.value:
                 self.quantization_config.quantization_status = (
                     QuantizationStatus.COMPRESSED
@@ -397,8 +394,8 @@ class ModelCompressor:
                 targets=self.sparsity_config.targets,
                 ignore=self.sparsity_config.ignore,
             )
-            compressed_state_dict = self.sparsity_compressor.compress(
-                compressed_state_dict,
+            state_dict = self.sparsity_compressor.compress(
+                state_dict,
                 compression_targets=sparse_compression_targets,
             )
@@ -407,7 +404,7 @@ class ModelCompressor:
         # https://github.com/huggingface/transformers/pull/30488
         transformers.modeling_utils.dtype_byte_size = new_dtype_byte_size
-        return compressed_state_dict
+        return state_dict
     def decompress(self, model_path: str, model: Module):
         """
@@ -576,8 +573,8 @@ class ModelCompressor:
         :param model: The model whose weights are to be updated.
         """
-        for name, data in tqdm(dense_weight_generator, desc="Decompressing model"):
-            module = operator.attrgetter(name)(model)
+        for mod_path, data in tqdm(dense_weight_generator, desc="Decompressing model"):
+            module = operator.attrgetter(mod_path)(model)
             params_device = next(module.parameters()).device
             device = "cpu" if has_offloaded_params(module) else params_device
@@ -605,30 +602,15 @@ class ModelCompressor:
                         update_parameter_data(module, param_data, param_name)
-def map_modules_to_quant_args(
-    model: Module,
-) -> Dict[str, Union[QuantizationArgs, Tuple[QuantizationArgs, QuantizationArgs]]]:
+def map_module_to_scheme(model: Module) -> Dict[str, QuantizationScheme]:
     """
-    Given a pytorch model, map out the submodule name (usually linear layers)
-    to the weight QuantizationArgs. If running input activation quantization, will also
-    map to the input QuantizationArgs in a tuple.
-    :param model: pytorch model
+    Returns a dictionary which maps quantized module names to their quantization schemes
     """
-    quantized_modules_to_args = {}
-    for name, submodule in iter_named_leaf_modules(model):
-        if is_module_quantized(submodule):
-            if submodule.quantization_scheme.weights is not None:
-                name = fix_fsdp_module_name(name)
-                quantized_modules_to_args[name] = submodule.quantization_scheme.weights
-                if submodule.quantization_scheme.input_activations is not None:
-                    weight_args = quantized_modules_to_args.get(name)
-                    quantized_modules_to_args[name] = (
-                        weight_args,
-                        submodule.quantization_scheme.input_activations,
-                    )
-    return quantized_modules_to_args
+    return {
+        fix_fsdp_module_name(name): module.quantization_scheme
+        for name, module in iter_named_leaf_modules(model)
+        if is_module_quantized(module)
+    }
 # HACK: Override the dtype_byte_size function in transformers to support float8 types

{compressed_tensors-0.9.5a20250428 → compressed_tensors-0.9.5a20250507}/src/compressed_tensors/compressors/quantized_compressors/base.py RENAMED Viewed

@@ -14,15 +14,16 @@
 import logging
 from pathlib import Path
-from typing import Any, Dict, Generator, Optional, Tuple, Union
+from typing import Any, Dict, Generator, Tuple, Union
 import torch
 from compressed_tensors.compressors.base import BaseCompressor
-from compressed_tensors.quantization import QuantizationArgs, QuantizationStrategy
+from compressed_tensors.quantization import QuantizationScheme, QuantizationStrategy
 from compressed_tensors.utils import (
     get_nested_mappings_from_state_dict,
     get_nested_weight_mappings,
     merge_names,
+    remove_suffix,
 )
 from safetensors import safe_open
 from torch import Tensor
@@ -69,7 +70,7 @@ class BaseQuantizationCompressor(BaseCompressor):
     def compress(
         self,
         model_state: Dict[str, Tensor],
-        names_to_scheme: Dict[str, QuantizationArgs],
+        names_to_scheme: Dict[str, QuantizationScheme],
         **kwargs,
     ) -> Dict[str, Tensor]:
         """
@@ -81,87 +82,87 @@ class BaseQuantizationCompressor(BaseCompressor):
         :return: compressed state dict
         """
         compressed_dict = {}
-        weight_suffix = ".weight"
-        input_zp_suffix = ".input_zero_point"
-        weight_zp_suffix = ".weight_zero_point"
-        _LOGGER.debug(
-            f"Compressing model with {len(model_state)} parameterized layers..."
-        )
+        save_device = "cpu"
+        uncompressed_names = list(model_state.keys())
+        for name in tqdm(uncompressed_names, desc="Compressing with quantization"):
+            value = model_state[name]
+            # compress weights
+            if name.endswith("weight"):
+                prefix = remove_suffix(name, "weight")
+                # gather qparams
+                scale = model_state.get(prefix + "weight_scale", None)
+                g_idx = model_state.get(prefix + "weight_g_idx", None)
+                zp = model_state.get(prefix + "weight_zero_point", None)
+                # is scale does not exist, then weight cannot be compressed
+                if scale is None:
+                    compressed_dict[name] = value.to(save_device)
+                    continue
+                # compress values on cpu (memory movement too expensive)
+                module_path = prefix[:-1] if prefix.endswith(".") else prefix
+                quant_args = names_to_scheme[module_path].weights
+                compressed_values = self.compress_weight(
+                    weight=value,
+                    scale=scale,
+                    zero_point=zp,
+                    g_idx=g_idx,
+                    quantization_args=quant_args,
+                    device="cpu",
+                )
+                # update state dict
+                for key, value in compressed_values.items():
+                    compressed_dict[prefix + key] = value.to(save_device)
-        for name, value in tqdm(model_state.items(), desc="Quantized Compression"):
-            # check if the parameter we're compressing is the weight zp
-            # or the input zp
-            is_weight_zp = name.endswith(weight_zp_suffix)
-            is_input_zp = name.endswith(input_zp_suffix)
-            # if we're saving the weight zp, fetch weight quant args
-            if is_weight_zp:
-                quant_args_zp = names_to_scheme.get(name[: -(len(weight_zp_suffix))])
-                if isinstance(quant_args_zp, tuple):
-                    # If tuple, first value is weight args, second is input args
-                    quant_args_zp = quant_args_zp[0]
-            # if we're saving the input zp, fetch input quant args
-            if is_input_zp:
-                input_args_zp = names_to_scheme.get(name[: -(len(input_zp_suffix))])
-                if isinstance(input_args_zp, tuple):
-                    # If tuple, first value is weight args, second is input args
-                    input_args_zp = input_args_zp[-1]
-            if name.endswith(weight_suffix):
-                prefix = name[: -(len(weight_suffix))]
-                scale = model_state.get(merge_names(prefix, "weight_scale"), None)
-                zp = model_state.get(merge_names(prefix, "weight_zero_point"), None)
-                g_idx = model_state.get(merge_names(prefix, "weight_g_idx"), None)
-                if scale is not None:
-                    # weight is quantized, compress it
-                    if isinstance(names_to_scheme[prefix], tuple):
-                        quant_args = names_to_scheme[prefix][0]
-                    else:
-                        quant_args = names_to_scheme[prefix]
-                    compressed_data = self.compress_weight(
-                        weight=value,
-                        scale=scale,
-                        zero_point=zp,
-                        g_idx=g_idx,
-                        quantization_args=quant_args,
-                        device="cpu",
-                    )
-                    for key, value in compressed_data.items():
-                        compressed_dict[merge_names(prefix, key)] = value
-                else:
-                    compressed_dict[name] = value.to("cpu")
-            # only save zp if asym and not packed zp
-            elif is_weight_zp and (
-                quant_args_zp.symmetric or self._check_if_zp_pack_quantized(quant_args)
-            ):
-                continue
-            # only save if asym
-            elif is_input_zp and input_args_zp.symmetric:
-                continue
-            elif name.endswith("g_idx") and torch.any(value <= -1):
-                continue
             else:
-                compressed_dict[name] = value.to("cpu")
+                # omit saving zero points for symmetric or packed quantization
+                if name.endswith("zero_point") and self._skip_zp(name, names_to_scheme):
+                    continue
+                # omit saving for g_idx if uninitialized
+                # TODO: does this case actually occur?
+                elif name.endswith("g_idx") and torch.any(value <= -1):
+                    continue
+                compressed_dict[name] = value.to(save_device)
         return compressed_dict
-    def _check_if_zp_pack_quantized(self, quant_args):
+    def _skip_zp(
+        self, name: str, names_to_scheme: Dict[str, QuantizationScheme]
+    ) -> bool:
         from compressed_tensors.compressors import PackedQuantizationCompressor
-        if isinstance(self, PackedQuantizationCompressor):
-            if not quant_args.symmetric and quant_args.strategy in [
-                QuantizationStrategy.GROUP.value,
-                QuantizationStrategy.CHANNEL.value,
-            ]:
-                return True
-        return False
+        module_name, zp_name = name.rsplit(".", 1) if "." in name else ("", name)
+        scheme = names_to_scheme[module_name]
+        if zp_name == "weight_zero_point":
+            args = scheme.weights
+        if zp_name == "input_zero_point":
+            args = scheme.input_activations
+        if zp_name == "output_zero_point":
+            args = scheme.output_activations
+        symmetric = args.symmetric
+        packable_strategies = [
+            QuantizationStrategy.GROUP.value,
+            QuantizationStrategy.CHANNEL.value,
+        ]
+        packed = (
+            isinstance(self, PackedQuantizationCompressor)
+            and args.strategy in packable_strategies
+        )
+        return symmetric or packed
     def decompress(
         self,
         path_to_model_or_tensors: Union[str, Path, Dict[str, Any]],
-        names_to_scheme: Dict[str, QuantizationArgs],
+        names_to_scheme: Dict[str, QuantizationScheme],
         device: str = "cpu",
     ) -> Generator[Tuple[str, Tensor], None, None]:
         """
@@ -170,8 +171,9 @@ class BaseQuantizationCompressor(BaseCompressor):
         dense state dict
         :param path_to_model_or_tensors: path to compressed safetensors model (directory
             with one or more safetensors files) or compressed tensors file
-        :param names_to_scheme: quantization args for each quantized weight
-        :param device: optional device to load intermediate weights into
+        :param names_to_scheme: quantization scheme for each quantized weight
+        :param device: optional device to load intermediate weights into (must be `str`,
+            not `torch.device`)
         :return: compressed state dict
         """
         if isinstance(path_to_model_or_tensors, (str, Path)):
@@ -184,37 +186,42 @@ class BaseQuantizationCompressor(BaseCompressor):
                 path_to_model_or_tensors, names_to_scheme
             )
-    def _decompress_from_path(self, path_to_model, names_to_scheme, device):
+    def _decompress_from_path(
+        self,
+        path_to_model: Union[str, Path, Dict[str, Any]],
+        names_to_scheme: Dict[str, QuantizationScheme],
+        device: str,
+    ):
         weight_mappings = get_nested_weight_mappings(
             path_to_model, self.compression_param_names
         )
-        for weight_name in weight_mappings.keys():
+        for module_path in weight_mappings.keys():
             weight_data = {}
-            for param_name, safe_path in weight_mappings[weight_name].items():
-                full_name = merge_names(weight_name, param_name)
+            for param_name, safe_path in weight_mappings[module_path].items():
+                full_name = merge_names(module_path, param_name)
                 with safe_open(safe_path, framework="pt", device=device) as f:
                     weight_data[param_name] = f.get_tensor(full_name)
             if "weight_scale" in weight_data:
-                quant_args = names_to_scheme[weight_name]
+                quant_args = names_to_scheme[module_path].weights
                 decompressed = self.decompress_weight(
                     compressed_data=weight_data, quantization_args=quant_args
                 )
                 weight_data["weight"] = decompressed
-                yield weight_name, weight_data
+                yield module_path, weight_data
     def _decompress_from_state_dict(self, state_dict, names_to_scheme):
         weight_mappings = get_nested_mappings_from_state_dict(
             state_dict, self.compression_param_names
         )
-        for weight_name in weight_mappings.keys():
+        for module_path in weight_mappings.keys():
             weight_data = {}
-            for param_name, param_value in weight_mappings[weight_name].items():
+            for param_name, param_value in weight_mappings[module_path].items():
                 weight_data[param_name] = param_value
             if "weight_scale" in weight_data:
-                quant_args = names_to_scheme[weight_name]
+                quant_args = names_to_scheme[module_path]
                 decompressed = self.decompress_weight(
                     compressed_data=weight_data, quantization_args=quant_args
                 )
                 weight_data["weight"] = decompressed
-                yield weight_name, weight_data
+                yield module_path, weight_data

{compressed_tensors-0.9.5a20250428 → compressed_tensors-0.9.5a20250507}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py RENAMED Viewed

@@ -19,7 +19,11 @@ import numpy as np
 import torch
 from compressed_tensors.compressors.base import BaseCompressor
 from compressed_tensors.config import CompressionFormat
-from compressed_tensors.quantization import QuantizationArgs, QuantizationStrategy
+from compressed_tensors.quantization import (
+    QuantizationArgs,
+    QuantizationScheme,
+    QuantizationStrategy,
+)
 from compressed_tensors.quantization.lifecycle.forward import quantize
 from compressed_tensors.utils import (
     get_permutations_24,
@@ -44,19 +48,25 @@ class Marlin24Compressor(BaseCompressor):
     @staticmethod
     def validate_quant_compatability(
-        model_quant_args: Dict[str, QuantizationArgs]
+        names_to_scheme: Dict[str, QuantizationScheme]
     ) -> bool:
         """
         Checks if every quantized module in the model is compatible with Marlin24
         compression. Quantization must be channel or group strategy with group_size
         of 128. Only symmetric quantization is supported
-        :param model_quant_args: dictionary of mapping module names to their
-            quantization configuration
+        :param names_to_scheme: dictionary of mapping module names to their
+            quantization schemes
         :return: True if all modules are compatible with Marlin24 compression, raises
             a ValueError otherwise
         """
-        for name, quant_args in model_quant_args.items():
+        for name, scheme in names_to_scheme.items():
+            quant_args = scheme.weights
+            if quant_args is None:
+                raise ValueError(
+                    "Marlin24 Compressor is only valid for weight quantization schemes"
+                )
             strategy = quant_args.strategy
             group_size = quant_args.group_size
             symmetric = quant_args.symmetric
@@ -114,7 +124,7 @@ class Marlin24Compressor(BaseCompressor):
     def compress(
         self,
         model_state: Dict[str, Tensor],
-        names_to_scheme: Dict[str, QuantizationArgs],
+        names_to_scheme: Dict[str, QuantizationScheme],
         **kwargs,
     ) -> Dict[str, Tensor]:
         """
@@ -122,8 +132,8 @@ class Marlin24Compressor(BaseCompressor):
         with the Marlin24 kernel
         :param model_state: state dict of uncompressed model
-        :param names_to_scheme: quantization args for each quantized weight, needed for
-           quantize function to calculate bit depth
+        :param names_to_scheme: quantization scheme for each quantized weight, needed
+            for quantize function to calculate bit depth
         :return: compressed state dict
         """
         self.validate_quant_compatability(names_to_scheme)
@@ -146,7 +156,7 @@ class Marlin24Compressor(BaseCompressor):
                     value = value.to(torch.float16)
                     # quantize weight, keeping it as a float16 for now
-                    quant_args = names_to_scheme[prefix]
+                    quant_args = names_to_scheme[prefix].weights
                     value = quantize(
                         x=value, scale=scale, zero_point=zp, args=quant_args
                     )
@@ -215,7 +225,7 @@ def pack_weight_24(
     weight: Tensor,
     quantization_args: QuantizationArgs,
     tile: int = 16,
-):
+) -> torch.Tensor:
     size_k = weight.shape[0]
     size_n = weight.shape[1]
     num_bits = quantization_args.num_bits
@@ -236,7 +246,9 @@ def pack_weight_24(
     return q_packed
-def pack_scales_24(scales, quantization_args, w_shape):
+def pack_scales_24(
+    scales: torch.Tensor, quantization_args: QuantizationArgs, w_shape: torch.Size
+) -> torch.Tensor:
     size_k = w_shape[0]
     size_n = w_shape[1]
     num_bits = quantization_args.num_bits

{compressed_tensors-0.9.5a20250428 → compressed_tensors-0.9.5a20250507}/src/compressed_tensors/quantization/lifecycle/apply.py RENAMED Viewed

@@ -119,7 +119,7 @@ def load_pretrained_quantization_parameters(
 def apply_quantization_config(
     model: Module, config: Union[QuantizationConfig, None], run_compressed: bool = False
-) -> OrderedDict:
+) -> Dict[str, QuantizationScheme]:
     """
     Initializes the model for quantization in-place based on the given config.
     Optionally coverts quantizable modules to compressed_linear modules
@@ -131,7 +131,7 @@ def apply_quantization_config(
     """
     # Workaround for when HF Quantizer passes None, see PR #180
     if config is None:
-        return OrderedDict()
+        return dict()
     # remove reference to the original `config`
     # argument. This function can mutate it, and we'd
@@ -141,7 +141,7 @@ def apply_quantization_config(
     # use ordered dict to preserve target ordering in config
     target_to_scheme = OrderedDict()
     config = process_quantization_config(config)
-    names_to_scheme = OrderedDict()
+    names_to_scheme = dict()
     for scheme in config.config_groups.values():
         for target in scheme.targets:
             target_to_scheme[target] = scheme
@@ -187,7 +187,7 @@ def apply_quantization_config(
                 target_to_scheme, targets, name
             )
-            names_to_scheme[name] = submodule.quantization_scheme.weights
+            names_to_scheme[name] = submodule.quantization_scheme
     if config.ignore is not None and ignored_submodules is not None:
         if set(config.ignore) - set(ignored_submodules):

{compressed_tensors-0.9.5a20250428 → compressed_tensors-0.9.5a20250507}/src/compressed_tensors/quantization/lifecycle/initialize.py RENAMED Viewed

@@ -14,6 +14,7 @@
 import logging
+import math
 from enum import Enum
 from typing import Optional
@@ -162,7 +163,7 @@ def _initialize_scale_zero_point(
             # (output_channels, 1)
             expected_shape = (weight_shape[0], 1)
         elif quantization_args.strategy == QuantizationStrategy.GROUP:
-            num_groups = weight_shape[1] // quantization_args.group_size
+            num_groups = math.ceil(weight_shape[1] / quantization_args.group_size)
             expected_shape = (weight_shape[0], max(num_groups, 1))
     scale_dtype = scale_dtype if scale_dtype is not None else module.weight.dtype

{compressed_tensors-0.9.5a20250428 → compressed_tensors-0.9.5a20250507}/src/compressed_tensors/utils/helpers.py RENAMED Viewed

@@ -38,6 +38,7 @@ __all__ = [
     "shard_tensor",
     "pack_bitmasks",
     "unpack_bitmasks",
+    "remove_suffix",
 ]
 FSDP_WRAPPER_NAME = "_fsdp_wrapped_module"
@@ -328,3 +329,9 @@ def unpack_bitmasks(
     )
     return unpacked_bitmasks_torch
+def remove_suffix(value: str, suffix: str) -> str:
+    # can replace with str.removesuffix in python3.9+
+    assert value.endswith(suffix)
+    return value[: -len(suffix)]

{compressed_tensors-0.9.5a20250428 → compressed_tensors-0.9.5a20250507}/src/compressed_tensors/utils/safetensors_load.py RENAMED Viewed

@@ -234,11 +234,11 @@ def get_nested_weight_mappings(
     for key, file_location in weight_mappings.items():
         matched = False
         for param_name in params_to_nest:
-            dense_param = match_param_name(key, param_name)
-            if dense_param:
-                if dense_param not in nested_weight_mappings:
-                    nested_weight_mappings[dense_param] = {}
-                nested_weight_mappings[dense_param][param_name] = file_location
+            module_path = match_param_name(key, param_name)
+            if module_path:
+                if module_path not in nested_weight_mappings:
+                    nested_weight_mappings[module_path] = {}
+                nested_weight_mappings[module_path][param_name] = file_location
                 matched = True
         if return_unmatched_params and not matched:
             unmatched_params[key] = file_location
@@ -271,11 +271,11 @@ def get_nested_mappings_from_state_dict(
     nested_weight_mappings = {}
     for key in state_dict.keys():
         for param_name in params_to_nest:
-            dense_param = match_param_name(key, param_name)
-            if dense_param:
-                if dense_param not in nested_weight_mappings:
-                    nested_weight_mappings[dense_param] = {}
-                nested_weight_mappings[dense_param][param_name] = state_dict[key]
+            module_path = match_param_name(key, param_name)
+            if module_path:
+                if module_path not in nested_weight_mappings:
+                    nested_weight_mappings[module_path] = {}
+                nested_weight_mappings[module_path][param_name] = state_dict[key]
     return nested_weight_mappings

{compressed_tensors-0.9.5a20250428 → compressed_tensors-0.9.5a20250507}/src/compressed_tensors/version.py RENAMED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.9.5.a20250428'
+__version__ = version = '0.9.5.a20250507'
 __version_tuple__ = version_tuple = (0, 9, 5)

{compressed_tensors-0.9.5a20250428 → compressed_tensors-0.9.5a20250507/src/compressed_tensors.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.9.5a20250428
+Version: 0.9.5a20250507
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.9.5a20250428 → compressed_tensors-0.9.5a20250507}/tests/test_compressors/quantized_compressors/test_fp8_quant.py RENAMED Viewed

@@ -84,9 +84,9 @@ def test_quant_format(strategy, group_size, sc, zp):
     quant_config = get_dummy_quant_config(strategy=strategy, group_size=group_size)
     compressor = FloatQuantizationCompressor(config=quant_config)
-    quantized_modules_to_args = {"dummy": quant_config.config_groups["group_1"].weights}
+    module_name_to_scheme = {"dummy": quant_config.config_groups["group_1"]}
     compressed_state_dict = compressor.compress(
-        dense_state_dict, names_to_scheme=quantized_modules_to_args
+        dense_state_dict, names_to_scheme=module_name_to_scheme
     )
     # state_dict params should be the same, minus the zero_point if symmetric
@@ -140,15 +140,15 @@ def test_reload_match(
         )
     compressor = FloatQuantizationCompressor(config=quant_config)
-    quantized_modules_to_args = {
-        "dummy": quant_config.config_groups["group_1"].weights,
+    module_name_to_scheme = {
+        "dummy": quant_config.config_groups["group_1"],
     }
     compressed_state_dict = compressor.compress(
-        model.state_dict(), names_to_scheme=quantized_modules_to_args
+        model.state_dict(), names_to_scheme=module_name_to_scheme
     )
     save_file(compressed_state_dict, tmp_path / "model.safetensors")
     reconstructed_dense_gen = compressor.decompress(
-        tmp_path, names_to_scheme=quantized_modules_to_args
+        tmp_path, names_to_scheme=module_name_to_scheme
     )
     reconstructed_dense = {}
     for name, value in reconstructed_dense_gen:
@@ -158,7 +158,7 @@ def test_reload_match(
         model.dummy.weight,
         scale=model.dummy.weight_scale,
         zero_point=model.dummy.weight_zero_point,
-        args=quantized_modules_to_args["dummy"],
+        args=module_name_to_scheme["dummy"].weights,
     )
     assert torch.equal(fake_quant_dummy, reconstructed_dense["dummy"].get("weight"))

compressed-tensors 0.9.5a20250428__tar.gz → 0.9.5a20250507__tar.gz

compressed-tensors 0.9.5a20250428tar.gz → 0.9.5a20250507tar.gz