PyPI - compressed-tensors - Versions diffs - 0.9.5a20250513__tar.gz → 0.9.5a20250514__tar.gz - Mend

compressed-tensors 0.9.5a20250513tar.gz → 0.9.5a20250514tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (125) hide show

{compressed_tensors-0.9.5a20250513/src/compressed_tensors.egg-info → compressed_tensors-0.9.5a20250514}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.9.5a20250513
+Version: 0.9.5a20250514
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/compressors/model_compressors/model_compressor.py RENAMED Viewed

@@ -47,6 +47,9 @@ from compressed_tensors.quantization.utils import (
     iter_named_leaf_modules,
 )
 from compressed_tensors.utils import (
+    align_module_device,
+    delete_offload_parameter,
+    get_execution_device,
     get_safetensors_folder,
     has_offloaded_params,
     merge_names,
@@ -98,6 +101,9 @@ class ModelCompressor:
     :param quantization_config: config specifying quantization compression parameters
     """
+    sparsity_config: Optional[SparsityCompressionConfig] = None
+    quantization_config: Optional[QuantizationConfig] = None
     @classmethod
     def from_pretrained(
         cls,
@@ -261,6 +267,8 @@ class ModelCompressor:
                 quantization_config.format, config=quantization_config
             )
+    # ----- used by hf quantizer ----- #
     def get_missing_module_keys(self, model: Module) -> List[str]:
         """
         Identifies the expected missing weight keys in the compressed state_dict.
@@ -270,7 +278,6 @@ class ModelCompressor:
         This function determines which weight keys are missing based on the
         applied compression techniques.
         :param model: The PyTorch model to check for missing keys.
         :return: A list of missing keys expected in the compressed state_dict.
         """
@@ -362,8 +369,124 @@ class ModelCompressor:
         return list(unexpected_keys)
+    # ----- model memory compression/decompression pathways ----- #
+    def compress_model(self, model: Module):
+        """
+        Compress a model in memory. Because the model structure is modified in place,
+        this method is more memory-efficient than `self.compress`
+        :param model: model containing parameters to compress
+        """
+        module_to_scheme = map_module_to_scheme(model)
+        sparse_compression_targets: Set[str] = expand_target_names(
+            model=model,
+            targets=self.sparsity_config.targets if self.sparsity_config else [],
+            ignore=self.sparsity_config.ignore if self.sparsity_config else [],
+        )
+        for prefix, module in tqdm(model.named_modules(), desc="Compressing model"):
+            if prefix in module_to_scheme or prefix in sparse_compression_targets:
+                # in the future, support compression on same device
+                with align_module_device(module, execution_device="cpu"):
+                    state_dict = module.state_dict(prefix=f"{prefix}.")
+                # quantization first
+                if prefix in module_to_scheme:
+                    state_dict = self.quantization_compressor.compress(
+                        state_dict,
+                        names_to_scheme=module_to_scheme,
+                        show_progress=False,
+                    )
+                # sparsity second
+                if prefix in sparse_compression_targets:
+                    state_dict = self.sparsity_compressor.compress(
+                        state_dict,
+                        compression_targets=sparse_compression_targets,
+                        show_progress=False,
+                    )
+                # remove any existing parameters
+                device = get_execution_device(module)
+                for name, _ in list(module.named_parameters()):
+                    delattr(module, name)
+                # replace with compressed parameters
+                for name, value in state_dict.items():
+                    name = name.removeprefix(f"{prefix}.")
+                    value = value.to(device)
+                    param = torch.nn.Parameter(value, requires_grad=False)
+                    register_offload_parameter(module, name, param)
+                module.quantization_status = QuantizationStatus.COMPRESSED
+    def decompress_model(self, model: Module):
+        """
+        Decompress a model in memory. Because the model structure is modified in place,
+        this method does not require loading some compression parameters from disk
+        :param model: model containing parameters to compress
+        """
+        module_to_scheme = map_module_to_scheme(model)
+        sparse_compression_targets: Set[str] = expand_target_names(
+            model=model,
+            targets=self.sparsity_config.targets if self.sparsity_config else [],
+            ignore=self.sparsity_config.ignore if self.sparsity_config else [],
+        )
+        for prefix, module in tqdm(model.named_modules(), desc="Decompressing model"):
+            if prefix in module_to_scheme or prefix in sparse_compression_targets:
+                # in the future, support decompression on same device
+                with align_module_device(module, execution_device="cpu"):
+                    state_dict = module.state_dict(prefix=f"{prefix}.")
+                # sparsity first
+                if prefix in sparse_compression_targets:
+                    # sparse_compression_targets are automatically inferred by this fn
+                    generator = self.sparsity_compressor.decompress_from_state_dict(
+                        state_dict,
+                    )
+                    # generates (param_path, param_val)
+                    # of compressed and unused params
+                    state_dict = {key: value for key, value in generator}
+                # quantization second
+                if prefix in module_to_scheme:
+                    generator = self.quantization_compressor.decompress_from_state_dict(
+                        state_dict,
+                        names_to_scheme=module_to_scheme,
+                    )
+                    # generates (mod_path, {param_name, param_val})
+                    # of compressed params and used params, but not unused params
+                    # some used params are removed by get_unexpected_file_keys
+                    state_dict = {
+                        merge_names(module_path, param_name): param_value
+                        for module_path, compressed_data in generator
+                        for param_name, param_value in compressed_data.items()
+                    }
+                # remove any existing parameters
+                device = get_execution_device(module)
+                for name, _ in list(module.named_parameters()):
+                    delete_offload_parameter(module, name)
+                # replace with decompressed parameters
+                for name, value in state_dict.items():
+                    name = name.removeprefix(f"{prefix}.")
+                    value = value.to(device)
+                    param = torch.nn.Parameter(value, requires_grad=False)
+                    register_offload_parameter(module, name, param)
+                module.quantization_status = QuantizationStatus.FROZEN
+    # ----- state dict compression pathways ----- #
     def compress(
-        self, model: Module, state_dict: Optional[Dict[str, Tensor]] = None
+        self,
+        model: Module,
+        state_dict: Optional[Dict[str, Tensor]] = None,
+        show_progress: bool = False,
     ) -> Dict[str, Tensor]:
         """
         Compresses a dense state dict or model with sparsity and/or quantization
@@ -379,7 +502,9 @@ class ModelCompressor:
         if self.quantization_compressor is not None:
             module_to_scheme = map_module_to_scheme(model)
             state_dict = self.quantization_compressor.compress(
-                state_dict, names_to_scheme=module_to_scheme
+                state_dict,
+                names_to_scheme=module_to_scheme,
+                show_progress=show_progress,
             )
             # TODO: consider sparse compression to also be compression
@@ -397,6 +522,7 @@ class ModelCompressor:
             state_dict = self.sparsity_compressor.compress(
                 state_dict,
                 compression_targets=sparse_compression_targets,
+                show_progress=show_progress,
             )
         # HACK: Override the dtype_byte_size function in transformers to
@@ -406,6 +532,8 @@ class ModelCompressor:
         return state_dict
+    # ----- disk decompression pathways ----- #
     def decompress(self, model_path: str, model: Module):
         """
         Overwrites the weights in model with weights decompressed from model_path

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/compressors/quantized_compressors/base.py RENAMED Viewed

@@ -23,7 +23,6 @@ from compressed_tensors.utils import (
     get_nested_mappings_from_state_dict,
     get_nested_weight_mappings,
     merge_names,
-    remove_suffix,
 )
 from safetensors import safe_open
 from torch import Tensor
@@ -71,6 +70,7 @@ class BaseQuantizationCompressor(BaseCompressor):
         self,
         model_state: Dict[str, Tensor],
         names_to_scheme: Dict[str, QuantizationScheme],
+        show_progress: bool = False,
         **kwargs,
     ) -> Dict[str, Tensor]:
         """
@@ -79,18 +79,21 @@ class BaseQuantizationCompressor(BaseCompressor):
         :param model_state: state dict of uncompressed model
         :param names_to_scheme: quantization args for each quantized weight, needed for
             quantize function to calculate bit depth
+        :param show_progress: whether to show tqdm progress
         :return: compressed state dict
         """
+        uncompressed_names = list(model_state.keys())
         compressed_dict = {}
         save_device = "cpu"
-        uncompressed_names = list(model_state.keys())
-        for name in tqdm(uncompressed_names, desc="Compressing with quantization"):
+        # compress values
+        desc = "Compressing with quantization"
+        for name in tqdm(uncompressed_names, desc=desc, disable=(not show_progress)):
             value = model_state[name]
             # compress weights
             if name.endswith("weight"):
-                prefix = remove_suffix(name, "weight")
+                prefix = name.removesuffix("weight")
                 # gather qparams
                 scale = model_state.get(prefix + "weight_scale", None)
@@ -182,7 +185,7 @@ class BaseQuantizationCompressor(BaseCompressor):
             )
         else:
-            yield from self._decompress_from_state_dict(
+            yield from self.decompress_from_state_dict(
                 path_to_model_or_tensors, names_to_scheme
             )
@@ -209,7 +212,11 @@ class BaseQuantizationCompressor(BaseCompressor):
                 weight_data["weight"] = decompressed
                 yield module_path, weight_data
-    def _decompress_from_state_dict(self, state_dict, names_to_scheme):
+    def decompress_from_state_dict(
+        self,
+        state_dict: Dict[str, torch.Tensor],
+        names_to_scheme: Dict[str, QuantizationScheme],
+    ) -> Generator[Tuple[str, Dict[str, torch.Tensor]], None, None]:
         weight_mappings = get_nested_mappings_from_state_dict(
             state_dict, self.compression_param_names
         )
@@ -219,7 +226,7 @@ class BaseQuantizationCompressor(BaseCompressor):
                 weight_data[param_name] = param_value
             if "weight_scale" in weight_data:
-                quant_args = names_to_scheme[module_path]
+                quant_args = names_to_scheme[module_path].weights
                 decompressed = self.decompress_weight(
                     compressed_data=weight_data, quantization_args=quant_args
                 )

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/compressors/sparse_compressors/base.py RENAMED Viewed

@@ -16,7 +16,11 @@ import logging
 from typing import Dict, Generator, Optional, Set, Tuple
 from compressed_tensors.compressors.base import BaseCompressor
-from compressed_tensors.utils import get_nested_weight_mappings, merge_names
+from compressed_tensors.utils import (
+    get_nested_mappings_from_state_dict,
+    get_nested_weight_mappings,
+    merge_names,
+)
 from safetensors import safe_open
 from torch import Tensor
 from tqdm import tqdm
@@ -63,6 +67,7 @@ class BaseSparseCompressor(BaseCompressor):
         self,
         model_state: Dict[str, Tensor],
         compression_targets: Optional[Set[str]] = None,
+        show_progress: bool = False,
     ) -> Dict[str, Tensor]:
         """
         Compresses a dense state dict using bitmask compression
@@ -76,7 +81,11 @@ class BaseSparseCompressor(BaseCompressor):
         _LOGGER.debug(
             f"Compressing model with {len(model_state)} parameterized layers..."
         )
-        for name, value in tqdm(model_state.items(), desc="Compressing model"):
+        for name, value in tqdm(
+            model_state.items(),
+            desc="Compressing with sparsity",
+            disable=(not show_progress),
+        ):
             if not self.should_compress(name, compression_targets):
                 compressed_dict[name] = value
                 continue
@@ -124,15 +133,15 @@ class BaseSparseCompressor(BaseCompressor):
             self.compression_param_names,
             return_unmatched_params=True,
         )
-        for weight_name in weight_mappings.keys():
+        for module_path in weight_mappings.keys():
             weight_data = {}
-            for param_name, safe_path in weight_mappings[weight_name].items():
-                full_name = merge_names(weight_name, param_name)
+            for param_name, safe_path in weight_mappings[module_path].items():
+                full_name = merge_names(module_path, param_name)
                 with safe_open(safe_path, framework="pt", device=device) as f:
                     weight_data[param_name] = f.get_tensor(full_name)
             decompressed = self.decompress_weight(weight_data)
-            yield merge_names(weight_name, "weight"), decompressed
+            yield merge_names(module_path, "weight"), decompressed
         for ignored_param_name, safe_path in ignored_params.items():
             should_skip = False
@@ -146,6 +155,35 @@ class BaseSparseCompressor(BaseCompressor):
                     value = f.get_tensor(ignored_param_name)
                 yield ignored_param_name, value
+    def decompress_from_state_dict(
+        self,
+        state_dict: Dict[str, Tensor],
+    ) -> Generator[Tuple[str, Dict[str, Tensor]], None, None]:
+        """
+        Decompress the state dict of a module (or model)
+        Unlike `self.decompress`, this function does not need to explicitly skip params
+        via params_to_skip_load because it is more convenient for its only caller
+        (ModelCompressor.decompress_model) to retrieve all unused param keys
+        :param state_dict: state dict containing parameters to decompress
+        :return: Generator of (param_path, param_val)
+        """
+        weight_mappings, ignored_params = get_nested_mappings_from_state_dict(
+            state_dict, self.compression_param_names, return_unmatched_params=True
+        )
+        for module_path in weight_mappings.keys():
+            weight_data = {}
+            for param_name, param_value in weight_mappings[module_path].items():
+                weight_data[param_name] = param_value
+            decompressed = self.decompress_weight(weight_data)
+            yield merge_names(module_path, "weight"), decompressed
+        for ignored_param_path, ignored_param_value in ignored_params.items():
+            yield ignored_param_path, ignored_param_value
     @staticmethod
     def should_compress(name: str, expanded_targets: Optional[Set[str]] = None) -> bool:
         """

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/compressors/sparse_compressors/dense.py RENAMED Viewed

@@ -40,3 +40,10 @@ class DenseCompressor(BaseCompressor):
         self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs
     ) -> Generator[Tuple[str, Tensor], None, None]:
         return iter([])
+    def decompress_from_state_dict(
+        self,
+        state_dict: Dict[str, Tensor],
+    ) -> Generator[Tuple[str, Dict[str, Tensor]], None, None]:
+        for key, value in state_dict.items():
+            yield key, value

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py RENAMED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 from dataclasses import dataclass
-from typing import Dict, List, Tuple, Union
+from typing import Dict, Generator, List, Tuple, Union
 import torch
 from compressed_tensors.compressors.base import BaseCompressor
@@ -202,11 +202,7 @@ def sparse24_bitmask_decompress(
     decompressed_tensor = torch.zeros(original_shape, dtype=values.dtype)
     decompressed_tensor = decompressed_tensor.to(values.device)
     values = values.flatten()
-    if decompressed_tensor.dtype == FP8_DTYPE:
-        decompressed_tensor[bytemasks_unpacked] = values
-        decompressed_tensor = decompressed_tensor.cuda()
-    else:
-        decompressed_tensor[bytemasks_unpacked] = values
+    decompressed_tensor[bytemasks_unpacked] = values
     return decompressed_tensor

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py RENAMED Viewed

@@ -125,6 +125,7 @@ class Marlin24Compressor(BaseCompressor):
         self,
         model_state: Dict[str, Tensor],
         names_to_scheme: Dict[str, QuantizationScheme],
+        show_progress: bool = False,
         **kwargs,
     ) -> Dict[str, Tensor]:
         """
@@ -134,6 +135,7 @@ class Marlin24Compressor(BaseCompressor):
         :param model_state: state dict of uncompressed model
         :param names_to_scheme: quantization scheme for each quantized weight, needed
             for quantize function to calculate bit depth
+        :param show_progress: whether to show tqdm progress
         :return: compressed state dict
         """
         self.validate_quant_compatability(names_to_scheme)
@@ -144,7 +146,9 @@ class Marlin24Compressor(BaseCompressor):
             f"Compressing model with {len(model_state)} parameterized layers..."
         )
-        for name, value in tqdm(model_state.items(), desc="Compressing model"):
+        for name, value in tqdm(
+            model_state.items(), desc="Compressing model", disable=(not show_progress)
+        ):
             if name.endswith(weight_suffix):
                 prefix = name[: -(len(weight_suffix))]
                 scale = model_state.get(merge_names(prefix, "weight_scale"), None)

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/linear/compressed_linear.py RENAMED Viewed

@@ -23,6 +23,7 @@ from compressed_tensors.quantization import (
     initialize_module_for_quantization,
 )
 from compressed_tensors.utils import register_offload_parameter
+from compressed_tensors.utils.offload import get_execution_device
 from torch import Tensor
 from torch.nn import Parameter
 from torch.nn.functional import linear
@@ -60,7 +61,7 @@ class CompressedLinear(Linear):
         """
         module.__class__ = CompressedLinear
         module.compressor = BaseCompressor.load_from_registry(quantization_format)
-        device = next(module.parameters()).device
+        init_device = get_execution_device(module)
         # this will initialize all the scales and zero points
         initialize_module_for_quantization(
@@ -79,7 +80,7 @@ class CompressedLinear(Linear):
         # populate compressed weights and quantization parameters
         for name, (shape, dtype) in compression_params.items():
             param = Parameter(
-                torch.empty(shape, device=device, dtype=dtype), requires_grad=False
+                torch.empty(shape, device=init_device, dtype=dtype), requires_grad=False
             )
             register_offload_parameter(module, name, param)

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/utils/helpers.py RENAMED Viewed

@@ -38,7 +38,6 @@ __all__ = [
     "shard_tensor",
     "pack_bitmasks",
     "unpack_bitmasks",
-    "remove_suffix",
 ]
 FSDP_WRAPPER_NAME = "_fsdp_wrapped_module"
@@ -329,9 +328,3 @@ def unpack_bitmasks(
     )
     return unpacked_bitmasks_torch
-def remove_suffix(value: str, suffix: str) -> str:
-    # can replace with str.removesuffix in python3.9+
-    assert value.endswith(suffix)
-    return value[: -len(suffix)]

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/utils/safetensors_load.py RENAMED Viewed

@@ -35,6 +35,7 @@ __all__ = [
     "is_quantization_param",
 ]
+NestedStateDictType = Dict[str, Dict[str, Tensor]]
 WeightMappingType = Dict[str, str]
 NestedWeightMappingType = Dict[str, WeightMappingType]
@@ -249,8 +250,10 @@ def get_nested_weight_mappings(
 def get_nested_mappings_from_state_dict(
-    state_dict, params_to_nest: Iterable[str]
-) -> NestedWeightMappingType:
+    state_dict: Dict[str, Tensor],
+    params_to_nest: Iterable[str],
+    return_unmatched_params: bool = False,
+) -> Union[NestedStateDictType, Tuple[NestedStateDictType, Dict[str, Tensor]]]:
     """
     Takes a state dict and returns a nested mapping from uncompressed
     parameterized layer names to the value of
@@ -266,16 +269,26 @@ def get_nested_mappings_from_state_dict(
     :param state_dict: state dict of the model
     :param params_to_nest: Iterable of parameter names to nest.
     :return: Nested mapping of parameterized layer names to the value of
-        each layer's compression parameters.
+        each layer's compression parameters. If `return_unmatched_params`, then
+        also return a dictionary mapping unused parameter names to their values
     """
     nested_weight_mappings = {}
+    unmatched_params = {}
     for key in state_dict.keys():
+        matched = False
         for param_name in params_to_nest:
             module_path = match_param_name(key, param_name)
             if module_path:
                 if module_path not in nested_weight_mappings:
                     nested_weight_mappings[module_path] = {}
                 nested_weight_mappings[module_path][param_name] = state_dict[key]
+                matched = True
+        if return_unmatched_params and not matched:
+            unmatched_params[key] = state_dict[key]
+    if return_unmatched_params:
+        return nested_weight_mappings, unmatched_params
     return nested_weight_mappings

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/version.py RENAMED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.9.5.a20250513'
+__version__ = version = '0.9.5.a20250514'
 __version_tuple__ = version_tuple = (0, 9, 5)

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514/src/compressed_tensors.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.9.5a20250513
+Version: 0.9.5a20250514
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_compressors/model_compressors/test_model_compressor.py RENAMED Viewed

@@ -24,6 +24,7 @@ from compressed_tensors.config import SparsityCompressionConfig
 from compressed_tensors.quantization import QuantizationConfig
 from safetensors.torch import save_file
 from tests.testing_utils import induce_sparsity, requires_hf_quantizer
+from transformers import AutoModelForCausalLM
 def sparsity_config():
@@ -365,3 +366,115 @@ def _get_combined_config(s_config, q_config):
         combined["sparsity_config"] = s_config
     return combined
+@pytest.mark.parametrize(
+    "model_stub,q_format,s_config",
+    [
+        (
+            "nm-testing/llama2.c-stories42M-gsm8k-quantized-only-uncompressed",
+            "float-quantized",
+            None,
+        ),
+        (
+            "nm-testing/llama2.c-stories42M-gsm8k-sparse-only-uncompressed",
+            None,
+            "sparse-24-bitmask",
+        ),
+        (
+            "nm-testing/llama2.c-stories42M-gsm8k-stacked-uncompressed",
+            "float-quantized",
+            "sparse-24-bitmask",
+        ),
+        (
+            "nm-testing/llama2.c-stories15M-ultrachat-mixed-uncompressed",
+            "pack-quantized",
+            None,
+        ),
+    ],
+)
+def test_compress_model(model_stub, q_format, s_config, tmpdir):
+    model = AutoModelForCausalLM.from_pretrained(model_stub, torch_dtype=torch.float32)
+    compressor = ModelCompressor.from_pretrained_model(model, s_config, q_format)
+    # compress model by eagerly compressing state dict
+    true_compressed = dict(compressor.compress(model))
+    true_compressed = {key: value.clone() for key, value in true_compressed.items()}
+    # compress model directly
+    compressor.compress_model(model)
+    compressed = dict(model.state_dict())
+    # equivalent to eagerly compressing state dict
+    assert compressed.keys() == true_compressed.keys()
+    for key in compressed.keys():
+        assert compressed[key].dtype == true_compressed[key].dtype
+        assert torch.all(compressed[key] == true_compressed[key]), f"{key}"
+@pytest.mark.parametrize(
+    "model_stub,comp_stub",
+    [
+        (
+            "nm-testing/llama2.c-stories42M-gsm8k-quantized-only-uncompressed",
+            "nm-testing/llama2.c-stories42M-gsm8k-quantized-only-compressed",
+        ),
+        (
+            "nm-testing/llama2.c-stories42M-gsm8k-sparse-only-uncompressed",
+            "nm-testing/llama2.c-stories42M-gsm8k-sparse-only-compressed",
+        ),
+        (
+            "nm-testing/llama2.c-stories42M-gsm8k-stacked-uncompressed",
+            "nm-testing/llama2.c-stories42M-gsm8k-stacked-compressed",
+        ),
+        (
+            "nm-testing/llama2.c-stories15M-ultrachat-mixed-uncompressed",
+            "nm-testing/llama2.c-stories15M-ultrachat-mixed-compressed",
+        ),
+    ],
+)
+def test_decompress_model(model_stub, comp_stub):
+    from transformers.utils.quantization_config import CompressedTensorsConfig
+    # decompress from disk
+    # NOTE: transformers adds extra zero points if run_compressed=False or w/ sparsity
+    # https://github.com/huggingface/transformers/blob/main/src/transformers/quantizers/quantizer_compressed_tensors.py#L131-L133
+    # however, decompression does not add zero points in non-asymmetric cases
+    # in order to normalize for this effect in this test, we remove empty weight zps
+    true_decompressed_model = AutoModelForCausalLM.from_pretrained(
+        comp_stub,
+        quantization_config=CompressedTensorsConfig(run_compressed=False),
+        torch_dtype=torch.float32,
+    )
+    true_decompressed = dict(true_decompressed_model.state_dict())
+    true_decompressed = remove_empty_weight_zero_points(true_decompressed)  # see above
+    # decompress from memory
+    # NOTE there is no other way to load a compressed model into memory, since
+    # there is no way to turn off decompression for sparse models
+    # https://github.com/huggingface/transformers/blob/main/src/transformers/quantizers/quantizer_compressed_tensors.py#L133
+    model = AutoModelForCausalLM.from_pretrained(model_stub, torch_dtype=torch.float32)
+    compressor = ModelCompressor.from_pretrained(comp_stub)
+    compressor.compress_model(model)
+    compressor.decompress_model(model)
+    decompressed = dict(model.state_dict())
+    # remove keys not in model definition
+    # NOTE it would be better if compressors only returned keys to keep, rather than
+    # relying on the model structure + missing keys to catch and remove them later
+    model_keys = true_decompressed_model.state_dict().keys()
+    decompressed = {key: val for key, val in decompressed.items() if key in model_keys}
+    # equivalent to decompressing from disk
+    assert decompressed.keys() == true_decompressed.keys()
+    for key in decompressed.keys():
+        assert decompressed[key].dtype == true_decompressed[key].dtype
+        assert torch.all(decompressed[key] == true_decompressed[key]), f"{key}"
+def remove_empty_weight_zero_points(state_dict):
+    return {
+        name: value
+        for name, value in state_dict.items()
+        if not (name.endswith("weight_zero_point") and torch.all(value == 0))
+    }

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py RENAMED Viewed

@@ -47,6 +47,8 @@ def shard_validation():
 def validate_compression(dense_matrix, decompressed_tensor):
     """Validate that the decompressed tensor matches the original dense matrix."""
+    if decompressed_tensor.dtype == FP8_DTYPE:
+        decompressed_tensor = decompressed_tensor.to("cuda")
     dense_matrix = dense_matrix.to(decompressed_tensor.device)
     assert dense_matrix.dtype == decompressed_tensor.dtype, "Dtype mismatch"
     assert dense_matrix.shape == decompressed_tensor.shape, "Shape mismatch"

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/.github/.gitkeep RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/.github/actions/test/action.yml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/.github/scripts/step-status RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/.github/workflows/build-test.yml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/.github/workflows/build.yml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/.github/workflows/report.yml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/.github/workflows/test-check.yaml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/.github/workflows/test.yml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/.github/workflows/trigger-all.yml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/.github/workflows/upload.yml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/.gitignore RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/LICENSE RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/Makefile RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/README.md RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/examples/bit_packing/ex_quantize_and_pack.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/examples/bit_packing/int4_config.json RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/examples/bitmask_compression.ipynb RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/examples/llama_1.1b/ex_config_quantization.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/examples/llama_1.1b/ex_llmcompressor_quantization.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/examples/llama_1.1b/example_quant_config.json RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/examples/llama_1.1b/example_quant_recipe.yaml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/examples/quantize_and_pack_int4.ipynb RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/pyproject.toml RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/setup.cfg RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/setup.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/README.md RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/base.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/compressors/base.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/compressors/helpers.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/compressors/model_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/compressors/quantized_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/compressors/sparse_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/config/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/config/base.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/config/dense.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/config/sparse_24_bitmask.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/config/sparse_bitmask.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/linear/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/quantization/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/quantization/lifecycle/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/quantization/lifecycle/apply.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/quantization/lifecycle/compressed.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/quantization/lifecycle/forward.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/quantization/lifecycle/helpers.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/quantization/lifecycle/initialize.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/quantization/quant_args.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/quantization/quant_config.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/quantization/quant_scheme.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/quantization/utils/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/quantization/utils/helpers.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/registry/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/registry/registry.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/utils/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/utils/offload.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/utils/permutations_24.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/utils/permute.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors/utils/semi_structured_conversions.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors.egg-info/requires.txt RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/src/compressed_tensors.egg-info/top_level.txt RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/conftest.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_compressors/model_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_compressors/quantized_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_compressors/quantized_compressors/test_fp8_quant.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_compressors/quantized_compressors/test_int_quant.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_compressors/quantized_compressors/test_nvfp4_quant.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_compressors/quantized_compressors/test_pack_quant.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_compressors/sparse_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_compressors/sparse_compressors/test_bitmask.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_compressors/sparse_quantized_compressors/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_configs/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_configs/test_base.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_examples/test_bitmask_compression_ipynb.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_linear/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_linear/test_compressed_linear.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_quantization/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_quantization/lifecycle/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_quantization/lifecycle/conftest.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_quantization/lifecycle/test_apply.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_quantization/lifecycle/test_enabled.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_quantization/lifecycle/test_forward.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_quantization/lifecycle/test_helpers.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_quantization/lifecycle/test_initialize.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_quantization/lifecycle/test_lifecycle.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_quantization/test_configs/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_quantization/test_configs/test_bit_depths.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_quantization/test_configs/test_strategies.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_quantization/test_quant_args.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_quantization/test_quant_config.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_quantization/test_quant_scheme.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_quantization/test_utils/test_helpers.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_registry.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_utils/__init__.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_utils/test_helpers.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_utils/test_offload.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/test_utils/test_safetensors_load.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/tests/testing_utils.py RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250513 → compressed_tensors-0.9.5a20250514}/utils/copyright.py RENAMED Viewed

File without changes

compressed-tensors 0.9.5a20250513__tar.gz → 0.9.5a20250514__tar.gz

compressed-tensors 0.9.5a20250513tar.gz → 0.9.5a20250514tar.gz