PyPI - compressed-tensors-nightly - Versions diffs - 0.9.1.20250203__tar.gz → 0.9.1.20250205__tar.gz - Mend

compressed-tensors-nightly 0.9.1.20250203tar.gz → 0.9.1.20250205tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

{compressed-tensors-nightly-0.9.1.20250203/src/compressed_tensors_nightly.egg-info → compressed_tensors_nightly-0.9.1.20250205}/PKG-INFO RENAMED Viewed

@@ -1,15 +1,34 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: compressed-tensors-nightly
-Version: 0.9.1.20250203
+Version: 0.9.1.20250205
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.
 Author-email: support@neuralmagic.com
 License: Apache 2.0
 Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: torch>=1.7.0
+Requires-Dist: transformers
+Requires-Dist: pydantic>=2.0
 Provides-Extra: dev
+Requires-Dist: black==22.12.0; extra == "dev"
+Requires-Dist: isort==5.8.0; extra == "dev"
+Requires-Dist: wheel>=0.36.2; extra == "dev"
+Requires-Dist: flake8>=3.8.3; extra == "dev"
+Requires-Dist: pytest>=6.0.0; extra == "dev"
+Requires-Dist: nbconvert>=7.16.3; extra == "dev"
 Provides-Extra: accelerate
-License-File: LICENSE
+Requires-Dist: accelerate; extra == "accelerate"
+Dynamic: author
+Dynamic: author-email
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: license
+Dynamic: provides-extra
+Dynamic: requires-dist
+Dynamic: summary
 # compressed-tensors

{compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/base.py RENAMED Viewed

@@ -77,6 +77,15 @@ class BaseCompressor(RegistryMixin, ABC):
         """
         raise NotImplementedError()
+    @property
+    @abstractmethod
+    def compression_param_names(self) -> Tuple[str]:
+        """
+        Returns a tuple of compression parameter names introduced by
+        the compressor during compression
+        """
+        raise NotImplementedError()
     @abstractmethod
     def compress(
         self,

{compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/quantized_compressors/base.py RENAMED Viewed

@@ -144,7 +144,7 @@ class BaseQuantizationCompressor(BaseCompressor):
     def _decompress_from_path(self, path_to_model, names_to_scheme, device):
         weight_mappings = get_nested_weight_mappings(
-            path_to_model, self.COMPRESSION_PARAM_NAMES
+            path_to_model, self.compression_param_names
         )
         for weight_name in weight_mappings.keys():
             weight_data = {}
@@ -161,7 +161,7 @@ class BaseQuantizationCompressor(BaseCompressor):
     def _decompress_from_state_dict(self, state_dict, names_to_scheme):
         weight_mappings = get_nested_mappings_from_state_dict(
-            state_dict, self.COMPRESSION_PARAM_NAMES
+            state_dict, self.compression_param_names
         )
         for weight_name in weight_mappings.keys():
             weight_data = {}

{compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py RENAMED Viewed

@@ -41,12 +41,18 @@ class NaiveQuantizationCompressor(BaseQuantizationCompressor):
     type to the type specified by the layer's QuantizationArgs.
     """
-    COMPRESSION_PARAM_NAMES = [
-        "weight",
-        "weight_scale",
-        "weight_zero_point",
-        "weight_g_idx",
-    ]
+    @property
+    def compression_param_names(self) -> Tuple[str]:
+        """
+        Returns a tuple of compression parameter names introduced by
+        the compressor during compression
+        """
+        return (
+            "weight",
+            "weight_scale",
+            "weight_zero_point",
+            "weight_g_idx",
+        )
     def compression_param_info(
         self,

{compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py RENAMED Viewed

@@ -36,13 +36,19 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
     Compresses a quantized model by packing every eight 4-bit weights into an int32
     """
-    COMPRESSION_PARAM_NAMES = [
-        "weight_packed",
-        "weight_scale",
-        "weight_zero_point",
-        "weight_g_idx",
-        "weight_shape",
-    ]
+    @property
+    def compression_param_names(self) -> Tuple[str]:
+        """
+        Returns a tuple of compression parameter names introduced by
+        the compressor during compression
+        """
+        return (
+            "weight_packed",
+            "weight_scale",
+            "weight_zero_point",
+            "weight_g_idx",
+            "weight_shape",
+        )
     def compression_param_info(
         self,

{compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/sparse_compressors/base.py RENAMED Viewed

@@ -30,8 +30,7 @@ _LOGGER: logging.Logger = logging.getLogger(__name__)
 class BaseSparseCompressor(BaseCompressor):
     """
     Base class representing a sparse compression algorithm. Each child class should
-    implement compression_param_info, compress_weight and decompress_weight; child
-    classes should also define COMPRESSION_PARAM_NAMES.
+    implement compression_param_names, compress_weight and decompress_weight;
     Compressors support compressing/decompressing a full module state dict or a single
     quantized PyTorch leaf module.
@@ -113,7 +112,7 @@ class BaseSparseCompressor(BaseCompressor):
         """
         weight_mappings, ignored_params = get_nested_weight_mappings(
             path_to_model_or_tensors,
-            self.COMPRESSION_PARAM_NAMES,
+            self.compression_param_names,
             return_unmatched_params=True,
         )
         for weight_name in weight_mappings.keys():

{compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/sparse_compressors/dense.py RENAMED Viewed

@@ -25,6 +25,14 @@ class DenseCompressor(BaseCompressor):
     Identity compressor for dense models, returns the original state_dict
     """
+    @property
+    def compression_param_names(self) -> Tuple[str]:
+        """
+        Returns a tuple of compression parameter names introduced by
+        the compressor during compression
+        """
+        return ()
     def compress(self, model_state: Dict[str, Tensor], **kwargs) -> Dict[str, Tensor]:
         return model_state

{compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py RENAMED Viewed

@@ -40,11 +40,17 @@ class Sparse24BitMaskCompressor(BaseSparseCompressor):
     values tensor, with their locations stored in a 2d bitmask
     """
-    COMPRESSION_PARAM_NAMES = [
-        "shape",
-        "compressed",
-        "bitmask",
-    ]
+    @property
+    def compression_param_names(self) -> Tuple[str]:
+        """
+        Returns a tuple of compression parameter names introduced by
+        the compressor during compression
+        """
+        return (
+            "shape",
+            "compressed",
+            "bitmask",
+        )
     def compress_weight(self, name, value):
         bitmask_tensor = Sparse24BitMaskTensor.from_dense(

{compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py RENAMED Viewed

@@ -38,7 +38,13 @@ class BitmaskCompressor(BaseSparseCompressor):
     values tensor, with their locations stored in a 2d bitmask
     """
-    COMPRESSION_PARAM_NAMES = ["shape", "compressed", "bitmask", "row_offsets"]
+    @property
+    def compression_param_names(self) -> Tuple[str]:
+        """
+        Returns a tuple of compression parameter names introduced by
+        the compressor during compression
+        """
+        return ("shape", "compressed", "bitmask", "row_offsets")
     def compress_weight(self, name, value):
         bitmask_tensor = BitmaskTensor.from_dense(value)

{compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py RENAMED Viewed

@@ -42,8 +42,6 @@ class Marlin24Compressor(BaseCompressor):
     Marlin24 kernel. Decompression is not implemented for this compressor.
     """
-    COMPRESSION_PARAM_NAMES = ["weight_packed", "scale_packed", "meta"]
     @staticmethod
     def validate_quant_compatability(
         model_quant_args: Dict[str, QuantizationArgs]
@@ -105,6 +103,14 @@ class Marlin24Compressor(BaseCompressor):
         return True
+    @property
+    def compression_param_names(self) -> Tuple[str]:
+        """
+        Returns a tuple of compression parameter names introduced by
+        the compressor during compression
+        """
+        return ("weight_packed", "scale_packed", "meta")
     def compress(
         self,
         model_state: Dict[str, Tensor],

{compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors/utils/safetensors_load.py RENAMED Viewed

@@ -16,7 +16,7 @@ import json
 import os
 import re
 import struct
-from typing import Dict, List, Optional, Tuple, Union
+from typing import Dict, Iterable, Optional, Tuple, Union
 from safetensors import safe_open
 from torch import Tensor
@@ -180,7 +180,9 @@ def get_weight_mappings(path_to_model_or_tensors: str) -> Dict[str, str]:
 def get_nested_weight_mappings(
-    model_path: str, params_to_nest: List[str], return_unmatched_params: bool = False
+    model_path: str,
+    params_to_nest: Iterable[str],
+    return_unmatched_params: bool = False,
 ) -> Union[NestedWeightMappingType, Tuple[NestedWeightMappingType, WeightMappingType]]:
     """
     Takes a path to a state dict saved in safetensors format and returns a nested
@@ -211,7 +213,7 @@ def get_nested_weight_mappings(
     :param model_path: Path to the safetensors state dict, must contain either a
         single safetensors file or multiple files with an index.
-    :param params_to_nest: List of parameter names to nest.
+    :param params_to_nest: Iterable of parameter names to nest.
     :param return_unmatched_params: If True, return a second dictionary containing
         the remaining parameters that were not matched to the params_to_nest.
     :return:
@@ -247,7 +249,7 @@ def get_nested_weight_mappings(
 def get_nested_mappings_from_state_dict(
-    state_dict, params_to_nest
+    state_dict, params_to_nest: Iterable[str]
 ) -> NestedWeightMappingType:
     """
     Takes a state dict and returns a nested mapping from uncompressed
@@ -262,7 +264,7 @@ def get_nested_mappings_from_state_dict(
     }
     :param state_dict: state dict of the model
-    :param params_to_nest: List of parameter names to nest.
+    :param params_to_nest: Iterable of parameter names to nest.
     :return: Nested mapping of parameterized layer names to the value of
         each layer's compression parameters.
     """

{compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205/src/compressed_tensors_nightly.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,15 +1,34 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: compressed-tensors-nightly
-Version: 0.9.1.20250203
+Version: 0.9.1.20250205
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.
 Author-email: support@neuralmagic.com
 License: Apache 2.0
 Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: torch>=1.7.0
+Requires-Dist: transformers
+Requires-Dist: pydantic>=2.0
 Provides-Extra: dev
+Requires-Dist: black==22.12.0; extra == "dev"
+Requires-Dist: isort==5.8.0; extra == "dev"
+Requires-Dist: wheel>=0.36.2; extra == "dev"
+Requires-Dist: flake8>=3.8.3; extra == "dev"
+Requires-Dist: pytest>=6.0.0; extra == "dev"
+Requires-Dist: nbconvert>=7.16.3; extra == "dev"
 Provides-Extra: accelerate
-License-File: LICENSE
+Requires-Dist: accelerate; extra == "accelerate"
+Dynamic: author
+Dynamic: author-email
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: license
+Dynamic: provides-extra
+Dynamic: requires-dist
+Dynamic: summary
 # compressed-tensors

{compressed-tensors-nightly-0.9.1.20250203 → compressed_tensors_nightly-0.9.1.20250205}/src/compressed_tensors_nightly.egg-info/SOURCES.txt RENAMED Viewed

@@ -54,4 +54,6 @@ src/compressed_tensors_nightly.egg-info/PKG-INFO
 src/compressed_tensors_nightly.egg-info/SOURCES.txt
 src/compressed_tensors_nightly.egg-info/dependency_links.txt
 src/compressed_tensors_nightly.egg-info/requires.txt
-src/compressed_tensors_nightly.egg-info/top_level.txt
+src/compressed_tensors_nightly.egg-info/top_level.txt
+tests/test_registry.py
+tests/testing_utils.py

compressed_tensors_nightly-0.9.1.20250205/tests/test_registry.py ADDED Viewed

@@ -0,0 +1,53 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+from compressed_tensors import (
+    BaseCompressor,
+    BitmaskCompressor,
+    BitmaskConfig,
+    CompressionFormat,
+    DenseCompressor,
+    DenseSparsityConfig,
+    SparsityCompressionConfig,
+)
+@pytest.mark.parametrize(
+    "name,type",
+    [
+        [CompressionFormat.sparse_bitmask.value, BitmaskConfig],
+        [CompressionFormat.dense.value, DenseSparsityConfig],
+    ],
+)
+def test_configs(name, type):
+    config = SparsityCompressionConfig.load_from_registry(name)
+    assert isinstance(config, type)
+    assert config.format == name
+@pytest.mark.parametrize(
+    "name,type",
+    [
+        [CompressionFormat.sparse_bitmask.value, BitmaskCompressor],
+        [CompressionFormat.dense.value, DenseCompressor],
+    ],
+)
+def test_compressors(name, type):
+    compressor = BaseCompressor.load_from_registry(
+        name, config=SparsityCompressionConfig(format="none")
+    )
+    assert isinstance(compressor, type)
+    assert isinstance(compressor.config, SparsityCompressionConfig)
+    assert compressor.config.format == "none"

compressed_tensors_nightly-0.9.1.20250205/tests/testing_utils.py ADDED Viewed

@@ -0,0 +1,144 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# flake8: noqa
+import unittest
+import pytest
+def compressed_tensors_config_available():
+    try:
+        from transformers.utils.quantization_config import (  # noqa: F401
+            CompressedTensorsConfig,
+        )
+        return True
+    except ImportError:
+        return False
+def accelerate_availabe():
+    try:
+        import accelerate  # noqa: F401
+        return True
+    except ImportError:
+        return False
+_is_compressed_tensors_config_available = compressed_tensors_config_available()
+_is_accelerate_available = accelerate_availabe()
+def requires_hf_quantizer():
+    return pytest.mark.skipif(
+        not _is_compressed_tensors_config_available,
+        reason="requires transformers>=4.45 to support CompressedTensorsHfQuantizer",
+    )
+def requires_accelerate():
+    return pytest.mark.skipif(
+        not _is_accelerate_available,
+        reason="requires accelerate",
+    )
+def get_random_mat(M, K, dtype) -> "torch.Tensor":
+    """
+    :param M: number of rows
+    :param K: number of columns
+    :param dtype: data type of the matrix
+    :return: random matrix of shape (M, K) with non-zero values
+    """
+    import torch
+    from compressed_tensors.quantization import FP8_DTYPE
+    rand_tensor_dtype = dtype
+    if dtype in [torch.int8, FP8_DTYPE]:
+        rand_tensor_dtype = torch.float16
+    mat = torch.rand(M, K, dtype=rand_tensor_dtype).cuda()
+    mat = mat.masked_fill_(mat == 0, 1)
+    return mat.to(dtype)
+def generate_pruned_semi_structured_mat(M, K, dtype) -> "torch.Tensor":
+    """
+    :param M: number of rows
+    :param K: number of columns
+    :param dtype: data type of the matrix
+    :return: random matrix of shape (M, K) with 2:4 sparsity pattern
+    """
+    import torch
+    from compressed_tensors.quantization import FP8_DTYPE
+    mask = torch.Tensor([0, 0, 1, 1]).tile((M, K // 4)).bool()
+    rand_tensor_dtype = dtype
+    if dtype in [torch.int8, FP8_DTYPE]:
+        rand_tensor_dtype = torch.float16
+    mat = torch.rand(M, K, dtype=rand_tensor_dtype)
+    mat = mat.masked_fill_(mat == 0, 1)
+    if dtype == FP8_DTYPE:
+        # some float8_e4m3fn operations are not supported on CPU
+        mat = mat.cuda()
+        mask = mask.cuda()
+    mat = mat * mask
+    return mat.to(dtype)
+def induce_sparsity(tensor, sparsity_ratio) -> "torch.Tensor":
+    """
+    Makes a tensor sparse by zeroing out a given fraction
+    of its smallest absolute values.
+    :param: weight_tensor (torch.Tensor): The input weight tensor.
+    :param: sparsity_ratio (float): Fraction of weights to be zeroed
+        (0 <= sparsity_ratio <= 1).
+    :returns: torch.Tensor: Sparse version of the input tensor.
+    """
+    import torch
+    if not (0 <= sparsity_ratio <= 1):
+        raise ValueError("Sparsity ratio must be between 0 and 1.")
+    # Flatten the tensor and compute the threshold for sparsity
+    flattened = tensor.view(-1)
+    k = int(sparsity_ratio * flattened.numel())
+    if k > 0:
+        threshold = torch.topk(flattened.abs(), k, largest=False).values.max()
+        sparse_tensor = torch.where(
+            tensor.abs() > threshold, tensor, torch.zeros_like(tensor)
+        )
+    else:
+        sparse_tensor = tensor
+    return sparse_tensor
+def is_gpu_available():
+    """
+    :return: True if a GPU is available, False otherwise
+    """
+    try:
+        import torch  # noqa: F401
+        return torch.cuda.device_count() > 0
+    except ImportError:
+        return False
+def requires_gpu(test_case):
+    return unittest.skipUnless(is_gpu_available(), "test requires GPU")(test_case)