PyPI - compressed-tensors - Versions diffs - 0.8.0__tar.gz → 0.9.0__tar.gz - Mend

compressed-tensors 0.8.0tar.gz → 0.9.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

{compressed-tensors-0.8.0 → compressed-tensors-0.9.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: compressed-tensors
-Version: 0.8.0
+Version: 0.9.0
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed-tensors-0.8.0 → compressed-tensors-0.9.0}/setup.py RENAMED Viewed

@@ -1,11 +1,11 @@
 # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
-#
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
-#
+#
 #    http://www.apache.org/licenses/LICENSE-2.0
-#
+#
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -15,7 +15,33 @@
 import os
 from setuptools import setup, find_packages
 from typing import List, Dict, Tuple
-from utils.artifacts import get_release_and_version
+def get_release_and_version(package_path: str) -> Tuple[bool, bool, str, str, str, str]:
+    """
+    Load version and release info from compressed-tensors package
+    """
+    # compressed-tensors/src/compressed_tensors/version.py always exists, default source of truth
+    version_path = os.path.join(package_path, "version.py")
+    # exec() cannot set local variables so need to manually
+    locals_dict = {}
+    exec(open(version_path).read(), globals(), locals_dict)
+    is_release = locals_dict.get("is_release", False)
+    version = locals_dict.get("version", "unknown")
+    version_major = locals_dict.get("version_major", "unknown")
+    version_minor = locals_dict.get("version_minor", "unknown")
+    version_bug = locals_dict.get("version_bug", "unknown")
+    print(f"Loaded version {version} from {version_path}")
+    return (
+        is_release,
+        version,
+        version_major,
+        version_minor,
+        version_bug,
+    )
 package_path = os.path.join(
@@ -35,7 +61,7 @@ if is_release:
     _PACKAGE_NAME = "compressed-tensors"
 else:
     _PACKAGE_NAME = "compressed-tensors-nightly"
 def _setup_long_description() -> Tuple[str, str]:
     return open("README.md", "r", encoding="utf-8").read(), "text/markdown"
@@ -44,7 +70,7 @@ def _setup_packages() -> List:
     return find_packages(
         "src", include=["compressed_tensors", "compressed_tensors.*"], exclude=["*.__pycache__.*"]
     )
 def _setup_install_requires() -> List:
     return ["torch>=1.7.0", "transformers", "pydantic>=2.0"]

{compressed-tensors-0.8.0 → compressed-tensors-0.9.0}/src/compressed_tensors/compressors/model_compressors/model_compressor.py RENAMED Viewed

@@ -17,14 +17,14 @@ import logging
 import operator
 import os
 import re
+from contextlib import contextmanager
 from copy import deepcopy
-from typing import TYPE_CHECKING, Any, Dict, Optional, TypeVar, Union
+from typing import TYPE_CHECKING, Any, Dict, Optional, Set, TypeVar, Union
 import compressed_tensors
 import torch
 import transformers
 from compressed_tensors.base import (
-    COMPRESSION_CONFIG_NAME,
     COMPRESSION_VERSION_NAME,
     QUANTIZATION_CONFIG_NAME,
     QUANTIZATION_METHOD_NAME,
@@ -39,6 +39,8 @@ from compressed_tensors.quantization import (
     apply_quantization_config,
     load_pretrained_quantization,
 )
+from compressed_tensors.quantization.lifecycle import expand_sparse_target_names
+from compressed_tensors.quantization.quant_args import QuantizationArgs
 from compressed_tensors.quantization.utils import (
     is_module_quantized,
     iter_named_leaf_modules,
@@ -103,12 +105,13 @@ class ModelCompressor:
         :return: compressor for the configs, or None if model is not compressed
         """
         config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        compression_config = getattr(config, COMPRESSION_CONFIG_NAME, None)
+        compression_config = getattr(config, QUANTIZATION_CONFIG_NAME, None)
         return cls.from_compression_config(compression_config)
     @classmethod
     def from_compression_config(
-        cls, compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"]
+        cls,
+        compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"],
     ):
         """
         :param compression_config:
@@ -135,7 +138,7 @@ class ModelCompressor:
                 format, **sparsity_config
             )
         if quantization_config is not None:
-            quantization_config = QuantizationConfig.parse_obj(quantization_config)
+            quantization_config = QuantizationConfig.model_validate(quantization_config)
         return cls(
             sparsity_config=sparsity_config, quantization_config=quantization_config
@@ -191,7 +194,7 @@ class ModelCompressor:
         if is_compressed_tensors_config(compression_config):
             s_config = compression_config.sparsity_config
-            return s_config.dict() if s_config is not None else None
+            return s_config.model_dump() if s_config is not None else None
         return compression_config.get(SPARSITY_CONFIG_NAME, None)
@@ -212,7 +215,7 @@ class ModelCompressor:
         if is_compressed_tensors_config(compression_config):
             q_config = compression_config.quantization_config
-            return q_config.dict() if q_config is not None else None
+            return q_config.model_dump() if q_config is not None else None
         quantization_config = deepcopy(compression_config)
         quantization_config.pop(SPARSITY_CONFIG_NAME, None)
@@ -265,7 +268,11 @@ class ModelCompressor:
             state_dict = model.state_dict()
         compressed_state_dict = state_dict
-        quantized_modules_to_args = map_modules_to_quant_args(model)
+        quantized_modules_to_args: Dict[
+            str, QuantizationArgs
+        ] = map_modules_to_quant_args(model)
         if self.quantization_compressor is not None:
             compressed_state_dict = self.quantization_compressor.compress(
                 state_dict, names_to_scheme=quantized_modules_to_args
@@ -276,8 +283,14 @@ class ModelCompressor:
                 )
         if self.sparsity_compressor is not None:
+            sparse_compression_targets: Set[str] = expand_sparse_target_names(
+                model=model,
+                targets=self.sparsity_config.targets,
+                ignore=self.sparsity_config.ignore,
+            )
             compressed_state_dict = self.sparsity_compressor.compress(
-                compressed_state_dict
+                compressed_state_dict,
+                compression_targets=sparse_compression_targets,
             )
         # HACK: Override the dtype_byte_size function in transformers to
@@ -295,23 +308,44 @@ class ModelCompressor:
         :param model: pytorch model to load decompressed weights into
         """
         model_path = get_safetensors_folder(model_path)
-        if self.sparsity_compressor is not None:
+        sparse_decompressed = False
+        if (
+            self.sparsity_compressor is not None
+            and self.sparsity_config.format != CompressionFormat.dense.value
+        ):
+            # Sparse decompression is applied on the model_path
             dense_gen = self.sparsity_compressor.decompress(model_path)
             self._replace_weights(dense_gen, model)
             setattr(model, SPARSITY_CONFIG_NAME, self.sparsity_compressor.config)
+            sparse_decompressed = True
         if self.quantization_compressor is not None:
-            names_to_scheme = apply_quantization_config(model, self.quantization_config)
-            load_pretrained_quantization(model, model_path)
+            # Temporarily set quantization status to FROZEN to prevent
+            # quantization during apply_quantization_config. This ensures
+            # that the dtypes of the weights are not unintentionally updated.
+            # The status is restored after quantization params are loaded.
+            with override_quantization_status(
+                self.quantization_config, QuantizationStatus.FROZEN
+            ):
+                names_to_scheme = apply_quantization_config(
+                    model, self.quantization_config
+                )
+                load_pretrained_quantization(model, model_path)
+            model_path_or_state_dict = (
+                model.state_dict() if sparse_decompressed else model_path
+            )
             dense_gen = self.quantization_compressor.decompress(
-                model_path, names_to_scheme=names_to_scheme
+                model_path_or_state_dict, names_to_scheme=names_to_scheme
             )
             self._replace_weights(dense_gen, model)
-            def update_status(module):
+            def freeze_quantization_status(module):
                 module.quantization_status = QuantizationStatus.FROZEN
-            model.apply(update_status)
+            model.apply(freeze_quantization_status)
             setattr(model, QUANTIZATION_CONFIG_NAME, self.quantization_config)
     def update_config(self, save_directory: str):
@@ -361,15 +395,35 @@ class ModelCompressor:
         with open(config_file_path, "w") as config_file:
             json.dump(config_data, config_file, indent=2, sort_keys=True)
-    def _replace_weights(self, dense_weight_generator, model):
+    def _replace_weights(self, dense_weight_generator, model: Module):
+        """
+        Replace the weights of the model with the
+        provided dense weights.
+        This method iterates over the dense_weight_generator and
+        updates the corresponding weights in the model. If a parameter
+        name does not exist in the model, it will be skipped.
+        :param dense_weight_generator (generator): A generator that yields
+            tuples of (name, data), where 'name' is the parameter name and
+            'data' is the updated param data
+        :param model: The model whose weights are to be updated.
+        """
         for name, data in tqdm(dense_weight_generator, desc="Decompressing model"):
             split_name = name.split(".")
             prefix, param_name = ".".join(split_name[:-1]), split_name[-1]
             module = operator.attrgetter(prefix)(model)
-            update_parameter_data(module, data, param_name)
+            if hasattr(module, param_name):
+                update_parameter_data(module, data, param_name)
-def map_modules_to_quant_args(model: Module) -> Dict:
+def map_modules_to_quant_args(model: Module) -> Dict[str, QuantizationArgs]:
+    """
+    Given a pytorch model, map out the submodule name (usually linear layers)
+     to the QuantizationArgs
+    :param model: pytorch model
+    """
     quantized_modules_to_args = {}
     for name, submodule in iter_named_leaf_modules(model):
         if is_module_quantized(submodule):
@@ -390,3 +444,23 @@ def new_dtype_byte_size(dtype):
         raise ValueError(f"`dtype` is not a valid dtype: {dtype}.")
     bit_size = int(bit_search.groups()[0])
     return bit_size // 8
+@contextmanager
+def override_quantization_status(
+    config: QuantizationConfig, status: QuantizationStatus
+):
+    """
+    Within this context, the quantization status will be set to the
+    supplied status. After the context exits, the original status
+    will be restored.
+    :param config: the quantization config to override
+    :param status: the status to temporarily set
+    """
+    original_status = config.quantization_status
+    config.quantization_status = status
+    try:
+        yield
+    finally:
+        config.quantization_status = original_status

{compressed-tensors-0.8.0 → compressed-tensors-0.9.0}/src/compressed_tensors/compressors/quantized_compressors/base.py RENAMED Viewed

@@ -13,12 +13,17 @@
 # limitations under the License.
 import logging
-from typing import Dict, Generator, Tuple
+from pathlib import Path
+from typing import Any, Dict, Generator, Tuple, Union
 import torch
 from compressed_tensors.compressors.base import BaseCompressor
 from compressed_tensors.quantization import QuantizationArgs
-from compressed_tensors.utils import get_nested_weight_mappings, merge_names
+from compressed_tensors.utils import (
+    get_nested_mappings_from_state_dict,
+    get_nested_weight_mappings,
+    merge_names,
+)
 from safetensors import safe_open
 from torch import Tensor
 from tqdm import tqdm
@@ -113,7 +118,7 @@ class BaseQuantizationCompressor(BaseCompressor):
     def decompress(
         self,
-        path_to_model_or_tensors: str,
+        path_to_model_or_tensors: Union[str, Path, Dict[str, Any]],
         names_to_scheme: Dict[str, QuantizationArgs],
         device: str = "cpu",
     ) -> Generator[Tuple[str, Tensor], None, None]:
@@ -121,15 +126,25 @@ class BaseQuantizationCompressor(BaseCompressor):
         Reads a compressed state dict located at path_to_model_or_tensors
         and returns a generator for sequentially decompressing back to a
         dense state dict
         :param path_to_model_or_tensors: path to compressed safetensors model (directory
             with one or more safetensors files) or compressed tensors file
         :param names_to_scheme: quantization args for each quantized weight
         :param device: optional device to load intermediate weights into
         :return: compressed state dict
         """
+        if isinstance(path_to_model_or_tensors, (str, Path)):
+            yield from self._decompress_from_path(
+                path_to_model_or_tensors, names_to_scheme, device
+            )
+        else:
+            yield from self._decompress_from_state_dict(
+                path_to_model_or_tensors, names_to_scheme
+            )
+    def _decompress_from_path(self, path_to_model, names_to_scheme, device):
         weight_mappings = get_nested_weight_mappings(
-            path_to_model_or_tensors, self.COMPRESSION_PARAM_NAMES
+            path_to_model, self.COMPRESSION_PARAM_NAMES
         )
         for weight_name in weight_mappings.keys():
             weight_data = {}
@@ -137,6 +152,21 @@ class BaseQuantizationCompressor(BaseCompressor):
                 full_name = merge_names(weight_name, param_name)
                 with safe_open(safe_path, framework="pt", device=device) as f:
                     weight_data[param_name] = f.get_tensor(full_name)
+            if "weight_scale" in weight_data:
+                quant_args = names_to_scheme[weight_name]
+                decompressed = self.decompress_weight(
+                    compressed_data=weight_data, quantization_args=quant_args
+                )
+                yield merge_names(weight_name, "weight"), decompressed
+    def _decompress_from_state_dict(self, state_dict, names_to_scheme):
+        weight_mappings = get_nested_mappings_from_state_dict(
+            state_dict, self.COMPRESSION_PARAM_NAMES
+        )
+        for weight_name in weight_mappings.keys():
+            weight_data = {}
+            for param_name, param_value in weight_mappings[weight_name].items():
+                weight_data[param_name] = param_value
             if "weight_scale" in weight_data:
                 quant_args = names_to_scheme[weight_name]

{compressed-tensors-0.8.0 → compressed-tensors-0.9.0}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py RENAMED Viewed

@@ -68,9 +68,9 @@ class NaiveQuantizationCompressor(BaseQuantizationCompressor):
         self,
         weight: Tensor,
         scale: Tensor,
+        quantization_args: QuantizationArgs,
         zero_point: Optional[Tensor] = None,
         g_idx: Optional[torch.Tensor] = None,
-        quantization_args: Optional[QuantizationArgs] = None,
         device: Optional[torch.device] = None,
     ) -> Dict[str, torch.Tensor]:
         """
@@ -78,9 +78,9 @@ class NaiveQuantizationCompressor(BaseQuantizationCompressor):
         :param weight: uncompressed weight tensor
         :param scale: quantization scale for weight
+        :param quantization_args: quantization parameters for weight
         :param zero_point: quantization zero point for weight
         :param g_idx: optional mapping from column index to group index
-        :param quantization_args: quantization parameters for weight
         :param device: optional device to move compressed output to
         :return: dictionary of compressed weight data
         """
@@ -93,9 +93,11 @@ class NaiveQuantizationCompressor(BaseQuantizationCompressor):
                 args=quantization_args,
                 dtype=quantization_args.pytorch_dtype(),
             )
+        else:
+            quantized_weight = weight
-            if device is not None:
-                quantized_weight = quantized_weight.to(device)
+        if device is not None:
+            quantized_weight = quantized_weight.to(device)
         return {"weight": quantized_weight}

{compressed-tensors-0.8.0 → compressed-tensors-0.9.0}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py RENAMED Viewed

@@ -68,9 +68,9 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
         self,
         weight: Tensor,
         scale: Tensor,
+        quantization_args: QuantizationArgs,
         zero_point: Optional[Tensor] = None,
         g_idx: Optional[torch.Tensor] = None,
-        quantization_args: Optional[QuantizationArgs] = None,
         device: Optional[torch.device] = None,
     ) -> Dict[str, torch.Tensor]:
         """
@@ -78,9 +78,9 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
         :param weight: uncompressed weight tensor
         :param scale: quantization scale for weight
+        :param quantization_args: quantization parameters for weight
         :param zero_point: quantization zero point for weight
         :param g_idx: optional mapping from column index to group index
-        :param quantization_args: quantization parameters for weight
         :param device: optional device to move compressed output to
         :return: dictionary of compressed weight data
         """
@@ -94,6 +94,8 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
                 args=quantization_args,
                 dtype=torch.int8,
             )
+        else:
+            quantized_weight = weight
         packed_weight = pack_to_int32(quantized_weight, quantization_args.num_bits)
         weight_shape = torch.tensor(weight.shape)

{compressed-tensors-0.8.0/src/compressed_tensors/config → compressed-tensors-0.9.0/src/compressed_tensors/compressors/sparse_compressors}/__init__.py RENAMED Viewed

@@ -11,8 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # flake8: noqa
 from .base import *
 from .dense import *
+from .sparse_24_bitmask import *
 from .sparse_bitmask import *

{compressed-tensors-0.8.0 → compressed-tensors-0.9.0}/src/compressed_tensors/compressors/sparse_compressors/base.py RENAMED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 import logging
-from typing import Dict, Generator, Tuple
+from typing import Dict, Generator, Optional, Set, Tuple
 from compressed_tensors.compressors.base import BaseCompressor
 from compressed_tensors.utils import get_nested_weight_mappings, merge_names
@@ -30,7 +30,8 @@ _LOGGER: logging.Logger = logging.getLogger(__name__)
 class BaseSparseCompressor(BaseCompressor):
     """
     Base class representing a sparse compression algorithm. Each child class should
-    implement compression_param_info, compress_weight and decompress_weight.
+    implement compression_param_info, compress_weight and decompress_weight; child
+    classes should also define COMPRESSION_PARAM_NAMES.
     Compressors support compressing/decompressing a full module state dict or a single
     quantized PyTorch leaf module.
@@ -59,11 +60,17 @@ class BaseSparseCompressor(BaseCompressor):
     :param config: config specifying compression parameters
     """
-    def compress(self, model_state: Dict[str, Tensor]) -> Dict[str, Tensor]:
+    def compress(
+        self,
+        model_state: Dict[str, Tensor],
+        compression_targets: Optional[Set[str]] = None,
+    ) -> Dict[str, Tensor]:
         """
         Compresses a dense state dict using bitmask compression
         :param model_state: state dict of uncompressed model
+        :param compression_targets: optional set of layer prefixes to compress,
+            otherwise compress all layers (for backwards compatibility)
         :return: compressed state dict
         """
         compressed_dict = {}
@@ -71,7 +78,14 @@ class BaseSparseCompressor(BaseCompressor):
             f"Compressing model with {len(model_state)} parameterized layers..."
         )
         for name, value in tqdm(model_state.items(), desc="Compressing model"):
-            compression_data = self.compress_weight(name, value)
+            if not self.should_compress(name, compression_targets):
+                compressed_dict[name] = value
+                continue
+            prefix = name
+            if prefix.endswith(".weight"):
+                prefix = prefix[: -(len(".weight"))]
+            compression_data = self.compress_weight(prefix, value)
             for key in compression_data.keys():
                 if key in compressed_dict:
                     _LOGGER.warn(
@@ -97,8 +111,10 @@ class BaseSparseCompressor(BaseCompressor):
         :param device: device to load decompressed weights onto
         :return: iterator for generating decompressed weights
         """
-        weight_mappings = get_nested_weight_mappings(
-            path_to_model_or_tensors, self.COMPRESSION_PARAM_NAMES
+        weight_mappings, ignored_params = get_nested_weight_mappings(
+            path_to_model_or_tensors,
+            self.COMPRESSION_PARAM_NAMES,
+            return_unmatched_params=True,
         )
         for weight_name in weight_mappings.keys():
             weight_data = {}
@@ -107,4 +123,26 @@ class BaseSparseCompressor(BaseCompressor):
                 with safe_open(safe_path, framework="pt", device=device) as f:
                     weight_data[param_name] = f.get_tensor(full_name)
             decompressed = self.decompress_weight(weight_data)
-            yield weight_name, decompressed
+            yield merge_names(weight_name, "weight"), decompressed
+        for ignored_param_name, safe_path in ignored_params.items():
+            with safe_open(safe_path, framework="pt", device=device) as f:
+                value = f.get_tensor(ignored_param_name)
+            yield ignored_param_name, value
+    @staticmethod
+    def should_compress(name: str, expanded_targets: Optional[Set[str]] = None) -> bool:
+        """
+        Check if a parameter should be compressed.
+        Currently, this only returns True for weight parameters.
+        :param name: name of the parameter
+        :param expanded_targets: set of layer prefixes to compress
+        :return: whether or not the parameter should be compressed
+        """
+        if expanded_targets is None:
+            return name.endswith(".weight")
+        return (
+            name.endswith(".weight") and name[: -(len(".weight"))] in expanded_targets
+        )

compressed-tensors 0.8.0__tar.gz → 0.9.0__tar.gz

compressed-tensors 0.8.0tar.gz → 0.9.0tar.gz