PyPI - compressed-tensors-nightly - Versions diffs - 0.6.0.20240929__py3-none-any.whl → 0.6.0.20241004__py3-none-any.whl - Mend

compressed-tensors-nightly 0.6.0.20240929py3-none-any.whl → 0.6.0.20241004py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

compressed_tensors/base.py CHANGED Viewed

@@ -17,3 +17,4 @@ QUANTIZATION_CONFIG_NAME = "quantization_config"
 COMPRESSION_CONFIG_NAME = "compression_config"
 KV_CACHE_SCHEME_NAME = "kv_cache_scheme"
 COMPRESSION_VERSION_NAME = "version"
+QUANTIZATION_METHOD_NAME = "quant_method"

compressed_tensors/compressors/__init__.py CHANGED Viewed

@@ -14,15 +14,9 @@
 # flake8: noqa
-from .base import Compressor
-from .dense import DenseCompressor
-from .helpers import load_compressed, save_compressed, save_compressed_model
-from .marlin_24 import Marlin24Compressor
-from .model_compressor import ModelCompressor, map_modules_to_quant_args
-from .naive_quantized import (
-    FloatQuantizationCompressor,
-    IntQuantizationCompressor,
-    QuantizationCompressor,
-)
-from .pack_quantized import PackedQuantizationCompressor
-from .sparse_bitmask import BitmaskCompressor, BitmaskTensor
+from .base import *
+from .helpers import *
+from .model_compressors import *
+from .quantized_compressors import *
+from .sparse_compressors import *
+from .sparse_quantized_compressors import *

compressed_tensors/compressors/base.py CHANGED Viewed

@@ -12,26 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import logging
+from abc import ABC, abstractmethod
 from typing import Dict, Generator, Optional, Tuple, Union
 import torch
 from compressed_tensors.config import SparsityCompressionConfig
 from compressed_tensors.quantization import QuantizationArgs, QuantizationConfig
 from compressed_tensors.registry import RegistryMixin
-from compressed_tensors.utils import get_nested_weight_mappings, merge_names
-from safetensors import safe_open
 from torch import Tensor
-from torch.nn.modules import Module
-from tqdm import tqdm
+from torch.nn import Module
-_LOGGER: logging.Logger = logging.getLogger(__name__)
+__all__ = ["BaseCompressor"]
-__all__ = ["Compressor"]
-class Compressor(RegistryMixin):
+class BaseCompressor(RegistryMixin, ABC):
     """
     Base class representing a model compression algorithm. Each child class should
     implement compression_param_info, compress_weight and decompress_weight.
@@ -42,19 +37,18 @@ class Compressor(RegistryMixin):
     Model Load Lifecycle (run_compressed=False):
         - ModelCompressor.decompress()
             - apply_quantization_config()
-            - Compressor.decompress()
-                - Compressor.decompress_weight()
+            - BaseCompressor.decompress()
     Model Save Lifecycle:
         - ModelCompressor.compress()
-            - Compressor.compress()
-                - Compressor.compress_weight()
+            - BaseCompressor.compress()
     Module Lifecycle (run_compressed=True):
         - apply_quantization_config()
         - compressed_module = CompressedLinear(module)
             - initialize_module_for_quantization()
-            - Compressor.compression_param_info()
+            - BaseCompressor.compression_param_info()
             - register_parameters()
         - compressed_module.forward()
             -compressed_module.decompress()
@@ -83,61 +77,27 @@ class Compressor(RegistryMixin):
         """
         raise NotImplementedError()
+    @abstractmethod
     def compress(
         self,
         model_state: Dict[str, Tensor],
-        names_to_scheme: Dict[str, QuantizationArgs],
         **kwargs,
     ) -> Dict[str, Tensor]:
         """
         Compresses a dense state dict
         :param model_state: state dict of uncompressed model
-        :param names_to_scheme: quantization args for each quantized weight, needed for
-            quantize function to calculate bit depth
+        :param kwargs: additional arguments for compression
         :return: compressed state dict
         """
-        compressed_dict = {}
-        weight_suffix = ".weight"
-        _LOGGER.debug(
-            f"Compressing model with {len(model_state)} parameterized layers..."
-        )
-        for name, value in tqdm(model_state.items(), desc="Compressing model"):
-            if name.endswith(weight_suffix):
-                prefix = name[: -(len(weight_suffix))]
-                scale = model_state.get(merge_names(prefix, "weight_scale"), None)
-                zp = model_state.get(merge_names(prefix, "weight_zero_point"), None)
-                g_idx = model_state.get(merge_names(prefix, "weight_g_idx"), None)
-                if scale is not None:
-                    # weight is quantized, compress it
-                    quant_args = names_to_scheme[prefix]
-                    compressed_data = self.compress_weight(
-                        weight=value,
-                        scale=scale,
-                        zero_point=zp,
-                        g_idx=g_idx,
-                        quantization_args=quant_args,
-                        device="cpu",
-                    )
-                    for key, value in compressed_data.items():
-                        compressed_dict[merge_names(prefix, key)] = value
-                else:
-                    compressed_dict[name] = value.to("cpu")
-            elif name.endswith("zero_point") and torch.all(value == 0):
-                continue
-            elif name.endswith("g_idx") and torch.any(value <= -1):
-                continue
-            else:
-                compressed_dict[name] = value.to("cpu")
-        return compressed_dict
+        raise NotImplementedError()
+    @abstractmethod
     def decompress(
         self,
         path_to_model_or_tensors: str,
-        names_to_scheme: Dict[str, QuantizationArgs],
         device: str = "cpu",
+        **kwargs,
     ) -> Generator[Tuple[str, Tensor], None, None]:
         """
         Reads a compressed state dict located at path_to_model_or_tensors
@@ -150,55 +110,6 @@ class Compressor(RegistryMixin):
         :param device: optional device to load intermediate weights into
         :return: compressed state dict
         """
-        weight_mappings = get_nested_weight_mappings(
-            path_to_model_or_tensors, self.COMPRESSION_PARAM_NAMES
-        )
-        for weight_name in weight_mappings.keys():
-            weight_data = {}
-            for param_name, safe_path in weight_mappings[weight_name].items():
-                full_name = merge_names(weight_name, param_name)
-                with safe_open(safe_path, framework="pt", device=device) as f:
-                    weight_data[param_name] = f.get_tensor(full_name)
-            if "weight_scale" in weight_data:
-                quant_args = names_to_scheme[weight_name]
-                decompressed = self.decompress_weight(
-                    compressed_data=weight_data, quantization_args=quant_args
-                )
-                yield merge_names(weight_name, "weight"), decompressed
-    def compress_weight(
-        self,
-        weight: Tensor,
-        scale: Tensor,
-        zero_point: Optional[Tensor] = None,
-        g_idx: Optional[torch.Tensor] = None,
-        quantization_args: Optional[QuantizationArgs] = None,
-    ) -> Dict[str, torch.Tensor]:
-        """
-        Compresses a single uncompressed weight
-        :param weight: uncompressed weight tensor
-        :param scale: quantization scale for weight
-        :param zero_point: quantization zero point for weight
-        :param g_idx: optional mapping from column index to group index
-        :param quantization_args: quantization parameters for weight
-        :return: dictionary of compressed weight data
-        """
-        raise NotImplementedError()
-    def decompress_weight(
-        self,
-        compressed_data: Dict[str, Tensor],
-        quantization_args: Optional[QuantizationArgs] = None,
-    ) -> torch.Tensor:
-        """
-        Decompresses a single compressed weight
-        :param compressed_data: dictionary of data needed for decompression
-        :param quantization_args: quantization parameters for the weight
-        :return: tensor of the decompressed weight
-        """
         raise NotImplementedError()
     def compress_module(self, module: Module) -> Optional[Dict[str, torch.Tensor]]:
@@ -228,6 +139,19 @@ class Compressor(RegistryMixin):
             quantization_args=quantization_args,
         )
+    def compress_weight(
+        self,
+        weight: Tensor,
+        **kwargs,
+    ) -> Dict[str, torch.Tensor]:
+        """
+        Compresses a single uncompressed weight
+        :param weight: uncompressed weight tensor
+        :param kwargs: additional arguments for compression
+        """
+        raise NotImplementedError()
     def decompress_module(self, module: Module):
         """
         Decompresses a single compressed leaf PyTorch module. If the module is not
@@ -250,3 +174,15 @@ class Compressor(RegistryMixin):
         return self.decompress_weight(
             compressed_data=compressed_data, quantization_args=quantization_args
         )
+    def decompress_weight(
+        self, compressed_data: Dict[str, Tensor], **kwargs
+    ) -> torch.Tensor:
+        """
+        Decompresses a single compressed weight
+        :param compressed_data: dictionary of data needed for decompression
+        :param kwargs: additional arguments for decompression
+        :return: tensor of the decompressed weight
+        """
+        raise NotImplementedError()

compressed_tensors/compressors/helpers.py CHANGED Viewed

@@ -16,7 +16,7 @@ from pathlib import Path
 from typing import Dict, Generator, Optional, Tuple, Union
 import torch
-from compressed_tensors.compressors import Compressor
+from compressed_tensors.compressors import BaseCompressor
 from compressed_tensors.config import CompressionFormat, SparsityCompressionConfig
 from compressed_tensors.utils.safetensors_load import get_weight_mappings
 from safetensors import safe_open
@@ -52,16 +52,16 @@ def save_compressed(
     compression_format = compression_format or CompressionFormat.dense.value
     if not (
-        compression_format in Compressor.registered_names()
-        or compression_format in Compressor.registered_aliases()
+        compression_format in BaseCompressor.registered_names()
+        or compression_format in BaseCompressor.registered_aliases()
     ):
         raise ValueError(
             f"Unknown compression format: {compression_format}. "
-            f"Must be one of {set(Compressor.registered_names() + Compressor.registered_aliases())}"  # noqa E501
+            f"Must be one of {set(BaseCompressor.registered_names() + BaseCompressor.registered_aliases())}"  # noqa E501
         )
     # compress
-    compressor = Compressor.load_from_registry(compression_format)
+    compressor = BaseCompressor.load_from_registry(compression_format)
     # save compressed tensors
     compressed_tensors = compressor.compress(tensors)
     save_file(compressed_tensors, save_path)
@@ -102,7 +102,7 @@ def load_compressed(
     else:
         # decompress tensors
         compression_format = compression_config.format
-        compressor = Compressor.load_from_registry(
+        compressor = BaseCompressor.load_from_registry(
             compression_format, config=compression_config
         )
         yield from compressor.decompress(compressed_tensors, device=device)

compressed_tensors/compressors/model_compressors/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# flake8: noqa
+from .model_compressor import *

compressed_tensors/compressors/{model_compressor.py → model_compressors/model_compressor.py} RENAMED Viewed

@@ -18,20 +18,22 @@ import operator
 import os
 import re
 from copy import deepcopy
-from typing import Any, Dict, Optional, Union
+from typing import TYPE_CHECKING, Any, Dict, Optional, TypeVar, Union
+import compressed_tensors
 import torch
 import transformers
-import compressed_tensors
 from compressed_tensors.base import (
     COMPRESSION_CONFIG_NAME,
     COMPRESSION_VERSION_NAME,
     QUANTIZATION_CONFIG_NAME,
+    QUANTIZATION_METHOD_NAME,
     SPARSITY_CONFIG_NAME,
 )
-from compressed_tensors.compressors import Compressor
+from compressed_tensors.compressors.base import BaseCompressor
 from compressed_tensors.config import CompressionFormat, SparsityCompressionConfig
 from compressed_tensors.quantization import (
+    DEFAULT_QUANTIZATION_METHOD,
     QuantizationConfig,
     QuantizationStatus,
     apply_quantization_config,
@@ -42,7 +44,10 @@ from compressed_tensors.quantization.utils import (
     iter_named_leaf_modules,
 )
 from compressed_tensors.utils import get_safetensors_folder, update_parameter_data
-from compressed_tensors.utils.helpers import fix_fsdp_module_name
+from compressed_tensors.utils.helpers import (
+    fix_fsdp_module_name,
+    is_compressed_tensors_config,
+)
 from torch import Tensor
 from torch.nn import Module
 from tqdm import tqdm
@@ -55,6 +60,11 @@ __all__ = ["ModelCompressor", "map_modules_to_quant_args"]
 _LOGGER: logging.Logger = logging.getLogger(__name__)
+if TYPE_CHECKING:
+    # dummy type if not available from transformers
+    CompressedTensorsConfig = TypeVar("CompressedTensorsConfig")
 class ModelCompressor:
     """
     Handles compression and decompression of a model with a sparsity config and/or
@@ -90,45 +100,41 @@ class ModelCompressor:
         configs and load a ModelCompressor
         :param pretrained_model_name_or_path: path to model config on disk or HF hub
-        :return: compressor for the extracted configs
+        :return: compressor for the configs, or None if model is not compressed
         """
         config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
         compression_config = getattr(config, COMPRESSION_CONFIG_NAME, None)
         return cls.from_compression_config(compression_config)
     @classmethod
-    def from_compression_config(cls, compression_config: Dict[str, Any]):
+    def from_compression_config(
+        cls, compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"]
+    ):
         """
-        :param compression_config: compression/quantization config dictionary
-            found under key "quantization_config" in HF model config
-        :return: compressor for the extracted configs
+        :param compression_config:
+            A compression or quantization config
+            The type is one of the following:
+            1. A Dict found under either "quantization_config" or "compression_config"
+                keys in the config.json
+            2. A CompressedTensorsConfig found under key "quantization_config" in HF
+                model config
+        :return: compressor for the configs, or None if model is not compressed
         """
         if compression_config is None:
             return None
-        try:
-            from transformers.utils.quantization_config import CompressedTensorsConfig
-            if isinstance(compression_config, CompressedTensorsConfig):
-                compression_config = compression_config.to_dict()
-        except ImportError:
-            pass
         sparsity_config = cls.parse_sparsity_config(compression_config)
         quantization_config = cls.parse_quantization_config(compression_config)
         if sparsity_config is None and quantization_config is None:
             return None
-        if sparsity_config is not None and not isinstance(
-            sparsity_config, SparsityCompressionConfig
-        ):
+        if sparsity_config is not None:
             format = sparsity_config.get("format")
             sparsity_config = SparsityCompressionConfig.load_from_registry(
                 format, **sparsity_config
             )
-        if quantization_config is not None and not isinstance(
-            quantization_config, QuantizationConfig
-        ):
+        if quantization_config is not None:
             quantization_config = QuantizationConfig.parse_obj(quantization_config)
         return cls(
@@ -151,7 +157,7 @@ class ModelCompressor:
             to a sparsity compression algorithm
         :param quantization_format: string corresponding to a quantization compression
             algorithm
-        :return: compressor for the extracted configs
+        :return: compressor for the configs, or None if model is not compressed
         """
         quantization_config = QuantizationConfig.from_pretrained(
             model, format=quantization_format
@@ -170,40 +176,60 @@ class ModelCompressor:
         )
     @staticmethod
-    def parse_sparsity_config(compression_config: Dict) -> Union[Dict, None]:
+    def parse_sparsity_config(
+        compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"]
+    ) -> Union[Dict[str, Any], None]:
+        """
+        Parse sparsity config from quantization/compression config. Sparsity
+        config is nested inside q/c config
+        :param compression_config: quantization/compression config
+        :return: sparsity config
+        """
         if compression_config is None:
             return None
-        if SPARSITY_CONFIG_NAME not in compression_config:
-            return None
-        if hasattr(compression_config, SPARSITY_CONFIG_NAME):
-            # for loaded HFQuantizer config
-            return getattr(compression_config, SPARSITY_CONFIG_NAME)
-        if SPARSITY_CONFIG_NAME in compression_config:
-            # for loaded HFQuantizer config from dict
-            return compression_config[SPARSITY_CONFIG_NAME]
-        # SparseAutoModel format
+        if is_compressed_tensors_config(compression_config):
+            s_config = compression_config.sparsity_config
+            return s_config.dict() if s_config is not None else None
         return compression_config.get(SPARSITY_CONFIG_NAME, None)
     @staticmethod
-    def parse_quantization_config(compression_config: Dict) -> Union[Dict, None]:
+    def parse_quantization_config(
+        compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"]
+    ) -> Union[Dict[str, Any], None]:
+        """
+        Parse quantization config from quantization/compression config. The
+        quantization are all the fields that are not the sparsity config or
+        metadata fields
+        :param compression_config: quantization/compression config
+        :return: quantization config without sparsity config or metadata fields
+        """
         if compression_config is None:
             return None
-        if hasattr(compression_config, QUANTIZATION_CONFIG_NAME):
-            # for loaded HFQuantizer config
-            return getattr(compression_config, QUANTIZATION_CONFIG_NAME)
+        if is_compressed_tensors_config(compression_config):
+            q_config = compression_config.quantization_config
+            return q_config.dict() if q_config is not None else None
-        if QUANTIZATION_CONFIG_NAME in compression_config:
-            # for loaded HFQuantizer config from dict
-            return compression_config[QUANTIZATION_CONFIG_NAME]
-        # SparseAutoModel format
         quantization_config = deepcopy(compression_config)
         quantization_config.pop(SPARSITY_CONFIG_NAME, None)
-        quantization_config.pop(COMPRESSION_VERSION_NAME, None)
+        # some fields are required, even if a qconfig is not present
+        # pop them off and if nothing remains, then there is no qconfig
+        quant_method = quantization_config.pop(QUANTIZATION_METHOD_NAME, None)
+        _ = quantization_config.pop(COMPRESSION_VERSION_NAME, None)
         if len(quantization_config) == 0:
-            quantization_config = None
+            return None
+        # replace popped off values
+        # note that version is discarded for now
+        if quant_method is not None:
+            quantization_config[QUANTIZATION_METHOD_NAME] = quant_method
         return quantization_config
     def __init__(
@@ -216,17 +242,16 @@ class ModelCompressor:
         self.sparsity_compressor = None
         self.quantization_compressor = None
         if sparsity_config and sparsity_config.format == CompressionFormat.dense.value:
             # ignore dense sparsity config
             self.sparsity_config = None
         if sparsity_config is not None:
-            self.sparsity_compressor = Compressor.load_from_registry(
+            self.sparsity_compressor = BaseCompressor.load_from_registry(
                 sparsity_config.format, config=sparsity_config
             )
         if quantization_config is not None:
-            self.quantization_compressor = Compressor.load_from_registry(
+            self.quantization_compressor = BaseCompressor.load_from_registry(
                 quantization_config.format, config=quantization_config
             )
@@ -237,7 +262,7 @@ class ModelCompressor:
         Compresses a dense state dict or model with sparsity and/or quantization
         :param model: uncompressed model to compress
-        :param model_state: optional uncompressed state_dict to insert into model
+        :param state_dict: optional uncompressed state_dict to insert into model
         :return: compressed state dict
         """
         if state_dict is None:
@@ -300,6 +325,9 @@ class ModelCompressor:
         :param save_directory: path to a folder containing a HF model config
         """
+        if self.quantization_config is None and self.sparsity_config is None:
+            return
         config_file_path = os.path.join(save_directory, CONFIG_NAME)
         if not os.path.exists(config_file_path):
             _LOGGER.warning(
@@ -311,7 +339,20 @@ class ModelCompressor:
         with open(config_file_path, "r") as config_file:
             config_data = json.load(config_file)
+        # required metadata whenever a quantization or sparsity config is present
+        # overwrite previous config and version if already existing
         config_data[QUANTIZATION_CONFIG_NAME] = {}
+        config_data[QUANTIZATION_CONFIG_NAME][
+            COMPRESSION_VERSION_NAME
+        ] = compressed_tensors.__version__
+        if self.quantization_config is not None:
+            self.quantization_config.quant_method = DEFAULT_QUANTIZATION_METHOD
+        else:
+            config_data[QUANTIZATION_CONFIG_NAME][
+                QUANTIZATION_METHOD_NAME
+            ] = DEFAULT_QUANTIZATION_METHOD
+        # quantization and sparsity configs
         if self.quantization_config is not None:
             quant_config_data = self.quantization_config.model_dump()
             config_data[QUANTIZATION_CONFIG_NAME] = quant_config_data
@@ -320,9 +361,6 @@ class ModelCompressor:
             config_data[QUANTIZATION_CONFIG_NAME][
                 SPARSITY_CONFIG_NAME
             ] = sparsity_config_data
-        config_data[QUANTIZATION_CONFIG_NAME][
-            COMPRESSION_VERSION_NAME
-        ] = compressed_tensors.__version__
         with open(config_file_path, "w") as config_file:
             json.dump(config_data, config_file, indent=2, sort_keys=True)

compressed_tensors/compressors/quantized_compressors/__init__.py ADDED Viewed

@@ -0,0 +1,18 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# flake8: noqa
+from .base import *
+from .naive_quantized import *
+from .pack_quantized import *

compressed-tensors-nightly 0.6.0.20240929__py3-none-any.whl → 0.6.0.20241004__py3-none-any.whl

compressed-tensors-nightly 0.6.0.20240929py3-none-any.whl → 0.6.0.20241004py3-none-any.whl