PyPI - compressed-tensors-nightly - Versions diffs - 0.3.3.20240612__py3-none-any.whl → 0.4.0.20240614__py3-none-any.whl - Mend

compressed-tensors-nightly 0.3.3.20240612py3-none-any.whl → 0.4.0.20240614py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

compressed_tensors/compressors/model_compressor.py CHANGED Viewed

@@ -17,7 +17,7 @@ import logging
 import operator
 import os
 from copy import deepcopy
-from typing import Dict, Optional, Union
+from typing import Any, Dict, Optional, Union
 from compressed_tensors.base import (
     COMPRESSION_CONFIG_NAME,
@@ -88,20 +88,41 @@ class ModelCompressor:
         """
         config = AutoConfig.from_pretrained(pretrained_model_name_or_path)
         compression_config = getattr(config, COMPRESSION_CONFIG_NAME, None)
+        return cls.from_compression_config(compression_config)
+    @classmethod
+    def from_compression_config(cls, compression_config: Dict[str, Any]):
+        """
+        :param compression_config: compression/quantization config dictionary
+            found under key "quantization_config" in HF model config
+        :return: compressor for the extracted configs
+        """
         if compression_config is None:
             return None
+        try:
+            from transformers.utils.quantization_config import CompressedTensorsConfig
+            if isinstance(compression_config, CompressedTensorsConfig):
+                compression_config = compression_config.to_dict()
+        except ImportError:
+            pass
         sparsity_config = cls.parse_sparsity_config(compression_config)
         quantization_config = cls.parse_quantization_config(compression_config)
         if sparsity_config is None and quantization_config is None:
             return None
-        if sparsity_config is not None:
+        if sparsity_config is not None and not isinstance(
+            sparsity_config, SparsityCompressionConfig
+        ):
             format = sparsity_config.get("format")
             sparsity_config = SparsityCompressionConfig.load_from_registry(
                 format, **sparsity_config
             )
-        if quantization_config is not None:
+        if quantization_config is not None and not isinstance(
+            quantization_config, QuantizationConfig
+        ):
             quantization_config = QuantizationConfig.parse_obj(quantization_config)
         return cls(
@@ -146,15 +167,29 @@ class ModelCompressor:
     def parse_sparsity_config(compression_config: Dict) -> Union[Dict, None]:
         if compression_config is None:
             return None
+        if SPARSITY_CONFIG_NAME not in compression_config:
+            return None
+        if hasattr(compression_config, SPARSITY_CONFIG_NAME):
+            # for loaded HFQuantizer config
+            return getattr(compression_config, SPARSITY_CONFIG_NAME)
+        # SparseAutoModel format
         return compression_config.get(SPARSITY_CONFIG_NAME, None)
     @staticmethod
     def parse_quantization_config(compression_config: Dict) -> Union[Dict, None]:
+        if compression_config is None:
+            return None
+        if hasattr(compression_config, QUANTIZATION_CONFIG_NAME):
+            # for loaded HFQuantizer config
+            return getattr(compression_config, QUANTIZATION_CONFIG_NAME)
+        # SparseAutoModel format
         quantization_config = deepcopy(compression_config)
         quantization_config.pop(SPARSITY_CONFIG_NAME, None)
         if len(quantization_config) == 0:
             quantization_config = None
         return quantization_config
     def __init__(

compressed_tensors/quantization/lifecycle/apply.py CHANGED Viewed

@@ -123,11 +123,14 @@ def apply_quantization_config(model: Module, config: QuantizationConfig):
         if target is not None:
             # target matched - add layer and scheme to target list
             submodule.quantization_scheme = target_to_scheme[target]
-    if set(config.ignore) - set(ignored_submodules):
-        _LOGGER.warning(
-            "Some layers that were to be ignored were "
-            f"not found in the model: {set(config.ignore) - set(ignored_submodules)}"
-        )
+    if config.ignore is not None and ignored_submodules is not None:
+        if set(config.ignore) - set(ignored_submodules):
+            _LOGGER.warning(
+                "Some layers that were to be ignored were "
+                "not found in the model: "
+                f"{set(config.ignore) - set(ignored_submodules)}"
+            )
     # apply current quantization status across all targeted layers
     apply_quantization_status(model, config.quantization_status)
@@ -146,7 +149,6 @@ def apply_quantization_status(model: Module, status: QuantizationStatus):
     if current_status < status >= QuantizationStatus.CALIBRATION > current_status:
         model.apply(set_module_for_calibration)
     if current_status < status >= QuantizationStatus.FROZEN > current_status:
         model.apply(freeze_module_quantization)
@@ -160,9 +162,10 @@ def find_first_name_or_class_match(
     # first element of targets that matches the given name
     # if no name matches returns first target that matches the class name
     # returns None otherwise
-    return _find_first_match(name, targets) or _find_first_match(
-        module.__class__.__name__, targets, check_contains
-    )
+    if isinstance(targets, Iterable):
+        return _find_first_match(name, targets) or _find_first_match(
+            module.__class__.__name__, targets, check_contains
+        )
 def _find_first_match(
@@ -212,7 +215,12 @@ def _load_quant_args_from_state_dict(
     scale = getattr(module, scale_name, None)
     zp = getattr(module, zp_name, None)
     if scale is not None:
-        scale.data = state_dict[f"{module_name}.{scale_name}"].to(device)
+        state_dict_scale = state_dict.get(f"{module_name}.{scale_name}")
+        if state_dict_scale is not None:
+            scale.data = state_dict_scale.to(device).to(scale.dtype)
+        else:
+            scale.data = scale.data.to(device)
     if zp is not None:
         zp_from_state = state_dict.get(f"{module_name}.{zp_name}", None)
         if zp_from_state is not None:  # load the non-zero zero points

compressed_tensors/quantization/lifecycle/forward.py CHANGED Viewed

@@ -94,7 +94,7 @@ def dequantize(
     :return: dequantized float tensor
     """
     if args is None:
-        if scale.ndim == 0:
+        if scale.ndim == 0 or scale.ndim == 1:
             args = QuantizationArgs(strategy=QuantizationStrategy.TENSOR)
         elif scale.ndim == 2:
             if scale.shape[1] == 1:

compressed_tensors/quantization/lifecycle/initialize.py CHANGED Viewed

@@ -20,7 +20,10 @@ import torch
 from compressed_tensors.quantization.lifecycle.forward import (
     wrap_module_forward_quantized,
 )
-from compressed_tensors.quantization.quant_args import QuantizationArgs
+from compressed_tensors.quantization.quant_args import (
+    QuantizationArgs,
+    QuantizationStrategy,
+)
 from compressed_tensors.quantization.quant_config import QuantizationStatus
 from compressed_tensors.quantization.quant_scheme import QuantizationScheme
 from torch.nn import Module, Parameter
@@ -58,7 +61,12 @@ def initialize_module_for_quantization(
         _initialize_scale_zero_point_observer(module, "input", scheme.input_activations)
     if scheme.weights is not None:
         if hasattr(module, "weight"):
-            _initialize_scale_zero_point_observer(module, "weight", scheme.weights)
+            weight_shape = None
+            if isinstance(module, torch.nn.Linear):
+                weight_shape = module.weight.shape
+            _initialize_scale_zero_point_observer(
+                module, "weight", scheme.weights, weight_shape=weight_shape
+            )
         else:
             _LOGGER.warning(
                 f"module type {type(module)} targeted for weight quantization but "
@@ -78,7 +86,10 @@ def initialize_module_for_quantization(
 def _initialize_scale_zero_point_observer(
-    module: Module, base_name: str, quantization_args: QuantizationArgs
+    module: Module,
+    base_name: str,
+    quantization_args: QuantizationArgs,
+    weight_shape: Optional[torch.Size] = None,
 ):
     # initialize observer module and attach as submodule
     observer = quantization_args.get_observer()
@@ -89,13 +100,28 @@ def _initialize_scale_zero_point_observer(
     device = next(module.parameters()).device
+    # infer expected scale/zero point shape
+    expected_shape = 1  # per tensor
+    if base_name == "weight" and weight_shape is not None:
+        if quantization_args.strategy == QuantizationStrategy.CHANNEL:
+            # (output_channels, 1)
+            expected_shape = (weight_shape[0], 1)
+        elif quantization_args.strategy == QuantizationStrategy.GROUP:
+            expected_shape = (
+                weight_shape[0],
+                weight_shape[1] // quantization_args.group_size,
+            )
     # initializes empty scale and zero point parameters for the module
     init_scale = Parameter(
-        torch.empty(0, dtype=torch.float16, device=device), requires_grad=False
+        torch.empty(expected_shape, dtype=module.weight.dtype, device=device),
+        requires_grad=False,
     )
     module.register_parameter(f"{base_name}_scale", init_scale)
     init_zero_point = Parameter(
-        torch.empty(0, device=device, dtype=int), requires_grad=False
+        torch.empty(expected_shape, device=device, dtype=int),
+        requires_grad=False,
     )
     module.register_parameter(f"{base_name}_zero_point", init_zero_point)

compressed_tensors/quantization/observers/helpers.py CHANGED Viewed

@@ -51,4 +51,8 @@ def calculate_qparams(
         zero_points = bit_min - torch.round(min_vals / scales)
         zero_points = torch.clamp(zero_points, bit_min, bit_max).to(torch.int8)
+    if scales.ndim == 0:
+        scales = scales.reshape(1)
+        zero_points = zero_points.reshape(1)
     return scales, zero_points

compressed_tensors/quantization/quant_config.py CHANGED Viewed

@@ -144,6 +144,10 @@ class QuantizationConfig(BaseModel):
                 targets=targets_or_scheme,
             )
+    def to_dict(self):
+        # for compatibility with HFQuantizer
+        return self.dict()
     @staticmethod
     def from_pretrained(
         model: Module, format: Optional[str] = None

compressed_tensors/version.py CHANGED Viewed

@@ -19,7 +19,7 @@ Functionality for storing and setting the version info for SparseML
 from datetime import date
-version_base = "0.3.3"
+version_base = "0.4.0"
 is_release = False  # change to True to set the generated version as a release version

{compressed_tensors_nightly-0.3.3.20240612.dist-info → compressed_tensors_nightly-0.4.0.20240614.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: compressed-tensors-nightly
-Version: 0.3.3.20240612
+Version: 0.4.0.20240614
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors_nightly-0.3.3.20240612.dist-info → compressed_tensors_nightly-0.4.0.20240614.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,13 @@
 compressed_tensors/__init__.py,sha256=SV1csvHUVCd8kHXz6UDZim1HZ_fAVG3vfk-j_4Bb6hY,789
 compressed_tensors/base.py,sha256=OA2TOLP1gP3LSH7gp508eqr2ZtDQ-pqRHElCp-aB0vs,755
-compressed_tensors/version.py,sha256=V8krJZctm43D4AGQhJY6dB0MvP1-T9TJ8BcGa8kESrI,1512
+compressed_tensors/version.py,sha256=7shEvInzCEXAScJ2akpiQpgv_IjveX6mAfvi2D_wQDE,1512
 compressed_tensors/compressors/__init__.py,sha256=rhqPp3YXFxCJRLZs1KRNSHTIxK2rNU--sYwDI8MW47w,1061
 compressed_tensors/compressors/base.py,sha256=LWEgbpgTxzmoqQ7Xhq2OQszUgWoDtFuGCiV1Y8nlBGw,2134
 compressed_tensors/compressors/dense.py,sha256=G_XHbvuENyupIKlXSITOQgvPkNkcMEOLcLWQr70V9EE,1257
 compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24SCfhzZ-RusXfw,5403
 compressed_tensors/compressors/int_quantized.py,sha256=Ct2vCK0yoPm6vkIFlzDMGQ7m14xT1GyURsSwH9DP770,5242
 compressed_tensors/compressors/marlin_24.py,sha256=X_BjtFB3Mn0hqiLz56UM3jGX2eNmGLnvEIPfbg7di6U,9444
-compressed_tensors/compressors/model_compressor.py,sha256=jUktyujYdd9KqkA9IyZK6EMi09iEw4_itwhzSh805Jk,11150
+compressed_tensors/compressors/model_compressor.py,sha256=h3ixQtfzt6HxSNtdnB9OVdpCucTmIo4paDoaM7XYZXE,12559
 compressed_tensors/compressors/pack_quantized.py,sha256=VPiLlgJlDgARrn7YmiQoLqUfxErKBfj54epMYWRsF8k,8451
 compressed_tensors/compressors/sparse_bitmask.py,sha256=H9oZSTYI1oRCzAMbd4zThUnZd1h2rfs8DmA3tPcvuNE,8637
 compressed_tensors/compressors/utils/__init__.py,sha256=-mbGDZh1hd9T6u62Ht_iBIK255UmMg0f5bLkSs1f9Cc,731
@@ -20,18 +20,18 @@ compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74j
 compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5ynVAUeiiYpS1Gt8,1308
 compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
 compressed_tensors/quantization/quant_args.py,sha256=Z9Zu20ooAwEWlliAdUw1f1zwSrheuD6vqm3YXgJ1Lws,4388
-compressed_tensors/quantization/quant_config.py,sha256=Nv9rvWNrlbeJgNZhQf-cPAEWJ9NU75ATWHCacWaiQ_s,8189
+compressed_tensors/quantization/quant_config.py,sha256=hL42sXp1wAZxyrkHarw7tAMRcwSVEr0MT3wmrmL3NhE,8285
 compressed_tensors/quantization/quant_scheme.py,sha256=-hAK1-C67_wJl10eaVLUvbslPBTV04WyzL_J-u9f1ck,3571
 compressed_tensors/quantization/lifecycle/__init__.py,sha256=ggRGWRqhCxCaTTDWRcgTVX3axnS2xV6rc5YvdzK7fSg,798
-compressed_tensors/quantization/lifecycle/apply.py,sha256=disclMUDaz2MLPvcTwGQ1oo1clhTTBkAeNz5J9NRxVw,8552
+compressed_tensors/quantization/lifecycle/apply.py,sha256=aZrglJ5mR3Xaxwj51-1BVVB1JGVkKQEeHxGfBaVmsHI,8881
 compressed_tensors/quantization/lifecycle/calibration.py,sha256=mLns4jlaWmBwOW8Jtlm5bMX-JET1AiZYUBO7qa-XuxI,1776
 compressed_tensors/quantization/lifecycle/compressed.py,sha256=VreB10xPwgSLQQlTu20UCrFpRS--cA7-lx5s7nrPPrg,2247
-compressed_tensors/quantization/lifecycle/forward.py,sha256=_1TwffkyaaXL5QpFgXH1gvueUivOLpuRkoXY7vRXktY,11094
+compressed_tensors/quantization/lifecycle/forward.py,sha256=0T817yzYqFR1wUjk2XCtOISwr4u7cdkKqAv13jjfu24,11113
 compressed_tensors/quantization/lifecycle/frozen.py,sha256=h1XYt89MouBTf3jTYLG_6OdFxIu5q2N8tPjsy6J4E6Y,1726
-compressed_tensors/quantization/lifecycle/initialize.py,sha256=pFfcu-pxdQKzlnn-18-RlkEktt2yDi6woNXJsiv1A2c,3732
+compressed_tensors/quantization/lifecycle/initialize.py,sha256=9xgPzHejQUO_AkZcc_SH5kqFeieG-9uo0fMRYV51i7Y,4577
 compressed_tensors/quantization/observers/__init__.py,sha256=DNH31NQYrIBBcmHsMyFA6whh4pbRsLwuNa6L8AeXaGc,745
 compressed_tensors/quantization/observers/base.py,sha256=z_JC-CRz-PY7WlpSoyOoSQQWz5ekTEd5LbXt0iHQRes,5239
-compressed_tensors/quantization/observers/helpers.py,sha256=JwALNfBYY9Eyl8Q180t0lGh8szumQj8TygfNl-isErs,2166
+compressed_tensors/quantization/observers/helpers.py,sha256=FUyYUNd-3LbXt0-8Lwr7EPI2m-LXXBTXW1l5iOajNhA,2272
 compressed_tensors/quantization/observers/memoryless.py,sha256=jH_c6K3gxf4W3VNXQ7tbnP-J_86QTrEfjBn6Kh1C-H8,2165
 compressed_tensors/quantization/observers/min_max.py,sha256=UK7zCMzxv9GGn6BflBxdajV20RiWaCY2RHcvZodCP1w,3669
 compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
@@ -41,8 +41,8 @@ compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85S
 compressed_tensors/utils/__init__.py,sha256=5DrYjoZbaEvSkJcC-GRSbM_RBHVF4tG9gMd3zsJnjLw,665
 compressed_tensors/utils/helpers.py,sha256=5ull5yFT31M2zVxKeFvpvvlvX5f1Sk1LGuj_wrfZWCY,2267
 compressed_tensors/utils/safetensors_load.py,sha256=0MheXwx1jeY12PeISppiSIZHs6rmN2YddwPpFb9V67I,8527
-compressed_tensors_nightly-0.3.3.20240612.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors_nightly-0.3.3.20240612.dist-info/METADATA,sha256=GjdOve1sMxN8qOUPu3EjXTNRFvnX0jrjA8lYwmq9CCY,5668
-compressed_tensors_nightly-0.3.3.20240612.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-compressed_tensors_nightly-0.3.3.20240612.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors_nightly-0.3.3.20240612.dist-info/RECORD,,
+compressed_tensors_nightly-0.4.0.20240614.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors_nightly-0.4.0.20240614.dist-info/METADATA,sha256=IO0LSwjqxSO7pLA8kiT29rfSn18c7ZfVr_rI6iunZtc,5668
+compressed_tensors_nightly-0.4.0.20240614.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+compressed_tensors_nightly-0.4.0.20240614.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors_nightly-0.4.0.20240614.dist-info/RECORD,,

{compressed_tensors_nightly-0.3.3.20240612.dist-info → compressed_tensors_nightly-0.4.0.20240614.dist-info}/LICENSE RENAMED Viewed

File without changes

{compressed_tensors_nightly-0.3.3.20240612.dist-info → compressed_tensors_nightly-0.4.0.20240614.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors_nightly-0.3.3.20240612.dist-info → compressed_tensors_nightly-0.4.0.20240614.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors-nightly 0.3.3.20240612__py3-none-any.whl → 0.4.0.20240614__py3-none-any.whl

compressed-tensors-nightly 0.3.3.20240612py3-none-any.whl → 0.4.0.20240614py3-none-any.whl