PyPI - compressed-tensors-nightly - Versions diffs - 0.3.3.20240523__py3-none-any.whl → 0.3.3.20240525__py3-none-any.whl - Mend

compressed-tensors-nightly 0.3.3.20240523py3-none-any.whl → 0.3.3.20240525py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

compressed_tensors/compressors/int_quantized.py CHANGED Viewed

@@ -78,7 +78,11 @@ class IntQuantizationCompressor(Compressor):
                             args=quant_args,
                             dtype=torch.int8,
                         )
+            elif name.endswith("zero_point"):
+                if torch.all(value == 0):
+                    # all zero_points are 0, no need to include in
+                    # compressed state_dict
+                    continue
             compressed_dict[name] = value.to("cpu")
         return compressed_dict
@@ -106,10 +110,16 @@ class IntQuantizationCompressor(Compressor):
                 with safe_open(safe_path, framework="pt", device=device) as f:
                     weight_data[param_name] = f.get_tensor(full_name)
-            if len(weight_data) == len(self.COMPRESSION_PARAM_NAMES):
+            if "weight_scale" in weight_data:
+                zero_point = weight_data.get("weight_zero_point", None)
+                scale = weight_data["weight_scale"]
+                if zero_point is None:
+                    # zero_point assumed to be 0 if not included in state_dict
+                    zero_point = torch.zeros_like(scale)
                 decompressed = dequantize(
                     x_q=weight_data["weight"],
-                    scale=weight_data["weight_scale"],
-                    zero_point=weight_data["weight_zero_point"],
+                    scale=scale,
+                    zero_point=zero_point,
                 )
                 yield merge_names(weight_name, "weight"), decompressed

compressed_tensors/compressors/model_compressor.py CHANGED Viewed

@@ -249,8 +249,9 @@ class ModelCompressor:
         for name, data in tqdm(dense_weight_generator, desc="Decompressing model"):
             # loading the decompressed weights into the model
             model_device = operator.attrgetter(name)(model).device
-            data_new = Parameter(data.to(model_device))
             data_old = operator.attrgetter(name)(model)
+            data_dtype = data_old.dtype
+            data_new = Parameter(data.to(model_device).to(data_dtype))
             data_old.data = data_new.data

compressed_tensors/compressors/pack_quantized.py CHANGED Viewed

@@ -87,7 +87,11 @@ class PackedQuantizationCompressor(Compressor):
                         )
                         value = pack_4bit_ints(value.cpu())
                     compressed_dict[merge_names(prefix, "weight_shape")] = shape
+            elif name.endswith("zero_point"):
+                if torch.all(value == 0):
+                    # all zero_points are 0, no need to include in
+                    # compressed state_dict
+                    continue
             compressed_dict[name] = value.to("cpu")
         return compressed_dict
@@ -115,14 +119,20 @@ class PackedQuantizationCompressor(Compressor):
                 with safe_open(safe_path, framework="pt", device=device) as f:
                     weight_data[param_name] = f.get_tensor(full_name)
-            if len(weight_data) == len(self.COMPRESSION_PARAM_NAMES):
+            if "weight_scale" in weight_data:
+                zero_point = weight_data.get("weight_zero_point", None)
+                scale = weight_data["weight_scale"]
+                if zero_point is None:
+                    # zero_point assumed to be 0 if not included in state_dict
+                    zero_point = torch.zeros_like(scale)
                 weight = weight_data["weight"]
                 original_shape = torch.Size(weight_data["weight_shape"])
                 unpacked = unpack_4bit_ints(weight, original_shape)
                 decompressed = dequantize(
                     x_q=unpacked,
-                    scale=weight_data["weight_scale"],
-                    zero_point=weight_data["weight_zero_point"],
+                    scale=scale,
+                    zero_point=zero_point,
                 )
                 yield merge_names(weight_name, "weight"), decompressed

compressed_tensors/quantization/lifecycle/apply.py CHANGED Viewed

@@ -16,6 +16,7 @@ import re
 from collections import OrderedDict
 from typing import Dict, Iterable, Optional
+import torch
 from compressed_tensors.quantization.lifecycle.calibration import (
     set_module_for_calibration,
 )
@@ -193,7 +194,13 @@ def _load_quant_args_from_state_dict(
     zp_name = f"{base_name}_zero_point"
     device = next(module.parameters()).device
-    scale = getattr(module, scale_name)
-    zp = getattr(module, zp_name)
-    scale.data = state_dict[f"{module_name}.{scale_name}"].to(device)
-    zp.data = state_dict[f"{module_name}.{zp_name}"].to(device)
+    scale = getattr(module, scale_name, None)
+    zp = getattr(module, zp_name, None)
+    if scale is not None:
+        scale.data = state_dict[f"{module_name}.{scale_name}"].to(device)
+    if zp is not None:
+        zp_from_state = state_dict.get(f"{module_name}.{zp_name}", None)
+        if zp_from_state is not None:  # load the non-zero zero points
+            zp.data = state_dict[f"{module_name}.{zp_name}"].to(device)
+        else:  # fill with zeros matching scale shape
+            zp.data = torch.zeros_like(scale, dtype=torch.int8).to(device)

compressed_tensors/quantization/lifecycle/initialize.py CHANGED Viewed

@@ -90,7 +90,9 @@ def _initialize_scale_zero_point_observer(
     device = next(module.parameters()).device
     # initializes empty scale and zero point parameters for the module
-    init_scale = Parameter(torch.empty(0, device=device), requires_grad=False)
+    init_scale = Parameter(
+        torch.empty(0, dtype=torch.float16, device=device), requires_grad=False
+    )
     module.register_parameter(f"{base_name}_scale", init_scale)
     init_zero_point = Parameter(

compressed_tensors/quantization/quant_scheme.py CHANGED Viewed

@@ -22,6 +22,7 @@ from pydantic import BaseModel
 __all__ = [
     "QuantizationScheme",
     "preset_name_to_scheme",
+    "is_preset_scheme",
 ]
@@ -98,6 +99,14 @@ def preset_name_to_scheme(name: str, targets: List[str]) -> QuantizationScheme:
     )
+def is_preset_scheme(name: str) -> bool:
+    """
+    :param name: preset quantization settings name
+    :return: True if the name is a preset scheme name
+    """
+    return name.upper() in PRESET_SCHEMES
 W8A8 = dict(
     weights=QuantizationArgs(), input_activations=QuantizationArgs(symmetric=False)
 )

{compressed_tensors_nightly-0.3.3.20240523.dist-info → compressed_tensors_nightly-0.3.3.20240525.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: compressed-tensors-nightly
-Version: 0.3.3.20240523
+Version: 0.3.3.20240525
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors_nightly-0.3.3.20240523.dist-info → compressed_tensors_nightly-0.3.3.20240525.dist-info}/RECORD RENAMED Viewed

@@ -5,9 +5,9 @@ compressed_tensors/compressors/__init__.py,sha256=3yyoNICHll3F4HS6Yu-cgNZpDhfuob
 compressed_tensors/compressors/base.py,sha256=LWEgbpgTxzmoqQ7Xhq2OQszUgWoDtFuGCiV1Y8nlBGw,2134
 compressed_tensors/compressors/dense.py,sha256=G_XHbvuENyupIKlXSITOQgvPkNkcMEOLcLWQr70V9EE,1257
 compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24SCfhzZ-RusXfw,5403
-compressed_tensors/compressors/int_quantized.py,sha256=I0FqnjtwCiJvQxi9YyfA8aBeaR5csqtq1bOrVvRqJ1I,4744
-compressed_tensors/compressors/model_compressor.py,sha256=teohd0xTbcIDIuEfZrH-bZyAzHn2UZH2KJXT-7Gk3sw,10426
-compressed_tensors/compressors/pack_quantized.py,sha256=K03l8kFqejpapgcMU5hMm1-JIX1cUVvU-VybGSN6RWA,7885
+compressed_tensors/compressors/int_quantized.py,sha256=bPi62n1MjySOeBat_yWMyc_LvDNDeSihu1gxzo_YrNY,5203
+compressed_tensors/compressors/model_compressor.py,sha256=gHD2VMbXkXaZiJu3ibOaWiYb4oJDz2hxX03wDuu1yhI,10481
+compressed_tensors/compressors/pack_quantized.py,sha256=VFaHQU-f1QuXuTyOtn19p015KHveXe-NeNJ97ATuOR8,8344
 compressed_tensors/compressors/sparse_bitmask.py,sha256=H9oZSTYI1oRCzAMbd4zThUnZd1h2rfs8DmA3tPcvuNE,8637
 compressed_tensors/config/__init__.py,sha256=ZBqWn3r6ku1qfmlHHYp0mQueY0i7Pwhr9rbQk9dDlMc,704
 compressed_tensors/config/base.py,sha256=grf5tDaLep8i2-W_p7H-fW9DOGXDi4Zz7su7zjs1Qqc,1454
@@ -16,14 +16,14 @@ compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5y
 compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
 compressed_tensors/quantization/quant_args.py,sha256=A6b2V8lhsM8Ho8RjlPBQdxRUDNWhqq-ie5E3RR2_GNg,4360
 compressed_tensors/quantization/quant_config.py,sha256=3BcbQ8-Ah7LbTDSSkRu29Yiid33xo0C1ki6NVhxLiaY,8727
-compressed_tensors/quantization/quant_scheme.py,sha256=QwZsCo8QR9ISB_d58WhIngk2gsMM8ooX-LcRPR-JDRw,3341
+compressed_tensors/quantization/quant_scheme.py,sha256=-hAK1-C67_wJl10eaVLUvbslPBTV04WyzL_J-u9f1ck,3571
 compressed_tensors/quantization/lifecycle/__init__.py,sha256=ggRGWRqhCxCaTTDWRcgTVX3axnS2xV6rc5YvdzK7fSg,798
-compressed_tensors/quantization/lifecycle/apply.py,sha256=whKfNGC_EZm0BC23AP7qWfjRe5OJVWmcZOpX7lryZZc,7625
+compressed_tensors/quantization/lifecycle/apply.py,sha256=yLTDT1zkJp1Nti-aKZGOMW8-TELanF8dXiqDvAkVUQo,7984
 compressed_tensors/quantization/lifecycle/calibration.py,sha256=mLns4jlaWmBwOW8Jtlm5bMX-JET1AiZYUBO7qa-XuxI,1776
 compressed_tensors/quantization/lifecycle/compressed.py,sha256=VreB10xPwgSLQQlTu20UCrFpRS--cA7-lx5s7nrPPrg,2247
 compressed_tensors/quantization/lifecycle/forward.py,sha256=x9JaIX3TK7cb_-0aCOTTYtA4At9l6v5YOY_70GzIeFU,10520
 compressed_tensors/quantization/lifecycle/frozen.py,sha256=h1XYt89MouBTf3jTYLG_6OdFxIu5q2N8tPjsy6J4E6Y,1726
-compressed_tensors/quantization/lifecycle/initialize.py,sha256=U6g9qifSF6pagQZQZEwd-rwWC6uQ_dZXn1wg6nr1Abg,3697
+compressed_tensors/quantization/lifecycle/initialize.py,sha256=pFfcu-pxdQKzlnn-18-RlkEktt2yDi6woNXJsiv1A2c,3732
 compressed_tensors/quantization/observers/__init__.py,sha256=DNH31NQYrIBBcmHsMyFA6whh4pbRsLwuNa6L8AeXaGc,745
 compressed_tensors/quantization/observers/base.py,sha256=kywLVwycFvGxuZMU2cy8-KYyNrZCHkinN6YzCL7boLE,5121
 compressed_tensors/quantization/observers/helpers.py,sha256=JwALNfBYY9Eyl8Q180t0lGh8szumQj8TygfNl-isErs,2166
@@ -36,8 +36,8 @@ compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85S
 compressed_tensors/utils/__init__.py,sha256=5DrYjoZbaEvSkJcC-GRSbM_RBHVF4tG9gMd3zsJnjLw,665
 compressed_tensors/utils/helpers.py,sha256=h0jfl9drs5FAx40tCHRcVtJqXixB5hT5yq_IG2aY_-w,1735
 compressed_tensors/utils/safetensors_load.py,sha256=wo9UirGrGlenBqZeqotvpCT7D5MEdjCo2J3HeRaIFoU,8502
-compressed_tensors_nightly-0.3.3.20240523.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors_nightly-0.3.3.20240523.dist-info/METADATA,sha256=_c67GXEm0cMZ_AGWhcLqsMZ3hSbFB4KdQ3lL9Dg7M8M,5633
-compressed_tensors_nightly-0.3.3.20240523.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-compressed_tensors_nightly-0.3.3.20240523.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors_nightly-0.3.3.20240523.dist-info/RECORD,,
+compressed_tensors_nightly-0.3.3.20240525.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors_nightly-0.3.3.20240525.dist-info/METADATA,sha256=px9rAQu0vrnWEBFzu_I7Rfjq6AlXs8K6lSwPvT5SmrM,5633
+compressed_tensors_nightly-0.3.3.20240525.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+compressed_tensors_nightly-0.3.3.20240525.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors_nightly-0.3.3.20240525.dist-info/RECORD,,

{compressed_tensors_nightly-0.3.3.20240523.dist-info → compressed_tensors_nightly-0.3.3.20240525.dist-info}/LICENSE RENAMED Viewed

File without changes

{compressed_tensors_nightly-0.3.3.20240523.dist-info → compressed_tensors_nightly-0.3.3.20240525.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors_nightly-0.3.3.20240523.dist-info → compressed_tensors_nightly-0.3.3.20240525.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors-nightly 0.3.3.20240523__py3-none-any.whl → 0.3.3.20240525__py3-none-any.whl

compressed-tensors-nightly 0.3.3.20240523py3-none-any.whl → 0.3.3.20240525py3-none-any.whl