PyPI - compressed-tensors-nightly - Versions diffs - 0.3.3.20240529__py3-none-any.whl → 0.3.3.20240531__py3-none-any.whl - Mend

compressed-tensors-nightly 0.3.3.20240529py3-none-any.whl → 0.3.3.20240531py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

compressed_tensors/compressors/int_quantized.py CHANGED Viewed

@@ -57,13 +57,14 @@ class IntQuantizationCompressor(Compressor):
         :return: compressed state dict
         """
         compressed_dict = {}
+        weight_suffix = ".weight"
         _LOGGER.debug(
             f"Compressing model with {len(model_state)} parameterized layers..."
         )
         for name, value in tqdm(model_state.items(), desc="Compressing model"):
-            if name.endswith(".weight"):
-                prefix = name.removesuffix(".weight")
+            if name.endswith(weight_suffix):
+                prefix = name[: -(len(weight_suffix))]
                 scale = model_state.get(merge_names(prefix, "weight_scale"), None)
                 zp = model_state.get(merge_names(prefix, "weight_zero_point"), None)
                 if scale is not None and zp is not None:

compressed_tensors/compressors/pack_quantized.py CHANGED Viewed

@@ -62,13 +62,14 @@ class PackedQuantizationCompressor(Compressor):
         :return: compressed state dict
         """
         compressed_dict = {}
+        weight_suffix = ".weight"
         _LOGGER.debug(
             f"Compressing model with {len(model_state)} parameterized layers..."
         )
         for name, value in tqdm(model_state.items(), desc="Compressing model"):
-            if name.endswith(".weight"):
-                prefix = name.removesuffix(".weight")
+            if name.endswith(weight_suffix):
+                prefix = name[: -(len(weight_suffix))]
                 scale = model_state.get(merge_names(prefix, "weight_scale"), None)
                 zp = model_state.get(merge_names(prefix, "weight_zero_point"), None)
                 shape = torch.tensor(value.shape)

compressed_tensors/quantization/lifecycle/forward.py CHANGED Viewed

@@ -89,11 +89,17 @@ def dequantize(
         if scale.ndim == 0:
             args = QuantizationArgs(strategy=QuantizationStrategy.TENSOR)
         elif scale.ndim == 2:
-            args = QuantizationArgs(strategy=QuantizationStrategy.CHANNEL)
-        elif scale.ndim == 3:
-            group_size = int(x_q.shape[1] / scale.shape[1])
-            args = QuantizationArgs(
-                strategy=QuantizationStrategy.GROUP, group_size=group_size
+            if scale.shape[1] == 1:
+                args = QuantizationArgs(strategy=QuantizationStrategy.CHANNEL)
+            else:
+                group_size = int(x_q.shape[1] / scale.shape[1])
+                args = QuantizationArgs(
+                    strategy=QuantizationStrategy.GROUP, group_size=group_size
+                )
+        else:
+            raise ValueError(
+                f"Could not infer a quantization strategy from scale with {scale.ndim} "
+                "dimmensions. Expected 0-2 dimmensions."
             )
     return _process_quantization(
         x=x_q,
@@ -152,7 +158,8 @@ def _process_quantization(
     if args.strategy == QuantizationStrategy.GROUP:
-        if do_dequantize:  # if dequantizing the output should be a fp type
+        if do_dequantize and not do_quantize:
+            # if dequantizing a quantized type infer the output type from the scale
             output = torch.zeros_like(x, dtype=scale.dtype)
         else:
             output_dtype = dtype if dtype is not None else x.dtype

{compressed_tensors_nightly-0.3.3.20240529.dist-info → compressed_tensors_nightly-0.3.3.20240531.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: compressed-tensors-nightly
-Version: 0.3.3.20240529
+Version: 0.3.3.20240531
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.
@@ -89,7 +89,7 @@ from compressed_tensors import save_compressed_model, load_compressed, BitmaskCo
 from transformers import AutoModelForCausalLM
 model_name = "neuralmagic/llama2.c-stories110M-pruned50"
-model = AutoModelForCausalLM.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto")
 original_state_dict = model.state_dict()
@@ -111,7 +111,7 @@ We can use compressed-tensors to run basic post training quantization (PTQ) and
 ```python
 model_name = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
-model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cuda:0")
+model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cuda:0", torch_dtype="auto")
 config = QuantizationConfig.parse_file("./examples/bit_packing/int4_config.json")
 config.quantization_status = QuantizationStatus.CALIBRATION

{compressed_tensors_nightly-0.3.3.20240529.dist-info → compressed_tensors_nightly-0.3.3.20240531.dist-info}/RECORD RENAMED Viewed

@@ -5,9 +5,9 @@ compressed_tensors/compressors/__init__.py,sha256=3yyoNICHll3F4HS6Yu-cgNZpDhfuob
 compressed_tensors/compressors/base.py,sha256=LWEgbpgTxzmoqQ7Xhq2OQszUgWoDtFuGCiV1Y8nlBGw,2134
 compressed_tensors/compressors/dense.py,sha256=G_XHbvuENyupIKlXSITOQgvPkNkcMEOLcLWQr70V9EE,1257
 compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24SCfhzZ-RusXfw,5403
-compressed_tensors/compressors/int_quantized.py,sha256=bPi62n1MjySOeBat_yWMyc_LvDNDeSihu1gxzo_YrNY,5203
+compressed_tensors/compressors/int_quantized.py,sha256=Ct2vCK0yoPm6vkIFlzDMGQ7m14xT1GyURsSwH9DP770,5242
 compressed_tensors/compressors/model_compressor.py,sha256=gHD2VMbXkXaZiJu3ibOaWiYb4oJDz2hxX03wDuu1yhI,10481
-compressed_tensors/compressors/pack_quantized.py,sha256=zNQnnefK3qWpIzeGTBldQrRc8LM0p-nfSAN9Q9zUAIE,8412
+compressed_tensors/compressors/pack_quantized.py,sha256=VPiLlgJlDgARrn7YmiQoLqUfxErKBfj54epMYWRsF8k,8451
 compressed_tensors/compressors/sparse_bitmask.py,sha256=H9oZSTYI1oRCzAMbd4zThUnZd1h2rfs8DmA3tPcvuNE,8637
 compressed_tensors/config/__init__.py,sha256=ZBqWn3r6ku1qfmlHHYp0mQueY0i7Pwhr9rbQk9dDlMc,704
 compressed_tensors/config/base.py,sha256=grf5tDaLep8i2-W_p7H-fW9DOGXDi4Zz7su7zjs1Qqc,1454
@@ -21,7 +21,7 @@ compressed_tensors/quantization/lifecycle/__init__.py,sha256=ggRGWRqhCxCaTTDWRcg
 compressed_tensors/quantization/lifecycle/apply.py,sha256=yLTDT1zkJp1Nti-aKZGOMW8-TELanF8dXiqDvAkVUQo,7984
 compressed_tensors/quantization/lifecycle/calibration.py,sha256=mLns4jlaWmBwOW8Jtlm5bMX-JET1AiZYUBO7qa-XuxI,1776
 compressed_tensors/quantization/lifecycle/compressed.py,sha256=VreB10xPwgSLQQlTu20UCrFpRS--cA7-lx5s7nrPPrg,2247
-compressed_tensors/quantization/lifecycle/forward.py,sha256=x9JaIX3TK7cb_-0aCOTTYtA4At9l6v5YOY_70GzIeFU,10520
+compressed_tensors/quantization/lifecycle/forward.py,sha256=xeHaUbFxcUyqHffhCBZiRk-ObxjAF99rTnPR1Cweym0,10822
 compressed_tensors/quantization/lifecycle/frozen.py,sha256=h1XYt89MouBTf3jTYLG_6OdFxIu5q2N8tPjsy6J4E6Y,1726
 compressed_tensors/quantization/lifecycle/initialize.py,sha256=pFfcu-pxdQKzlnn-18-RlkEktt2yDi6woNXJsiv1A2c,3732
 compressed_tensors/quantization/observers/__init__.py,sha256=DNH31NQYrIBBcmHsMyFA6whh4pbRsLwuNa6L8AeXaGc,745
@@ -36,8 +36,8 @@ compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85S
 compressed_tensors/utils/__init__.py,sha256=5DrYjoZbaEvSkJcC-GRSbM_RBHVF4tG9gMd3zsJnjLw,665
 compressed_tensors/utils/helpers.py,sha256=h0jfl9drs5FAx40tCHRcVtJqXixB5hT5yq_IG2aY_-w,1735
 compressed_tensors/utils/safetensors_load.py,sha256=wo9UirGrGlenBqZeqotvpCT7D5MEdjCo2J3HeRaIFoU,8502
-compressed_tensors_nightly-0.3.3.20240529.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors_nightly-0.3.3.20240529.dist-info/METADATA,sha256=VB3YwuyR3rzfenxxfK36EogLrAMlnWu9YrGIhFce0t0,5633
-compressed_tensors_nightly-0.3.3.20240529.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-compressed_tensors_nightly-0.3.3.20240529.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors_nightly-0.3.3.20240529.dist-info/RECORD,,
+compressed_tensors_nightly-0.3.3.20240531.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors_nightly-0.3.3.20240531.dist-info/METADATA,sha256=C02ZlsO9SJ14Zly_UFEtYDmiOdXVb-0upcrn_sgM1QM,5673
+compressed_tensors_nightly-0.3.3.20240531.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+compressed_tensors_nightly-0.3.3.20240531.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors_nightly-0.3.3.20240531.dist-info/RECORD,,

{compressed_tensors_nightly-0.3.3.20240529.dist-info → compressed_tensors_nightly-0.3.3.20240531.dist-info}/LICENSE RENAMED Viewed

File without changes

{compressed_tensors_nightly-0.3.3.20240529.dist-info → compressed_tensors_nightly-0.3.3.20240531.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors_nightly-0.3.3.20240529.dist-info → compressed_tensors_nightly-0.3.3.20240531.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors-nightly 0.3.3.20240529__py3-none-any.whl → 0.3.3.20240531__py3-none-any.whl

compressed-tensors-nightly 0.3.3.20240529py3-none-any.whl → 0.3.3.20240531py3-none-any.whl