PyPI - compressed-tensors - Versions diffs - 0.12.3a20251008__py3-none-any.whl → 0.12.3a20251010__py3-none-any.whl - Mend

compressed-tensors 0.12.3a20251008py3-none-any.whl → 0.12.3a20251010py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

compressed_tensors/base.py CHANGED Viewed

@@ -20,6 +20,3 @@ TRANSFORM_CONFIG_NAME = "transform_config"
 # required fields
 COMPRESSION_VERSION_NAME = "version"
 QUANTIZATION_METHOD_NAME = "quant_method"
-# auxillary configs
-KV_CACHE_SCHEME_NAME = "kv_cache_scheme"

compressed_tensors/quantization/lifecycle/forward.py CHANGED Viewed

@@ -330,7 +330,7 @@ def _process_quantization(
             inv_perm = torch.argsort(perm)
             output = output.index_select(-1, inv_perm)
-    else:  # covers channel, token and tensor strategies
+    else:  # covers tensor, channel, token, and attn_head strategies
         if do_quantize:
             output = _quantize(
                 x=x,

compressed_tensors/quantization/lifecycle/initialize.py CHANGED Viewed

@@ -14,7 +14,7 @@
 import logging
-from typing import Optional, Tuple
+from typing import Optional, Tuple, Union
 import torch
 from compressed_tensors.quantization import (
@@ -152,7 +152,7 @@ def initialize_qparams(
     module: Module,
     base_name: str,
     quantization_args: QuantizationArgs,
-    observed_shape: Tuple[int],
+    observed_shape: Tuple[Union[int, None]],
     observed_dtype: torch.dtype,
     force_zero_point: bool = True,
 ):
@@ -199,7 +199,7 @@ def initialize_qparams(
         expected_shape = (1,)
     elif strategy == QuantizationStrategy.TOKEN:
-        expected_shape = (1, 1)
+        raise ValueError("Cannot perform static token quantization")
     elif strategy == QuantizationStrategy.CHANNEL:
         if len(observed_shape) < 2:
@@ -234,6 +234,13 @@ def initialize_qparams(
         num_cols = strategy_cdiv(observed_shape[-1], block_structure[-1], strategy)
         expected_shape = (num_rows, num_cols)
+    elif strategy == QuantizationStrategy.ATTN_HEAD:
+        # (batch_size, num_attention_heads, seq_len, head_dim)
+        if len(observed_shape) < 3:
+            raise ValueError("Attention quant requires at least 3 observed dimensions")
+        expected_shape = (observed_shape[-3], 1, 1)
     else:
         assert False, f"Unknown strategy {strategy}"

compressed_tensors/quantization/quant_args.py CHANGED Viewed

@@ -101,6 +101,7 @@ class QuantizationStrategy(str, Enum):
     BLOCK = "block"
     TOKEN = "token"
     TENSOR_GROUP = "tensor_group"
+    ATTN_HEAD = "attn_head"
 class DynamicType(str, Enum):
@@ -263,6 +264,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
         actorder = model.actorder
         dynamic = model.dynamic
         observer = model.observer
+        dynamic = model.dynamic
         # infer strategy
         if strategy is None:
@@ -278,6 +280,12 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
                     "strategy='group' and group_size = -1 for 'channel'"
                 )
+        # validate token strategy
+        if strategy == QuantizationStrategy.TOKEN and not dynamic:
+            raise ValueError(
+                "Cannot perform static token quantization, please use `dynamic=True`"
+            )
         # validate group strategy
         if strategy == QuantizationStrategy.GROUP:
             if group_size is None or group_size <= 0:

compressed_tensors/quantization/quant_scheme.py CHANGED Viewed

@@ -65,6 +65,7 @@ class QuantizationScheme(BaseModel):
                 QuantizationStrategy.TENSOR,
                 QuantizationStrategy.GROUP,
                 QuantizationStrategy.TENSOR_GROUP,
+                QuantizationStrategy.ATTN_HEAD,
             ):
                 if (
                     inputs.strategy == QuantizationStrategy.GROUP

compressed_tensors/version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.12.3.a20251008'
+__version__ = version = '0.12.3.a20251010'
 __version_tuple__ = version_tuple = (0, 12, 3)

{compressed_tensors-0.12.3a20251008.dist-info → compressed_tensors-0.12.3a20251010.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.12.3a20251008
+Version: 0.12.3a20251010
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.12.3a20251008.dist-info → compressed_tensors-0.12.3a20251010.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
 compressed_tensors/__init__.py,sha256=SRqNYFVvxAaLa4SImhoiIBKfoOSj7EUdx0CxXjGC2PA,884
-compressed_tensors/base.py,sha256=-gxWvDF4LCkyeDP8YlGzvBBKxo4Dk9h4NINPD61drFU,921
+compressed_tensors/base.py,sha256=dKAVgQAp9GPH6YspvF_cbGXCrbiqAeLEIPydYAO40WE,859
 compressed_tensors/logger.py,sha256=sTm1Od1cV0aDxBm3YN-PPvsOATxY_2tBV62TQE4HiPw,4032
-compressed_tensors/version.py,sha256=_il8gmSl9gH7iYuhwiaqtDtTWGrILZboW5GnnNo-IxY,523
+compressed_tensors/version.py,sha256=VeZYNg68bJw1mFe9aePi3jBTYPJ7_EUXH4lnNFLZ-GE,523
 compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
 compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
 compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
@@ -28,16 +28,16 @@ compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5y
 compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
 compressed_tensors/linear/compressed_linear.py,sha256=1yo9RyjA0aQ--iuIknFfcSorJn43Mn4CoV-q4JlTJ_o,4052
 compressed_tensors/quantization/__init__.py,sha256=ifNRE2rJNILOWKA3jkPBGwXEXXvaKkn4lRMcxaVlkW0,790
-compressed_tensors/quantization/quant_args.py,sha256=prkBGBg8TbDK0QdMuFwZdiY8M831w_scD0Y4rEGN40I,13403
+compressed_tensors/quantization/quant_args.py,sha256=Cin8MfRrVYG4Ay9RToG4u1n-RfdPr72kYFwND6W5sO8,13695
 compressed_tensors/quantization/quant_config.py,sha256=Y_OgLId65ajdfupXuOrKSAArrvKicMeA8DHdzRt3J6o,10687
 compressed_tensors/quantization/quant_metadata.py,sha256=yudYWXRYYSqgRhoUA-RIu2LI14NFchOyPUUuz7bPqJE,1950
-compressed_tensors/quantization/quant_scheme.py,sha256=EG86Bq5c8q1O4fJL_o3s7gOu1S5SrcLjfNYOPDn414A,9673
+compressed_tensors/quantization/quant_scheme.py,sha256=ge_YQxeFRPdcZyfbdbLv2emtxCgkY1cd4nLmxsUDJ8c,9721
 compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
 compressed_tensors/quantization/lifecycle/apply.py,sha256=1zRc7tQbE5OAVJ5VRgU9FZPnMiusef84HluTORSYC2I,13108
 compressed_tensors/quantization/lifecycle/compressed.py,sha256=_gTH0CnLe8MxkTY1hrCCeSYAMzuvIwoCTT4hxW1TPk4,2354
-compressed_tensors/quantization/lifecycle/forward.py,sha256=MAw049L4a9ha4P5D4MjOMoIcSwv9_ZXizahYzHJaaQI,17550
+compressed_tensors/quantization/lifecycle/forward.py,sha256=vVh9JiF2hd9l6B7Wa1zFfYreM0dP3gKX4XghYbV-vEo,17562
 compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
-compressed_tensors/quantization/lifecycle/initialize.py,sha256=xebqRiQz3hiSTYwCQQsovg-IKJtHkAbuj6eWygf5yKY,10259
+compressed_tensors/quantization/lifecycle/initialize.py,sha256=0Ju-TiFHcPnr9jKdOIUtYAqLm8C6d_YzABcVF-BxueA,10610
 compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
 compressed_tensors/quantization/utils/helpers.py,sha256=BA-twfAKk-HMBr_OZHZnSQN7F1a0l5zB1kJhml6j-cI,17146
 compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
@@ -65,8 +65,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
 compressed_tensors/utils/safetensors_load.py,sha256=Vql34aCTDHwmTZXJHzCyBISJo7iA7EQ78LdTlMjdpZo,12023
 compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
 compressed_tensors/utils/type.py,sha256=bNwoo_FWlvLuDpYAGGzZJITRg0JA_Ngk9LGPo-kvjeU,2554
-compressed_tensors-0.12.3a20251008.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors-0.12.3a20251008.dist-info/METADATA,sha256=lZoGp9KeOZZp6Ms-Sf2PjBgJGQ78DUGc4wcti7K9E-I,7027
-compressed_tensors-0.12.3a20251008.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-compressed_tensors-0.12.3a20251008.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors-0.12.3a20251008.dist-info/RECORD,,
+compressed_tensors-0.12.3a20251010.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors-0.12.3a20251010.dist-info/METADATA,sha256=wLNtu8ihyyONOIb05OUKqwF9QekLsSp3u_p_5GcdZSM,7027
+compressed_tensors-0.12.3a20251010.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+compressed_tensors-0.12.3a20251010.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors-0.12.3a20251010.dist-info/RECORD,,

{compressed_tensors-0.12.3a20251008.dist-info → compressed_tensors-0.12.3a20251010.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors-0.12.3a20251008.dist-info → compressed_tensors-0.12.3a20251010.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{compressed_tensors-0.12.3a20251008.dist-info → compressed_tensors-0.12.3a20251010.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors 0.12.3a20251008__py3-none-any.whl → 0.12.3a20251010__py3-none-any.whl

compressed-tensors 0.12.3a20251008py3-none-any.whl → 0.12.3a20251010py3-none-any.whl