PyPI - compressed-tensors - Versions diffs - 0.12.3a20251007__py3-none-any.whl → 0.12.3a20251009__py3-none-any.whl - Mend

compressed-tensors 0.12.3a20251007py3-none-any.whl → 0.12.3a20251009py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

compressed_tensors/quantization/lifecycle/forward.py CHANGED Viewed

@@ -330,7 +330,7 @@ def _process_quantization(
             inv_perm = torch.argsort(perm)
             output = output.index_select(-1, inv_perm)
-    else:  # covers channel, token and tensor strategies
+    else:  # covers tensor, channel, token, and attn_head strategies
         if do_quantize:
             output = _quantize(
                 x=x,

compressed_tensors/quantization/lifecycle/initialize.py CHANGED Viewed

@@ -14,7 +14,7 @@
 import logging
-from typing import Optional, Tuple
+from typing import Optional, Tuple, Union
 import torch
 from compressed_tensors.quantization import (
@@ -152,7 +152,7 @@ def initialize_qparams(
     module: Module,
     base_name: str,
     quantization_args: QuantizationArgs,
-    observed_shape: Tuple[int],
+    observed_shape: Tuple[Union[int, None]],
     observed_dtype: torch.dtype,
     force_zero_point: bool = True,
 ):
@@ -234,6 +234,13 @@ def initialize_qparams(
         num_cols = strategy_cdiv(observed_shape[-1], block_structure[-1], strategy)
         expected_shape = (num_rows, num_cols)
+    elif strategy == QuantizationStrategy.ATTN_HEAD:
+        # (batch_size, num_attention_heads, seq_len, head_dim)
+        if len(observed_shape) < 3:
+            raise ValueError("Attention quant requires at least 3 observed dimensions")
+        expected_shape = (observed_shape[-3], 1, 1)
     else:
         assert False, f"Unknown strategy {strategy}"

compressed_tensors/quantization/quant_args.py CHANGED Viewed

@@ -101,6 +101,7 @@ class QuantizationStrategy(str, Enum):
     BLOCK = "block"
     TOKEN = "token"
     TENSOR_GROUP = "tensor_group"
+    ATTN_HEAD = "attn_head"
 class DynamicType(str, Enum):
@@ -259,6 +260,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
         # extract user-passed values from dictionary
         strategy = model.strategy
         group_size = model.group_size
+        block_structure = model.block_structure
         actorder = model.actorder
         dynamic = model.dynamic
         observer = model.observer
@@ -277,7 +279,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
                     "strategy='group' and group_size = -1 for 'channel'"
                 )
-        # validate strategy and group
+        # validate group strategy
         if strategy == QuantizationStrategy.GROUP:
             if group_size is None or group_size <= 0:
                 raise ValueError(
@@ -292,6 +294,14 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
         ):
             raise ValueError("group_size requires strategy to be set to 'group'")
+        # validate block strategy
+        has_block_strategy = strategy == QuantizationStrategy.BLOCK
+        has_block_structure = block_structure is not None
+        if has_block_strategy and not has_block_structure:
+            raise ValueError(f"Block strategy requires block structure\n{model}")
+        if has_block_structure and not has_block_strategy:
+            raise ValueError(f"Block structure requires block strategy\n{model}")
         # validate activation ordering and strategy
         if actorder is not None and strategy != QuantizationStrategy.GROUP:
             raise ValueError(

compressed_tensors/quantization/quant_scheme.py CHANGED Viewed

@@ -65,6 +65,7 @@ class QuantizationScheme(BaseModel):
                 QuantizationStrategy.TENSOR,
                 QuantizationStrategy.GROUP,
                 QuantizationStrategy.TENSOR_GROUP,
+                QuantizationStrategy.ATTN_HEAD,
             ):
                 if (
                     inputs.strategy == QuantizationStrategy.GROUP

compressed_tensors/version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.12.3.a20251007'
+__version__ = version = '0.12.3.a20251009'
 __version_tuple__ = version_tuple = (0, 12, 3)

{compressed_tensors-0.12.3a20251007.dist-info → compressed_tensors-0.12.3a20251009.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.12.3a20251007
+Version: 0.12.3a20251009
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.12.3a20251007.dist-info → compressed_tensors-0.12.3a20251009.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
 compressed_tensors/__init__.py,sha256=SRqNYFVvxAaLa4SImhoiIBKfoOSj7EUdx0CxXjGC2PA,884
 compressed_tensors/base.py,sha256=-gxWvDF4LCkyeDP8YlGzvBBKxo4Dk9h4NINPD61drFU,921
 compressed_tensors/logger.py,sha256=sTm1Od1cV0aDxBm3YN-PPvsOATxY_2tBV62TQE4HiPw,4032
-compressed_tensors/version.py,sha256=wUAf4k-SKBfmo_lva-t_YeLxw6mVgmYfFIXdh6YQLP4,523
+compressed_tensors/version.py,sha256=p1gc603nDCOmpKHv3ByvOa_-mIzcmf3lWif35Bc9Lo8,523
 compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
 compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
 compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
@@ -28,16 +28,16 @@ compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5y
 compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
 compressed_tensors/linear/compressed_linear.py,sha256=1yo9RyjA0aQ--iuIknFfcSorJn43Mn4CoV-q4JlTJ_o,4052
 compressed_tensors/quantization/__init__.py,sha256=ifNRE2rJNILOWKA3jkPBGwXEXXvaKkn4lRMcxaVlkW0,790
-compressed_tensors/quantization/quant_args.py,sha256=5AxYKqCSlg7CDgz2N8G4ZRVIiSUKvIm-SCQa-Bq_SF0,12916
+compressed_tensors/quantization/quant_args.py,sha256=MUDEEokFH2AWRhJHu-32_JiamMr5K8ifSuEWKbg2jfE,13431
 compressed_tensors/quantization/quant_config.py,sha256=Y_OgLId65ajdfupXuOrKSAArrvKicMeA8DHdzRt3J6o,10687
 compressed_tensors/quantization/quant_metadata.py,sha256=yudYWXRYYSqgRhoUA-RIu2LI14NFchOyPUUuz7bPqJE,1950
-compressed_tensors/quantization/quant_scheme.py,sha256=EG86Bq5c8q1O4fJL_o3s7gOu1S5SrcLjfNYOPDn414A,9673
+compressed_tensors/quantization/quant_scheme.py,sha256=ge_YQxeFRPdcZyfbdbLv2emtxCgkY1cd4nLmxsUDJ8c,9721
 compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
 compressed_tensors/quantization/lifecycle/apply.py,sha256=1zRc7tQbE5OAVJ5VRgU9FZPnMiusef84HluTORSYC2I,13108
 compressed_tensors/quantization/lifecycle/compressed.py,sha256=_gTH0CnLe8MxkTY1hrCCeSYAMzuvIwoCTT4hxW1TPk4,2354
-compressed_tensors/quantization/lifecycle/forward.py,sha256=MAw049L4a9ha4P5D4MjOMoIcSwv9_ZXizahYzHJaaQI,17550
+compressed_tensors/quantization/lifecycle/forward.py,sha256=vVh9JiF2hd9l6B7Wa1zFfYreM0dP3gKX4XghYbV-vEo,17562
 compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
-compressed_tensors/quantization/lifecycle/initialize.py,sha256=xebqRiQz3hiSTYwCQQsovg-IKJtHkAbuj6eWygf5yKY,10259
+compressed_tensors/quantization/lifecycle/initialize.py,sha256=JMpcsAmLrOMPb3PC4asyo7lce3BkLd8H6iVdnI72K2Q,10573
 compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
 compressed_tensors/quantization/utils/helpers.py,sha256=BA-twfAKk-HMBr_OZHZnSQN7F1a0l5zB1kJhml6j-cI,17146
 compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
@@ -65,8 +65,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
 compressed_tensors/utils/safetensors_load.py,sha256=Vql34aCTDHwmTZXJHzCyBISJo7iA7EQ78LdTlMjdpZo,12023
 compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
 compressed_tensors/utils/type.py,sha256=bNwoo_FWlvLuDpYAGGzZJITRg0JA_Ngk9LGPo-kvjeU,2554
-compressed_tensors-0.12.3a20251007.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors-0.12.3a20251007.dist-info/METADATA,sha256=8uUWt8bF7sZhbMQd2Llj4PDLC7I4ALKJE_eFNa8DBWI,7027
-compressed_tensors-0.12.3a20251007.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-compressed_tensors-0.12.3a20251007.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors-0.12.3a20251007.dist-info/RECORD,,
+compressed_tensors-0.12.3a20251009.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors-0.12.3a20251009.dist-info/METADATA,sha256=H2QGZBR6fGYaw7TSQY8VbPgJBffvl_5qkFl6UTLL5Nk,7027
+compressed_tensors-0.12.3a20251009.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+compressed_tensors-0.12.3a20251009.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors-0.12.3a20251009.dist-info/RECORD,,

{compressed_tensors-0.12.3a20251007.dist-info → compressed_tensors-0.12.3a20251009.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors-0.12.3a20251007.dist-info → compressed_tensors-0.12.3a20251009.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{compressed_tensors-0.12.3a20251007.dist-info → compressed_tensors-0.12.3a20251009.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors 0.12.3a20251007__py3-none-any.whl → 0.12.3a20251009__py3-none-any.whl

compressed-tensors 0.12.3a20251007py3-none-any.whl → 0.12.3a20251009py3-none-any.whl