compressed-tensors 0.12.3a20251007__py3-none-any.whl → 0.12.3a20251009__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -330,7 +330,7 @@ def _process_quantization(
330
330
  inv_perm = torch.argsort(perm)
331
331
  output = output.index_select(-1, inv_perm)
332
332
 
333
- else: # covers channel, token and tensor strategies
333
+ else: # covers tensor, channel, token, and attn_head strategies
334
334
  if do_quantize:
335
335
  output = _quantize(
336
336
  x=x,
@@ -14,7 +14,7 @@
14
14
 
15
15
 
16
16
  import logging
17
- from typing import Optional, Tuple
17
+ from typing import Optional, Tuple, Union
18
18
 
19
19
  import torch
20
20
  from compressed_tensors.quantization import (
@@ -152,7 +152,7 @@ def initialize_qparams(
152
152
  module: Module,
153
153
  base_name: str,
154
154
  quantization_args: QuantizationArgs,
155
- observed_shape: Tuple[int],
155
+ observed_shape: Tuple[Union[int, None]],
156
156
  observed_dtype: torch.dtype,
157
157
  force_zero_point: bool = True,
158
158
  ):
@@ -234,6 +234,13 @@ def initialize_qparams(
234
234
  num_cols = strategy_cdiv(observed_shape[-1], block_structure[-1], strategy)
235
235
  expected_shape = (num_rows, num_cols)
236
236
 
237
+ elif strategy == QuantizationStrategy.ATTN_HEAD:
238
+ # (batch_size, num_attention_heads, seq_len, head_dim)
239
+ if len(observed_shape) < 3:
240
+ raise ValueError("Attention quant requires at least 3 observed dimensions")
241
+
242
+ expected_shape = (observed_shape[-3], 1, 1)
243
+
237
244
  else:
238
245
  assert False, f"Unknown strategy {strategy}"
239
246
 
@@ -101,6 +101,7 @@ class QuantizationStrategy(str, Enum):
101
101
  BLOCK = "block"
102
102
  TOKEN = "token"
103
103
  TENSOR_GROUP = "tensor_group"
104
+ ATTN_HEAD = "attn_head"
104
105
 
105
106
 
106
107
  class DynamicType(str, Enum):
@@ -259,6 +260,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
259
260
  # extract user-passed values from dictionary
260
261
  strategy = model.strategy
261
262
  group_size = model.group_size
263
+ block_structure = model.block_structure
262
264
  actorder = model.actorder
263
265
  dynamic = model.dynamic
264
266
  observer = model.observer
@@ -277,7 +279,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
277
279
  "strategy='group' and group_size = -1 for 'channel'"
278
280
  )
279
281
 
280
- # validate strategy and group
282
+ # validate group strategy
281
283
  if strategy == QuantizationStrategy.GROUP:
282
284
  if group_size is None or group_size <= 0:
283
285
  raise ValueError(
@@ -292,6 +294,14 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
292
294
  ):
293
295
  raise ValueError("group_size requires strategy to be set to 'group'")
294
296
 
297
+ # validate block strategy
298
+ has_block_strategy = strategy == QuantizationStrategy.BLOCK
299
+ has_block_structure = block_structure is not None
300
+ if has_block_strategy and not has_block_structure:
301
+ raise ValueError(f"Block strategy requires block structure\n{model}")
302
+ if has_block_structure and not has_block_strategy:
303
+ raise ValueError(f"Block structure requires block strategy\n{model}")
304
+
295
305
  # validate activation ordering and strategy
296
306
  if actorder is not None and strategy != QuantizationStrategy.GROUP:
297
307
  raise ValueError(
@@ -65,6 +65,7 @@ class QuantizationScheme(BaseModel):
65
65
  QuantizationStrategy.TENSOR,
66
66
  QuantizationStrategy.GROUP,
67
67
  QuantizationStrategy.TENSOR_GROUP,
68
+ QuantizationStrategy.ATTN_HEAD,
68
69
  ):
69
70
  if (
70
71
  inputs.strategy == QuantizationStrategy.GROUP
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.12.3.a20251007'
20
+ __version__ = version = '0.12.3.a20251009'
21
21
  __version_tuple__ = version_tuple = (0, 12, 3)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.12.3a20251007
3
+ Version: 0.12.3a20251009
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -1,7 +1,7 @@
1
1
  compressed_tensors/__init__.py,sha256=SRqNYFVvxAaLa4SImhoiIBKfoOSj7EUdx0CxXjGC2PA,884
2
2
  compressed_tensors/base.py,sha256=-gxWvDF4LCkyeDP8YlGzvBBKxo4Dk9h4NINPD61drFU,921
3
3
  compressed_tensors/logger.py,sha256=sTm1Od1cV0aDxBm3YN-PPvsOATxY_2tBV62TQE4HiPw,4032
4
- compressed_tensors/version.py,sha256=wUAf4k-SKBfmo_lva-t_YeLxw6mVgmYfFIXdh6YQLP4,523
4
+ compressed_tensors/version.py,sha256=p1gc603nDCOmpKHv3ByvOa_-mIzcmf3lWif35Bc9Lo8,523
5
5
  compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
6
6
  compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
7
7
  compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
@@ -28,16 +28,16 @@ compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5y
28
28
  compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
29
29
  compressed_tensors/linear/compressed_linear.py,sha256=1yo9RyjA0aQ--iuIknFfcSorJn43Mn4CoV-q4JlTJ_o,4052
30
30
  compressed_tensors/quantization/__init__.py,sha256=ifNRE2rJNILOWKA3jkPBGwXEXXvaKkn4lRMcxaVlkW0,790
31
- compressed_tensors/quantization/quant_args.py,sha256=5AxYKqCSlg7CDgz2N8G4ZRVIiSUKvIm-SCQa-Bq_SF0,12916
31
+ compressed_tensors/quantization/quant_args.py,sha256=MUDEEokFH2AWRhJHu-32_JiamMr5K8ifSuEWKbg2jfE,13431
32
32
  compressed_tensors/quantization/quant_config.py,sha256=Y_OgLId65ajdfupXuOrKSAArrvKicMeA8DHdzRt3J6o,10687
33
33
  compressed_tensors/quantization/quant_metadata.py,sha256=yudYWXRYYSqgRhoUA-RIu2LI14NFchOyPUUuz7bPqJE,1950
34
- compressed_tensors/quantization/quant_scheme.py,sha256=EG86Bq5c8q1O4fJL_o3s7gOu1S5SrcLjfNYOPDn414A,9673
34
+ compressed_tensors/quantization/quant_scheme.py,sha256=ge_YQxeFRPdcZyfbdbLv2emtxCgkY1cd4nLmxsUDJ8c,9721
35
35
  compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
36
36
  compressed_tensors/quantization/lifecycle/apply.py,sha256=1zRc7tQbE5OAVJ5VRgU9FZPnMiusef84HluTORSYC2I,13108
37
37
  compressed_tensors/quantization/lifecycle/compressed.py,sha256=_gTH0CnLe8MxkTY1hrCCeSYAMzuvIwoCTT4hxW1TPk4,2354
38
- compressed_tensors/quantization/lifecycle/forward.py,sha256=MAw049L4a9ha4P5D4MjOMoIcSwv9_ZXizahYzHJaaQI,17550
38
+ compressed_tensors/quantization/lifecycle/forward.py,sha256=vVh9JiF2hd9l6B7Wa1zFfYreM0dP3gKX4XghYbV-vEo,17562
39
39
  compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
40
- compressed_tensors/quantization/lifecycle/initialize.py,sha256=xebqRiQz3hiSTYwCQQsovg-IKJtHkAbuj6eWygf5yKY,10259
40
+ compressed_tensors/quantization/lifecycle/initialize.py,sha256=JMpcsAmLrOMPb3PC4asyo7lce3BkLd8H6iVdnI72K2Q,10573
41
41
  compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
42
42
  compressed_tensors/quantization/utils/helpers.py,sha256=BA-twfAKk-HMBr_OZHZnSQN7F1a0l5zB1kJhml6j-cI,17146
43
43
  compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
@@ -65,8 +65,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
65
65
  compressed_tensors/utils/safetensors_load.py,sha256=Vql34aCTDHwmTZXJHzCyBISJo7iA7EQ78LdTlMjdpZo,12023
66
66
  compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
67
67
  compressed_tensors/utils/type.py,sha256=bNwoo_FWlvLuDpYAGGzZJITRg0JA_Ngk9LGPo-kvjeU,2554
68
- compressed_tensors-0.12.3a20251007.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
69
- compressed_tensors-0.12.3a20251007.dist-info/METADATA,sha256=8uUWt8bF7sZhbMQd2Llj4PDLC7I4ALKJE_eFNa8DBWI,7027
70
- compressed_tensors-0.12.3a20251007.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
71
- compressed_tensors-0.12.3a20251007.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
72
- compressed_tensors-0.12.3a20251007.dist-info/RECORD,,
68
+ compressed_tensors-0.12.3a20251009.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
69
+ compressed_tensors-0.12.3a20251009.dist-info/METADATA,sha256=H2QGZBR6fGYaw7TSQY8VbPgJBffvl_5qkFl6UTLL5Nk,7027
70
+ compressed_tensors-0.12.3a20251009.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
71
+ compressed_tensors-0.12.3a20251009.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
72
+ compressed_tensors-0.12.3a20251009.dist-info/RECORD,,