compressed-tensors 0.12.3a20251008__py3-none-any.whl → 0.12.3a20251010__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,6 +20,3 @@ TRANSFORM_CONFIG_NAME = "transform_config"
20
20
  # required fields
21
21
  COMPRESSION_VERSION_NAME = "version"
22
22
  QUANTIZATION_METHOD_NAME = "quant_method"
23
-
24
- # auxillary configs
25
- KV_CACHE_SCHEME_NAME = "kv_cache_scheme"
@@ -330,7 +330,7 @@ def _process_quantization(
330
330
  inv_perm = torch.argsort(perm)
331
331
  output = output.index_select(-1, inv_perm)
332
332
 
333
- else: # covers channel, token and tensor strategies
333
+ else: # covers tensor, channel, token, and attn_head strategies
334
334
  if do_quantize:
335
335
  output = _quantize(
336
336
  x=x,
@@ -14,7 +14,7 @@
14
14
 
15
15
 
16
16
  import logging
17
- from typing import Optional, Tuple
17
+ from typing import Optional, Tuple, Union
18
18
 
19
19
  import torch
20
20
  from compressed_tensors.quantization import (
@@ -152,7 +152,7 @@ def initialize_qparams(
152
152
  module: Module,
153
153
  base_name: str,
154
154
  quantization_args: QuantizationArgs,
155
- observed_shape: Tuple[int],
155
+ observed_shape: Tuple[Union[int, None]],
156
156
  observed_dtype: torch.dtype,
157
157
  force_zero_point: bool = True,
158
158
  ):
@@ -199,7 +199,7 @@ def initialize_qparams(
199
199
  expected_shape = (1,)
200
200
 
201
201
  elif strategy == QuantizationStrategy.TOKEN:
202
- expected_shape = (1, 1)
202
+ raise ValueError("Cannot perform static token quantization")
203
203
 
204
204
  elif strategy == QuantizationStrategy.CHANNEL:
205
205
  if len(observed_shape) < 2:
@@ -234,6 +234,13 @@ def initialize_qparams(
234
234
  num_cols = strategy_cdiv(observed_shape[-1], block_structure[-1], strategy)
235
235
  expected_shape = (num_rows, num_cols)
236
236
 
237
+ elif strategy == QuantizationStrategy.ATTN_HEAD:
238
+ # (batch_size, num_attention_heads, seq_len, head_dim)
239
+ if len(observed_shape) < 3:
240
+ raise ValueError("Attention quant requires at least 3 observed dimensions")
241
+
242
+ expected_shape = (observed_shape[-3], 1, 1)
243
+
237
244
  else:
238
245
  assert False, f"Unknown strategy {strategy}"
239
246
 
@@ -101,6 +101,7 @@ class QuantizationStrategy(str, Enum):
101
101
  BLOCK = "block"
102
102
  TOKEN = "token"
103
103
  TENSOR_GROUP = "tensor_group"
104
+ ATTN_HEAD = "attn_head"
104
105
 
105
106
 
106
107
  class DynamicType(str, Enum):
@@ -263,6 +264,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
263
264
  actorder = model.actorder
264
265
  dynamic = model.dynamic
265
266
  observer = model.observer
267
+ dynamic = model.dynamic
266
268
 
267
269
  # infer strategy
268
270
  if strategy is None:
@@ -278,6 +280,12 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
278
280
  "strategy='group' and group_size = -1 for 'channel'"
279
281
  )
280
282
 
283
+ # validate token strategy
284
+ if strategy == QuantizationStrategy.TOKEN and not dynamic:
285
+ raise ValueError(
286
+ "Cannot perform static token quantization, please use `dynamic=True`"
287
+ )
288
+
281
289
  # validate group strategy
282
290
  if strategy == QuantizationStrategy.GROUP:
283
291
  if group_size is None or group_size <= 0:
@@ -65,6 +65,7 @@ class QuantizationScheme(BaseModel):
65
65
  QuantizationStrategy.TENSOR,
66
66
  QuantizationStrategy.GROUP,
67
67
  QuantizationStrategy.TENSOR_GROUP,
68
+ QuantizationStrategy.ATTN_HEAD,
68
69
  ):
69
70
  if (
70
71
  inputs.strategy == QuantizationStrategy.GROUP
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.12.3.a20251008'
20
+ __version__ = version = '0.12.3.a20251010'
21
21
  __version_tuple__ = version_tuple = (0, 12, 3)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.12.3a20251008
3
+ Version: 0.12.3a20251010
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -1,7 +1,7 @@
1
1
  compressed_tensors/__init__.py,sha256=SRqNYFVvxAaLa4SImhoiIBKfoOSj7EUdx0CxXjGC2PA,884
2
- compressed_tensors/base.py,sha256=-gxWvDF4LCkyeDP8YlGzvBBKxo4Dk9h4NINPD61drFU,921
2
+ compressed_tensors/base.py,sha256=dKAVgQAp9GPH6YspvF_cbGXCrbiqAeLEIPydYAO40WE,859
3
3
  compressed_tensors/logger.py,sha256=sTm1Od1cV0aDxBm3YN-PPvsOATxY_2tBV62TQE4HiPw,4032
4
- compressed_tensors/version.py,sha256=_il8gmSl9gH7iYuhwiaqtDtTWGrILZboW5GnnNo-IxY,523
4
+ compressed_tensors/version.py,sha256=VeZYNg68bJw1mFe9aePi3jBTYPJ7_EUXH4lnNFLZ-GE,523
5
5
  compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
6
6
  compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
7
7
  compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
@@ -28,16 +28,16 @@ compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5y
28
28
  compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
29
29
  compressed_tensors/linear/compressed_linear.py,sha256=1yo9RyjA0aQ--iuIknFfcSorJn43Mn4CoV-q4JlTJ_o,4052
30
30
  compressed_tensors/quantization/__init__.py,sha256=ifNRE2rJNILOWKA3jkPBGwXEXXvaKkn4lRMcxaVlkW0,790
31
- compressed_tensors/quantization/quant_args.py,sha256=prkBGBg8TbDK0QdMuFwZdiY8M831w_scD0Y4rEGN40I,13403
31
+ compressed_tensors/quantization/quant_args.py,sha256=Cin8MfRrVYG4Ay9RToG4u1n-RfdPr72kYFwND6W5sO8,13695
32
32
  compressed_tensors/quantization/quant_config.py,sha256=Y_OgLId65ajdfupXuOrKSAArrvKicMeA8DHdzRt3J6o,10687
33
33
  compressed_tensors/quantization/quant_metadata.py,sha256=yudYWXRYYSqgRhoUA-RIu2LI14NFchOyPUUuz7bPqJE,1950
34
- compressed_tensors/quantization/quant_scheme.py,sha256=EG86Bq5c8q1O4fJL_o3s7gOu1S5SrcLjfNYOPDn414A,9673
34
+ compressed_tensors/quantization/quant_scheme.py,sha256=ge_YQxeFRPdcZyfbdbLv2emtxCgkY1cd4nLmxsUDJ8c,9721
35
35
  compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
36
36
  compressed_tensors/quantization/lifecycle/apply.py,sha256=1zRc7tQbE5OAVJ5VRgU9FZPnMiusef84HluTORSYC2I,13108
37
37
  compressed_tensors/quantization/lifecycle/compressed.py,sha256=_gTH0CnLe8MxkTY1hrCCeSYAMzuvIwoCTT4hxW1TPk4,2354
38
- compressed_tensors/quantization/lifecycle/forward.py,sha256=MAw049L4a9ha4P5D4MjOMoIcSwv9_ZXizahYzHJaaQI,17550
38
+ compressed_tensors/quantization/lifecycle/forward.py,sha256=vVh9JiF2hd9l6B7Wa1zFfYreM0dP3gKX4XghYbV-vEo,17562
39
39
  compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
40
- compressed_tensors/quantization/lifecycle/initialize.py,sha256=xebqRiQz3hiSTYwCQQsovg-IKJtHkAbuj6eWygf5yKY,10259
40
+ compressed_tensors/quantization/lifecycle/initialize.py,sha256=0Ju-TiFHcPnr9jKdOIUtYAqLm8C6d_YzABcVF-BxueA,10610
41
41
  compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
42
42
  compressed_tensors/quantization/utils/helpers.py,sha256=BA-twfAKk-HMBr_OZHZnSQN7F1a0l5zB1kJhml6j-cI,17146
43
43
  compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
@@ -65,8 +65,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
65
65
  compressed_tensors/utils/safetensors_load.py,sha256=Vql34aCTDHwmTZXJHzCyBISJo7iA7EQ78LdTlMjdpZo,12023
66
66
  compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
67
67
  compressed_tensors/utils/type.py,sha256=bNwoo_FWlvLuDpYAGGzZJITRg0JA_Ngk9LGPo-kvjeU,2554
68
- compressed_tensors-0.12.3a20251008.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
69
- compressed_tensors-0.12.3a20251008.dist-info/METADATA,sha256=lZoGp9KeOZZp6Ms-Sf2PjBgJGQ78DUGc4wcti7K9E-I,7027
70
- compressed_tensors-0.12.3a20251008.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
71
- compressed_tensors-0.12.3a20251008.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
72
- compressed_tensors-0.12.3a20251008.dist-info/RECORD,,
68
+ compressed_tensors-0.12.3a20251010.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
69
+ compressed_tensors-0.12.3a20251010.dist-info/METADATA,sha256=wLNtu8ihyyONOIb05OUKqwF9QekLsSp3u_p_5GcdZSM,7027
70
+ compressed_tensors-0.12.3a20251010.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
71
+ compressed_tensors-0.12.3a20251010.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
72
+ compressed_tensors-0.12.3a20251010.dist-info/RECORD,,