compressed-tensors 0.12.3a20251007__py3-none-any.whl → 0.12.3a20251009__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors/quantization/lifecycle/forward.py +1 -1
- compressed_tensors/quantization/lifecycle/initialize.py +9 -2
- compressed_tensors/quantization/quant_args.py +11 -1
- compressed_tensors/quantization/quant_scheme.py +1 -0
- compressed_tensors/version.py +1 -1
- {compressed_tensors-0.12.3a20251007.dist-info → compressed_tensors-0.12.3a20251009.dist-info}/METADATA +1 -1
- {compressed_tensors-0.12.3a20251007.dist-info → compressed_tensors-0.12.3a20251009.dist-info}/RECORD +10 -10
- {compressed_tensors-0.12.3a20251007.dist-info → compressed_tensors-0.12.3a20251009.dist-info}/WHEEL +0 -0
- {compressed_tensors-0.12.3a20251007.dist-info → compressed_tensors-0.12.3a20251009.dist-info}/licenses/LICENSE +0 -0
- {compressed_tensors-0.12.3a20251007.dist-info → compressed_tensors-0.12.3a20251009.dist-info}/top_level.txt +0 -0
|
@@ -330,7 +330,7 @@ def _process_quantization(
|
|
|
330
330
|
inv_perm = torch.argsort(perm)
|
|
331
331
|
output = output.index_select(-1, inv_perm)
|
|
332
332
|
|
|
333
|
-
else: # covers channel, token and
|
|
333
|
+
else: # covers tensor, channel, token, and attn_head strategies
|
|
334
334
|
if do_quantize:
|
|
335
335
|
output = _quantize(
|
|
336
336
|
x=x,
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
import logging
|
|
17
|
-
from typing import Optional, Tuple
|
|
17
|
+
from typing import Optional, Tuple, Union
|
|
18
18
|
|
|
19
19
|
import torch
|
|
20
20
|
from compressed_tensors.quantization import (
|
|
@@ -152,7 +152,7 @@ def initialize_qparams(
|
|
|
152
152
|
module: Module,
|
|
153
153
|
base_name: str,
|
|
154
154
|
quantization_args: QuantizationArgs,
|
|
155
|
-
observed_shape: Tuple[int],
|
|
155
|
+
observed_shape: Tuple[Union[int, None]],
|
|
156
156
|
observed_dtype: torch.dtype,
|
|
157
157
|
force_zero_point: bool = True,
|
|
158
158
|
):
|
|
@@ -234,6 +234,13 @@ def initialize_qparams(
|
|
|
234
234
|
num_cols = strategy_cdiv(observed_shape[-1], block_structure[-1], strategy)
|
|
235
235
|
expected_shape = (num_rows, num_cols)
|
|
236
236
|
|
|
237
|
+
elif strategy == QuantizationStrategy.ATTN_HEAD:
|
|
238
|
+
# (batch_size, num_attention_heads, seq_len, head_dim)
|
|
239
|
+
if len(observed_shape) < 3:
|
|
240
|
+
raise ValueError("Attention quant requires at least 3 observed dimensions")
|
|
241
|
+
|
|
242
|
+
expected_shape = (observed_shape[-3], 1, 1)
|
|
243
|
+
|
|
237
244
|
else:
|
|
238
245
|
assert False, f"Unknown strategy {strategy}"
|
|
239
246
|
|
|
@@ -101,6 +101,7 @@ class QuantizationStrategy(str, Enum):
|
|
|
101
101
|
BLOCK = "block"
|
|
102
102
|
TOKEN = "token"
|
|
103
103
|
TENSOR_GROUP = "tensor_group"
|
|
104
|
+
ATTN_HEAD = "attn_head"
|
|
104
105
|
|
|
105
106
|
|
|
106
107
|
class DynamicType(str, Enum):
|
|
@@ -259,6 +260,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
|
|
|
259
260
|
# extract user-passed values from dictionary
|
|
260
261
|
strategy = model.strategy
|
|
261
262
|
group_size = model.group_size
|
|
263
|
+
block_structure = model.block_structure
|
|
262
264
|
actorder = model.actorder
|
|
263
265
|
dynamic = model.dynamic
|
|
264
266
|
observer = model.observer
|
|
@@ -277,7 +279,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
|
|
|
277
279
|
"strategy='group' and group_size = -1 for 'channel'"
|
|
278
280
|
)
|
|
279
281
|
|
|
280
|
-
# validate strategy
|
|
282
|
+
# validate group strategy
|
|
281
283
|
if strategy == QuantizationStrategy.GROUP:
|
|
282
284
|
if group_size is None or group_size <= 0:
|
|
283
285
|
raise ValueError(
|
|
@@ -292,6 +294,14 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
|
|
|
292
294
|
):
|
|
293
295
|
raise ValueError("group_size requires strategy to be set to 'group'")
|
|
294
296
|
|
|
297
|
+
# validate block strategy
|
|
298
|
+
has_block_strategy = strategy == QuantizationStrategy.BLOCK
|
|
299
|
+
has_block_structure = block_structure is not None
|
|
300
|
+
if has_block_strategy and not has_block_structure:
|
|
301
|
+
raise ValueError(f"Block strategy requires block structure\n{model}")
|
|
302
|
+
if has_block_structure and not has_block_strategy:
|
|
303
|
+
raise ValueError(f"Block structure requires block strategy\n{model}")
|
|
304
|
+
|
|
295
305
|
# validate activation ordering and strategy
|
|
296
306
|
if actorder is not None and strategy != QuantizationStrategy.GROUP:
|
|
297
307
|
raise ValueError(
|
compressed_tensors/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: compressed-tensors
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.3a20251009
|
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
|
6
6
|
Author: Neuralmagic, Inc.
|
{compressed_tensors-0.12.3a20251007.dist-info → compressed_tensors-0.12.3a20251009.dist-info}/RECORD
RENAMED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
compressed_tensors/__init__.py,sha256=SRqNYFVvxAaLa4SImhoiIBKfoOSj7EUdx0CxXjGC2PA,884
|
|
2
2
|
compressed_tensors/base.py,sha256=-gxWvDF4LCkyeDP8YlGzvBBKxo4Dk9h4NINPD61drFU,921
|
|
3
3
|
compressed_tensors/logger.py,sha256=sTm1Od1cV0aDxBm3YN-PPvsOATxY_2tBV62TQE4HiPw,4032
|
|
4
|
-
compressed_tensors/version.py,sha256=
|
|
4
|
+
compressed_tensors/version.py,sha256=p1gc603nDCOmpKHv3ByvOa_-mIzcmf3lWif35Bc9Lo8,523
|
|
5
5
|
compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
|
|
6
6
|
compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
|
|
7
7
|
compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
|
|
@@ -28,16 +28,16 @@ compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5y
|
|
|
28
28
|
compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
|
|
29
29
|
compressed_tensors/linear/compressed_linear.py,sha256=1yo9RyjA0aQ--iuIknFfcSorJn43Mn4CoV-q4JlTJ_o,4052
|
|
30
30
|
compressed_tensors/quantization/__init__.py,sha256=ifNRE2rJNILOWKA3jkPBGwXEXXvaKkn4lRMcxaVlkW0,790
|
|
31
|
-
compressed_tensors/quantization/quant_args.py,sha256=
|
|
31
|
+
compressed_tensors/quantization/quant_args.py,sha256=MUDEEokFH2AWRhJHu-32_JiamMr5K8ifSuEWKbg2jfE,13431
|
|
32
32
|
compressed_tensors/quantization/quant_config.py,sha256=Y_OgLId65ajdfupXuOrKSAArrvKicMeA8DHdzRt3J6o,10687
|
|
33
33
|
compressed_tensors/quantization/quant_metadata.py,sha256=yudYWXRYYSqgRhoUA-RIu2LI14NFchOyPUUuz7bPqJE,1950
|
|
34
|
-
compressed_tensors/quantization/quant_scheme.py,sha256=
|
|
34
|
+
compressed_tensors/quantization/quant_scheme.py,sha256=ge_YQxeFRPdcZyfbdbLv2emtxCgkY1cd4nLmxsUDJ8c,9721
|
|
35
35
|
compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
|
|
36
36
|
compressed_tensors/quantization/lifecycle/apply.py,sha256=1zRc7tQbE5OAVJ5VRgU9FZPnMiusef84HluTORSYC2I,13108
|
|
37
37
|
compressed_tensors/quantization/lifecycle/compressed.py,sha256=_gTH0CnLe8MxkTY1hrCCeSYAMzuvIwoCTT4hxW1TPk4,2354
|
|
38
|
-
compressed_tensors/quantization/lifecycle/forward.py,sha256=
|
|
38
|
+
compressed_tensors/quantization/lifecycle/forward.py,sha256=vVh9JiF2hd9l6B7Wa1zFfYreM0dP3gKX4XghYbV-vEo,17562
|
|
39
39
|
compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
|
|
40
|
-
compressed_tensors/quantization/lifecycle/initialize.py,sha256=
|
|
40
|
+
compressed_tensors/quantization/lifecycle/initialize.py,sha256=JMpcsAmLrOMPb3PC4asyo7lce3BkLd8H6iVdnI72K2Q,10573
|
|
41
41
|
compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
|
|
42
42
|
compressed_tensors/quantization/utils/helpers.py,sha256=BA-twfAKk-HMBr_OZHZnSQN7F1a0l5zB1kJhml6j-cI,17146
|
|
43
43
|
compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
|
|
@@ -65,8 +65,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
|
|
|
65
65
|
compressed_tensors/utils/safetensors_load.py,sha256=Vql34aCTDHwmTZXJHzCyBISJo7iA7EQ78LdTlMjdpZo,12023
|
|
66
66
|
compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
|
|
67
67
|
compressed_tensors/utils/type.py,sha256=bNwoo_FWlvLuDpYAGGzZJITRg0JA_Ngk9LGPo-kvjeU,2554
|
|
68
|
-
compressed_tensors-0.12.
|
|
69
|
-
compressed_tensors-0.12.
|
|
70
|
-
compressed_tensors-0.12.
|
|
71
|
-
compressed_tensors-0.12.
|
|
72
|
-
compressed_tensors-0.12.
|
|
68
|
+
compressed_tensors-0.12.3a20251009.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
69
|
+
compressed_tensors-0.12.3a20251009.dist-info/METADATA,sha256=H2QGZBR6fGYaw7TSQY8VbPgJBffvl_5qkFl6UTLL5Nk,7027
|
|
70
|
+
compressed_tensors-0.12.3a20251009.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
71
|
+
compressed_tensors-0.12.3a20251009.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
|
|
72
|
+
compressed_tensors-0.12.3a20251009.dist-info/RECORD,,
|
{compressed_tensors-0.12.3a20251007.dist-info → compressed_tensors-0.12.3a20251009.dist-info}/WHEEL
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|