compressed-tensors-nightly 0.5.0.20240908__py3-none-any.whl → 0.5.0.20240910__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors/compressors/base.py +2 -2
- compressed_tensors/quantization/lifecycle/compressed.py +2 -0
- compressed_tensors/quantization/quant_args.py +6 -0
- {compressed_tensors_nightly-0.5.0.20240908.dist-info → compressed_tensors_nightly-0.5.0.20240910.dist-info}/METADATA +1 -1
- {compressed_tensors_nightly-0.5.0.20240908.dist-info → compressed_tensors_nightly-0.5.0.20240910.dist-info}/RECORD +8 -8
- {compressed_tensors_nightly-0.5.0.20240908.dist-info → compressed_tensors_nightly-0.5.0.20240910.dist-info}/LICENSE +0 -0
- {compressed_tensors_nightly-0.5.0.20240908.dist-info → compressed_tensors_nightly-0.5.0.20240910.dist-info}/WHEEL +0 -0
- {compressed_tensors_nightly-0.5.0.20240908.dist-info → compressed_tensors_nightly-0.5.0.20240910.dist-info}/top_level.txt +0 -0
@@ -125,8 +125,8 @@ class Compressor(RegistryMixin):
|
|
125
125
|
else:
|
126
126
|
compressed_dict[name] = value.to("cpu")
|
127
127
|
elif name.endswith("zero_point") and torch.all(value == 0):
|
128
|
-
|
129
|
-
|
128
|
+
continue
|
129
|
+
elif name.endswith("g_idx") and torch.any(value <= -1):
|
130
130
|
continue
|
131
131
|
else:
|
132
132
|
compressed_dict[name] = value.to("cpu")
|
@@ -49,6 +49,7 @@ def compress_quantized_weights(module: Module):
|
|
49
49
|
weight = getattr(module, "weight", None)
|
50
50
|
scale = getattr(module, "weight_scale", None)
|
51
51
|
zero_point = getattr(module, "weight_zero_point", None)
|
52
|
+
g_idx = getattr(module, "weight_g_idx", None)
|
52
53
|
|
53
54
|
if weight is None or scale is None:
|
54
55
|
# no weight, scale, or ZP, nothing to do
|
@@ -62,6 +63,7 @@ def compress_quantized_weights(module: Module):
|
|
62
63
|
x=weight,
|
63
64
|
scale=scale,
|
64
65
|
zero_point=zero_point,
|
66
|
+
g_idx=g_idx,
|
65
67
|
args=scheme.weights,
|
66
68
|
dtype=torch.int8,
|
67
69
|
)
|
@@ -187,6 +187,12 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
|
|
187
187
|
f"strategy {strategy} requires group_size to be "
|
188
188
|
"set to a positive value"
|
189
189
|
)
|
190
|
+
if (
|
191
|
+
group_size is not None
|
192
|
+
and group_size > 0
|
193
|
+
and strategy != QuantizationStrategy.GROUP
|
194
|
+
):
|
195
|
+
raise ValueError("group_size requires strategy to be set to 'group'")
|
190
196
|
|
191
197
|
# validate activation ordering and strategy
|
192
198
|
if actorder is not None and strategy != QuantizationStrategy.GROUP:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: compressed-tensors-nightly
|
3
|
-
Version: 0.5.0.
|
3
|
+
Version: 0.5.0.20240910
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -2,7 +2,7 @@ compressed_tensors/__init__.py,sha256=SV1csvHUVCd8kHXz6UDZim1HZ_fAVG3vfk-j_4Bb6h
|
|
2
2
|
compressed_tensors/base.py,sha256=Mq4mfVQcJhNpha-BXzpOfpmFIdl01o09BJE7D2oQ_00,796
|
3
3
|
compressed_tensors/version.py,sha256=DdMT4o5D6_t26gTuvhF1Q9HPeXY6vV5g7XMprWuHLdI,1586
|
4
4
|
compressed_tensors/compressors/__init__.py,sha256=wmX4VnkUTS63xBwK5-6w8FP78bNZpcdcqvf2KOEC5E4,1133
|
5
|
-
compressed_tensors/compressors/base.py,sha256=
|
5
|
+
compressed_tensors/compressors/base.py,sha256=NfVkhq6PRiq2cvAXaUXLoqC_nVYWdSrkE12c9AXYSMo,9956
|
6
6
|
compressed_tensors/compressors/dense.py,sha256=xcWECjcRY4INN6jC7vHx5wvUX3NmnKlxA9SVE1A6m2Q,1267
|
7
7
|
compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24SCfhzZ-RusXfw,5403
|
8
8
|
compressed_tensors/compressors/marlin_24.py,sha256=e7fGUyZbjUpA5VUMCPxqcYPGNiwoDKupHJaXWCoVKRw,9410
|
@@ -17,13 +17,13 @@ compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5y
|
|
17
17
|
compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
|
18
18
|
compressed_tensors/linear/compressed_linear.py,sha256=G0gEFfxLAUsgRcnfSV-PKz1ZBNTVokOauOoup7SE1mw,3210
|
19
19
|
compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
|
20
|
-
compressed_tensors/quantization/quant_args.py,sha256=
|
20
|
+
compressed_tensors/quantization/quant_args.py,sha256=CmyVtjJeHlqCW-7R5Z7tIw6lXUrzCX6Y9bwgmMxEudY,8069
|
21
21
|
compressed_tensors/quantization/quant_config.py,sha256=NpVu8YJ4Xw2pIQW_PGaNaml8kx1bUnxkvb0jBYWbKdE,9971
|
22
22
|
compressed_tensors/quantization/quant_scheme.py,sha256=VRvWweqwlhjYMrKf62fXKQTeoJGhjJa3tXnE-TuFdFA,6093
|
23
23
|
compressed_tensors/quantization/lifecycle/__init__.py,sha256=MXE2E7GfIfRRfhrdGy2Og3AZOz5N59B0ZGFcsD89y6c,821
|
24
24
|
compressed_tensors/quantization/lifecycle/apply.py,sha256=uftWFunr_CpCZM_qWfo2O1USXKB2qSYD1pBJsO8BuCU,15285
|
25
25
|
compressed_tensors/quantization/lifecycle/calibration.py,sha256=PlS_EqCOPqJD3QKuLPXO9AOtDzXtQWvEBTynFv-FFVw,2698
|
26
|
-
compressed_tensors/quantization/lifecycle/compressed.py,sha256=
|
26
|
+
compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
|
27
27
|
compressed_tensors/quantization/lifecycle/forward.py,sha256=PljD9pzATILEOiC3ZdHUTsfSbZdAa6iSIxWmvAHLG9I,13688
|
28
28
|
compressed_tensors/quantization/lifecycle/frozen.py,sha256=h1XYt89MouBTf3jTYLG_6OdFxIu5q2N8tPjsy6J4E6Y,1726
|
29
29
|
compressed_tensors/quantization/lifecycle/helpers.py,sha256=TmLY_G5VP_Fg2Ywio_dxoHRTxOKZdT7_aG5S9WtD4zI,2424
|
@@ -45,8 +45,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
|
|
45
45
|
compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
|
46
46
|
compressed_tensors/utils/safetensors_load.py,sha256=m08ANVuTBxQdoa6LufDgcNJ7wCLDJolyZljB8VEybAU,8578
|
47
47
|
compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
|
48
|
-
compressed_tensors_nightly-0.5.0.
|
49
|
-
compressed_tensors_nightly-0.5.0.
|
50
|
-
compressed_tensors_nightly-0.5.0.
|
51
|
-
compressed_tensors_nightly-0.5.0.
|
52
|
-
compressed_tensors_nightly-0.5.0.
|
48
|
+
compressed_tensors_nightly-0.5.0.20240910.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
49
|
+
compressed_tensors_nightly-0.5.0.20240910.dist-info/METADATA,sha256=Apu9Dh6ncAK3McUXRhB2RPOVw-bYYKz2s-f82M4tb_Y,6799
|
50
|
+
compressed_tensors_nightly-0.5.0.20240910.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
51
|
+
compressed_tensors_nightly-0.5.0.20240910.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
|
52
|
+
compressed_tensors_nightly-0.5.0.20240910.dist-info/RECORD,,
|
File without changes
|
File without changes
|