compressed-tensors-nightly 0.6.0.20240924__py3-none-any.whl → 0.6.0.20240925__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors/__init__.py +1 -0
- compressed_tensors/compressors/model_compressor.py +2 -0
- compressed_tensors/quantization/quant_scheme.py +5 -4
- {compressed_tensors_nightly-0.6.0.20240924.dist-info → compressed_tensors_nightly-0.6.0.20240925.dist-info}/METADATA +1 -1
- {compressed_tensors_nightly-0.6.0.20240924.dist-info → compressed_tensors_nightly-0.6.0.20240925.dist-info}/RECORD +8 -8
- {compressed_tensors_nightly-0.6.0.20240924.dist-info → compressed_tensors_nightly-0.6.0.20240925.dist-info}/LICENSE +0 -0
- {compressed_tensors_nightly-0.6.0.20240924.dist-info → compressed_tensors_nightly-0.6.0.20240925.dist-info}/WHEEL +0 -0
- {compressed_tensors_nightly-0.6.0.20240924.dist-info → compressed_tensors_nightly-0.6.0.20240925.dist-info}/top_level.txt +0 -0
compressed_tensors/__init__.py
CHANGED
@@ -22,6 +22,7 @@ from typing import Any, Dict, Optional, Union
|
|
22
22
|
|
23
23
|
import torch
|
24
24
|
import transformers
|
25
|
+
import compressed_tensors
|
25
26
|
from compressed_tensors.base import (
|
26
27
|
COMPRESSION_CONFIG_NAME,
|
27
28
|
QUANTIZATION_CONFIG_NAME,
|
@@ -368,6 +369,7 @@ class ModelCompressor:
|
|
368
369
|
config_data[COMPRESSION_CONFIG_NAME][
|
369
370
|
SPARSITY_CONFIG_NAME
|
370
371
|
] = sparsity_config_data
|
372
|
+
config_data[COMPRESSION_CONFIG_NAME]["version"] = compressed_tensors.__version__
|
371
373
|
|
372
374
|
with open(config_file_path, "w") as config_file:
|
373
375
|
json.dump(config_data, config_file, indent=2, sort_keys=True)
|
@@ -108,7 +108,7 @@ def is_preset_scheme(name: str) -> bool:
|
|
108
108
|
UNQUANTIZED = dict()
|
109
109
|
|
110
110
|
# 8 bit integer weights and 8 bit activations quantization
|
111
|
-
|
111
|
+
INT8_W8A8 = dict(
|
112
112
|
weights=QuantizationArgs(
|
113
113
|
num_bits=8,
|
114
114
|
type=QuantizationType.INT,
|
@@ -149,7 +149,7 @@ W4A16 = dict(
|
|
149
149
|
)
|
150
150
|
|
151
151
|
# 4 bit integer weights and 8 bit activations quantization
|
152
|
-
|
152
|
+
INT8_W4A8 = dict(
|
153
153
|
weights=QuantizationArgs(
|
154
154
|
num_bits=4,
|
155
155
|
type=QuantizationType.INT,
|
@@ -210,8 +210,9 @@ PRESET_SCHEMES = {
|
|
210
210
|
"W8A16": W8A16,
|
211
211
|
"W4A16": W4A16,
|
212
212
|
# Integer weight and activation schemes
|
213
|
-
"W8A8":
|
214
|
-
"
|
213
|
+
"W8A8": INT8_W8A8,
|
214
|
+
"INT8": INT8_W8A8, # alias for W8A8
|
215
|
+
"W4A8": INT8_W4A8,
|
215
216
|
# Float weight and activation schemes
|
216
217
|
"FP8": FP8,
|
217
218
|
"FP8_DYNAMIC": FP8_DYNAMIC,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: compressed-tensors-nightly
|
3
|
-
Version: 0.6.0.
|
3
|
+
Version: 0.6.0.20240925
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
compressed_tensors/__init__.py,sha256=
|
1
|
+
compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
|
2
2
|
compressed_tensors/base.py,sha256=Mq4mfVQcJhNpha-BXzpOfpmFIdl01o09BJE7D2oQ_00,796
|
3
3
|
compressed_tensors/version.py,sha256=83tBdwNu2sUhiLPvv6tRNh4Y7u70sZ1TFy3ydWctVL8,1586
|
4
4
|
compressed_tensors/compressors/__init__.py,sha256=wmX4VnkUTS63xBwK5-6w8FP78bNZpcdcqvf2KOEC5E4,1133
|
@@ -6,7 +6,7 @@ compressed_tensors/compressors/base.py,sha256=NfVkhq6PRiq2cvAXaUXLoqC_nVYWdSrkE1
|
|
6
6
|
compressed_tensors/compressors/dense.py,sha256=xcWECjcRY4INN6jC7vHx5wvUX3NmnKlxA9SVE1A6m2Q,1267
|
7
7
|
compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24SCfhzZ-RusXfw,5403
|
8
8
|
compressed_tensors/compressors/marlin_24.py,sha256=e7fGUyZbjUpA5VUMCPxqcYPGNiwoDKupHJaXWCoVKRw,9410
|
9
|
-
compressed_tensors/compressors/model_compressor.py,sha256=
|
9
|
+
compressed_tensors/compressors/model_compressor.py,sha256=Wq-NbjtaVOEElDpcjEYun6QFvAIZee8ZAw_wbifuTDA,16793
|
10
10
|
compressed_tensors/compressors/naive_quantized.py,sha256=z3h3ca5xKCN69mahutxcbzdv-OysiaxaM8P-Qum6zUQ,4823
|
11
11
|
compressed_tensors/compressors/pack_quantized.py,sha256=27RVmJ2wg2dvCoawj407HSmKT3VPGJ6ujAMHlT26WlI,7571
|
12
12
|
compressed_tensors/compressors/sparse_bitmask.py,sha256=kiDwBlFV0sJGLcIdDYxIiuF64ccgwDfqq1hWRQThYDc,8647
|
@@ -19,7 +19,7 @@ compressed_tensors/linear/compressed_linear.py,sha256=G0gEFfxLAUsgRcnfSV-PKz1ZBN
|
|
19
19
|
compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
|
20
20
|
compressed_tensors/quantization/quant_args.py,sha256=CmyVtjJeHlqCW-7R5Z7tIw6lXUrzCX6Y9bwgmMxEudY,8069
|
21
21
|
compressed_tensors/quantization/quant_config.py,sha256=NpVu8YJ4Xw2pIQW_PGaNaml8kx1bUnxkvb0jBYWbKdE,9971
|
22
|
-
compressed_tensors/quantization/quant_scheme.py,sha256=
|
22
|
+
compressed_tensors/quantization/quant_scheme.py,sha256=2ITawuNf76E1CDYBWrfpMP8tyZFykzwU99-eD-WggsM,5930
|
23
23
|
compressed_tensors/quantization/lifecycle/__init__.py,sha256=MXE2E7GfIfRRfhrdGy2Og3AZOz5N59B0ZGFcsD89y6c,821
|
24
24
|
compressed_tensors/quantization/lifecycle/apply.py,sha256=uftWFunr_CpCZM_qWfo2O1USXKB2qSYD1pBJsO8BuCU,15285
|
25
25
|
compressed_tensors/quantization/lifecycle/calibration.py,sha256=PlS_EqCOPqJD3QKuLPXO9AOtDzXtQWvEBTynFv-FFVw,2698
|
@@ -45,8 +45,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
|
|
45
45
|
compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
|
46
46
|
compressed_tensors/utils/safetensors_load.py,sha256=m08ANVuTBxQdoa6LufDgcNJ7wCLDJolyZljB8VEybAU,8578
|
47
47
|
compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
|
48
|
-
compressed_tensors_nightly-0.6.0.
|
49
|
-
compressed_tensors_nightly-0.6.0.
|
50
|
-
compressed_tensors_nightly-0.6.0.
|
51
|
-
compressed_tensors_nightly-0.6.0.
|
52
|
-
compressed_tensors_nightly-0.6.0.
|
48
|
+
compressed_tensors_nightly-0.6.0.20240925.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
49
|
+
compressed_tensors_nightly-0.6.0.20240925.dist-info/METADATA,sha256=AHeC-ko08CtK8_xQUnuNlWNQIhmDcKzDpihAiMBHjR8,6799
|
50
|
+
compressed_tensors_nightly-0.6.0.20240925.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
51
|
+
compressed_tensors_nightly-0.6.0.20240925.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
|
52
|
+
compressed_tensors_nightly-0.6.0.20240925.dist-info/RECORD,,
|
File without changes
|
File without changes
|