compressed-tensors-nightly 0.3.3.20240530__py3-none-any.whl → 0.3.3.20240601__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors/compressors/int_quantized.py +3 -2
- compressed_tensors/compressors/pack_quantized.py +3 -2
- {compressed_tensors_nightly-0.3.3.20240530.dist-info → compressed_tensors_nightly-0.3.3.20240601.dist-info}/METADATA +3 -3
- {compressed_tensors_nightly-0.3.3.20240530.dist-info → compressed_tensors_nightly-0.3.3.20240601.dist-info}/RECORD +7 -7
- {compressed_tensors_nightly-0.3.3.20240530.dist-info → compressed_tensors_nightly-0.3.3.20240601.dist-info}/LICENSE +0 -0
- {compressed_tensors_nightly-0.3.3.20240530.dist-info → compressed_tensors_nightly-0.3.3.20240601.dist-info}/WHEEL +0 -0
- {compressed_tensors_nightly-0.3.3.20240530.dist-info → compressed_tensors_nightly-0.3.3.20240601.dist-info}/top_level.txt +0 -0
@@ -57,13 +57,14 @@ class IntQuantizationCompressor(Compressor):
|
|
57
57
|
:return: compressed state dict
|
58
58
|
"""
|
59
59
|
compressed_dict = {}
|
60
|
+
weight_suffix = ".weight"
|
60
61
|
_LOGGER.debug(
|
61
62
|
f"Compressing model with {len(model_state)} parameterized layers..."
|
62
63
|
)
|
63
64
|
|
64
65
|
for name, value in tqdm(model_state.items(), desc="Compressing model"):
|
65
|
-
if name.endswith(
|
66
|
-
prefix = name
|
66
|
+
if name.endswith(weight_suffix):
|
67
|
+
prefix = name[: -(len(weight_suffix))]
|
67
68
|
scale = model_state.get(merge_names(prefix, "weight_scale"), None)
|
68
69
|
zp = model_state.get(merge_names(prefix, "weight_zero_point"), None)
|
69
70
|
if scale is not None and zp is not None:
|
@@ -62,13 +62,14 @@ class PackedQuantizationCompressor(Compressor):
|
|
62
62
|
:return: compressed state dict
|
63
63
|
"""
|
64
64
|
compressed_dict = {}
|
65
|
+
weight_suffix = ".weight"
|
65
66
|
_LOGGER.debug(
|
66
67
|
f"Compressing model with {len(model_state)} parameterized layers..."
|
67
68
|
)
|
68
69
|
|
69
70
|
for name, value in tqdm(model_state.items(), desc="Compressing model"):
|
70
|
-
if name.endswith(
|
71
|
-
prefix = name
|
71
|
+
if name.endswith(weight_suffix):
|
72
|
+
prefix = name[: -(len(weight_suffix))]
|
72
73
|
scale = model_state.get(merge_names(prefix, "weight_scale"), None)
|
73
74
|
zp = model_state.get(merge_names(prefix, "weight_zero_point"), None)
|
74
75
|
shape = torch.tensor(value.shape)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: compressed-tensors-nightly
|
3
|
-
Version: 0.3.3.
|
3
|
+
Version: 0.3.3.20240601
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -89,7 +89,7 @@ from compressed_tensors import save_compressed_model, load_compressed, BitmaskCo
|
|
89
89
|
from transformers import AutoModelForCausalLM
|
90
90
|
|
91
91
|
model_name = "neuralmagic/llama2.c-stories110M-pruned50"
|
92
|
-
model = AutoModelForCausalLM.from_pretrained(model_name)
|
92
|
+
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto")
|
93
93
|
|
94
94
|
original_state_dict = model.state_dict()
|
95
95
|
|
@@ -111,7 +111,7 @@ We can use compressed-tensors to run basic post training quantization (PTQ) and
|
|
111
111
|
|
112
112
|
```python
|
113
113
|
model_name = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
|
114
|
-
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cuda:0")
|
114
|
+
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cuda:0", torch_dtype="auto")
|
115
115
|
|
116
116
|
config = QuantizationConfig.parse_file("./examples/bit_packing/int4_config.json")
|
117
117
|
config.quantization_status = QuantizationStatus.CALIBRATION
|
@@ -5,9 +5,9 @@ compressed_tensors/compressors/__init__.py,sha256=3yyoNICHll3F4HS6Yu-cgNZpDhfuob
|
|
5
5
|
compressed_tensors/compressors/base.py,sha256=LWEgbpgTxzmoqQ7Xhq2OQszUgWoDtFuGCiV1Y8nlBGw,2134
|
6
6
|
compressed_tensors/compressors/dense.py,sha256=G_XHbvuENyupIKlXSITOQgvPkNkcMEOLcLWQr70V9EE,1257
|
7
7
|
compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24SCfhzZ-RusXfw,5403
|
8
|
-
compressed_tensors/compressors/int_quantized.py,sha256=
|
8
|
+
compressed_tensors/compressors/int_quantized.py,sha256=Ct2vCK0yoPm6vkIFlzDMGQ7m14xT1GyURsSwH9DP770,5242
|
9
9
|
compressed_tensors/compressors/model_compressor.py,sha256=gHD2VMbXkXaZiJu3ibOaWiYb4oJDz2hxX03wDuu1yhI,10481
|
10
|
-
compressed_tensors/compressors/pack_quantized.py,sha256=
|
10
|
+
compressed_tensors/compressors/pack_quantized.py,sha256=VPiLlgJlDgARrn7YmiQoLqUfxErKBfj54epMYWRsF8k,8451
|
11
11
|
compressed_tensors/compressors/sparse_bitmask.py,sha256=H9oZSTYI1oRCzAMbd4zThUnZd1h2rfs8DmA3tPcvuNE,8637
|
12
12
|
compressed_tensors/config/__init__.py,sha256=ZBqWn3r6ku1qfmlHHYp0mQueY0i7Pwhr9rbQk9dDlMc,704
|
13
13
|
compressed_tensors/config/base.py,sha256=grf5tDaLep8i2-W_p7H-fW9DOGXDi4Zz7su7zjs1Qqc,1454
|
@@ -36,8 +36,8 @@ compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85S
|
|
36
36
|
compressed_tensors/utils/__init__.py,sha256=5DrYjoZbaEvSkJcC-GRSbM_RBHVF4tG9gMd3zsJnjLw,665
|
37
37
|
compressed_tensors/utils/helpers.py,sha256=h0jfl9drs5FAx40tCHRcVtJqXixB5hT5yq_IG2aY_-w,1735
|
38
38
|
compressed_tensors/utils/safetensors_load.py,sha256=wo9UirGrGlenBqZeqotvpCT7D5MEdjCo2J3HeRaIFoU,8502
|
39
|
-
compressed_tensors_nightly-0.3.3.
|
40
|
-
compressed_tensors_nightly-0.3.3.
|
41
|
-
compressed_tensors_nightly-0.3.3.
|
42
|
-
compressed_tensors_nightly-0.3.3.
|
43
|
-
compressed_tensors_nightly-0.3.3.
|
39
|
+
compressed_tensors_nightly-0.3.3.20240601.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
40
|
+
compressed_tensors_nightly-0.3.3.20240601.dist-info/METADATA,sha256=myHAvn_PdIn9sInGNjfo8CwIObcM_GpTj74SqvCMZSU,5673
|
41
|
+
compressed_tensors_nightly-0.3.3.20240601.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
42
|
+
compressed_tensors_nightly-0.3.3.20240601.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
|
43
|
+
compressed_tensors_nightly-0.3.3.20240601.dist-info/RECORD,,
|
File without changes
|
File without changes
|