compressed-tensors-nightly 0.3.3.20240523__py3-none-any.whl → 0.3.3.20240524__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors/compressors/int_quantized.py +14 -4
- compressed_tensors/compressors/model_compressor.py +2 -1
- compressed_tensors/compressors/pack_quantized.py +14 -4
- compressed_tensors/quantization/lifecycle/apply.py +11 -4
- compressed_tensors/quantization/lifecycle/initialize.py +3 -1
- compressed_tensors/quantization/quant_scheme.py +9 -0
- {compressed_tensors_nightly-0.3.3.20240523.dist-info → compressed_tensors_nightly-0.3.3.20240524.dist-info}/METADATA +1 -1
- {compressed_tensors_nightly-0.3.3.20240523.dist-info → compressed_tensors_nightly-0.3.3.20240524.dist-info}/RECORD +11 -11
- {compressed_tensors_nightly-0.3.3.20240523.dist-info → compressed_tensors_nightly-0.3.3.20240524.dist-info}/LICENSE +0 -0
- {compressed_tensors_nightly-0.3.3.20240523.dist-info → compressed_tensors_nightly-0.3.3.20240524.dist-info}/WHEEL +0 -0
- {compressed_tensors_nightly-0.3.3.20240523.dist-info → compressed_tensors_nightly-0.3.3.20240524.dist-info}/top_level.txt +0 -0
@@ -78,7 +78,11 @@ class IntQuantizationCompressor(Compressor):
|
|
78
78
|
args=quant_args,
|
79
79
|
dtype=torch.int8,
|
80
80
|
)
|
81
|
-
|
81
|
+
elif name.endswith("zero_point"):
|
82
|
+
if torch.all(value == 0):
|
83
|
+
# all zero_points are 0, no need to include in
|
84
|
+
# compressed state_dict
|
85
|
+
continue
|
82
86
|
compressed_dict[name] = value.to("cpu")
|
83
87
|
|
84
88
|
return compressed_dict
|
@@ -106,10 +110,16 @@ class IntQuantizationCompressor(Compressor):
|
|
106
110
|
with safe_open(safe_path, framework="pt", device=device) as f:
|
107
111
|
weight_data[param_name] = f.get_tensor(full_name)
|
108
112
|
|
109
|
-
if
|
113
|
+
if "weight_scale" in weight_data:
|
114
|
+
zero_point = weight_data.get("weight_zero_point", None)
|
115
|
+
scale = weight_data["weight_scale"]
|
116
|
+
if zero_point is None:
|
117
|
+
# zero_point assumed to be 0 if not included in state_dict
|
118
|
+
zero_point = torch.zeros_like(scale)
|
119
|
+
|
110
120
|
decompressed = dequantize(
|
111
121
|
x_q=weight_data["weight"],
|
112
|
-
scale=
|
113
|
-
zero_point=
|
122
|
+
scale=scale,
|
123
|
+
zero_point=zero_point,
|
114
124
|
)
|
115
125
|
yield merge_names(weight_name, "weight"), decompressed
|
@@ -249,8 +249,9 @@ class ModelCompressor:
|
|
249
249
|
for name, data in tqdm(dense_weight_generator, desc="Decompressing model"):
|
250
250
|
# loading the decompressed weights into the model
|
251
251
|
model_device = operator.attrgetter(name)(model).device
|
252
|
-
data_new = Parameter(data.to(model_device))
|
253
252
|
data_old = operator.attrgetter(name)(model)
|
253
|
+
data_dtype = data_old.dtype
|
254
|
+
data_new = Parameter(data.to(model_device).to(data_dtype))
|
254
255
|
data_old.data = data_new.data
|
255
256
|
|
256
257
|
|
@@ -87,7 +87,11 @@ class PackedQuantizationCompressor(Compressor):
|
|
87
87
|
)
|
88
88
|
value = pack_4bit_ints(value.cpu())
|
89
89
|
compressed_dict[merge_names(prefix, "weight_shape")] = shape
|
90
|
-
|
90
|
+
elif name.endswith("zero_point"):
|
91
|
+
if torch.all(value == 0):
|
92
|
+
# all zero_points are 0, no need to include in
|
93
|
+
# compressed state_dict
|
94
|
+
continue
|
91
95
|
compressed_dict[name] = value.to("cpu")
|
92
96
|
|
93
97
|
return compressed_dict
|
@@ -115,14 +119,20 @@ class PackedQuantizationCompressor(Compressor):
|
|
115
119
|
with safe_open(safe_path, framework="pt", device=device) as f:
|
116
120
|
weight_data[param_name] = f.get_tensor(full_name)
|
117
121
|
|
118
|
-
if
|
122
|
+
if "weight_scale" in weight_data:
|
123
|
+
zero_point = weight_data.get("weight_zero_point", None)
|
124
|
+
scale = weight_data["weight_scale"]
|
125
|
+
if zero_point is None:
|
126
|
+
# zero_point assumed to be 0 if not included in state_dict
|
127
|
+
zero_point = torch.zeros_like(scale)
|
128
|
+
|
119
129
|
weight = weight_data["weight"]
|
120
130
|
original_shape = torch.Size(weight_data["weight_shape"])
|
121
131
|
unpacked = unpack_4bit_ints(weight, original_shape)
|
122
132
|
decompressed = dequantize(
|
123
133
|
x_q=unpacked,
|
124
|
-
scale=
|
125
|
-
zero_point=
|
134
|
+
scale=scale,
|
135
|
+
zero_point=zero_point,
|
126
136
|
)
|
127
137
|
yield merge_names(weight_name, "weight"), decompressed
|
128
138
|
|
@@ -16,6 +16,7 @@ import re
|
|
16
16
|
from collections import OrderedDict
|
17
17
|
from typing import Dict, Iterable, Optional
|
18
18
|
|
19
|
+
import torch
|
19
20
|
from compressed_tensors.quantization.lifecycle.calibration import (
|
20
21
|
set_module_for_calibration,
|
21
22
|
)
|
@@ -193,7 +194,13 @@ def _load_quant_args_from_state_dict(
|
|
193
194
|
zp_name = f"{base_name}_zero_point"
|
194
195
|
device = next(module.parameters()).device
|
195
196
|
|
196
|
-
scale = getattr(module, scale_name)
|
197
|
-
zp = getattr(module, zp_name)
|
198
|
-
scale
|
199
|
-
|
197
|
+
scale = getattr(module, scale_name, None)
|
198
|
+
zp = getattr(module, zp_name, None)
|
199
|
+
if scale is not None:
|
200
|
+
scale.data = state_dict[f"{module_name}.{scale_name}"].to(device)
|
201
|
+
if zp is not None:
|
202
|
+
zp_from_state = state_dict.get(f"{module_name}.{zp_name}", None)
|
203
|
+
if zp_from_state is not None: # load the non-zero zero points
|
204
|
+
zp.data = state_dict[f"{module_name}.{zp_name}"].to(device)
|
205
|
+
else: # fill with zeros matching scale shape
|
206
|
+
zp.data = torch.zeros_like(scale, dtype=torch.int8).to(device)
|
@@ -90,7 +90,9 @@ def _initialize_scale_zero_point_observer(
|
|
90
90
|
device = next(module.parameters()).device
|
91
91
|
|
92
92
|
# initializes empty scale and zero point parameters for the module
|
93
|
-
init_scale = Parameter(
|
93
|
+
init_scale = Parameter(
|
94
|
+
torch.empty(0, dtype=torch.float16, device=device), requires_grad=False
|
95
|
+
)
|
94
96
|
module.register_parameter(f"{base_name}_scale", init_scale)
|
95
97
|
|
96
98
|
init_zero_point = Parameter(
|
@@ -22,6 +22,7 @@ from pydantic import BaseModel
|
|
22
22
|
__all__ = [
|
23
23
|
"QuantizationScheme",
|
24
24
|
"preset_name_to_scheme",
|
25
|
+
"is_preset_scheme",
|
25
26
|
]
|
26
27
|
|
27
28
|
|
@@ -98,6 +99,14 @@ def preset_name_to_scheme(name: str, targets: List[str]) -> QuantizationScheme:
|
|
98
99
|
)
|
99
100
|
|
100
101
|
|
102
|
+
def is_preset_scheme(name: str) -> bool:
|
103
|
+
"""
|
104
|
+
:param name: preset quantization settings name
|
105
|
+
:return: True if the name is a preset scheme name
|
106
|
+
"""
|
107
|
+
return name.upper() in PRESET_SCHEMES
|
108
|
+
|
109
|
+
|
101
110
|
W8A8 = dict(
|
102
111
|
weights=QuantizationArgs(), input_activations=QuantizationArgs(symmetric=False)
|
103
112
|
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: compressed-tensors-nightly
|
3
|
-
Version: 0.3.3.
|
3
|
+
Version: 0.3.3.20240524
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -5,9 +5,9 @@ compressed_tensors/compressors/__init__.py,sha256=3yyoNICHll3F4HS6Yu-cgNZpDhfuob
|
|
5
5
|
compressed_tensors/compressors/base.py,sha256=LWEgbpgTxzmoqQ7Xhq2OQszUgWoDtFuGCiV1Y8nlBGw,2134
|
6
6
|
compressed_tensors/compressors/dense.py,sha256=G_XHbvuENyupIKlXSITOQgvPkNkcMEOLcLWQr70V9EE,1257
|
7
7
|
compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24SCfhzZ-RusXfw,5403
|
8
|
-
compressed_tensors/compressors/int_quantized.py,sha256=
|
9
|
-
compressed_tensors/compressors/model_compressor.py,sha256=
|
10
|
-
compressed_tensors/compressors/pack_quantized.py,sha256=
|
8
|
+
compressed_tensors/compressors/int_quantized.py,sha256=bPi62n1MjySOeBat_yWMyc_LvDNDeSihu1gxzo_YrNY,5203
|
9
|
+
compressed_tensors/compressors/model_compressor.py,sha256=gHD2VMbXkXaZiJu3ibOaWiYb4oJDz2hxX03wDuu1yhI,10481
|
10
|
+
compressed_tensors/compressors/pack_quantized.py,sha256=VFaHQU-f1QuXuTyOtn19p015KHveXe-NeNJ97ATuOR8,8344
|
11
11
|
compressed_tensors/compressors/sparse_bitmask.py,sha256=H9oZSTYI1oRCzAMbd4zThUnZd1h2rfs8DmA3tPcvuNE,8637
|
12
12
|
compressed_tensors/config/__init__.py,sha256=ZBqWn3r6ku1qfmlHHYp0mQueY0i7Pwhr9rbQk9dDlMc,704
|
13
13
|
compressed_tensors/config/base.py,sha256=grf5tDaLep8i2-W_p7H-fW9DOGXDi4Zz7su7zjs1Qqc,1454
|
@@ -16,14 +16,14 @@ compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5y
|
|
16
16
|
compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
|
17
17
|
compressed_tensors/quantization/quant_args.py,sha256=A6b2V8lhsM8Ho8RjlPBQdxRUDNWhqq-ie5E3RR2_GNg,4360
|
18
18
|
compressed_tensors/quantization/quant_config.py,sha256=3BcbQ8-Ah7LbTDSSkRu29Yiid33xo0C1ki6NVhxLiaY,8727
|
19
|
-
compressed_tensors/quantization/quant_scheme.py,sha256
|
19
|
+
compressed_tensors/quantization/quant_scheme.py,sha256=-hAK1-C67_wJl10eaVLUvbslPBTV04WyzL_J-u9f1ck,3571
|
20
20
|
compressed_tensors/quantization/lifecycle/__init__.py,sha256=ggRGWRqhCxCaTTDWRcgTVX3axnS2xV6rc5YvdzK7fSg,798
|
21
|
-
compressed_tensors/quantization/lifecycle/apply.py,sha256=
|
21
|
+
compressed_tensors/quantization/lifecycle/apply.py,sha256=yLTDT1zkJp1Nti-aKZGOMW8-TELanF8dXiqDvAkVUQo,7984
|
22
22
|
compressed_tensors/quantization/lifecycle/calibration.py,sha256=mLns4jlaWmBwOW8Jtlm5bMX-JET1AiZYUBO7qa-XuxI,1776
|
23
23
|
compressed_tensors/quantization/lifecycle/compressed.py,sha256=VreB10xPwgSLQQlTu20UCrFpRS--cA7-lx5s7nrPPrg,2247
|
24
24
|
compressed_tensors/quantization/lifecycle/forward.py,sha256=x9JaIX3TK7cb_-0aCOTTYtA4At9l6v5YOY_70GzIeFU,10520
|
25
25
|
compressed_tensors/quantization/lifecycle/frozen.py,sha256=h1XYt89MouBTf3jTYLG_6OdFxIu5q2N8tPjsy6J4E6Y,1726
|
26
|
-
compressed_tensors/quantization/lifecycle/initialize.py,sha256=
|
26
|
+
compressed_tensors/quantization/lifecycle/initialize.py,sha256=pFfcu-pxdQKzlnn-18-RlkEktt2yDi6woNXJsiv1A2c,3732
|
27
27
|
compressed_tensors/quantization/observers/__init__.py,sha256=DNH31NQYrIBBcmHsMyFA6whh4pbRsLwuNa6L8AeXaGc,745
|
28
28
|
compressed_tensors/quantization/observers/base.py,sha256=kywLVwycFvGxuZMU2cy8-KYyNrZCHkinN6YzCL7boLE,5121
|
29
29
|
compressed_tensors/quantization/observers/helpers.py,sha256=JwALNfBYY9Eyl8Q180t0lGh8szumQj8TygfNl-isErs,2166
|
@@ -36,8 +36,8 @@ compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85S
|
|
36
36
|
compressed_tensors/utils/__init__.py,sha256=5DrYjoZbaEvSkJcC-GRSbM_RBHVF4tG9gMd3zsJnjLw,665
|
37
37
|
compressed_tensors/utils/helpers.py,sha256=h0jfl9drs5FAx40tCHRcVtJqXixB5hT5yq_IG2aY_-w,1735
|
38
38
|
compressed_tensors/utils/safetensors_load.py,sha256=wo9UirGrGlenBqZeqotvpCT7D5MEdjCo2J3HeRaIFoU,8502
|
39
|
-
compressed_tensors_nightly-0.3.3.
|
40
|
-
compressed_tensors_nightly-0.3.3.
|
41
|
-
compressed_tensors_nightly-0.3.3.
|
42
|
-
compressed_tensors_nightly-0.3.3.
|
43
|
-
compressed_tensors_nightly-0.3.3.
|
39
|
+
compressed_tensors_nightly-0.3.3.20240524.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
40
|
+
compressed_tensors_nightly-0.3.3.20240524.dist-info/METADATA,sha256=UKjwrUdq2hJGGcA2_ZGO0us811fMjVun9scVSPVXxTI,5633
|
41
|
+
compressed_tensors_nightly-0.3.3.20240524.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
42
|
+
compressed_tensors_nightly-0.3.3.20240524.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
|
43
|
+
compressed_tensors_nightly-0.3.3.20240524.dist-info/RECORD,,
|
File without changes
|
File without changes
|