compressed-tensors-nightly 0.3.3.20240523__py3-none-any.whl → 0.3.3.20240525__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -78,7 +78,11 @@ class IntQuantizationCompressor(Compressor):
78
78
  args=quant_args,
79
79
  dtype=torch.int8,
80
80
  )
81
-
81
+ elif name.endswith("zero_point"):
82
+ if torch.all(value == 0):
83
+ # all zero_points are 0, no need to include in
84
+ # compressed state_dict
85
+ continue
82
86
  compressed_dict[name] = value.to("cpu")
83
87
 
84
88
  return compressed_dict
@@ -106,10 +110,16 @@ class IntQuantizationCompressor(Compressor):
106
110
  with safe_open(safe_path, framework="pt", device=device) as f:
107
111
  weight_data[param_name] = f.get_tensor(full_name)
108
112
 
109
- if len(weight_data) == len(self.COMPRESSION_PARAM_NAMES):
113
+ if "weight_scale" in weight_data:
114
+ zero_point = weight_data.get("weight_zero_point", None)
115
+ scale = weight_data["weight_scale"]
116
+ if zero_point is None:
117
+ # zero_point assumed to be 0 if not included in state_dict
118
+ zero_point = torch.zeros_like(scale)
119
+
110
120
  decompressed = dequantize(
111
121
  x_q=weight_data["weight"],
112
- scale=weight_data["weight_scale"],
113
- zero_point=weight_data["weight_zero_point"],
122
+ scale=scale,
123
+ zero_point=zero_point,
114
124
  )
115
125
  yield merge_names(weight_name, "weight"), decompressed
@@ -249,8 +249,9 @@ class ModelCompressor:
249
249
  for name, data in tqdm(dense_weight_generator, desc="Decompressing model"):
250
250
  # loading the decompressed weights into the model
251
251
  model_device = operator.attrgetter(name)(model).device
252
- data_new = Parameter(data.to(model_device))
253
252
  data_old = operator.attrgetter(name)(model)
253
+ data_dtype = data_old.dtype
254
+ data_new = Parameter(data.to(model_device).to(data_dtype))
254
255
  data_old.data = data_new.data
255
256
 
256
257
 
@@ -87,7 +87,11 @@ class PackedQuantizationCompressor(Compressor):
87
87
  )
88
88
  value = pack_4bit_ints(value.cpu())
89
89
  compressed_dict[merge_names(prefix, "weight_shape")] = shape
90
-
90
+ elif name.endswith("zero_point"):
91
+ if torch.all(value == 0):
92
+ # all zero_points are 0, no need to include in
93
+ # compressed state_dict
94
+ continue
91
95
  compressed_dict[name] = value.to("cpu")
92
96
 
93
97
  return compressed_dict
@@ -115,14 +119,20 @@ class PackedQuantizationCompressor(Compressor):
115
119
  with safe_open(safe_path, framework="pt", device=device) as f:
116
120
  weight_data[param_name] = f.get_tensor(full_name)
117
121
 
118
- if len(weight_data) == len(self.COMPRESSION_PARAM_NAMES):
122
+ if "weight_scale" in weight_data:
123
+ zero_point = weight_data.get("weight_zero_point", None)
124
+ scale = weight_data["weight_scale"]
125
+ if zero_point is None:
126
+ # zero_point assumed to be 0 if not included in state_dict
127
+ zero_point = torch.zeros_like(scale)
128
+
119
129
  weight = weight_data["weight"]
120
130
  original_shape = torch.Size(weight_data["weight_shape"])
121
131
  unpacked = unpack_4bit_ints(weight, original_shape)
122
132
  decompressed = dequantize(
123
133
  x_q=unpacked,
124
- scale=weight_data["weight_scale"],
125
- zero_point=weight_data["weight_zero_point"],
134
+ scale=scale,
135
+ zero_point=zero_point,
126
136
  )
127
137
  yield merge_names(weight_name, "weight"), decompressed
128
138
 
@@ -16,6 +16,7 @@ import re
16
16
  from collections import OrderedDict
17
17
  from typing import Dict, Iterable, Optional
18
18
 
19
+ import torch
19
20
  from compressed_tensors.quantization.lifecycle.calibration import (
20
21
  set_module_for_calibration,
21
22
  )
@@ -193,7 +194,13 @@ def _load_quant_args_from_state_dict(
193
194
  zp_name = f"{base_name}_zero_point"
194
195
  device = next(module.parameters()).device
195
196
 
196
- scale = getattr(module, scale_name)
197
- zp = getattr(module, zp_name)
198
- scale.data = state_dict[f"{module_name}.{scale_name}"].to(device)
199
- zp.data = state_dict[f"{module_name}.{zp_name}"].to(device)
197
+ scale = getattr(module, scale_name, None)
198
+ zp = getattr(module, zp_name, None)
199
+ if scale is not None:
200
+ scale.data = state_dict[f"{module_name}.{scale_name}"].to(device)
201
+ if zp is not None:
202
+ zp_from_state = state_dict.get(f"{module_name}.{zp_name}", None)
203
+ if zp_from_state is not None: # load the non-zero zero points
204
+ zp.data = state_dict[f"{module_name}.{zp_name}"].to(device)
205
+ else: # fill with zeros matching scale shape
206
+ zp.data = torch.zeros_like(scale, dtype=torch.int8).to(device)
@@ -90,7 +90,9 @@ def _initialize_scale_zero_point_observer(
90
90
  device = next(module.parameters()).device
91
91
 
92
92
  # initializes empty scale and zero point parameters for the module
93
- init_scale = Parameter(torch.empty(0, device=device), requires_grad=False)
93
+ init_scale = Parameter(
94
+ torch.empty(0, dtype=torch.float16, device=device), requires_grad=False
95
+ )
94
96
  module.register_parameter(f"{base_name}_scale", init_scale)
95
97
 
96
98
  init_zero_point = Parameter(
@@ -22,6 +22,7 @@ from pydantic import BaseModel
22
22
  __all__ = [
23
23
  "QuantizationScheme",
24
24
  "preset_name_to_scheme",
25
+ "is_preset_scheme",
25
26
  ]
26
27
 
27
28
 
@@ -98,6 +99,14 @@ def preset_name_to_scheme(name: str, targets: List[str]) -> QuantizationScheme:
98
99
  )
99
100
 
100
101
 
102
+ def is_preset_scheme(name: str) -> bool:
103
+ """
104
+ :param name: preset quantization settings name
105
+ :return: True if the name is a preset scheme name
106
+ """
107
+ return name.upper() in PRESET_SCHEMES
108
+
109
+
101
110
  W8A8 = dict(
102
111
  weights=QuantizationArgs(), input_activations=QuantizationArgs(symmetric=False)
103
112
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: compressed-tensors-nightly
3
- Version: 0.3.3.20240523
3
+ Version: 0.3.3.20240525
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -5,9 +5,9 @@ compressed_tensors/compressors/__init__.py,sha256=3yyoNICHll3F4HS6Yu-cgNZpDhfuob
5
5
  compressed_tensors/compressors/base.py,sha256=LWEgbpgTxzmoqQ7Xhq2OQszUgWoDtFuGCiV1Y8nlBGw,2134
6
6
  compressed_tensors/compressors/dense.py,sha256=G_XHbvuENyupIKlXSITOQgvPkNkcMEOLcLWQr70V9EE,1257
7
7
  compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24SCfhzZ-RusXfw,5403
8
- compressed_tensors/compressors/int_quantized.py,sha256=I0FqnjtwCiJvQxi9YyfA8aBeaR5csqtq1bOrVvRqJ1I,4744
9
- compressed_tensors/compressors/model_compressor.py,sha256=teohd0xTbcIDIuEfZrH-bZyAzHn2UZH2KJXT-7Gk3sw,10426
10
- compressed_tensors/compressors/pack_quantized.py,sha256=K03l8kFqejpapgcMU5hMm1-JIX1cUVvU-VybGSN6RWA,7885
8
+ compressed_tensors/compressors/int_quantized.py,sha256=bPi62n1MjySOeBat_yWMyc_LvDNDeSihu1gxzo_YrNY,5203
9
+ compressed_tensors/compressors/model_compressor.py,sha256=gHD2VMbXkXaZiJu3ibOaWiYb4oJDz2hxX03wDuu1yhI,10481
10
+ compressed_tensors/compressors/pack_quantized.py,sha256=VFaHQU-f1QuXuTyOtn19p015KHveXe-NeNJ97ATuOR8,8344
11
11
  compressed_tensors/compressors/sparse_bitmask.py,sha256=H9oZSTYI1oRCzAMbd4zThUnZd1h2rfs8DmA3tPcvuNE,8637
12
12
  compressed_tensors/config/__init__.py,sha256=ZBqWn3r6ku1qfmlHHYp0mQueY0i7Pwhr9rbQk9dDlMc,704
13
13
  compressed_tensors/config/base.py,sha256=grf5tDaLep8i2-W_p7H-fW9DOGXDi4Zz7su7zjs1Qqc,1454
@@ -16,14 +16,14 @@ compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5y
16
16
  compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
17
17
  compressed_tensors/quantization/quant_args.py,sha256=A6b2V8lhsM8Ho8RjlPBQdxRUDNWhqq-ie5E3RR2_GNg,4360
18
18
  compressed_tensors/quantization/quant_config.py,sha256=3BcbQ8-Ah7LbTDSSkRu29Yiid33xo0C1ki6NVhxLiaY,8727
19
- compressed_tensors/quantization/quant_scheme.py,sha256=QwZsCo8QR9ISB_d58WhIngk2gsMM8ooX-LcRPR-JDRw,3341
19
+ compressed_tensors/quantization/quant_scheme.py,sha256=-hAK1-C67_wJl10eaVLUvbslPBTV04WyzL_J-u9f1ck,3571
20
20
  compressed_tensors/quantization/lifecycle/__init__.py,sha256=ggRGWRqhCxCaTTDWRcgTVX3axnS2xV6rc5YvdzK7fSg,798
21
- compressed_tensors/quantization/lifecycle/apply.py,sha256=whKfNGC_EZm0BC23AP7qWfjRe5OJVWmcZOpX7lryZZc,7625
21
+ compressed_tensors/quantization/lifecycle/apply.py,sha256=yLTDT1zkJp1Nti-aKZGOMW8-TELanF8dXiqDvAkVUQo,7984
22
22
  compressed_tensors/quantization/lifecycle/calibration.py,sha256=mLns4jlaWmBwOW8Jtlm5bMX-JET1AiZYUBO7qa-XuxI,1776
23
23
  compressed_tensors/quantization/lifecycle/compressed.py,sha256=VreB10xPwgSLQQlTu20UCrFpRS--cA7-lx5s7nrPPrg,2247
24
24
  compressed_tensors/quantization/lifecycle/forward.py,sha256=x9JaIX3TK7cb_-0aCOTTYtA4At9l6v5YOY_70GzIeFU,10520
25
25
  compressed_tensors/quantization/lifecycle/frozen.py,sha256=h1XYt89MouBTf3jTYLG_6OdFxIu5q2N8tPjsy6J4E6Y,1726
26
- compressed_tensors/quantization/lifecycle/initialize.py,sha256=U6g9qifSF6pagQZQZEwd-rwWC6uQ_dZXn1wg6nr1Abg,3697
26
+ compressed_tensors/quantization/lifecycle/initialize.py,sha256=pFfcu-pxdQKzlnn-18-RlkEktt2yDi6woNXJsiv1A2c,3732
27
27
  compressed_tensors/quantization/observers/__init__.py,sha256=DNH31NQYrIBBcmHsMyFA6whh4pbRsLwuNa6L8AeXaGc,745
28
28
  compressed_tensors/quantization/observers/base.py,sha256=kywLVwycFvGxuZMU2cy8-KYyNrZCHkinN6YzCL7boLE,5121
29
29
  compressed_tensors/quantization/observers/helpers.py,sha256=JwALNfBYY9Eyl8Q180t0lGh8szumQj8TygfNl-isErs,2166
@@ -36,8 +36,8 @@ compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85S
36
36
  compressed_tensors/utils/__init__.py,sha256=5DrYjoZbaEvSkJcC-GRSbM_RBHVF4tG9gMd3zsJnjLw,665
37
37
  compressed_tensors/utils/helpers.py,sha256=h0jfl9drs5FAx40tCHRcVtJqXixB5hT5yq_IG2aY_-w,1735
38
38
  compressed_tensors/utils/safetensors_load.py,sha256=wo9UirGrGlenBqZeqotvpCT7D5MEdjCo2J3HeRaIFoU,8502
39
- compressed_tensors_nightly-0.3.3.20240523.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
40
- compressed_tensors_nightly-0.3.3.20240523.dist-info/METADATA,sha256=_c67GXEm0cMZ_AGWhcLqsMZ3hSbFB4KdQ3lL9Dg7M8M,5633
41
- compressed_tensors_nightly-0.3.3.20240523.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
42
- compressed_tensors_nightly-0.3.3.20240523.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
43
- compressed_tensors_nightly-0.3.3.20240523.dist-info/RECORD,,
39
+ compressed_tensors_nightly-0.3.3.20240525.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
40
+ compressed_tensors_nightly-0.3.3.20240525.dist-info/METADATA,sha256=px9rAQu0vrnWEBFzu_I7Rfjq6AlXs8K6lSwPvT5SmrM,5633
41
+ compressed_tensors_nightly-0.3.3.20240525.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
42
+ compressed_tensors_nightly-0.3.3.20240525.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
43
+ compressed_tensors_nightly-0.3.3.20240525.dist-info/RECORD,,