compressed-tensors-nightly 0.5.0.20240808__py3-none-any.whl → 0.5.0.20240810__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors/quantization/lifecycle/apply.py +13 -5
- compressed_tensors/quantization/lifecycle/calibration.py +4 -2
- {compressed_tensors_nightly-0.5.0.20240808.dist-info → compressed_tensors_nightly-0.5.0.20240810.dist-info}/METADATA +1 -1
- {compressed_tensors_nightly-0.5.0.20240808.dist-info → compressed_tensors_nightly-0.5.0.20240810.dist-info}/RECORD +7 -7
- {compressed_tensors_nightly-0.5.0.20240808.dist-info → compressed_tensors_nightly-0.5.0.20240810.dist-info}/LICENSE +0 -0
- {compressed_tensors_nightly-0.5.0.20240808.dist-info → compressed_tensors_nightly-0.5.0.20240810.dist-info}/WHEEL +0 -0
- {compressed_tensors_nightly-0.5.0.20240808.dist-info → compressed_tensors_nightly-0.5.0.20240810.dist-info}/top_level.txt +0 -0
@@ -14,7 +14,7 @@
|
|
14
14
|
|
15
15
|
import logging
|
16
16
|
import re
|
17
|
-
from collections import OrderedDict
|
17
|
+
from collections import OrderedDict, defaultdict
|
18
18
|
from copy import deepcopy
|
19
19
|
from typing import Dict, Iterable, List, Optional
|
20
20
|
from typing import OrderedDict as OrderedDictType
|
@@ -125,13 +125,14 @@ def apply_quantization_config(model: Module, config: QuantizationConfig) -> Dict
|
|
125
125
|
target_to_scheme[target] = scheme
|
126
126
|
|
127
127
|
# list of submodules to ignore
|
128
|
-
ignored_submodules =
|
128
|
+
ignored_submodules = defaultdict(list)
|
129
129
|
# mark appropriate layers for quantization by setting their quantization schemes
|
130
130
|
for name, submodule in iter_named_leaf_modules(model):
|
131
131
|
# potentially fix module name to remove FSDP wrapper prefix
|
132
132
|
name = fix_fsdp_module_name(name)
|
133
|
-
if find_name_or_class_matches(name, submodule, config.ignore):
|
134
|
-
|
133
|
+
if matches := find_name_or_class_matches(name, submodule, config.ignore):
|
134
|
+
for match in matches:
|
135
|
+
ignored_submodules[match].append(name)
|
135
136
|
continue # layer matches ignore list, continue
|
136
137
|
targets = find_name_or_class_matches(name, submodule, target_to_scheme)
|
137
138
|
if targets:
|
@@ -200,7 +201,14 @@ def apply_quantization_status(model: Module, status: QuantizationStatus):
|
|
200
201
|
model.apply(initialize_module_for_quantization)
|
201
202
|
|
202
203
|
if current_status < status >= QuantizationStatus.CALIBRATION > current_status:
|
203
|
-
|
204
|
+
# only quantize weights up front when our end goal state is calibration,
|
205
|
+
# weight quantization parameters are already loaded for frozen/compressed
|
206
|
+
quantize_weights_upfront = status == QuantizationStatus.CALIBRATION
|
207
|
+
model.apply(
|
208
|
+
lambda module: set_module_for_calibration(
|
209
|
+
module, quantize_weights_upfront=quantize_weights_upfront
|
210
|
+
)
|
211
|
+
)
|
204
212
|
if current_status < status >= QuantizationStatus.FROZEN > current_status:
|
205
213
|
model.apply(freeze_module_quantization)
|
206
214
|
|
@@ -28,7 +28,7 @@ __all__ = [
|
|
28
28
|
_LOGGER = logging.getLogger(__name__)
|
29
29
|
|
30
30
|
|
31
|
-
def set_module_for_calibration(module: Module):
|
31
|
+
def set_module_for_calibration(module: Module, quantize_weights_upfront: bool = True):
|
32
32
|
"""
|
33
33
|
marks a layer as ready for calibration which activates observers
|
34
34
|
to update scales and zero points on each forward pass
|
@@ -36,6 +36,8 @@ def set_module_for_calibration(module: Module):
|
|
36
36
|
apply to full model with `model.apply(set_module_for_calibration)`
|
37
37
|
|
38
38
|
:param module: module to set for calibration
|
39
|
+
:param quantize_weights_upfront: whether to automatically
|
40
|
+
run weight quantization at the start of calibration
|
39
41
|
"""
|
40
42
|
if not getattr(module, "quantization_scheme", None):
|
41
43
|
# no quantization scheme nothing to do
|
@@ -49,7 +51,7 @@ def set_module_for_calibration(module: Module):
|
|
49
51
|
"to re-calibrate a frozen module"
|
50
52
|
)
|
51
53
|
|
52
|
-
if module.quantization_scheme.weights is not None:
|
54
|
+
if quantize_weights_upfront and module.quantization_scheme.weights is not None:
|
53
55
|
# set weight scale and zero_point up front, calibration data doesn't affect it
|
54
56
|
observer = module.weight_observer
|
55
57
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: compressed-tensors-nightly
|
3
|
-
Version: 0.5.0.
|
3
|
+
Version: 0.5.0.20240810
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -19,8 +19,8 @@ compressed_tensors/quantization/quant_args.py,sha256=Vc_tWSTcbZZsMJlACpLq4JEPvGx
|
|
19
19
|
compressed_tensors/quantization/quant_config.py,sha256=NpVu8YJ4Xw2pIQW_PGaNaml8kx1bUnxkvb0jBYWbKdE,9971
|
20
20
|
compressed_tensors/quantization/quant_scheme.py,sha256=_RKOFJI0T5xJVBLX63UeYkSY4EFAecsBnqzUIVBjeU0,6014
|
21
21
|
compressed_tensors/quantization/lifecycle/__init__.py,sha256=MXE2E7GfIfRRfhrdGy2Og3AZOz5N59B0ZGFcsD89y6c,821
|
22
|
-
compressed_tensors/quantization/lifecycle/apply.py,sha256=
|
23
|
-
compressed_tensors/quantization/lifecycle/calibration.py,sha256=
|
22
|
+
compressed_tensors/quantization/lifecycle/apply.py,sha256=UGnccJ4QPXU14lRqJkre95A_Nn5jYMAE0mWnsMi26_s,13900
|
23
|
+
compressed_tensors/quantization/lifecycle/calibration.py,sha256=zE5jtW-e5j8vrLO7FqhX3oUSNhjrg4FsRFiG6vDu7ME,2637
|
24
24
|
compressed_tensors/quantization/lifecycle/compressed.py,sha256=VreB10xPwgSLQQlTu20UCrFpRS--cA7-lx5s7nrPPrg,2247
|
25
25
|
compressed_tensors/quantization/lifecycle/forward.py,sha256=6PSXYcf-R1dOY8zsuIWnBaoyARNymYc3-qvV6-L7SlI,12397
|
26
26
|
compressed_tensors/quantization/lifecycle/frozen.py,sha256=h1XYt89MouBTf3jTYLG_6OdFxIu5q2N8tPjsy6J4E6Y,1726
|
@@ -41,8 +41,8 @@ compressed_tensors/utils/offload.py,sha256=qAMwoFT3WEQ9nB_SegE12ob8ghDugddQseE6z
|
|
41
41
|
compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
|
42
42
|
compressed_tensors/utils/safetensors_load.py,sha256=0MheXwx1jeY12PeISppiSIZHs6rmN2YddwPpFb9V67I,8527
|
43
43
|
compressed_tensors/utils/semi_structured_conversions.py,sha256=g1EZHzdv-ko7ufPX430dp7wE33o6FWJXuSP4zZydCu0,13488
|
44
|
-
compressed_tensors_nightly-0.5.0.
|
45
|
-
compressed_tensors_nightly-0.5.0.
|
46
|
-
compressed_tensors_nightly-0.5.0.
|
47
|
-
compressed_tensors_nightly-0.5.0.
|
48
|
-
compressed_tensors_nightly-0.5.0.
|
44
|
+
compressed_tensors_nightly-0.5.0.20240810.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
45
|
+
compressed_tensors_nightly-0.5.0.20240810.dist-info/METADATA,sha256=yHlRu1oQOe7H-xo9pOH-qA3Nc8yuPNKtOz5GoEg1spQ,5680
|
46
|
+
compressed_tensors_nightly-0.5.0.20240810.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
47
|
+
compressed_tensors_nightly-0.5.0.20240810.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
|
48
|
+
compressed_tensors_nightly-0.5.0.20240810.dist-info/RECORD,,
|
File without changes
|
File without changes
|