compressed-tensors-nightly 0.5.0.20240808__py3-none-any.whl → 0.5.0.20240810__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,7 +14,7 @@
14
14
 
15
15
  import logging
16
16
  import re
17
- from collections import OrderedDict
17
+ from collections import OrderedDict, defaultdict
18
18
  from copy import deepcopy
19
19
  from typing import Dict, Iterable, List, Optional
20
20
  from typing import OrderedDict as OrderedDictType
@@ -125,13 +125,14 @@ def apply_quantization_config(model: Module, config: QuantizationConfig) -> Dict
125
125
  target_to_scheme[target] = scheme
126
126
 
127
127
  # list of submodules to ignore
128
- ignored_submodules = []
128
+ ignored_submodules = defaultdict(list)
129
129
  # mark appropriate layers for quantization by setting their quantization schemes
130
130
  for name, submodule in iter_named_leaf_modules(model):
131
131
  # potentially fix module name to remove FSDP wrapper prefix
132
132
  name = fix_fsdp_module_name(name)
133
- if find_name_or_class_matches(name, submodule, config.ignore):
134
- ignored_submodules.append(name)
133
+ if matches := find_name_or_class_matches(name, submodule, config.ignore):
134
+ for match in matches:
135
+ ignored_submodules[match].append(name)
135
136
  continue # layer matches ignore list, continue
136
137
  targets = find_name_or_class_matches(name, submodule, target_to_scheme)
137
138
  if targets:
@@ -200,7 +201,14 @@ def apply_quantization_status(model: Module, status: QuantizationStatus):
200
201
  model.apply(initialize_module_for_quantization)
201
202
 
202
203
  if current_status < status >= QuantizationStatus.CALIBRATION > current_status:
203
- model.apply(set_module_for_calibration)
204
+ # only quantize weights up front when our end goal state is calibration,
205
+ # weight quantization parameters are already loaded for frozen/compressed
206
+ quantize_weights_upfront = status == QuantizationStatus.CALIBRATION
207
+ model.apply(
208
+ lambda module: set_module_for_calibration(
209
+ module, quantize_weights_upfront=quantize_weights_upfront
210
+ )
211
+ )
204
212
  if current_status < status >= QuantizationStatus.FROZEN > current_status:
205
213
  model.apply(freeze_module_quantization)
206
214
 
@@ -28,7 +28,7 @@ __all__ = [
28
28
  _LOGGER = logging.getLogger(__name__)
29
29
 
30
30
 
31
- def set_module_for_calibration(module: Module):
31
+ def set_module_for_calibration(module: Module, quantize_weights_upfront: bool = True):
32
32
  """
33
33
  marks a layer as ready for calibration which activates observers
34
34
  to update scales and zero points on each forward pass
@@ -36,6 +36,8 @@ def set_module_for_calibration(module: Module):
36
36
  apply to full model with `model.apply(set_module_for_calibration)`
37
37
 
38
38
  :param module: module to set for calibration
39
+ :param quantize_weights_upfront: whether to automatically
40
+ run weight quantization at the start of calibration
39
41
  """
40
42
  if not getattr(module, "quantization_scheme", None):
41
43
  # no quantization scheme nothing to do
@@ -49,7 +51,7 @@ def set_module_for_calibration(module: Module):
49
51
  "to re-calibrate a frozen module"
50
52
  )
51
53
 
52
- if module.quantization_scheme.weights is not None:
54
+ if quantize_weights_upfront and module.quantization_scheme.weights is not None:
53
55
  # set weight scale and zero_point up front, calibration data doesn't affect it
54
56
  observer = module.weight_observer
55
57
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: compressed-tensors-nightly
3
- Version: 0.5.0.20240808
3
+ Version: 0.5.0.20240810
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -19,8 +19,8 @@ compressed_tensors/quantization/quant_args.py,sha256=Vc_tWSTcbZZsMJlACpLq4JEPvGx
19
19
  compressed_tensors/quantization/quant_config.py,sha256=NpVu8YJ4Xw2pIQW_PGaNaml8kx1bUnxkvb0jBYWbKdE,9971
20
20
  compressed_tensors/quantization/quant_scheme.py,sha256=_RKOFJI0T5xJVBLX63UeYkSY4EFAecsBnqzUIVBjeU0,6014
21
21
  compressed_tensors/quantization/lifecycle/__init__.py,sha256=MXE2E7GfIfRRfhrdGy2Og3AZOz5N59B0ZGFcsD89y6c,821
22
- compressed_tensors/quantization/lifecycle/apply.py,sha256=FDKw6AaQjOwNtcX6oFXNx5b_bwm6BpuFtly1Ll6-WFE,13451
23
- compressed_tensors/quantization/lifecycle/calibration.py,sha256=n-m4xwa9Ds2xrltp6r0rhuVLJhE8bQ1LnifrcrbA-ig,2448
22
+ compressed_tensors/quantization/lifecycle/apply.py,sha256=UGnccJ4QPXU14lRqJkre95A_Nn5jYMAE0mWnsMi26_s,13900
23
+ compressed_tensors/quantization/lifecycle/calibration.py,sha256=zE5jtW-e5j8vrLO7FqhX3oUSNhjrg4FsRFiG6vDu7ME,2637
24
24
  compressed_tensors/quantization/lifecycle/compressed.py,sha256=VreB10xPwgSLQQlTu20UCrFpRS--cA7-lx5s7nrPPrg,2247
25
25
  compressed_tensors/quantization/lifecycle/forward.py,sha256=6PSXYcf-R1dOY8zsuIWnBaoyARNymYc3-qvV6-L7SlI,12397
26
26
  compressed_tensors/quantization/lifecycle/frozen.py,sha256=h1XYt89MouBTf3jTYLG_6OdFxIu5q2N8tPjsy6J4E6Y,1726
@@ -41,8 +41,8 @@ compressed_tensors/utils/offload.py,sha256=qAMwoFT3WEQ9nB_SegE12ob8ghDugddQseE6z
41
41
  compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
42
42
  compressed_tensors/utils/safetensors_load.py,sha256=0MheXwx1jeY12PeISppiSIZHs6rmN2YddwPpFb9V67I,8527
43
43
  compressed_tensors/utils/semi_structured_conversions.py,sha256=g1EZHzdv-ko7ufPX430dp7wE33o6FWJXuSP4zZydCu0,13488
44
- compressed_tensors_nightly-0.5.0.20240808.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
45
- compressed_tensors_nightly-0.5.0.20240808.dist-info/METADATA,sha256=-jkh63msCndnafIYuIJOYXfc-rlfREqnYk5ILDk3pRg,5680
46
- compressed_tensors_nightly-0.5.0.20240808.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
47
- compressed_tensors_nightly-0.5.0.20240808.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
48
- compressed_tensors_nightly-0.5.0.20240808.dist-info/RECORD,,
44
+ compressed_tensors_nightly-0.5.0.20240810.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
45
+ compressed_tensors_nightly-0.5.0.20240810.dist-info/METADATA,sha256=yHlRu1oQOe7H-xo9pOH-qA3Nc8yuPNKtOz5GoEg1spQ,5680
46
+ compressed_tensors_nightly-0.5.0.20240810.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
47
+ compressed_tensors_nightly-0.5.0.20240810.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
48
+ compressed_tensors_nightly-0.5.0.20240810.dist-info/RECORD,,