PyPI - compressed-tensors-nightly - Versions diffs - 0.5.0.20240808__py3-none-any.whl → 0.5.0.20240810__py3-none-any.whl - Mend

compressed-tensors-nightly 0.5.0.20240808py3-none-any.whl → 0.5.0.20240810py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

compressed_tensors/quantization/lifecycle/apply.py CHANGED Viewed

@@ -14,7 +14,7 @@
 import logging
 import re
-from collections import OrderedDict
+from collections import OrderedDict, defaultdict
 from copy import deepcopy
 from typing import Dict, Iterable, List, Optional
 from typing import OrderedDict as OrderedDictType
@@ -125,13 +125,14 @@ def apply_quantization_config(model: Module, config: QuantizationConfig) -> Dict
             target_to_scheme[target] = scheme
     # list of submodules to ignore
-    ignored_submodules = []
+    ignored_submodules = defaultdict(list)
     # mark appropriate layers for quantization by setting their quantization schemes
     for name, submodule in iter_named_leaf_modules(model):
         # potentially fix module name to remove FSDP wrapper prefix
         name = fix_fsdp_module_name(name)
-        if find_name_or_class_matches(name, submodule, config.ignore):
-            ignored_submodules.append(name)
+        if matches := find_name_or_class_matches(name, submodule, config.ignore):
+            for match in matches:
+                ignored_submodules[match].append(name)
             continue  # layer matches ignore list, continue
         targets = find_name_or_class_matches(name, submodule, target_to_scheme)
         if targets:
@@ -200,7 +201,14 @@ def apply_quantization_status(model: Module, status: QuantizationStatus):
         model.apply(initialize_module_for_quantization)
     if current_status < status >= QuantizationStatus.CALIBRATION > current_status:
-        model.apply(set_module_for_calibration)
+        # only quantize weights up front when our end goal state is calibration,
+        # weight quantization parameters are already loaded for frozen/compressed
+        quantize_weights_upfront = status == QuantizationStatus.CALIBRATION
+        model.apply(
+            lambda module: set_module_for_calibration(
+                module, quantize_weights_upfront=quantize_weights_upfront
+            )
+        )
     if current_status < status >= QuantizationStatus.FROZEN > current_status:
         model.apply(freeze_module_quantization)

compressed_tensors/quantization/lifecycle/calibration.py CHANGED Viewed

@@ -28,7 +28,7 @@ __all__ = [
 _LOGGER = logging.getLogger(__name__)
-def set_module_for_calibration(module: Module):
+def set_module_for_calibration(module: Module, quantize_weights_upfront: bool = True):
     """
     marks a layer as ready for calibration which activates observers
     to update scales and zero points on each forward pass
@@ -36,6 +36,8 @@ def set_module_for_calibration(module: Module):
     apply to full model with `model.apply(set_module_for_calibration)`
     :param module: module to set for calibration
+    :param quantize_weights_upfront: whether to automatically
+       run weight quantization at the start of calibration
     """
     if not getattr(module, "quantization_scheme", None):
         # no quantization scheme nothing to do
@@ -49,7 +51,7 @@ def set_module_for_calibration(module: Module):
             "to re-calibrate a frozen module"
         )
-    if module.quantization_scheme.weights is not None:
+    if quantize_weights_upfront and module.quantization_scheme.weights is not None:
         # set weight scale and zero_point up front, calibration data doesn't affect it
         observer = module.weight_observer

{compressed_tensors_nightly-0.5.0.20240808.dist-info → compressed_tensors_nightly-0.5.0.20240810.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: compressed-tensors-nightly
-Version: 0.5.0.20240808
+Version: 0.5.0.20240810
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors_nightly-0.5.0.20240808.dist-info → compressed_tensors_nightly-0.5.0.20240810.dist-info}/RECORD RENAMED Viewed

@@ -19,8 +19,8 @@ compressed_tensors/quantization/quant_args.py,sha256=Vc_tWSTcbZZsMJlACpLq4JEPvGx
 compressed_tensors/quantization/quant_config.py,sha256=NpVu8YJ4Xw2pIQW_PGaNaml8kx1bUnxkvb0jBYWbKdE,9971
 compressed_tensors/quantization/quant_scheme.py,sha256=_RKOFJI0T5xJVBLX63UeYkSY4EFAecsBnqzUIVBjeU0,6014
 compressed_tensors/quantization/lifecycle/__init__.py,sha256=MXE2E7GfIfRRfhrdGy2Og3AZOz5N59B0ZGFcsD89y6c,821
-compressed_tensors/quantization/lifecycle/apply.py,sha256=FDKw6AaQjOwNtcX6oFXNx5b_bwm6BpuFtly1Ll6-WFE,13451
-compressed_tensors/quantization/lifecycle/calibration.py,sha256=n-m4xwa9Ds2xrltp6r0rhuVLJhE8bQ1LnifrcrbA-ig,2448
+compressed_tensors/quantization/lifecycle/apply.py,sha256=UGnccJ4QPXU14lRqJkre95A_Nn5jYMAE0mWnsMi26_s,13900
+compressed_tensors/quantization/lifecycle/calibration.py,sha256=zE5jtW-e5j8vrLO7FqhX3oUSNhjrg4FsRFiG6vDu7ME,2637
 compressed_tensors/quantization/lifecycle/compressed.py,sha256=VreB10xPwgSLQQlTu20UCrFpRS--cA7-lx5s7nrPPrg,2247
 compressed_tensors/quantization/lifecycle/forward.py,sha256=6PSXYcf-R1dOY8zsuIWnBaoyARNymYc3-qvV6-L7SlI,12397
 compressed_tensors/quantization/lifecycle/frozen.py,sha256=h1XYt89MouBTf3jTYLG_6OdFxIu5q2N8tPjsy6J4E6Y,1726
@@ -41,8 +41,8 @@ compressed_tensors/utils/offload.py,sha256=qAMwoFT3WEQ9nB_SegE12ob8ghDugddQseE6z
 compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
 compressed_tensors/utils/safetensors_load.py,sha256=0MheXwx1jeY12PeISppiSIZHs6rmN2YddwPpFb9V67I,8527
 compressed_tensors/utils/semi_structured_conversions.py,sha256=g1EZHzdv-ko7ufPX430dp7wE33o6FWJXuSP4zZydCu0,13488
-compressed_tensors_nightly-0.5.0.20240808.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors_nightly-0.5.0.20240808.dist-info/METADATA,sha256=-jkh63msCndnafIYuIJOYXfc-rlfREqnYk5ILDk3pRg,5680
-compressed_tensors_nightly-0.5.0.20240808.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
-compressed_tensors_nightly-0.5.0.20240808.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors_nightly-0.5.0.20240808.dist-info/RECORD,,
+compressed_tensors_nightly-0.5.0.20240810.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors_nightly-0.5.0.20240810.dist-info/METADATA,sha256=yHlRu1oQOe7H-xo9pOH-qA3Nc8yuPNKtOz5GoEg1spQ,5680
+compressed_tensors_nightly-0.5.0.20240810.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
+compressed_tensors_nightly-0.5.0.20240810.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors_nightly-0.5.0.20240810.dist-info/RECORD,,

{compressed_tensors_nightly-0.5.0.20240808.dist-info → compressed_tensors_nightly-0.5.0.20240810.dist-info}/LICENSE RENAMED Viewed

File without changes

{compressed_tensors_nightly-0.5.0.20240808.dist-info → compressed_tensors_nightly-0.5.0.20240810.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors_nightly-0.5.0.20240808.dist-info → compressed_tensors_nightly-0.5.0.20240810.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors-nightly 0.5.0.20240808__py3-none-any.whl → 0.5.0.20240810__py3-none-any.whl

compressed-tensors-nightly 0.5.0.20240808py3-none-any.whl → 0.5.0.20240810py3-none-any.whl