PyPI - compressed-tensors-nightly - Versions diffs - 0.5.0.20240902__py3-none-any.whl → 0.5.0.20240903__py3-none-any.whl - Mend

compressed-tensors-nightly 0.5.0.20240902py3-none-any.whl → 0.5.0.20240903py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

compressed_tensors/compressors/base.py CHANGED Viewed

@@ -108,6 +108,7 @@ class Compressor(RegistryMixin):
                 prefix = name[: -(len(weight_suffix))]
                 scale = model_state.get(merge_names(prefix, "weight_scale"), None)
                 zp = model_state.get(merge_names(prefix, "weight_zero_point"), None)
+                g_idx = model_state.get(merge_names(prefix, "weight_g_idx"), None)
                 if scale is not None:
                     # weight is quantized, compress it
                     quant_args = names_to_scheme[prefix]
@@ -115,6 +116,7 @@ class Compressor(RegistryMixin):
                         weight=value,
                         scale=scale,
                         zero_point=zp,
+                        g_idx=g_idx,
                         quantization_args=quant_args,
                         device="cpu",
                     )

compressed_tensors/compressors/model_compressor.py CHANGED Viewed

@@ -271,6 +271,9 @@ class ModelCompressor:
             v_proj_has_quant_output = 0
             for name, module in model.named_modules():
                 if not hasattr(module, "quantization_scheme"):
+                    # We still want to count non-quantized q_proj
+                    if name.endswith(".q_proj"):
+                        q_proj_has_no_quant_output += 1
                     continue
                 out_act = module.quantization_scheme.output_activations
                 if name.endswith(".q_proj") and out_act is None:

compressed_tensors/quantization/lifecycle/forward.py CHANGED Viewed

@@ -348,7 +348,16 @@ def maybe_calibrate_or_quantize(
             update_parameter_data(module, updated_scale, f"{base_name}_scale")
             update_parameter_data(module, updated_zero_point, f"{base_name}_zero_point")
-    return fake_quantize(value, scale, zero_point, args, g_idx=g_idx)
+            scale = updated_scale
+            zero_point = updated_zero_point
+    return fake_quantize(
+        x=value,
+        scale=scale,
+        zero_point=zero_point,
+        args=args,
+        g_idx=g_idx,
+    )
 @torch.no_grad()

compressed_tensors/quantization/lifecycle/initialize.py CHANGED Viewed

@@ -21,6 +21,7 @@ from compressed_tensors.quantization.lifecycle.forward import (
     wrap_module_forward_quantized,
 )
 from compressed_tensors.quantization.quant_args import (
+    ActivationOrdering,
     QuantizationArgs,
     QuantizationStrategy,
 )
@@ -179,8 +180,8 @@ def _initialize_scale_zero_point_observer(
         )
         module.register_parameter(f"{base_name}_zero_point", init_zero_point)
-    # initialize with empty for actorder, to be populated by GPTQ or state_dict
-    if quantization_args.actorder:
+    # only grouped activation ordering has g_idx
+    if quantization_args.actorder == ActivationOrdering.GROUP:
         g_idx_shape = (weight_shape[1],)
         g_idx_dtype = torch.int
         init_g_idx = Parameter(

compressed_tensors/quantization/quant_args.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 from enum import Enum
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Union
 import torch
 from pydantic import BaseModel, Field, field_validator, model_validator
@@ -25,6 +25,7 @@ __all__ = [
     "QuantizationStrategy",
     "QuantizationArgs",
     "round_to_quantized_type",
+    "ActivationOrdering",
 ]
 FP8_DTYPE = torch.float8_e4m3fn
@@ -51,6 +52,19 @@ class QuantizationStrategy(str, Enum):
     TOKEN = "token"
+class ActivationOrdering(str, Enum):
+    """
+    Enum storing strategies for activation ordering
+    Group: reorder groups and weight\n
+    Weight: only reorder weight, not groups. Slightly lower latency and
+    accuracy compared to group actorder\n
+    """
+    GROUP = "group"
+    WEIGHT = "weight"
 class QuantizationArgs(BaseModel, use_enum_values=True):
     """
     User facing arguments used to define a quantization config for weights or
@@ -69,17 +83,17 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
         quantization. Note that enabling dynamic quantization will change the default
         observer to a memoryless one
     :param actorder: whether to apply group quantization in decreasing order of
-        activation. Defaults to False for arbitrary ordering
+        activation. Defaults to None for arbitrary ordering
     """
     num_bits: int = 8
-    type: QuantizationType = QuantizationType.INT.value
+    type: QuantizationType = QuantizationType.INT
     symmetric: bool = True
     group_size: Optional[int] = None
     strategy: Optional[QuantizationStrategy] = None
     block_structure: Optional[str] = None
     dynamic: bool = False
-    actorder: bool = False
+    actorder: Optional[ActivationOrdering] = None
     observer: str = Field(
         default="minmax",
         description=(
@@ -108,8 +122,15 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
         return Observer.load_from_registry(self.observer, quantization_args=self)
+    @field_validator("type", mode="before")
+    def validate_type(cls, value) -> QuantizationType:
+        if isinstance(value, str):
+            return QuantizationType(value.lower())
+        return value
     @field_validator("group_size", mode="before")
-    def validate_group(cls, value) -> int:
+    def validate_group(cls, value) -> Union[int, None]:
         if value is None:
             return value
@@ -121,18 +142,29 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
         return value
-    @model_validator(mode="before")
-    def validate_strategy(values) -> Dict[str, Any]:
-        model_fields = QuantizationArgs.model_fields
-        strategy = values.get("strategy", model_fields["strategy"].default)
-        group_size = values.get("group_size", model_fields["group_size"].default)
-        actorder = values.get("actorder", model_fields["actorder"].default)
+    @field_validator("strategy", mode="before")
+    def validate_strategy(cls, value) -> Union[QuantizationStrategy, None]:
+        if isinstance(value, str):
+            return QuantizationStrategy(value.lower())
-        if strategy is not None:
-            strategy = QuantizationStrategy(strategy.lower())
+        return value
-        else:
-            # use group_size to determinine strategy if not given explicity
+    @field_validator("actorder", mode="before")
+    def validate_actorder(cls, value) -> Optional[ActivationOrdering]:
+        if isinstance(value, str):
+            return ActivationOrdering(value.lower())
+        return value
+    @model_validator(mode="after")
+    def validate_model_after(model: "QuantizationArgs") -> Dict[str, Any]:
+        # extract user-passed values from dictionary
+        strategy = model.strategy
+        group_size = model.group_size
+        actorder = model.actorder
+        # infer strategy
+        if strategy is None:
             if group_size is None:
                 strategy = QuantizationStrategy.TENSOR
             elif group_size > 0:
@@ -145,6 +177,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
                     "strategy='group' and group_size = -1 for 'channel'"
                 )
+        # validate strategy and group
         if strategy == QuantizationStrategy.GROUP:
             if group_size is None or group_size <= 0:
                 raise ValueError(
@@ -152,14 +185,16 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
                     "set to a positive value"
                 )
-        if actorder and strategy != QuantizationStrategy.GROUP:
+        # validate activation ordering and strategy
+        if actorder is not None and strategy != QuantizationStrategy.GROUP:
             raise ValueError(
-                "Group quantization must be specified in order to apply "
+                "Must use group quantization strategy in order to apply "
                 "activation ordering"
             )
-        values["strategy"] = strategy
-        return values
+        # write back modified values
+        model.strategy = strategy
+        return model
     def pytorch_dtype(self) -> torch.dtype:
         if self.type == QuantizationType.FLOAT:

compressed_tensors/quantization/quant_scheme.py CHANGED Viewed

@@ -110,6 +110,7 @@ def is_preset_scheme(name: str) -> bool:
     """
     return name.upper() in PRESET_SCHEMES
+UNQUANTIZED = dict()
 # 8 bit integer weights and 8 bit activations quantization
 W8A8 = dict(
@@ -208,6 +209,8 @@ FP8_DYNAMIC = dict(
 )
 PRESET_SCHEMES = {
+    # Unquantized (no-op)
+    "UNQUANTIZED": UNQUANTIZED,
     # Integer weight only schemes
     "W8A16": W8A16,
     "W4A16": W4A16,

compressed_tensors/quantization/utils/helpers.py CHANGED Viewed

@@ -181,7 +181,7 @@ def calculate_compression_ratio(model: Module) -> float:
         for parameter in model.parameters():
             uncompressed_bits = get_torch_bit_depth(parameter)
             compressed_bits = uncompressed_bits
-            if is_module_quantized(submodule):
+            if is_module_quantized(submodule) and submodule.quantization_scheme.weights:
                 compressed_bits = submodule.quantization_scheme.weights.num_bits
             num_weights = parameter.numel()

{compressed_tensors_nightly-0.5.0.20240902.dist-info → compressed_tensors_nightly-0.5.0.20240903.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: compressed-tensors-nightly
-Version: 0.5.0.20240902
+Version: 0.5.0.20240903
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors_nightly-0.5.0.20240902.dist-info → compressed_tensors_nightly-0.5.0.20240903.dist-info}/RECORD RENAMED Viewed

@@ -2,11 +2,11 @@ compressed_tensors/__init__.py,sha256=SV1csvHUVCd8kHXz6UDZim1HZ_fAVG3vfk-j_4Bb6h
 compressed_tensors/base.py,sha256=Mq4mfVQcJhNpha-BXzpOfpmFIdl01o09BJE7D2oQ_00,796
 compressed_tensors/version.py,sha256=DdMT4o5D6_t26gTuvhF1Q9HPeXY6vV5g7XMprWuHLdI,1586
 compressed_tensors/compressors/__init__.py,sha256=wmX4VnkUTS63xBwK5-6w8FP78bNZpcdcqvf2KOEC5E4,1133
-compressed_tensors/compressors/base.py,sha256=4BO07h28Epbl2ED43lORnPGmBZ3pMdaoLYym_LJTpPQ,9846
+compressed_tensors/compressors/base.py,sha256=sJB3QhvNHxwBmpoLy_obkJBuIZ2hY__Jd-Mf2-MAty8,9966
 compressed_tensors/compressors/dense.py,sha256=xcWECjcRY4INN6jC7vHx5wvUX3NmnKlxA9SVE1A6m2Q,1267
 compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24SCfhzZ-RusXfw,5403
 compressed_tensors/compressors/marlin_24.py,sha256=e7fGUyZbjUpA5VUMCPxqcYPGNiwoDKupHJaXWCoVKRw,9410
-compressed_tensors/compressors/model_compressor.py,sha256=Yv2V8Ey6AFDg2Tmvwc7-E_AnMFkeIy_HVu62ct650AI,16507
+compressed_tensors/compressors/model_compressor.py,sha256=gI6KKtH3eeWi2540Ayx-4bg9o8qjrvxlF4Gd_sqltGA,16678
 compressed_tensors/compressors/naive_quantized.py,sha256=z3h3ca5xKCN69mahutxcbzdv-OysiaxaM8P-Qum6zUQ,4823
 compressed_tensors/compressors/pack_quantized.py,sha256=27RVmJ2wg2dvCoawj407HSmKT3VPGJ6ujAMHlT26WlI,7571
 compressed_tensors/compressors/sparse_bitmask.py,sha256=kiDwBlFV0sJGLcIdDYxIiuF64ccgwDfqq1hWRQThYDc,8647
@@ -17,17 +17,17 @@ compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5y
 compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
 compressed_tensors/linear/compressed_linear.py,sha256=G0gEFfxLAUsgRcnfSV-PKz1ZBNTVokOauOoup7SE1mw,3210
 compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
-compressed_tensors/quantization/quant_args.py,sha256=wSC2ve1P-XRwZUpqEaqvQpj1Xe0EGgmmPEjPk9YEnyg,6797
+compressed_tensors/quantization/quant_args.py,sha256=Td71ap7oYxcrjAvRVafQ3hZv3BbmCL50Elyyv7EG0Rw,7733
 compressed_tensors/quantization/quant_config.py,sha256=NpVu8YJ4Xw2pIQW_PGaNaml8kx1bUnxkvb0jBYWbKdE,9971
-compressed_tensors/quantization/quant_scheme.py,sha256=_RKOFJI0T5xJVBLX63UeYkSY4EFAecsBnqzUIVBjeU0,6014
+compressed_tensors/quantization/quant_scheme.py,sha256=VRvWweqwlhjYMrKf62fXKQTeoJGhjJa3tXnE-TuFdFA,6093
 compressed_tensors/quantization/lifecycle/__init__.py,sha256=MXE2E7GfIfRRfhrdGy2Og3AZOz5N59B0ZGFcsD89y6c,821
 compressed_tensors/quantization/lifecycle/apply.py,sha256=uftWFunr_CpCZM_qWfo2O1USXKB2qSYD1pBJsO8BuCU,15285
 compressed_tensors/quantization/lifecycle/calibration.py,sha256=PlS_EqCOPqJD3QKuLPXO9AOtDzXtQWvEBTynFv-FFVw,2698
 compressed_tensors/quantization/lifecycle/compressed.py,sha256=laNDwvhk4S925qWTPHCufo4uDdMo24NDV1qhsAkf5Iw,2225
-compressed_tensors/quantization/lifecycle/forward.py,sha256=fZMSrUXX2NnkQiappEpT5SO-6JxbX5wiw9hyjfKNIZo,13538
+compressed_tensors/quantization/lifecycle/forward.py,sha256=PljD9pzATILEOiC3ZdHUTsfSbZdAa6iSIxWmvAHLG9I,13688
 compressed_tensors/quantization/lifecycle/frozen.py,sha256=h1XYt89MouBTf3jTYLG_6OdFxIu5q2N8tPjsy6J4E6Y,1726
 compressed_tensors/quantization/lifecycle/helpers.py,sha256=TmLY_G5VP_Fg2Ywio_dxoHRTxOKZdT7_aG5S9WtD4zI,2424
-compressed_tensors/quantization/lifecycle/initialize.py,sha256=r8GNYIUYVHJ-539mHKnhhGysCluaOG6VieH6CQD4eeo,7112
+compressed_tensors/quantization/lifecycle/initialize.py,sha256=S5Kwy16Da8WUIIpa1xVKc72MijJ5C_rqM6JjanZ7MGk,7133
 compressed_tensors/quantization/observers/__init__.py,sha256=4Sa7rqi5RB_S5bPO8KmncETiqDsoMBhwP37arlQym8s,764
 compressed_tensors/quantization/observers/base.py,sha256=5ovQicWPYHjIxr6-EkQ4lgOX0PpI9g23iSzKpxjM1Zg,8420
 compressed_tensors/quantization/observers/helpers.py,sha256=s_A23Qa_BLfOdHJCN5bm-qPWkhjjj_RIVrhSp1Y9Dtk,4211
@@ -35,7 +35,7 @@ compressed_tensors/quantization/observers/memoryless.py,sha256=jH_c6K3gxf4W3VNXQ
 compressed_tensors/quantization/observers/min_max.py,sha256=sQXqU3z-voxIDfR_9mQzwQUflZj2sASm_G8CYaXntFw,3865
 compressed_tensors/quantization/observers/mse.py,sha256=Aeh-253Vbab1F8cYuBiGNn4OXWJ67wXQ_JVfl3mu2a8,6034
 compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
-compressed_tensors/quantization/utils/helpers.py,sha256=YjXABJQUnelof-z7qcwck6fnrFLh4uMSrOmPiqNp_RY,8591
+compressed_tensors/quantization/utils/helpers.py,sha256=pwvU613XRvMDtI5b39II5jukBl5OUCqoX0ofVRpOFRY,8633
 compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
 compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85SLG77nml2iA,11890
 compressed_tensors/utils/__init__.py,sha256=gS4gSU2pwcAbsKj-6YMaqhm25udFy6ISYaWBf-myRSM,808
@@ -45,8 +45,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
 compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
 compressed_tensors/utils/safetensors_load.py,sha256=m08ANVuTBxQdoa6LufDgcNJ7wCLDJolyZljB8VEybAU,8578
 compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
-compressed_tensors_nightly-0.5.0.20240902.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors_nightly-0.5.0.20240902.dist-info/METADATA,sha256=C5qh78nBJycno_oq2ML1puURNBO0pKRLCNY2YrV5SMg,6799
-compressed_tensors_nightly-0.5.0.20240902.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
-compressed_tensors_nightly-0.5.0.20240902.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors_nightly-0.5.0.20240902.dist-info/RECORD,,
+compressed_tensors_nightly-0.5.0.20240903.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors_nightly-0.5.0.20240903.dist-info/METADATA,sha256=plHC3Fg0bs-UlLdWYSOLl7RoMbum05Vg-JLDaje0YrY,6799
+compressed_tensors_nightly-0.5.0.20240903.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
+compressed_tensors_nightly-0.5.0.20240903.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors_nightly-0.5.0.20240903.dist-info/RECORD,,

{compressed_tensors_nightly-0.5.0.20240902.dist-info → compressed_tensors_nightly-0.5.0.20240903.dist-info}/LICENSE RENAMED Viewed

File without changes

{compressed_tensors_nightly-0.5.0.20240902.dist-info → compressed_tensors_nightly-0.5.0.20240903.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors_nightly-0.5.0.20240902.dist-info → compressed_tensors_nightly-0.5.0.20240903.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors-nightly 0.5.0.20240902__py3-none-any.whl → 0.5.0.20240903__py3-none-any.whl

compressed-tensors-nightly 0.5.0.20240902py3-none-any.whl → 0.5.0.20240903py3-none-any.whl