PyPI - compressed-tensors - Versions diffs - 0.9.5a20250520__tar.gz → 0.9.5a20250528__tar.gz - Mend

compressed-tensors 0.9.5a20250520tar.gz → 0.9.5a20250528tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (132) hide show

{compressed_tensors-0.9.5a20250520/src/compressed_tensors.egg-info → compressed_tensors-0.9.5a20250528}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.9.5a20250520
+Version: 0.9.5a20250528
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.9.5a20250520 → compressed_tensors-0.9.5a20250528}/src/compressed_tensors/quantization/lifecycle/forward.py RENAMED Viewed

@@ -18,6 +18,7 @@ from typing import Optional
 import torch
 from compressed_tensors.quantization.quant_args import (
+    DynamicType,
     QuantizationArgs,
     QuantizationStrategy,
     QuantizationType,
@@ -189,7 +190,11 @@ def _process_quantization(
     q_min, q_max = calculate_range(args, x.device)
     group_size = args.group_size
-    if args.strategy == QuantizationStrategy.GROUP:
+    if args.strategy in (QuantizationStrategy.GROUP, QuantizationStrategy.TENSOR_GROUP):
+        n_dims = x.shape
+        if len(n_dims) > 2:
+            x = x.squeeze(0)
         output_dtype = dtype if dtype is not None else x.dtype
         output = torch.zeros_like(x).to(output_dtype)
         columns = output.shape[1]
@@ -251,6 +256,9 @@ def _process_quantization(
         if not is_column_order:
             output = safe_permute(output, torch.argsort(perm), dim=1)
+        if len(n_dims) > 2:
+            output = output.unsqueeze(0)
     else:  # covers channel, token and tensor strategies
         if do_quantize:
             output = _quantize(
@@ -352,9 +360,11 @@ def forward_quantize(
     g_idx = getattr(module, "weight_g_idx", None)
     global_scale = getattr(module, f"{base_name}_global_scale", None)
-    if args.dynamic:
+    if args.dynamic in (True, DynamicType.LOCAL):
         # dynamic quantization - determine the scale/zp on the fly
-        scale, zero_point = compute_dynamic_scales_and_zp(value=value, args=args)
+        scale, zero_point = compute_dynamic_scales_and_zp(
+            value=value, args=args, module=module, global_scale=global_scale
+        )
     else:
         # static quantization - get scale and zero point from layer
         scale = getattr(module, f"{base_name}_scale")
@@ -388,6 +398,7 @@ def _quantize(
         scale = scale.to(global_scale.dtype) / global_scale
     scaled = x / scale
     if zero_point is not None:
         scaled += zero_point.to(x.dtype)
@@ -398,6 +409,7 @@ def _quantize(
         q_max,
     )
     quantized_value = round_to_quantized_type(clamped_value, args)
     if dtype is not None:
         quantized_value = quantized_value.to(dtype)
@@ -422,6 +434,7 @@ def _dequantize(
     if zero_point is not None:
         dequant_value = dequant_value - zero_point.to(scale.dtype)
     dequant_value = dequant_value * scale
     if dtype is not None:

{compressed_tensors-0.9.5a20250520 → compressed_tensors-0.9.5a20250528}/src/compressed_tensors/quantization/lifecycle/initialize.py RENAMED Viewed

@@ -156,13 +156,33 @@ def _initialize_scale_zero_point(
     force_zero_point: bool = True,
     scale_dtype: Optional[torch.dtype] = None,
 ):
-    if quantization_args.dynamic:
+    if quantization_args.dynamic is True:
         return
     # initialize on execution device to avoid performing quantized ops on cpu
     device = get_execution_device(module)
-    # infer expected scale/zero point shape
+    # 1. Create global_scales for tensor_group
+    if quantization_args.strategy == QuantizationStrategy.TENSOR_GROUP:
+        # TODO: should move to llmcompressor
+        if base_name == "weight":
+            # When applying weight-only FP4 quantization, generate a global_scale
+            # This scale is applied during runtime to ensure that the generated
+            # local scale falls properly within the FP8 range (i.e max value is FP8_max)
+            # which is the expected dtype of NVFP4A16 scales
+            value = generate_global_scale(input_tensor=module.weight)
+            value = value.to(device)
+            init_global_scale = Parameter(value, requires_grad=False)
+        else:
+            init_global_scale = Parameter(
+                torch.empty(1, dtype=torch.float32, device=device),
+                requires_grad=False,
+            )
+        register_offload_parameter(
+            module, f"{base_name}_global_scale", init_global_scale
+        )
+    # 2. Infer expected scale/zero point shape
     if quantization_args.strategy == QuantizationStrategy.TOKEN:
         expected_shape = (1, 1)
     else:
@@ -172,47 +192,35 @@ def _initialize_scale_zero_point(
         if quantization_args.strategy == QuantizationStrategy.CHANNEL:
             # (output_channels, 1)
             expected_shape = (weight_shape[0], 1)
-        elif quantization_args.strategy == QuantizationStrategy.GROUP:
+        elif quantization_args.strategy in (
+            QuantizationStrategy.TENSOR_GROUP,
+            QuantizationStrategy.GROUP,
+        ):
             num_groups = math.ceil(weight_shape[1] / quantization_args.group_size)
             expected_shape = (weight_shape[0], max(num_groups, 1))
+    # 3. Identify quantization scale and zp dtype
     scale_dtype = scale_dtype if scale_dtype is not None else module.weight.dtype
-    # TODO: consider erroring out in the future as if the dtype if not one fo these,
-    # there is likely bug
-    if is_fp4(quantization_args=quantization_args) and base_name == "weight":
-        scale_dtype = FP8_E4M3_DATA.dtype
-        # When applying weight-only FP4 quantization, generate a global_scale
-        # This scale is applied during runtime to ensure that the generated
-        # local scale falls properly within the FP8 range (i.e max value is FP8_max)
-        # which is the expected dtype of NVFP4A16 scales
-        value = generate_global_scale(input_tensor=module.weight)
-        value = value.to(device)
-        init_global_scale = Parameter(value, requires_grad=False)
-        register_offload_parameter(
-            module, f"{base_name}_global_scale", init_global_scale
-        )
-    if scale_dtype not in [
-        torch.float16,
-        torch.bfloat16,
-        torch.float32,
-    ] and not is_fp4(quantization_args=quantization_args):
-        scale_dtype = torch.float16
-    # initializes empty scale, zero point, and g_idx parameters for the module
-    init_scale = Parameter(
-        torch.empty(expected_shape, dtype=scale_dtype, device=device),
-        requires_grad=False,
-    )
-    register_offload_parameter(module, f"{base_name}_scale", init_scale)
+    if is_fp4(quantization_args=quantization_args):
+        scale_dtype = zp_dtype = FP8_E4M3_DATA.dtype
+    else:
+        # TODO: consider erroring out in the future as if the dtype if not one of these,
+        # there is likely bug
+        if scale_dtype not in [torch.float16, torch.bfloat16, torch.float32]:
+            scale_dtype = torch.float16
+        zp_dtype = quantization_args.pytorch_dtype()
+    # 4. Initializes empty scale, zero point, and g_idx parameters for the module
+    # do not init scales for quantzation_args.dynamic == DynamicType.local
+    if not quantization_args.dynamic:
+        init_scale = Parameter(
+            torch.empty(expected_shape, dtype=scale_dtype, device=device),
+            requires_grad=False,
+        )
+        register_offload_parameter(module, f"{base_name}_scale", init_scale)
     if force_zero_point or not quantization_args.symmetric:
-        if is_fp4(quantization_args=quantization_args):
-            zp_dtype = FP8_E4M3_DATA.dtype
-        else:
-            zp_dtype = quantization_args.pytorch_dtype()
         init_zero_point = Parameter(
             torch.zeros(expected_shape, device=device, dtype=zp_dtype),
             requires_grad=False,
@@ -304,6 +312,9 @@ def update_fused_layer_weight_global_scales(model: torch.nn.Module):
     ):
         if _is_attention_module(submodule):
+            # already fused/treated as one layer
+            if hasattr(submodule, "qkv_proj"):
+                continue
             if not _valid_fp4_quant(
                 [submodule.q_proj, submodule.v_proj, submodule.k_proj]

{compressed_tensors-0.9.5a20250520 → compressed_tensors-0.9.5a20250528}/src/compressed_tensors/quantization/quant_args.py RENAMED Viewed

@@ -32,6 +32,7 @@ __all__ = [
     "QuantizationArgs",
     "round_to_quantized_type",
     "ActivationOrdering",
+    "DynamicType",
 ]
@@ -98,6 +99,22 @@ class QuantizationStrategy(str, Enum):
     GROUP = "group"
     BLOCK = "block"
     TOKEN = "token"
+    TENSOR_GROUP = "tensor_group"
+class DynamicType(str, Enum):
+    """
+    Enum storing potential dynamic types.
+    1. If dynamic is True, all quantization parameters are generated on the fly.
+    2. If dynamic is False, all quantization parameters generated are static.
+    3. If "local" is provided, only local quantization parameters are dynamic.
+    Note: "local" is only currently supported for NVFP4.
+    """
+    LOCAL = "local"
 class ActivationOrdering(Aliasable, str, Enum):
@@ -152,7 +169,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
     group_size: Optional[int] = None
     strategy: Optional[QuantizationStrategy] = None
     block_structure: Optional[str] = None
-    dynamic: bool = False
+    dynamic: Union[DynamicType, bool] = False
     actorder: Union[ActivationOrdering, bool, None] = None
     observer: Optional[str] = Field(
         default=None,
@@ -206,6 +223,12 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
         return value
+    @field_validator("dynamic", mode="before")
+    def validate_dynamic(cls, value) -> Union[DynamicType, bool]:
+        if isinstance(value, str):
+            return DynamicType(value.lower())
+        return value
     @model_validator(mode="after")
     def validate_model_after(model: "QuantizationArgs") -> "QuantizationArgs":
         # extract user-passed values from dictionary
@@ -239,7 +262,8 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
         if (
             group_size is not None
             and group_size > 0
-            and strategy != QuantizationStrategy.GROUP
+            and strategy
+            not in (QuantizationStrategy.GROUP, QuantizationStrategy.TENSOR_GROUP)
         ):
             raise ValueError("group_size requires strategy to be set to 'group'")
@@ -255,18 +279,31 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
             if strategy not in (
                 QuantizationStrategy.TOKEN,
                 QuantizationStrategy.TENSOR,
+                QuantizationStrategy.TENSOR_GROUP,
             ):
                 raise ValueError(
-                    f"One of {QuantizationStrategy.TOKEN} or "
-                    f"{QuantizationStrategy.TENSOR} must be used for dynamic ",
-                    "quantization",
+                    f"One of {(QuantizationStrategy.TOKEN, QuantizationStrategy.TENSOR, QuantizationStrategy.TENSOR_GROUP)} "
+                    "must be used for dynamic quantization",
                 )
+            if (
+                dynamic == DynamicType.LOCAL
+                and strategy != QuantizationStrategy.TENSOR_GROUP
+            ):
+                raise ValueError("local is only supported for strategy tensor_group")
             if observer is not None:
-                if observer != "memoryless":  # avoid annoying users with old configs
-                    warnings.warn(
-                        "No observer is used for dynamic quantization, setting to None"
-                    )
-                observer = None
+                if dynamic is True:  # checking if dynamic is True, not "local"
+                    if (
+                        observer != "memoryless"
+                    ):  # avoid annoying users with old configs
+                        warnings.warn(
+                            "No observer is used for dynamic quantization, setting to None"
+                        )
+                    observer = None
+            else:
+                if dynamic == DynamicType.LOCAL:
+                    observer = "minmax"
         elif observer is None:
             # default to minmax for non-dynamic cases

{compressed_tensors-0.9.5a20250520 → compressed_tensors-0.9.5a20250528}/src/compressed_tensors/quantization/quant_config.py RENAMED Viewed

@@ -16,7 +16,7 @@ from enum import Enum
 from typing import Dict, List, Optional, Union
 from compressed_tensors.config import CompressionFormat
-from compressed_tensors.quantization.quant_args import QuantizationArgs
+from compressed_tensors.quantization.quant_args import DynamicType, QuantizationArgs
 from compressed_tensors.quantization.quant_scheme import (
     QuantizationScheme,
     preset_name_to_scheme,
@@ -251,7 +251,7 @@ class QuantizationConfig(BaseModel):
         for _, scheme in self.config_groups.items():
             if scheme.input_activations is not None:
-                if not scheme.input_activations.dynamic:
+                if scheme.input_activations.dynamic in (False, DynamicType.LOCAL):
                     return True
             if scheme.output_activations is not None:
                 if not scheme.output_activations.dynamic:

{compressed_tensors-0.9.5a20250520 → compressed_tensors-0.9.5a20250528}/src/compressed_tensors/quantization/quant_scheme.py RENAMED Viewed

@@ -16,6 +16,7 @@ from copy import deepcopy
 from typing import Any, Dict, List, Optional
 from compressed_tensors.quantization.quant_args import (
+    DynamicType,
     QuantizationArgs,
     QuantizationStrategy,
     QuantizationType,
@@ -104,13 +105,33 @@ NVFP4A16 = dict(
     weights=QuantizationArgs(
         num_bits=4,
         type=QuantizationType.FLOAT,
-        strategy=QuantizationStrategy.GROUP,
+        strategy=QuantizationStrategy.TENSOR_GROUP,
         symmetric=True,
         dynamic=False,
         group_size=16,
     )
 )
+NVFP4 = dict(
+    weights=QuantizationArgs(
+        num_bits=4,
+        type=QuantizationType.FLOAT,
+        strategy=QuantizationStrategy.TENSOR_GROUP,
+        symmetric=True,
+        dynamic=False,
+        group_size=16,
+    ),
+    input_activations=QuantizationArgs(
+        num_bits=4,
+        type=QuantizationType.FLOAT,
+        strategy=QuantizationStrategy.TENSOR_GROUP,
+        symmetric=True,
+        dynamic=DynamicType.LOCAL,
+        group_size=16,
+    ),
+)
 # 8 bit integer weights and 8 bit activations quantization
 INT8_W8A8 = dict(
     weights=QuantizationArgs(
@@ -237,4 +258,5 @@ PRESET_SCHEMES = {
     "FP8": FP8,
     "FP8_DYNAMIC": FP8_DYNAMIC,
     "NVFP4A16": NVFP4A16,
+    "NVFP4": NVFP4,
 }

{compressed_tensors-0.9.5a20250520 → compressed_tensors-0.9.5a20250528}/src/compressed_tensors/quantization/utils/helpers.py RENAMED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 import logging
+import math
 from typing import Generator, List, Optional, Tuple
 import torch
@@ -103,7 +104,9 @@ def calculate_qparams(
         if is_fp4(quantization_args=quantization_args) and global_scale is not None:
             # Conditionally scale the generated local scale by a global_scale
             scales = global_scale * (max_val_pos / FP4_E2M1_DATA.max)
+            scales = torch.clamp(scales, max=FP8_E4M3_DATA.max, min=FP8_E4M3_DATA.min)
             scales = scales.to(FP8_E4M3_DATA.dtype)
         else:
             scales = max_val_pos / (float(bit_range) / 2)
@@ -143,7 +146,12 @@ def calculate_qparams(
     return scales, zero_points
-def compute_dynamic_scales_and_zp(value: Tensor, args: QuantizationArgs):
+def compute_dynamic_scales_and_zp(
+    value: Tensor,
+    args: QuantizationArgs,
+    module: torch.nn.Module,
+    global_scale: Optional[Tensor] = None,
+):
     """
     Returns the computed scales and zero points for dynamic activation
     quantization.
@@ -155,24 +163,41 @@ def compute_dynamic_scales_and_zp(value: Tensor, args: QuantizationArgs):
         reduced dimensions
     :return: tuple of scale and zero point derived from the observed tensor
     """
+    keep_dims = True
     if args.strategy == QuantizationStrategy.TOKEN:
         dim = {1, 2}
         reduce_dims = tuple(idx for idx in range(value.ndim) if idx not in dim)
     elif args.strategy == QuantizationStrategy.TENSOR:
         reduce_dims = None
+    elif args.strategy == QuantizationStrategy.TENSOR_GROUP:
+        if len(value.shape) > 2:
+            value = value.squeeze(0)
+        dim = {0, 1}
+        reduce_dims = tuple(idx for idx in range(3) if idx not in dim)
+        keep_dims = False
+        value = torch.reshape(
+            value,
+            (
+                value.shape[0],
+                math.ceil(value.shape[1] / args.group_size),
+                args.group_size,
+            ),
+        )
     else:
         raise ValueError(
-            f"One of {QuantizationStrategy.TOKEN} or {QuantizationStrategy.TENSOR} ",
-            "must be used for dynamic quantization",
+            "Dynamic quantization is only supported for ",
+            f"{QuantizationStrategy.TOKEN, QuantizationStrategy.TENSOR, QuantizationStrategy.TENSOR_GROUP}",
         )
     if not reduce_dims:
         min_val, max_val = torch.aminmax(value)
     else:
-        min_val = torch.amin(value, dim=reduce_dims, keepdims=True)
-        max_val = torch.amax(value, dim=reduce_dims, keepdims=True)
+        min_val = torch.amin(value, dim=reduce_dims, keepdims=keep_dims)
+        max_val = torch.amax(value, dim=reduce_dims, keepdims=keep_dims)
-    return calculate_qparams(min_val, max_val, args)
+    return calculate_qparams(min_val, max_val, args, global_scale=global_scale)
 def calculate_range(quantization_args: QuantizationArgs, device: str) -> Tuple:

{compressed_tensors-0.9.5a20250520 → compressed_tensors-0.9.5a20250528}/src/compressed_tensors/registry/registry.py RENAMED Viewed

@@ -19,7 +19,7 @@ of neuralmagic utilities
 import importlib
 from collections import defaultdict
-from typing import Any, Dict, List, Optional, Type, Union
+from typing import Any, Dict, List, Optional, TypeVar, Union
 __all__ = [
@@ -32,8 +32,9 @@ __all__ = [
 ]
-_ALIAS_REGISTRY: Dict[Type, Dict[str, str]] = defaultdict(dict)
-_REGISTRY: Dict[Type, Dict[str, Any]] = defaultdict(dict)
+_ALIAS_REGISTRY: Dict[type, Dict[str, str]] = defaultdict(dict)
+_REGISTRY: Dict[type, Dict[str, Any]] = defaultdict(dict)
+T = TypeVar("", bound="RegistryMixin")
 def standardize_lookup_name(name: str) -> str:
@@ -159,7 +160,7 @@ class RegistryMixin:
         )
     @classmethod
-    def load_from_registry(cls, name: str, **constructor_kwargs) -> object:
+    def load_from_registry(cls: type[T], name: str, **constructor_kwargs) -> T:
         """
         :param name: name of registered class to load
         :param constructor_kwargs: arguments to pass to the constructor retrieved
@@ -172,7 +173,7 @@ class RegistryMixin:
         return constructor(**constructor_kwargs)
     @classmethod
-    def get_value_from_registry(cls, name: str):
+    def get_value_from_registry(cls: type[T], name: str) -> T:
         """
         :param name: name to retrieve from the registry
         :return: value from retrieved the registry for the given name, raises
@@ -200,7 +201,7 @@ class RegistryMixin:
 def register(
-    parent_class: Type,
+    parent_class: type,
     value: Any,
     name: Optional[str] = None,
     alias: Union[List[str], str, None] = None,
@@ -240,7 +241,7 @@ def register(
 def get_from_registry(
-    parent_class: Type, name: str, require_subclass: bool = False
+    parent_class: type, name: str, require_subclass: bool = False
 ) -> Any:
     """
     :param parent_class: class that the name is registered under
@@ -276,7 +277,7 @@ def get_from_registry(
     return retrieved_value
-def registered_names(parent_class: Type) -> List[str]:
+def registered_names(parent_class: type) -> List[str]:
     """
     :param parent_class: class to look up the registry of
     :return: all names registered to the given class
@@ -284,7 +285,7 @@ def registered_names(parent_class: Type) -> List[str]:
     return list(_REGISTRY[parent_class].keys())
-def registered_aliases(parent_class: Type) -> List[str]:
+def registered_aliases(parent_class: type) -> List[str]:
     """
     :param parent_class: class to look up the registry of
     :return: all aliases registered to the given class
@@ -297,7 +298,7 @@ def registered_aliases(parent_class: Type) -> List[str]:
 def register_alias(
-    name: str, parent_class: Type, alias: Union[str, List[str], None] = None
+    name: str, parent_class: type, alias: Union[str, List[str], None] = None
 ):
     """
     Updates the mapping from the alias(es) to the given name.
@@ -352,7 +353,7 @@ def _import_and_get_value_from_module(module_path: str, value_name: str) -> Any:
     return value
-def _validate_subclass(parent_class: Type, child_class: Type):
+def _validate_subclass(parent_class: type, child_class: type):
     if not issubclass(child_class, parent_class):
         raise ValueError(
             f"class {child_class} is not a subclass of the class it is "

compressed_tensors-0.9.5a20250528/src/compressed_tensors/transform/__init__.py ADDED Viewed

@@ -0,0 +1,20 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# flake8: noqa
+# isort: skip_file
+from .transform_args import *
+from .transform_scheme import *
+from .transform_config import *

compressed_tensors-0.9.5a20250528/src/compressed_tensors/transform/transform_args.py ADDED Viewed

@@ -0,0 +1,54 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from enum import Enum
+from typing import Any, List
+from pydantic import BaseModel, Field, field_validator
+__all__ = ["TransformArgs"]
+class TransformLocation(str, Enum):
+    INPUT = "input"
+    WEIGHT_INPUT = "weight_input"
+    WEIGHT_OUTPUT = "weight_output"
+    OUTPUT = "output"
+    K_CACHE = "k_cache"
+    Q_ATTN = "q_attn"
+class TransformArgs(BaseModel):
+    """
+    Arguments which define *how* and where a transform should be applied to a model
+    :param targets: list of modules to apply transforms to
+    :param location: where to apply transform on module, one of (`input`, `weight`,
+        `output`, `k_cache`, `q_attn`)
+    :param inverse: whether or not to apply the inverse of a transform
+    :param ignore: any modules which should be ignored from the targets list
+    """
+    targets: List[str]
+    location: TransformLocation
+    inverse: bool = Field(default=False)
+    ignore: List[str] = Field(default_factory=list)
+    @field_validator("targets", "ignore", mode="before")
+    @classmethod
+    def wrap_singleton(cls, value):
+        if isinstance(value, str):
+            return [value]
+        return value

compressed_tensors-0.9.5a20250528/src/compressed_tensors/transform/transform_config.py ADDED Viewed

@@ -0,0 +1,73 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Dict
+from compressed_tensors.transform import TransformArgs, TransformScheme
+from pydantic import BaseModel
+__all__ = ["TransformConfig"]
+class TransformConfig(BaseModel):
+    """
+    Configuration of transforms to be applied to a model. This config is to be
+    serialized within a model's `config.json` file
+    :param config_groups: A dictionary of `TransformSchemes` that should be applied
+        to a particular model. The keys can be any arbitrary string
+    """
+    config_groups: Dict[str, TransformScheme]
+# quip / quip sharp
+QUIP = TransformConfig(
+    config_groups={
+        "v": TransformScheme(
+            type="hadamard",
+            apply=[
+                TransformArgs(
+                    targets=["Linear"],
+                    location="input",  # non-mergable
+                ),
+                TransformArgs(
+                    targets=["Linear"],
+                    location="weight_input",
+                    inverse=True,
+                ),
+            ],
+            randomize_modules=True,
+        ),
+        "u": TransformScheme(
+            type="hadamard",
+            apply=[
+                TransformArgs(
+                    targets=["Linear"],
+                    location="weight_output",
+                ),
+                TransformArgs(
+                    targets=["Linear"], location="output", inverse=True  # non-mergable
+                ),
+            ],
+            randomize_modules=True,
+        ),
+    }
+)
+PRESET_CONFIGS = {
+    "QUIP": QUIP,
+}

compressed-tensors 0.9.5a20250520__tar.gz → 0.9.5a20250528__tar.gz

compressed-tensors 0.9.5a20250520tar.gz → 0.9.5a20250528tar.gz