PyPI - compressed-tensors - Versions diffs - 0.9.5a20250521__py3-none-any.whl → 0.9.5a20250530__py3-none-any.whl - Mend

compressed-tensors 0.9.5a20250521py3-none-any.whl → 0.9.5a20250530py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

compressed_tensors/quantization/lifecycle/forward.py CHANGED Viewed

@@ -18,6 +18,7 @@ from typing import Optional
 import torch
 from compressed_tensors.quantization.quant_args import (
+    DynamicType,
     QuantizationArgs,
     QuantizationStrategy,
     QuantizationType,
@@ -189,7 +190,11 @@ def _process_quantization(
     q_min, q_max = calculate_range(args, x.device)
     group_size = args.group_size
-    if args.strategy == QuantizationStrategy.GROUP:
+    if args.strategy in (QuantizationStrategy.GROUP, QuantizationStrategy.TENSOR_GROUP):
+        n_dims = x.shape
+        if len(n_dims) > 2:
+            x = x.squeeze(0)
         output_dtype = dtype if dtype is not None else x.dtype
         output = torch.zeros_like(x).to(output_dtype)
         columns = output.shape[1]
@@ -251,6 +256,9 @@ def _process_quantization(
         if not is_column_order:
             output = safe_permute(output, torch.argsort(perm), dim=1)
+        if len(n_dims) > 2:
+            output = output.unsqueeze(0)
     else:  # covers channel, token and tensor strategies
         if do_quantize:
             output = _quantize(
@@ -352,9 +360,11 @@ def forward_quantize(
     g_idx = getattr(module, "weight_g_idx", None)
     global_scale = getattr(module, f"{base_name}_global_scale", None)
-    if args.dynamic:
+    if args.dynamic in (True, DynamicType.LOCAL):
         # dynamic quantization - determine the scale/zp on the fly
-        scale, zero_point = compute_dynamic_scales_and_zp(value=value, args=args)
+        scale, zero_point = compute_dynamic_scales_and_zp(
+            value=value, args=args, module=module, global_scale=global_scale
+        )
     else:
         # static quantization - get scale and zero point from layer
         scale = getattr(module, f"{base_name}_scale")
@@ -388,6 +398,7 @@ def _quantize(
         scale = scale.to(global_scale.dtype) / global_scale
     scaled = x / scale
     if zero_point is not None:
         scaled += zero_point.to(x.dtype)
@@ -398,6 +409,7 @@ def _quantize(
         q_max,
     )
     quantized_value = round_to_quantized_type(clamped_value, args)
     if dtype is not None:
         quantized_value = quantized_value.to(dtype)
@@ -422,6 +434,7 @@ def _dequantize(
     if zero_point is not None:
         dequant_value = dequant_value - zero_point.to(scale.dtype)
     dequant_value = dequant_value * scale
     if dtype is not None:

compressed_tensors/quantization/lifecycle/initialize.py CHANGED Viewed

@@ -156,13 +156,33 @@ def _initialize_scale_zero_point(
     force_zero_point: bool = True,
     scale_dtype: Optional[torch.dtype] = None,
 ):
-    if quantization_args.dynamic:
+    if quantization_args.dynamic is True:
         return
     # initialize on execution device to avoid performing quantized ops on cpu
     device = get_execution_device(module)
-    # infer expected scale/zero point shape
+    # 1. Create global_scales for tensor_group
+    if quantization_args.strategy == QuantizationStrategy.TENSOR_GROUP:
+        # TODO: should move to llmcompressor
+        if base_name == "weight":
+            # When applying weight-only FP4 quantization, generate a global_scale
+            # This scale is applied during runtime to ensure that the generated
+            # local scale falls properly within the FP8 range (i.e max value is FP8_max)
+            # which is the expected dtype of NVFP4A16 scales
+            value = generate_global_scale(input_tensor=module.weight)
+            value = value.to(device)
+            init_global_scale = Parameter(value, requires_grad=False)
+        else:
+            init_global_scale = Parameter(
+                torch.empty(1, dtype=torch.float32, device=device),
+                requires_grad=False,
+            )
+        register_offload_parameter(
+            module, f"{base_name}_global_scale", init_global_scale
+        )
+    # 2. Infer expected scale/zero point shape
     if quantization_args.strategy == QuantizationStrategy.TOKEN:
         expected_shape = (1, 1)
     else:
@@ -172,47 +192,35 @@ def _initialize_scale_zero_point(
         if quantization_args.strategy == QuantizationStrategy.CHANNEL:
             # (output_channels, 1)
             expected_shape = (weight_shape[0], 1)
-        elif quantization_args.strategy == QuantizationStrategy.GROUP:
+        elif quantization_args.strategy in (
+            QuantizationStrategy.TENSOR_GROUP,
+            QuantizationStrategy.GROUP,
+        ):
             num_groups = math.ceil(weight_shape[1] / quantization_args.group_size)
             expected_shape = (weight_shape[0], max(num_groups, 1))
+    # 3. Identify quantization scale and zp dtype
     scale_dtype = scale_dtype if scale_dtype is not None else module.weight.dtype
-    # TODO: consider erroring out in the future as if the dtype if not one fo these,
-    # there is likely bug
-    if is_fp4(quantization_args=quantization_args) and base_name == "weight":
-        scale_dtype = FP8_E4M3_DATA.dtype
-        # When applying weight-only FP4 quantization, generate a global_scale
-        # This scale is applied during runtime to ensure that the generated
-        # local scale falls properly within the FP8 range (i.e max value is FP8_max)
-        # which is the expected dtype of NVFP4A16 scales
-        value = generate_global_scale(input_tensor=module.weight)
-        value = value.to(device)
-        init_global_scale = Parameter(value, requires_grad=False)
-        register_offload_parameter(
-            module, f"{base_name}_global_scale", init_global_scale
-        )
-    if scale_dtype not in [
-        torch.float16,
-        torch.bfloat16,
-        torch.float32,
-    ] and not is_fp4(quantization_args=quantization_args):
-        scale_dtype = torch.float16
-    # initializes empty scale, zero point, and g_idx parameters for the module
-    init_scale = Parameter(
-        torch.empty(expected_shape, dtype=scale_dtype, device=device),
-        requires_grad=False,
-    )
-    register_offload_parameter(module, f"{base_name}_scale", init_scale)
+    if is_fp4(quantization_args=quantization_args):
+        scale_dtype = zp_dtype = FP8_E4M3_DATA.dtype
+    else:
+        # TODO: consider erroring out in the future as if the dtype if not one of these,
+        # there is likely bug
+        if scale_dtype not in [torch.float16, torch.bfloat16, torch.float32]:
+            scale_dtype = torch.float16
+        zp_dtype = quantization_args.pytorch_dtype()
+    # 4. Initializes empty scale, zero point, and g_idx parameters for the module
+    # do not init scales for quantzation_args.dynamic == DynamicType.local
+    if not quantization_args.dynamic:
+        init_scale = Parameter(
+            torch.empty(expected_shape, dtype=scale_dtype, device=device),
+            requires_grad=False,
+        )
+        register_offload_parameter(module, f"{base_name}_scale", init_scale)
     if force_zero_point or not quantization_args.symmetric:
-        if is_fp4(quantization_args=quantization_args):
-            zp_dtype = FP8_E4M3_DATA.dtype
-        else:
-            zp_dtype = quantization_args.pytorch_dtype()
         init_zero_point = Parameter(
             torch.zeros(expected_shape, device=device, dtype=zp_dtype),
             requires_grad=False,

compressed_tensors/quantization/quant_args.py CHANGED Viewed

@@ -32,6 +32,7 @@ __all__ = [
     "QuantizationArgs",
     "round_to_quantized_type",
     "ActivationOrdering",
+    "DynamicType",
 ]
@@ -98,6 +99,22 @@ class QuantizationStrategy(str, Enum):
     GROUP = "group"
     BLOCK = "block"
     TOKEN = "token"
+    TENSOR_GROUP = "tensor_group"
+class DynamicType(str, Enum):
+    """
+    Enum storing potential dynamic types.
+    1. If dynamic is True, all quantization parameters are generated on the fly.
+    2. If dynamic is False, all quantization parameters generated are static.
+    3. If "local" is provided, only local quantization parameters are dynamic.
+    Note: "local" is only currently supported for NVFP4.
+    """
+    LOCAL = "local"
 class ActivationOrdering(Aliasable, str, Enum):
@@ -152,7 +169,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
     group_size: Optional[int] = None
     strategy: Optional[QuantizationStrategy] = None
     block_structure: Optional[str] = None
-    dynamic: bool = False
+    dynamic: Union[DynamicType, bool] = False
     actorder: Union[ActivationOrdering, bool, None] = None
     observer: Optional[str] = Field(
         default=None,
@@ -206,6 +223,12 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
         return value
+    @field_validator("dynamic", mode="before")
+    def validate_dynamic(cls, value) -> Union[DynamicType, bool]:
+        if isinstance(value, str):
+            return DynamicType(value.lower())
+        return value
     @model_validator(mode="after")
     def validate_model_after(model: "QuantizationArgs") -> "QuantizationArgs":
         # extract user-passed values from dictionary
@@ -239,7 +262,8 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
         if (
             group_size is not None
             and group_size > 0
-            and strategy != QuantizationStrategy.GROUP
+            and strategy
+            not in (QuantizationStrategy.GROUP, QuantizationStrategy.TENSOR_GROUP)
         ):
             raise ValueError("group_size requires strategy to be set to 'group'")
@@ -255,22 +279,35 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
             if strategy not in (
                 QuantizationStrategy.TOKEN,
                 QuantizationStrategy.TENSOR,
+                QuantizationStrategy.TENSOR_GROUP,
             ):
                 raise ValueError(
-                    f"One of {QuantizationStrategy.TOKEN} or "
-                    f"{QuantizationStrategy.TENSOR} must be used for dynamic ",
-                    "quantization",
+                    f"One of {(QuantizationStrategy.TOKEN, QuantizationStrategy.TENSOR, QuantizationStrategy.TENSOR_GROUP)} "
+                    "must be used for dynamic quantization",
                 )
+            if (
+                dynamic == DynamicType.LOCAL
+                and strategy != QuantizationStrategy.TENSOR_GROUP
+            ):
+                raise ValueError("local is only supported for strategy tensor_group")
             if observer is not None:
-                if observer != "memoryless":  # avoid annoying users with old configs
-                    warnings.warn(
-                        "No observer is used for dynamic quantization, setting to None"
-                    )
-                observer = None
+                if dynamic is True:  # checking if dynamic is True, not "local"
+                    if (
+                        observer != "memoryless"
+                    ):  # avoid annoying users with old configs
+                        warnings.warn(
+                            "No observer is used for dynamic quantization, setting to None"
+                        )
+                    observer = None
+            else:
+                if dynamic == DynamicType.LOCAL:
+                    observer = "minmax"
         elif observer is None:
-            # default to mse for non-dynamic cases
-            observer = "mse"
+            # default to minmax for non-dynamic cases
+            observer = "minmax"
         # write back modified values
         model.strategy = strategy

compressed_tensors/quantization/quant_config.py CHANGED Viewed

@@ -16,7 +16,7 @@ from enum import Enum
 from typing import Dict, List, Optional, Union
 from compressed_tensors.config import CompressionFormat
-from compressed_tensors.quantization.quant_args import QuantizationArgs
+from compressed_tensors.quantization.quant_args import DynamicType, QuantizationArgs
 from compressed_tensors.quantization.quant_scheme import (
     QuantizationScheme,
     preset_name_to_scheme,
@@ -251,7 +251,7 @@ class QuantizationConfig(BaseModel):
         for _, scheme in self.config_groups.items():
             if scheme.input_activations is not None:
-                if not scheme.input_activations.dynamic:
+                if scheme.input_activations.dynamic in (False, DynamicType.LOCAL):
                     return True
             if scheme.output_activations is not None:
                 if not scheme.output_activations.dynamic:

compressed_tensors/quantization/quant_scheme.py CHANGED Viewed

@@ -16,6 +16,7 @@ from copy import deepcopy
 from typing import Any, Dict, List, Optional
 from compressed_tensors.quantization.quant_args import (
+    DynamicType,
     QuantizationArgs,
     QuantizationStrategy,
     QuantizationType,
@@ -104,13 +105,33 @@ NVFP4A16 = dict(
     weights=QuantizationArgs(
         num_bits=4,
         type=QuantizationType.FLOAT,
-        strategy=QuantizationStrategy.GROUP,
+        strategy=QuantizationStrategy.TENSOR_GROUP,
         symmetric=True,
         dynamic=False,
         group_size=16,
     )
 )
+NVFP4 = dict(
+    weights=QuantizationArgs(
+        num_bits=4,
+        type=QuantizationType.FLOAT,
+        strategy=QuantizationStrategy.TENSOR_GROUP,
+        symmetric=True,
+        dynamic=False,
+        group_size=16,
+    ),
+    input_activations=QuantizationArgs(
+        num_bits=4,
+        type=QuantizationType.FLOAT,
+        strategy=QuantizationStrategy.TENSOR_GROUP,
+        symmetric=True,
+        dynamic=DynamicType.LOCAL,
+        group_size=16,
+    ),
+)
 # 8 bit integer weights and 8 bit activations quantization
 INT8_W8A8 = dict(
     weights=QuantizationArgs(
@@ -237,4 +258,5 @@ PRESET_SCHEMES = {
     "FP8": FP8,
     "FP8_DYNAMIC": FP8_DYNAMIC,
     "NVFP4A16": NVFP4A16,
+    "NVFP4": NVFP4,
 }

compressed_tensors/quantization/utils/helpers.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 import logging
+import math
 from typing import Generator, List, Optional, Tuple
 import torch
@@ -103,7 +104,9 @@ def calculate_qparams(
         if is_fp4(quantization_args=quantization_args) and global_scale is not None:
             # Conditionally scale the generated local scale by a global_scale
             scales = global_scale * (max_val_pos / FP4_E2M1_DATA.max)
+            scales = torch.clamp(scales, max=FP8_E4M3_DATA.max, min=FP8_E4M3_DATA.min)
             scales = scales.to(FP8_E4M3_DATA.dtype)
         else:
             scales = max_val_pos / (float(bit_range) / 2)
@@ -143,7 +146,12 @@ def calculate_qparams(
     return scales, zero_points
-def compute_dynamic_scales_and_zp(value: Tensor, args: QuantizationArgs):
+def compute_dynamic_scales_and_zp(
+    value: Tensor,
+    args: QuantizationArgs,
+    module: torch.nn.Module,
+    global_scale: Optional[Tensor] = None,
+):
     """
     Returns the computed scales and zero points for dynamic activation
     quantization.
@@ -155,24 +163,41 @@ def compute_dynamic_scales_and_zp(value: Tensor, args: QuantizationArgs):
         reduced dimensions
     :return: tuple of scale and zero point derived from the observed tensor
     """
+    keep_dims = True
     if args.strategy == QuantizationStrategy.TOKEN:
         dim = {1, 2}
         reduce_dims = tuple(idx for idx in range(value.ndim) if idx not in dim)
     elif args.strategy == QuantizationStrategy.TENSOR:
         reduce_dims = None
+    elif args.strategy == QuantizationStrategy.TENSOR_GROUP:
+        if len(value.shape) > 2:
+            value = value.squeeze(0)
+        dim = {0, 1}
+        reduce_dims = tuple(idx for idx in range(3) if idx not in dim)
+        keep_dims = False
+        value = torch.reshape(
+            value,
+            (
+                value.shape[0],
+                math.ceil(value.shape[1] / args.group_size),
+                args.group_size,
+            ),
+        )
     else:
         raise ValueError(
-            f"One of {QuantizationStrategy.TOKEN} or {QuantizationStrategy.TENSOR} ",
-            "must be used for dynamic quantization",
+            "Dynamic quantization is only supported for ",
+            f"{QuantizationStrategy.TOKEN, QuantizationStrategy.TENSOR, QuantizationStrategy.TENSOR_GROUP}",
         )
     if not reduce_dims:
         min_val, max_val = torch.aminmax(value)
     else:
-        min_val = torch.amin(value, dim=reduce_dims, keepdims=True)
-        max_val = torch.amax(value, dim=reduce_dims, keepdims=True)
+        min_val = torch.amin(value, dim=reduce_dims, keepdims=keep_dims)
+        max_val = torch.amax(value, dim=reduce_dims, keepdims=keep_dims)
-    return calculate_qparams(min_val, max_val, args)
+    return calculate_qparams(min_val, max_val, args, global_scale=global_scale)
 def calculate_range(quantization_args: QuantizationArgs, device: str) -> Tuple:

compressed_tensors/transform/__init__.py ADDED Viewed

@@ -0,0 +1,20 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# flake8: noqa
+# isort: skip_file
+from .transform_args import *
+from .transform_scheme import *
+from .transform_config import *

compressed_tensors/transform/transform_args.py ADDED Viewed

@@ -0,0 +1,54 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from enum import Enum
+from typing import Any, List
+from pydantic import BaseModel, Field, field_validator
+__all__ = ["TransformArgs"]
+class TransformLocation(str, Enum):
+    INPUT = "input"
+    WEIGHT_INPUT = "weight_input"
+    WEIGHT_OUTPUT = "weight_output"
+    OUTPUT = "output"
+    K_CACHE = "k_cache"
+    Q_ATTN = "q_attn"
+class TransformArgs(BaseModel):
+    """
+    Arguments which define *how* and where a transform should be applied to a model
+    :param targets: list of modules to apply transforms to
+    :param location: where to apply transform on module, one of (`input`, `weight`,
+        `output`, `k_cache`, `q_attn`)
+    :param inverse: whether or not to apply the inverse of a transform
+    :param ignore: any modules which should be ignored from the targets list
+    """
+    targets: List[str]
+    location: TransformLocation
+    inverse: bool = Field(default=False)
+    ignore: List[str] = Field(default_factory=list)
+    @field_validator("targets", "ignore", mode="before")
+    @classmethod
+    def wrap_singleton(cls, value):
+        if isinstance(value, str):
+            return [value]
+        return value

compressed_tensors/transform/transform_config.py ADDED Viewed

@@ -0,0 +1,73 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Dict
+from compressed_tensors.transform import TransformArgs, TransformScheme
+from pydantic import BaseModel
+__all__ = ["TransformConfig"]
+class TransformConfig(BaseModel):
+    """
+    Configuration of transforms to be applied to a model. This config is to be
+    serialized within a model's `config.json` file
+    :param config_groups: A dictionary of `TransformSchemes` that should be applied
+        to a particular model. The keys can be any arbitrary string
+    """
+    config_groups: Dict[str, TransformScheme]
+# quip / quip sharp
+QUIP = TransformConfig(
+    config_groups={
+        "v": TransformScheme(
+            type="hadamard",
+            apply=[
+                TransformArgs(
+                    targets=["Linear"],
+                    location="input",  # non-mergable
+                ),
+                TransformArgs(
+                    targets=["Linear"],
+                    location="weight_input",
+                    inverse=True,
+                ),
+            ],
+            randomize_modules=True,
+        ),
+        "u": TransformScheme(
+            type="hadamard",
+            apply=[
+                TransformArgs(
+                    targets=["Linear"],
+                    location="weight_output",
+                ),
+                TransformArgs(
+                    targets=["Linear"], location="output", inverse=True  # non-mergable
+                ),
+            ],
+            randomize_modules=True,
+        ),
+    }
+)
+PRESET_CONFIGS = {
+    "QUIP": QUIP,
+}

compressed_tensors/transform/transform_scheme.py ADDED Viewed

@@ -0,0 +1,43 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import List
+from compressed_tensors.transform import TransformArgs
+from pydantic import BaseModel, Field
+__all__ = ["TransformScheme"]
+class TransformScheme(BaseModel):
+    """
+    Scheme used to parameterize a particular transform type and specify how and where it
+    should be applied to the model
+    :param type: string indicating the particular transform type that should be created
+        and applied. This should be one of the registered transform types
+        (see `Transforms.registered_names()`)
+    :param apply: list of TransformationArgs containing the information about the
+        modules that should be targeted by the specified transform
+    :param randomize_modules: True if unique transforms should be applied to each
+        unique module targeted by `apply`, otherwise reuse transform weights where
+        applicable
+    :param requires_grad: True if weights include gradients for training
+    """
+    type: str
+    apply: List[TransformArgs] = Field(default_factory=list)
+    randomize_modules: bool = Field(default=False)
+    requires_grad: bool = Field(default=False)

compressed_tensors/version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.9.5.a20250521'
+__version__ = version = '0.9.5.a20250530'
 __version_tuple__ = version_tuple = (0, 9, 5)

{compressed_tensors-0.9.5a20250521.dist-info → compressed_tensors-0.9.5a20250530.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.9.5a20250521
+Version: 0.9.5a20250530
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.9.5a20250521.dist-info → compressed_tensors-0.9.5a20250530.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
 compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
-compressed_tensors/version.py,sha256=FJ5OPohL511E88TFF_Jipl_3ikvZ6NgmdrYxPbi2vo8,521
+compressed_tensors/version.py,sha256=BwDcUUpFaOn_-cMqdBWktPf89WCzFmESpx94d8qAUZM,521
 compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
 compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
 compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
@@ -26,19 +26,23 @@ compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5y
 compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
 compressed_tensors/linear/compressed_linear.py,sha256=1yo9RyjA0aQ--iuIknFfcSorJn43Mn4CoV-q4JlTJ_o,4052
 compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
-compressed_tensors/quantization/quant_args.py,sha256=5-mq43RmbI81z9Xl9pYNv4bqIP5AIT65FgT--4ERsE8,10502
-compressed_tensors/quantization/quant_config.py,sha256=MxSUcb5dOqMN6LFyD5K2h8X0TvEtcWIAoiUJqD2dHGE,10159
-compressed_tensors/quantization/quant_scheme.py,sha256=Fx7Ma4bDlFB6OWkHKhOB6_0AOVIOPRgNE_qTwmDLSbc,6586
+compressed_tensors/quantization/quant_args.py,sha256=huROC8fbY899EYa2MnEmujvcBeHYLpn-e8ZEViEFASo,11804
+compressed_tensors/quantization/quant_config.py,sha256=aFi6PKqmEX9iP9O8GVn3mEUjRDEwk_hOCbmmiq-j9oU,10198
+compressed_tensors/quantization/quant_scheme.py,sha256=IDWa1GWUbUdWCo8j78Jz6svYF5hLz89J2PVYWBBnXRc,7102
 compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
 compressed_tensors/quantization/lifecycle/apply.py,sha256=-OKZ-FFFfIIoeGTrho8lXx6HVWZQp3Xkn3Q-G0hU-CM,18294
 compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
-compressed_tensors/quantization/lifecycle/forward.py,sha256=WY-HY5kXY2Zs9HMpaq44bpolQUAQ1ELrNZC7GM5C4jw,14494
+compressed_tensors/quantization/lifecycle/forward.py,sha256=65USJEtsp_n8X36L5y4g4ftMnhrQyRWbwKJ8RZMMiBo,14797
 compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
-compressed_tensors/quantization/lifecycle/initialize.py,sha256=dWXxjYLemjmtrSnb8vyuvNoNTSm8ywmUswze3soKY4o,12041
+compressed_tensors/quantization/lifecycle/initialize.py,sha256=976sZ45ywGVzH1n4pyVhG7hnUBP1wKEWoo9cHrmKHxU,12522
 compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
-compressed_tensors/quantization/utils/helpers.py,sha256=w3Ucpdog88b0MnZdJ37VzgtYi1fqrwJafYdfWPc0hTk,16852
+compressed_tensors/quantization/utils/helpers.py,sha256=I-bJcMdBFXjIUQEpnxMMN_FfQyXjojpe5w7ZIKSZ5UU,17588
 compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
 compressed_tensors/registry/registry.py,sha256=0s15BxdGgzBv8RL4kUJCYcuDOFUh_KZYvNvLEeRqWTc,11956
+compressed_tensors/transform/__init__.py,sha256=oa5VdrE-GtDYYceXNSwj5X_ropoXLLukm6Aufcc9WhY,747
+compressed_tensors/transform/transform_args.py,sha256=Sazu_4kXL7IvIEgTaimgo8dV-qacXf_t1NLEfDvPJEU,1759
+compressed_tensors/transform/transform_config.py,sha256=6JA8VFcoz4EGHOev6thj51OuB7K2gKUUazWjrVPYDLc,2144
+compressed_tensors/transform/transform_scheme.py,sha256=c7NAuLDL0itFgUfBMNShegMI9bzKL7s4LR3QJTHsXLs,1733
 compressed_tensors/utils/__init__.py,sha256=gS4gSU2pwcAbsKj-6YMaqhm25udFy6ISYaWBf-myRSM,808
 compressed_tensors/utils/helpers.py,sha256=RrNvzD08naEjEiXdU-FdZjQVda1nQywu1hA_GCDj0vg,10415
 compressed_tensors/utils/offload.py,sha256=JNQ66_6vhSsizhlUaMgyEdBuFolYxbgUuT1mAZrCfKY,15436
@@ -46,8 +50,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
 compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
 compressed_tensors/utils/safetensors_load.py,sha256=DMfZBuUbA6qp_BG_zIWT3ckiEE33K9ob34s-OgzReO4,12057
 compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
-compressed_tensors-0.9.5a20250521.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors-0.9.5a20250521.dist-info/METADATA,sha256=Xl6EbYwMlKhFyy6VXtxD2x0TsiTDG36YszGdub5wLqM,7004
-compressed_tensors-0.9.5a20250521.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
-compressed_tensors-0.9.5a20250521.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors-0.9.5a20250521.dist-info/RECORD,,
+compressed_tensors-0.9.5a20250530.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors-0.9.5a20250530.dist-info/METADATA,sha256=avjHgMxk1vnX09YKjerSCov-X8mTckulmJV1xQyLk5I,7004
+compressed_tensors-0.9.5a20250530.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+compressed_tensors-0.9.5a20250530.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors-0.9.5a20250530.dist-info/RECORD,,

{compressed_tensors-0.9.5a20250521.dist-info → compressed_tensors-0.9.5a20250530.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.8.0)
+Generator: setuptools (80.9.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{compressed_tensors-0.9.5a20250521.dist-info → compressed_tensors-0.9.5a20250530.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250521.dist-info → compressed_tensors-0.9.5a20250530.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors 0.9.5a20250521__py3-none-any.whl → 0.9.5a20250530__py3-none-any.whl

compressed-tensors 0.9.5a20250521py3-none-any.whl → 0.9.5a20250530py3-none-any.whl