PyPI - compressed-tensors - Versions diffs - 0.10.3a20250721__tar.gz → 0.10.3a20250728__tar.gz - Mend

compressed-tensors 0.10.3a20250721tar.gz → 0.10.3a20250728tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (149) hide show

{compressed_tensors-0.10.3a20250721/src/compressed_tensors.egg-info → compressed_tensors-0.10.3a20250728}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.10.3a20250721
+Version: 0.10.3a20250728
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/compressors/model_compressors/model_compressor.py RENAMED Viewed

@@ -400,7 +400,10 @@ class ModelCompressor:
                 # in the future, support compression on same device
                 with align_module_device(module, execution_device=exec_device):
-                    state_dict = module.state_dict(prefix=f"{prefix}.")
+                    state_dict = {
+                        f"{prefix}.{name}": param
+                        for name, param in module.named_parameters(recurse=False)
+                    }
                 # quantization first
                 if prefix in module_to_scheme:
@@ -421,7 +424,7 @@ class ModelCompressor:
                 # remove any existing parameters
                 offload_device = get_offloaded_device(module)
-                for name, _ in list(module.named_parameters()):
+                for name, _ in list(module.named_parameters(recurse=False)):
                     delete_offload_parameter(module, name)
                 # replace with compressed parameters
@@ -458,7 +461,10 @@ class ModelCompressor:
             if prefix in module_to_scheme or prefix in sparse_compression_targets:
                 # in the future, support decompression on same device
                 with align_module_device(module, execution_device="cpu"):
-                    state_dict = module.state_dict(prefix=f"{prefix}.")
+                    state_dict = {
+                        f"{prefix}.{name}": param
+                        for name, param in module.named_parameters(recurse=False)
+                    }
                 # sparsity first
                 if prefix in sparse_compression_targets:
@@ -483,7 +489,7 @@ class ModelCompressor:
                 # remove any existing parameters
                 exec_device = get_execution_device(module)
                 offload_device = get_offloaded_device(module)
-                for name, _ in list(module.named_parameters()):
+                for name, _ in list(module.named_parameters(recurse=False)):
                     delete_offload_parameter(module, name)
                 # replace with decompressed parameters
@@ -754,8 +760,8 @@ def map_module_to_scheme(model: Module) -> Dict[str, QuantizationScheme]:
         fix_fsdp_module_name(name): module.quantization_scheme
         for name, module in model.named_modules()
         if (
-            hasattr(module, "quantization_scheme") and
-            module.quantization_scheme.weights is not None
+            hasattr(module, "quantization_scheme")
+            and module.quantization_scheme.weights is not None
         )
     }

{compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/quantization/lifecycle/forward.py RENAMED Viewed

@@ -112,17 +112,21 @@ def dequantize(
             if scale.shape[1] == 1:
                 args = QuantizationArgs(strategy=QuantizationStrategy.CHANNEL)
             # Scale height matches input or is 1 -> group quantization across columns
-            #
+            #
             # Example 1: scale.shape[0] == 1
             # x_q: (4, 8), scale: (1, 4) -> 2 columns per group
             #
-            # Example 2: scale.shape[0] == x_q.shape[0]
+            # Example 2: scale.shape[0] == x_q.shape[0]
             # x_q: (4, 8), scale: (4, 4) -> 2 elements per group (per row)
             elif (scale.shape[0] == 1) or (scale.shape[0] == x_q.shape[0]):
                 group_size = int(x_q.shape[1] / scale.shape[1])
-                args = QuantizationArgs(strategy=QuantizationStrategy.GROUP, group_size=group_size)
+                args = QuantizationArgs(
+                    strategy=QuantizationStrategy.GROUP, group_size=group_size
+                )
             else:
-                args = QuantizationArgs(strategy=QuantizationStrategy.BLOCK, block_structure=scale.shape)
+                args = QuantizationArgs(
+                    strategy=QuantizationStrategy.BLOCK, block_structure=scale.shape
+                )
         else:
             raise ValueError(
                 f"Could not infer a quantization strategy from scale with {scale.ndim} "

{compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/quantization/lifecycle/initialize.py RENAMED Viewed

@@ -15,6 +15,7 @@
 import logging
 import math
+import warnings
 from enum import Enum
 from typing import List, Optional
@@ -172,14 +173,43 @@ def _initialize_scale_zero_point(
     if base_name == "weight" and weight_shape is not None:
         if quantization_args.strategy == QuantizationStrategy.CHANNEL:
-            # (output_channels, 1)
+            # (output_channels, 1) - only for weights
             expected_shape = (weight_shape[0], 1)
         elif quantization_args.strategy in (
             QuantizationStrategy.TENSOR_GROUP,
             QuantizationStrategy.GROUP,
         ):
+            # GROUP/TENSOR_GROUP for both weights and activations
             num_groups = math.ceil(weight_shape[1] / quantization_args.group_size)
             expected_shape = (weight_shape[0], max(num_groups, 1))
+        elif quantization_args.strategy == QuantizationStrategy.BLOCK:
+            # For block quantization, scale shape should match number of blocks - only for weights
+            if quantization_args.block_structure is None:
+                raise ValueError(
+                    "Block quantization requires block_structure to be specified"
+                )
+            block_height, block_width = quantization_args.block_structure
+            rows, cols = weight_shape[-2], weight_shape[-1]
+            num_rows_blocks = math.ceil(rows / block_height)
+            num_cols_blocks = math.ceil(cols / block_width)
+            # Warn if dimensions don't divide evenly
+            if rows % block_height != 0 or cols % block_width != 0:
+                warnings.warn(
+                    f"Block quantization: tensor shape {weight_shape} does not divide evenly "
+                    f"by block structure {quantization_args.block_structure}. "
+                    f"Some blocks will be incomplete which may affect quantization quality.",
+                    UserWarning,
+                )
+            expected_shape = (num_rows_blocks, num_cols_blocks)
+    elif quantization_args.strategy == QuantizationStrategy.BLOCK:
+        warnings.warn(
+            f"BLOCK quantization not supported for {base_name} activations. "
+            f"Falling back to tensor-level quantization.",
+            UserWarning,
+        )
+        expected_shape = 1
     # 3. Identify quantization scale and zp dtype
     scale_dtype = scale_dtype if scale_dtype is not None else module.weight.dtype
@@ -189,7 +219,12 @@ def _initialize_scale_zero_point(
     else:
         # TODO: consider erroring out in the future as if the dtype if not one of these,
         # there is likely bug
-        if scale_dtype not in [torch.float16, torch.bfloat16, torch.float32, torch.float64]:
+        if scale_dtype not in [
+            torch.float16,
+            torch.bfloat16,
+            torch.float32,
+            torch.float64,
+        ]:
             scale_dtype = torch.float16
         zp_dtype = quantization_args.pytorch_dtype()

{compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/quantization/quant_scheme.py RENAMED Viewed

@@ -64,8 +64,9 @@ class QuantizationScheme(BaseModel):
                 raise ValueError("Cannot apply actorder to output activations")
         if (
-            inputs and weights
-            and weights.strategy == QuantizationStrategy.GROUP
+            inputs
+            and weights
+            and weights.strategy == QuantizationStrategy.GROUP
             and inputs.strategy == QuantizationStrategy.GROUP
             and weights.group_size != inputs.group_size
         ):
@@ -75,7 +76,7 @@ class QuantizationScheme(BaseModel):
                 "may complicate fused kernel implementations. Consider using "
                 "TENSOR_GROUP strategy for both or matching group sizes.",
                 UserWarning,
-                stacklevel=2
+                stacklevel=2,
             )
         return model

{compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/transform/factory/base.py RENAMED Viewed

@@ -18,7 +18,6 @@ from typing import Optional
 import torch
 import torch.nn.utils.parametrize as P
 from compressed_tensors import InternalModule
-from compressed_tensors.quantization.lifecycle import is_target  # TODO: move to utils
 from compressed_tensors.registry.registry import RegistryMixin, T
 from compressed_tensors.transform import (
     TransformArgs,
@@ -29,6 +28,7 @@ from compressed_tensors.utils import (
     align_module_device,
     delete_offload_module,
     has_offloaded_params,
+    match_named_modules,
     patch_attr,
     register_offload_module,
     update_offload_parameter,
@@ -87,9 +87,8 @@ class TransformFactory(RegistryMixin, ABC):
         :param model: module to apply transforms to
         """
         for arg in self.scheme.apply:
-            for name, module in list(model.named_modules()):
-                if is_target(name, module, arg.targets, arg.ignore):
-                    self._apply_to_module(module, arg)
+            for _, module in match_named_modules(model, arg.targets, arg.ignore):
+                self._apply_to_module(module, arg)
     def _apply_to_module(self, module: Module, args: TransformArgs):
         """

{compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/transform/factory/hadamard.py RENAMED Viewed

@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import math
 from typing import Optional, Union
-import math
 import torch
 from compressed_tensors.transform import TransformArgs, TransformScheme
 from compressed_tensors.transform.factory.base import TransformBase, TransformFactory
@@ -103,7 +103,8 @@ class HadamardTransform(TransformBase):
         if self.args.inverse:
             weight = weight.T
-        return apply_transform_weight(
-            weight, value, self.args.location, self.module_type
-        ) / self._scale
+        return (
+            apply_transform_weight(weight, value, self.args.location, self.module_type)
+            / self._scale
+        )

{compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/utils/__init__.py RENAMED Viewed

@@ -15,6 +15,7 @@
 from .helpers import *
 from .internal import *
+from .match import *
 from .offload import *
 from .permutations_24 import *
 from .permute import *

compressed_tensors-0.10.3a20250728/src/compressed_tensors/utils/match.py ADDED Viewed

@@ -0,0 +1,196 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import re
+from collections.abc import Generator
+from typing import Iterable, Tuple
+import torch
+from compressed_tensors.utils.internal import InternalModule
+_LOGGER: logging.Logger = logging.getLogger(__name__)
+__all__ = [
+    "match_named_modules",
+    "match_named_parameters",
+    "match_modules_set",
+    "is_match",
+]
+def match_named_modules(
+    model: torch.nn.Module,
+    targets: Iterable[str],
+    ignore: Iterable[str] = tuple(),
+    warn_on_fail: bool = False,
+) -> Generator[Tuple[str, torch.nn.Module]]:
+    """
+    Yields names and modules which match `targets` but do not match `ignore`.
+    Values are returned in order of `model.named_modules()`
+    :param model: model containing submodules to match against
+    :param targets: target strings, potentially containing "re:" prefixes
+    :param ignore: targets to ignore, potentially containing "re:" prefixes
+    :param warn_on_fail: if True, warns if any targets do not match any modules in model
+    :return: generator of module names and modules
+    """
+    unmatched_targets = set(targets)
+    for name, module in model.named_modules():
+        for target in targets:
+            if is_match(name, module, target):
+                unmatched_targets -= {target}
+                if not any(is_match(name, module, ign) for ign in ignore):
+                    yield name, module
+    if warn_on_fail:
+        for target in unmatched_targets:
+            _LOGGER.warning(
+                f"Could not match `{target}` in instance of {model.__class__.__name__}"
+            )
+def match_named_parameters(
+    model: torch.nn.Module,
+    targets: Iterable[str],
+    ignore: Iterable[str] = tuple(),
+    warn_on_fail: bool = False,
+) -> Generator[Tuple[str, torch.nn.Module, torch.nn.Parameter]]:
+    """
+    Yields parameters which match `targets` but do not match `ignore`.
+    Values are returned in order of `model.named_modules()`
+    :param model: model containing params to match against
+    :param targets: target strings, potentially containing "re:" prefixes
+    :param ignore: targets to ignore, potentially containing "re:" prefixes
+    :param warn_on_fail: if True, warns if any targets do not match any params in model
+    :return: generator of fully-qualified param names, parent modules, and params
+    """
+    unmatched_targets = set(targets)
+    for module_name, module in model.named_modules():
+        if isinstance(module, InternalModule):
+            continue
+        for param_name, param in module.named_parameters(recurse=False):
+            param_fqn = f"{module_name}.{param_name}"
+            for target in targets:
+                if _match_name(param_fqn, target):
+                    unmatched_targets -= {target}
+                    if not any(_match_name(param_fqn, ign) for ign in ignore):
+                        yield param_fqn, module, param
+    if warn_on_fail:
+        for target in unmatched_targets:
+            _LOGGER.warning(
+                f"Could not match `{target}` in instance of {model.__class__.__name__}"
+            )
+def match_modules_set(
+    model: torch.nn.Module,
+    targets: Iterable[str],
+    ignore: Iterable[str] = tuple(),
+) -> Generator[Iterable[torch.nn.Module]]:
+    """
+    Yields modules grouped with the same order and size as `targets`.
+    Values are returned in order of `model.named_modules()`
+    For example, the following targets would yield module belonging to the following layers:
+    ```python3
+    match_modules_set(model, ["q_proj", "k_proj", "v_proj"]) == (
+        (
+            `model.layers.0.self_attn.q_proj`,
+            `model.layers.0.self_attn.k_proj`,
+            `model.layers.0.self_attn.v_proj`,
+        ),
+        (
+            `model.layers.1.self_attn.q_proj`,
+            `model.layers.1.self_attn.k_proj`,
+            `model.layers.1.self_attn.v_proj`,
+        ),
+        ...
+        (
+            `model.layers.32.self_attn.q_proj`,
+            `model.layers.32.self_attn.k_proj`,
+            `model.layers.32.self_attn.v_proj`,
+        ),
+    )
+    ```
+    This can be used to match layers to their corresponding downstream counterparts.
+    For example, matching layer norms to their subsequent linear layers
+    ```python3
+    for norm, q, k, v in match_modules_set(model, (norm_tgt, q_tgt, k_tgt, v_tgt)):
+        fuse_norm_linears(norm, [q, k, v])
+    :param model: model containing modules to match against
+    :param targets: target strings, potentially containing "re:" prefixes
+    :param ignore: targets to ignore, potentially containing "re:" prefixes
+    """
+    matches = dict.fromkeys(targets, None)
+    for name, module in model.named_modules():
+        # match until we get a full set
+        for target in targets:
+            if is_match(name, module, target) and not any(
+                is_match(name, module, ign) for ign in ignore
+            ):
+                if matches[target] is not None:
+                    raise ValueError(f"Matched a {target} twice before completing set")
+                matches[target] = module
+        # once we have a full set, yield and reset
+        if targets and all((matches[target] is not None for target in targets)):
+            yield [matches[target] for target in targets]  # ensure correct ordering
+            matches = dict.fromkeys(targets, None)
+    # check that none are left over
+    unmatched_keys = [match for match, value in matches.items() if value is not None]
+    if len(unmatched_keys):
+        raise ValueError(f"Unable to match targets into set: {unmatched_keys}")
+def is_match(name: str, module: torch.nn.Module, target: str) -> bool:
+    """
+    Returns true if either module name or module parent classes match against target
+    and the module is not an internal module
+    """
+    return not isinstance(module, InternalModule) and (
+        _match_name(name, target) or _match_class(module, target)
+    )
+def _match_name(name: str, target: str) -> bool:
+    """
+    Returns true if target string begins with "re:" and
+    regex matches or if target string exactly matches name
+    """
+    if target.startswith("re:"):
+        return re.match(target.removeprefix("re:"), name) is not None
+    else:
+        return target == name
+def _match_class(module: torch.nn.Module, target: str) -> bool:
+    """
+    Returns true if any torch parent class names match the target string exactly
+    """
+    # will never match against a regex pattern since `:` is not allowed in class names
+    return any(
+        issubclass(cls, torch.nn.Module) and cls.__name__ == target
+        for cls in module.__class__.__mro__
+    )

{compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/version.py RENAMED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.10.3.a20250721'
+__version__ = version = '0.10.3.a20250728'
 __version_tuple__ = version_tuple = (0, 10, 3)

{compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728/src/compressed_tensors.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.10.3a20250721
+Version: 0.10.3a20250728
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors.egg-info/SOURCES.txt RENAMED Viewed

@@ -88,6 +88,7 @@ src/compressed_tensors/transform/utils/matrix.py
 src/compressed_tensors/utils/__init__.py
 src/compressed_tensors/utils/helpers.py
 src/compressed_tensors/utils/internal.py
+src/compressed_tensors/utils/match.py
 src/compressed_tensors/utils/offload.py
 src/compressed_tensors/utils/permutations_24.py
 src/compressed_tensors/utils/permute.py
@@ -141,6 +142,7 @@ tests/test_transform/factory/test_memory.py
 tests/test_transform/utils/test_hadamard.py
 tests/test_utils/__init__.py
 tests/test_utils/test_helpers.py
+tests/test_utils/test_match.py
 tests/test_utils/test_offload.py
 tests/test_utils/test_safetensors_load.py
 utils/copyright.py

{compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_quantization/lifecycle/test_initialize.py RENAMED Viewed

@@ -174,8 +174,8 @@ def test_initialize_module_for_quantization_offloaded(
             ),
         ),
         (
-            QuantizationArgs(strategy="block"),
-            QuantizationArgs(strategy="block"),
+            QuantizationArgs(strategy="block", block_structure=[2, 4]),
+            None,
         ),
         (
             QuantizationArgs(strategy="token"),
@@ -227,7 +227,17 @@ def test_initialize_quantization_parameters(weights, input_activations):
             expected_shape = (layer.weight.shape[0], max(num_groups, 1))
         elif args.strategy == QuantizationStrategy.BLOCK:
-            expected_shape = (1,)
+            # For block quantization, only weights get block-level scales
+            # Activations fall back to tensor-level since shape is unknown at init
+            if q_type == "weights" and args.block_structure is not None:
+                block_height, block_width = args.block_structure
+                rows, cols = layer.weight.shape[-2], layer.weight.shape[-1]
+                num_rows_blocks = math.ceil(rows / block_height)
+                num_cols_blocks = math.ceil(cols / block_width)
+                expected_shape = (num_rows_blocks, num_cols_blocks)
+            else:
+                # For activations or when block_structure is None
+                expected_shape = (1,)
         elif args.strategy == QuantizationStrategy.TOKEN:
             expected_shape = (1, 1)

compressed-tensors 0.10.3a20250721__tar.gz → 0.10.3a20250728__tar.gz

compressed-tensors 0.10.3a20250721tar.gz → 0.10.3a20250728tar.gz