PyPI - compressed-tensors - Versions diffs - 0.11.1a20250820__py3-none-any.whl → 0.11.1a20250828__py3-none-any.whl - Mend

compressed-tensors 0.11.1a20250820py3-none-any.whl → 0.11.1a20250828py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

compressed_tensors/quantization/lifecycle/apply.py CHANGED Viewed

@@ -13,12 +13,11 @@
 # limitations under the License.
 import logging
-import re
-from collections import OrderedDict, defaultdict
+from collections import OrderedDict
 from copy import deepcopy
 from typing import Dict, Iterable, List, Optional
 from typing import OrderedDict as OrderedDictType
-from typing import Set, Union
+from typing import Union
 import torch
 from compressed_tensors.config import CompressionFormat
@@ -39,7 +38,8 @@ from compressed_tensors.quantization.utils import (
     infer_quantization_status,
     is_kv_cache_quant_scheme,
 )
-from compressed_tensors.utils.helpers import fix_fsdp_module_name, replace_module
+from compressed_tensors.utils.helpers import deprecated, replace_module
+from compressed_tensors.utils.match import match_named_modules, match_targets
 from compressed_tensors.utils.offload import update_parameter_data
 from compressed_tensors.utils.safetensors_load import get_safetensors_folder
 from safetensors import safe_open
@@ -51,8 +51,6 @@ __all__ = [
     "apply_quantization_config",
     "apply_quantization_status",
     "find_name_or_class_matches",
-    "expand_target_names",
-    "is_target",
 ]
 from compressed_tensors.quantization.utils.helpers import is_module_quantized
@@ -73,14 +71,14 @@ def load_pretrained_quantization_parameters(
     Loads the quantization parameters (scale and zero point) from model_name_or_path to
     a model that has already been initialized with a quantization config.
-    NOTE: Will always load inputs/output parameters.
-    Will conditioanlly load weight parameters, if load_weight_quantization is set to True.
+    NOTE: Will always load inputs/output parameters. Will conditioanlly load weight
+    parameters, if load_weight_quantization is set to True.
     :param model: model to load pretrained quantization parameters to
     :param model_name_or_path: Hugging Face stub or local folder containing a quantized
         model, which is used to load quantization parameters
-    :param load_weight_quantization: whether or not the weight quantization parameters shoud
-        be laoded
+    :param load_weight_quantization: whether or not the weight quantization parameters
+        should be loaded
     """
     model_path = get_safetensors_folder(model_name_or_path)
     mapping = get_quantization_parameter_to_path_mapping(model_path)
@@ -117,7 +115,7 @@ def load_pretrained_quantization_parameters(
 def apply_quantization_config(
     model: Module, config: Union[QuantizationConfig, None], run_compressed: bool = False
-) -> Dict[str, QuantizationScheme]:
+):
     """
     Initializes the model for quantization in-place based on the given config.
     Optionally coverts quantizable modules to compressed_linear modules
@@ -127,71 +125,49 @@ def apply_quantization_config(
     :param run_compressed: Whether the model will be run in compressed mode or
         decompressed fully on load
     """
-    # Workaround for when HF Quantizer passes None, see PR #180
-    if config is None:
-        return dict()
+    from compressed_tensors.linear.compressed_linear import CompressedLinear
-    # remove reference to the original `config`
-    # argument. This function can mutate it, and we'd
-    # like to keep the original `config` as it is.
     config = deepcopy(config)
+    if config is None:  # see PR #180
+        return dict()
+    # preprocess to support kv cache scheme
+    config = process_quantization_config(config)
     # build mapping of targets to schemes for easier matching
     # use ordered dict to preserve target ordering in config
     target_to_scheme = OrderedDict()
-    config = process_quantization_config(config)
-    names_to_scheme = dict()
     for scheme in config.config_groups.values():
         for target in scheme.targets:
             target_to_scheme[target] = scheme
-    if run_compressed:
-        from compressed_tensors.linear.compressed_linear import CompressedLinear
-    # list of submodules to ignore
-    ignored_submodules = defaultdict(list)
     # mark appropriate layers for quantization by setting their quantization schemes
-    for name, submodule in model.named_modules():
-        # potentially fix module name to remove FSDP wrapper prefix
-        name = fix_fsdp_module_name(name)
-        if matches := find_name_or_class_matches(name, submodule, config.ignore):
-            for match in matches:
-                ignored_submodules[match].append(name)
-            continue  # layer matches ignore list, continue
-        targets = find_name_or_class_matches(name, submodule, target_to_scheme)
-        if targets:
-            # mark modules to be quantized by adding
-            # quant scheme to the matching layers
-            scheme = _scheme_from_targets(target_to_scheme, targets, name)
-            if run_compressed:
-                format = config.format
-                if format != CompressionFormat.dense.value:
-                    if isinstance(submodule, torch.nn.Linear):
-                        # TODO: expand to more module types
-                        compressed_linear = CompressedLinear.from_linear(
-                            submodule,
-                            quantization_scheme=scheme,
-                            quantization_format=format,
-                        )
-                        replace_module(model, name, compressed_linear)
-            # target matched - add layer and scheme to target list
-            submodule.quantization_scheme = scheme
-            names_to_scheme[name] = submodule.quantization_scheme
-    if config.ignore is not None and ignored_submodules is not None:
-        if set(config.ignore) - set(ignored_submodules):
-            _LOGGER.warning(
-                "Some layers that were to be ignored were "
-                "not found in the model: "
-                f"{set(config.ignore) - set(ignored_submodules)}"
-            )
+    for name, submodule in match_named_modules(
+        model, target_to_scheme, config.ignore, warn_on_fail=True
+    ):
+        # mark modules to be quantized by adding
+        # quant scheme to the matching layers
+        matched_targets = match_targets(name, submodule, target_to_scheme)
+        scheme = _scheme_from_targets(target_to_scheme, matched_targets, name)
+        # target matched - add layer and scheme to target list
+        submodule.quantization_scheme = scheme
+        # replace with run compressed if applicable
+        # FUTURE: move this to model compressor
+        if isinstance(submodule, torch.nn.Linear) and run_compressed:
+            format = config.format
+            if format != CompressionFormat.dense.value:
+                if isinstance(submodule, torch.nn.Linear):
+                    # TODO: expand to more module types
+                    compressed_linear = CompressedLinear.from_linear(
+                        submodule,
+                        quantization_scheme=scheme,
+                        quantization_format=format,
+                    )
+                    replace_module(model, name, compressed_linear)
     # apply current quantization status across all targeted layers
     apply_quantization_status(model, config.quantization_status)
-    return names_to_scheme
 def process_quantization_config(config: QuantizationConfig) -> QuantizationConfig:
@@ -262,54 +238,10 @@ def apply_quantization_status(model: Module, status: QuantizationStatus):
         model.apply(compress_quantized_weights)
-def expand_target_names(
-    model: Module,
-    targets: Optional[Iterable[str]] = None,
-    ignore: Optional[Iterable[str]] = None,
-) -> Set[str]:
-    """
-    Finds all unique module names in the model that match the given
-    targets and ignore lists.
-    Note: Targets must be regexes, layer types, or full layer names.
-    :param model: model to search for targets in
-    :param targets: Iterable of targets to search for
-    :param ignore: Iterable of targets to ignore
-    :return: set of all targets that match the given targets and should
-        not be ignored
-    """
-    return {
-        name
-        for name, module in model.named_modules()
-        if is_target(name, module, targets, ignore)
-    }
-def is_target(
-    name: str,
-    module: Module,
-    targets: Optional[Iterable[str]] = None,
-    ignore: Optional[Iterable[str]] = None,
-) -> bool:
-    """
-    Determines if a module should be included in the targets based on the
-    targets and ignore lists.
-    Note: Targets must be regexes, layer types, or full layer names.
-    :param name: name of the module
-    :param module: the module itself
-    :param targets: Iterable of targets to search for
-    :param ignore: Iterable of targets to ignore
-    :return: True if the module is a target and not ignored, False otherwise
-    """
-    return bool(
-        find_name_or_class_matches(name, module, targets or [])
-        and not find_name_or_class_matches(name, module, ignore or [])
-    )
+@deprecated(
+    message="This function is deprecated and will be removed in a future release."
+    "Please use `match_targets` from `compressed_tensors.utils.match` instead."
+)
 def find_name_or_class_matches(
     name: str, module: Module, targets: Iterable[str], check_contains: bool = False
 ) -> List[str]:
@@ -322,38 +254,13 @@ def find_name_or_class_matches(
         2. matches on regex patterns
         3. matches on module names
     """
-    from compressed_tensors import InternalModule
-    if isinstance(module, InternalModule):
-        return []
-    targets = sorted(targets, key=lambda x: ("re:" in x, x))
-    if isinstance(targets, Iterable):
-        matches = _find_matches(name, targets) + _find_matches(
-            module.__class__.__name__, targets, check_contains
+    if check_contains:
+        raise NotImplementedError(
+            "This function is deprecated, and the check_contains=True option has been"
+            " removed."
         )
-        matches = [match for match in matches if match is not None]
-        return matches
-def _find_matches(
-    value: str, targets: Iterable[str], check_contains: bool = False
-) -> List[str]:
-    # returns all the targets that match value either
-    # exactly or as a regex after 're:'. if check_contains is set to True,
-    # additionally checks if the target string is contained with value.
-    matches = []
-    for target in targets:
-        if target.startswith("re:"):
-            pattern = target[3:]
-            if re.match(pattern, value):
-                matches.append(target)
-        elif check_contains:
-            if target.lower() in value.lower():
-                matches.append(target)
-        elif target == value:
-            matches.append(target)
-    return matches
+    return match_targets(name, module, targets)
 def _infer_status(model: Module) -> Optional[QuantizationStatus]:
@@ -429,7 +336,6 @@ def _scheme_from_targets(
 def _merge_schemes(
     schemes_to_merge: List[QuantizationScheme], name: str
 ) -> QuantizationScheme:
     kv_cache_quantization_scheme = [
         scheme for scheme in schemes_to_merge if is_kv_cache_quant_scheme(scheme)
     ]

compressed_tensors/quantization/lifecycle/forward.py CHANGED Viewed

@@ -205,7 +205,8 @@ def _process_quantization(
     q_min, q_max = calculate_range(args, x.device)
     group_size = args.group_size
-    # blockwise FP8: quantize per 2D block, supports block_structure for static block quant
+    # blockwise FP8: quantize per 2D block, supports block_structure for static block
+    # quantization
     if args.strategy == QuantizationStrategy.BLOCK:
         original_shape = x.shape
         rows, cols = x.shape[-2], x.shape[-1]
@@ -214,8 +215,8 @@ def _process_quantization(
         # Ensure exact division (tensor dimensions must be divisible by block size)
         if rows % block_height != 0:
             raise ValueError(
-                f"Tensor height {rows} is not divisible by block_height {block_height}. "
-                f"Block quantization requires exact division."
+                f"Tensor height {rows} is not divisible by block_height {block_height}."
+                f" Block quantization requires exact division."
             )
         if cols % block_width != 0:
             raise ValueError(
@@ -295,7 +296,7 @@ def _process_quantization(
             perm = torch.argsort(g_idx)
             x = safe_permute(x, perm, dim=1)
-        # Maintain all dimensions apart from the last dim, which is divided by the group_size
+        # Maintain all dimensions except the last dim, which is divided by group_size
         reshaped_dims = (
             ceil(x.shape[-1] / group_size),
             group_size,

compressed_tensors/quantization/lifecycle/initialize.py CHANGED Viewed

@@ -17,7 +17,7 @@ import logging
 import math
 import warnings
 from enum import Enum
-from typing import List, Optional
+from typing import Optional
 import torch
 from compressed_tensors.quantization.lifecycle.forward import (
@@ -87,7 +87,6 @@ def initialize_module_for_quantization(
         _initialize_attn_scales(module)
     else:
         if scheme.input_activations is not None:
             _initialize_scale_zero_point(
                 module,
@@ -183,7 +182,8 @@ def _initialize_scale_zero_point(
             num_groups = math.ceil(weight_shape[1] / quantization_args.group_size)
             expected_shape = (weight_shape[0], max(num_groups, 1))
         elif quantization_args.strategy == QuantizationStrategy.BLOCK:
-            # For block quantization, scale shape should match number of blocks - only for weights
+            # For block quantization, scale shape should match number of blocks - only
+            # for weights
             if quantization_args.block_structure is None:
                 raise ValueError(
                     "Block quantization requires block_structure to be specified"
@@ -196,9 +196,10 @@ def _initialize_scale_zero_point(
             # Warn if dimensions don't divide evenly
             if rows % block_height != 0 or cols % block_width != 0:
                 warnings.warn(
-                    f"Block quantization: tensor shape {weight_shape} does not divide evenly "
-                    f"by block structure {quantization_args.block_structure}. "
-                    f"Some blocks will be incomplete which may affect quantization quality.",
+                    f"Block quantization: tensor shape {weight_shape} does not divide"
+                    f"evenly by block structure {quantization_args.block_structure}. "
+                    f"Some blocks will be incomplete which may affect quantization"
+                    "quality.",
                     UserWarning,
                 )

compressed_tensors/quantization/quant_args.py CHANGED Viewed

@@ -217,16 +217,18 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
                 return [int(x) for x in value.split("x")]
             except Exception:
                 raise ValueError(
-                    f"Invalid block_structure '{value}'. Must be a list of two ints [rows, cols]."
+                    f"Invalid block_structure '{value}'. Must be a list of ints "
+                    "[rows, cols]."
                 )
         if isinstance(value, (list, tuple)):
             if len(value) != 2 or not all(isinstance(v, int) for v in value):
                 raise ValueError(
-                    f"Invalid block_structure '{value}'. Must be a list of two ints [rows, cols]."
+                    f"Invalid block_structure '{value}'. Must be a list of ints "
+                    "[rows, cols]."
                 )
             return list(value)
         raise ValueError(
-            f"Invalid block_structure '{value}'. Must be a list of two ints [rows, cols]."
+            f"Invalid block_structure '{value}'. Must be a list of ints [rows, cols]."
         )
     @field_validator("strategy", mode="before")
@@ -307,7 +309,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
             )
             if strategy not in supported_strategies:
                 raise ValueError(
-                    f"One of {supported_strategies} must be used for dynamic quantization"
+                    f"One of {supported_strategies} must be used for dynamic quant."
                 )
             if (
@@ -322,7 +324,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
                         observer != "memoryless"
                     ):  # avoid annoying users with old configs
                         warnings.warn(
-                            "No observer is used for dynamic quantization, setting to None"
+                            "No observer is used for dynamic quant., setting to None"
                         )
                     observer = None
             else:

compressed_tensors/quantization/quant_scheme.py CHANGED Viewed

@@ -81,9 +81,10 @@ class QuantizationScheme(BaseModel):
         ):
             warnings.warn(
                 "Using GROUP strategy for both weights and input_activations "
-                f"with different group sizes ({weights.group_size} vs {inputs.group_size}) "
-                "may complicate fused kernel implementations. Consider using "
-                "TENSOR_GROUP strategy for both or matching group sizes.",
+                f"with different group sizes ({weights.group_size} vs "
+                f"{inputs.group_size}) may complicate fused kernel implementations. "
+                "Consider using TENSOR_GROUP strategy for both or matching group"
+                " sizes.",
                 UserWarning,
                 stacklevel=2,
             )

compressed_tensors/quantization/utils/helpers.py CHANGED Viewed

@@ -29,7 +29,6 @@ from compressed_tensors.quantization.quant_scheme import QuantizationScheme
 from compressed_tensors.utils import deprecated
 from torch import FloatTensor, IntTensor, Tensor
 from torch.nn import Module
-from tqdm import tqdm
 __all__ = [

compressed_tensors/registry/registry.py CHANGED Viewed

@@ -55,7 +55,7 @@ def standardize_lookup_name(name: str) -> str:
 def standardize_alias_name(
-    name: Union[None, str, List[str]]
+    name: Union[None, str, List[str]],
 ) -> Union[None, str, List[str]]:
     if name is None:
         return None

compressed_tensors/transform/transform_config.py CHANGED Viewed

@@ -14,7 +14,7 @@
 from typing import Dict
-from compressed_tensors.transform import TransformArgs, TransformScheme
+from compressed_tensors.transform import TransformScheme
 from pydantic import BaseModel, ConfigDict

compressed_tensors/transform/utils/matrix.py CHANGED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Callable, Optional, Tuple
+from typing import Optional
 import torch
 from compressed_tensors.transform import TransformLocation

compressed_tensors/utils/match.py CHANGED Viewed

@@ -27,6 +27,7 @@ _LOGGER: logging.Logger = logging.getLogger(__name__)
 __all__ = [
     "match_named_modules",
     "match_named_parameters",
+    "match_targets",
     "match_modules_set",
     "is_match",
 ]
@@ -37,8 +38,8 @@ FusedMappping = Mapping[str, Iterable[str]]
 def match_named_modules(
     model: torch.nn.Module,
-    targets: Iterable[str],
-    ignore: Iterable[str] = tuple(),
+    targets: Optional[Iterable[str]],
+    ignore: Optional[Iterable[str]] = None,
     fused: Optional[FusedMappping] = None,
     warn_on_fail: bool = False,
 ) -> Generator[Tuple[str, torch.nn.Module]]:
@@ -54,14 +55,18 @@ def match_named_modules(
     :param warn_on_fail: if True, warns if any targets do not match any modules in model
     :return: generator of module names and modules
     """
+    targets = targets or []
+    ignore = ignore or []
     unmatched_targets = set(targets)
     for name, module in model.named_modules():
         for target in targets:
             if is_match(name, module, target, fused=fused):
                 unmatched_targets -= {target}
                 if not is_match(name, module, ignore, fused=fused):
                     yield name, module
+                break
     if warn_on_fail:
         for target in unmatched_targets:
@@ -72,8 +77,8 @@ def match_named_modules(
 def match_named_parameters(
     model: torch.nn.Module,
-    targets: Iterable[str],
-    ignore: Iterable[str] = tuple(),
+    targets: Optional[Iterable[str]],
+    ignore: Optional[Iterable[str]] = None,
     fused: Optional[FusedMappping] = None,
     warn_on_fail: bool = False,
 ) -> Generator[Tuple[str, torch.nn.Module, torch.nn.Parameter]]:
@@ -89,6 +94,9 @@ def match_named_parameters(
     :param warn_on_fail: if True, warns if any targets do not match any params in model
     :return: generator of fully-qualified param names, parent modules, and params
     """
+    targets = targets or []
+    ignore = ignore or []
     unmatched_targets = set(targets)
     for module_name, module in model.named_modules():
         if isinstance(module, InternalModule):
@@ -110,16 +118,54 @@ def match_named_parameters(
             )
+def match_targets(
+    name: str, module: torch.nn.Module, targets: Optional[Iterable[str]]
+) -> List[str]:
+    """
+    Returns the targets that match the given name and module.
+    :param name: the name of the module
+    :param module: the module to match
+    :param targets: the target strings, potentially containing "re:" prefixes
+    :return: the targets that match the given name and module
+    Outputs are ordered by type: exact name match, regex name match, class name match
+    """
+    targets = targets or []
+    if isinstance(module, InternalModule):
+        return []
+    # The order of the output `matches` list matters, the are arranged from most
+    # specific to least specific, and this order will be used when merging configs.
+    # The entries are sorted in the following order:
+    #     1. matches on exact strings
+    #     2. matches on regex patterns
+    #     3. matches on module names
+    targets = sorted(targets, key=lambda x: ("re:" in x, x))
+    matched_targets = []
+    for target in targets:
+        if _match_name(name, target):
+            matched_targets.append(target)
+    for target in targets:
+        if _match_class(module, target) and target not in matched_targets:
+            matched_targets.append(target)
+    return matched_targets
 def match_modules_set(
     model: torch.nn.Module,
-    targets: Iterable[str],
-    ignore: Iterable[str] = tuple(),
+    targets: Optional[Iterable[str]],
+    ignore: Optional[Iterable[str]] = None,
 ) -> Generator[Iterable[torch.nn.Module]]:
     """
     Yields modules grouped with the same order and size as `targets`.
     Values are returned in order of `model.named_modules()`
-    For example, the following targets would yield module belonging to the following layers:
+    E.g. the following targets would yield module belonging to the following layers:
     ```python3
     match_modules_set(model, ["q_proj", "k_proj", "v_proj"]) == (
         (
@@ -151,6 +197,9 @@ def match_modules_set(
     :param targets: target strings, potentially containing "re:" prefixes
     :param ignore: targets to ignore, potentially containing "re:" prefixes
     """
+    targets = targets or []
+    ignore = ignore or []
     matches = dict.fromkeys(targets, None)
     for name, module in model.named_modules():
         # match until we get a full set

compressed_tensors/utils/offload.py CHANGED Viewed

@@ -296,7 +296,6 @@ def disable_hf_hook(module: torch.nn.Module):
     hooks = {}
     def collect_hooks(module):
-        nonlocal hooks
         if hasattr(module, "_hf_hook"):
             hooks[module] = module._hf_hook
             remove_hook_from_module(module)

compressed_tensors/utils/safetensors_load.py CHANGED Viewed

@@ -18,7 +18,6 @@ import re
 import struct
 from typing import Dict, Iterable, Optional, Tuple, Union
-from safetensors import safe_open
 from torch import Tensor
 from transformers.utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, cached_file

compressed_tensors/version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.11.1.a20250820'
+__version__ = version = '0.11.1.a20250828'
 __version_tuple__ = version_tuple = (0, 11, 1)

{compressed_tensors-0.11.1a20250820.dist-info → compressed_tensors-0.11.1a20250828.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.11.1a20250820
+Version: 0.11.1a20250828
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

compressed-tensors 0.11.1a20250820__py3-none-any.whl → 0.11.1a20250828__py3-none-any.whl

compressed-tensors 0.11.1a20250820py3-none-any.whl → 0.11.1a20250828py3-none-any.whl