PyPI - compressed-tensors - Versions diffs - 0.11.1a20250819__py3-none-any.whl → 0.11.1a20250821__py3-none-any.whl - Mend

compressed-tensors 0.11.1a20250819py3-none-any.whl → 0.11.1a20250821py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

compressed_tensors/compressors/model_compressors/model_compressor.py CHANGED Viewed

@@ -42,8 +42,6 @@ from compressed_tensors.quantization import (
     apply_quantization_config,
     load_pretrained_quantization_parameters,
 )
-from compressed_tensors.quantization.lifecycle import expand_target_names
-from compressed_tensors.quantization.utils import is_module_quantized
 from compressed_tensors.transform import TransformConfig
 from compressed_tensors.utils import (
     align_module_device,
@@ -60,6 +58,7 @@ from compressed_tensors.utils.helpers import (
     fix_fsdp_module_name,
     is_compressed_tensors_config,
 )
+from compressed_tensors.utils.match import match_named_modules
 from torch import Tensor
 from torch.nn import Module
 from tqdm import tqdm
@@ -309,7 +308,7 @@ class ModelCompressor:
         if quantization_config is not None:
             # If a list of compression_format is not provided, we resolve the
             # relevant quantization formats using the config groups from the config
-            # and if those are not defined, we fall-back to the global quantization format
+            # and if those are not defined, we fall-back to the global quantization fmt
             if not self.compression_formats:
                 self.compression_formats = self._fetch_unique_quantization_formats()
@@ -342,13 +341,15 @@ class ModelCompressor:
             self.sparsity_compressor
             and self.sparsity_config.format != CompressionFormat.dense.value
         ):
-            sparse_targets = expand_target_names(
+            sparse_targets = match_named_modules(
                 model=model,
                 targets=self.sparsity_config.targets,
                 ignore=self.sparsity_config.ignore,
             )
             missing_keys.update(
-                merge_names(target, "weight") for target in sparse_targets
+                merge_names(target_name, "weight")
+                for target_name, _module in sparse_targets
             )
         # Determine missing keys due to pack quantization
@@ -358,13 +359,14 @@ class ModelCompressor:
             == CompressionFormat.pack_quantized.value
         ):
             for scheme in self.quantization_config.config_groups.values():
-                quant_targets = expand_target_names(
+                quant_targets = match_named_modules(
                     model=model,
                     targets=scheme.targets,
                     ignore=self.quantization_config.ignore,
                 )
                 missing_keys.update(
-                    merge_names(target, "weight") for target in quant_targets
+                    merge_names(target_name, "weight")
+                    for target_name, _module in quant_targets
                 )
         return list(missing_keys)
@@ -395,29 +397,29 @@ class ModelCompressor:
             self.sparsity_compressor
             and self.sparsity_config.format != CompressionFormat.dense.value
         ):
-            sparse_targets: Set[str] = expand_target_names(
+            sparse_targets = match_named_modules(
                 model=model,
                 targets=self.sparsity_config.targets,
                 ignore=self.sparsity_config.ignore,
             )
             unexpected_keys.update(
-                merge_names(target, param)
-                for target in sparse_targets
+                merge_names(target_name, param)
+                for target_name, _module in sparse_targets
                 for param in self.sparsity_compressor.compression_param_names
             )
         # Identify unexpected keys from quantization compression
         if self.quantization_compressor:
             for scheme in self.quantization_config.config_groups.values():
-                quant_targets: Set[str] = expand_target_names(
+                quant_targets = match_named_modules(
                     model=model,
                     targets=scheme.targets,
                     ignore=self.quantization_config.ignore,
                 )
                 for quant_compressor in self.quantization_compressor.values():
                     unexpected_keys.update(
-                        merge_names(target, param)
-                        for target in quant_targets
+                        merge_names(target_name, param)
+                        for target_name, _module in quant_targets
                         for param in quant_compressor.compression_param_names
                         if param != "weight"
                     )
@@ -434,73 +436,79 @@ class ModelCompressor:
         :param model: model containing parameters to compress
         """
         module_to_scheme = map_module_to_scheme(model)
-        sparse_compression_targets: Set[str] = expand_target_names(
-            model=model,
-            targets=self.sparsity_config.targets if self.sparsity_config else [],
-            ignore=self.sparsity_config.ignore if self.sparsity_config else [],
-        )
-        for prefix, module in tqdm(model.named_modules(), desc="Compressing model"):
-            if prefix in module_to_scheme or prefix in sparse_compression_targets:
-                module_device = get_execution_device(module)
-                is_meta = module_device.type == "meta"
-                exec_device = "meta" if is_meta else "cpu"
-                onloading_device = "meta" if is_meta else module_device
-                # in the future, support compression on same device
-                with align_module_device(module, execution_device=exec_device):
-                    state_dict = {
-                        f"{prefix}.{name}": param
-                        for name, param in module.named_parameters(recurse=False)
-                    }
-                # quantization first
-                if prefix in module_to_scheme:
-                    if (
-                        not hasattr(module.quantization_scheme, "format")
-                        or module.quantization_scheme.format is None
-                    ):
-                        if len(self.compression_formats) > 1:
-                            raise ValueError(
-                                "Applying multiple compressors without defining "
-                                "per module formats is not supported "
-                            )
-                        format = self.compression_formats[0]
-                    else:
-                        format = module.quantization_scheme.format
-                    quant_compressor = self.quantization_compressor.get(format)
-                    state_dict = quant_compressor.compress(
-                        state_dict,
-                        names_to_scheme=module_to_scheme,
-                        show_progress=False,
-                        compression_device=exec_device,
-                    )
-                # sparsity second
-                if prefix in sparse_compression_targets:
-                    state_dict = self.sparsity_compressor.compress(
-                        state_dict,
-                        compression_targets=sparse_compression_targets,
-                        show_progress=False,
-                    )
+        sparse_compression_targets = [
+            module_name
+            for module_name, _module in match_named_modules(
+                model=model,
+                targets=self.sparsity_config.targets if self.sparsity_config else [],
+                ignore=self.sparsity_config.ignore if self.sparsity_config else [],
+            )
+        ]
+        for prefix, module in tqdm(
+            match_named_modules(
+                model,
+                [*sparse_compression_targets, *module_to_scheme.keys()],
+                warn_on_fail=True,
+            ),
+            desc="Compressing model",
+        ):
+            module_device = get_execution_device(module)
+            is_meta = module_device.type == "meta"
+            exec_device = "meta" if is_meta else "cpu"
+            onloading_device = "meta" if is_meta else module_device
+            # in the future, support compression on same device
+            with align_module_device(module, execution_device=exec_device):
+                state_dict = {
+                    f"{prefix}.{name}": param
+                    for name, param in module.named_parameters(recurse=False)
+                }
+            # quantization first
+            if prefix in module_to_scheme:
+                if (
+                    not hasattr(module.quantization_scheme, "format")
+                    or module.quantization_scheme.format is None
+                ):
+                    if len(self.compression_formats) > 1:
+                        raise ValueError(
+                            "Applying multiple compressors without defining "
+                            "per module formats is not supported "
+                        )
+                    format = self.compression_formats[0]
+                else:
+                    format = module.quantization_scheme.format
+                quant_compressor = self.quantization_compressor.get(format)
+                state_dict = quant_compressor.compress(
+                    state_dict,
+                    names_to_scheme=module_to_scheme,
+                    show_progress=False,
+                    compression_device=exec_device,
+                )
-                # remove any existing parameters
-                offload_device = get_offloaded_device(module)
-                for name, _ in list(module.named_parameters(recurse=False)):
-                    delete_offload_parameter(module, name)
+            # sparsity second
+            if prefix in sparse_compression_targets:
+                state_dict = self.sparsity_compressor.compress(
+                    state_dict,
+                    compression_targets=sparse_compression_targets,
+                    show_progress=False,
+                )
-                # replace with compressed parameters
-                for name, value in state_dict.items():
-                    name = name.removeprefix(f"{prefix}.")
-                    value = value.to(onloading_device)
-                    param = torch.nn.Parameter(value, requires_grad=False)
-                    register_offload_parameter(module, name, param, offload_device)
+            # remove any existing parameters
+            offload_device = get_offloaded_device(module)
+            for name, _ in list(module.named_parameters(recurse=False)):
+                delete_offload_parameter(module, name)
-                module.quantization_status = QuantizationStatus.COMPRESSED
+            # replace with compressed parameters
+            for name, value in state_dict.items():
+                name = name.removeprefix(f"{prefix}.")
+                value = value.to(onloading_device)
+                param = torch.nn.Parameter(value, requires_grad=False)
+                register_offload_parameter(module, name, param, offload_device)
+            module.quantization_status = QuantizationStatus.COMPRESSED
         # TODO: consider sparse compression to also be compression
         if (
             self.quantization_config is not None
@@ -516,67 +524,75 @@ class ModelCompressor:
         :param model: model containing parameters to compress
         """
         module_to_scheme = map_module_to_scheme(model)
-        sparse_compression_targets: Set[str] = expand_target_names(
-            model=model,
-            targets=self.sparsity_config.targets if self.sparsity_config else [],
-            ignore=self.sparsity_config.ignore if self.sparsity_config else [],
-        )
-        for prefix, module in tqdm(model.named_modules(), desc="Decompressing model"):
-            if prefix in module_to_scheme or prefix in sparse_compression_targets:
-                # in the future, support decompression on same device
-                with align_module_device(module, execution_device="cpu"):
-                    state_dict = {
-                        f"{prefix}.{name}": param
-                        for name, param in module.named_parameters(recurse=False)
-                    }
-                # sparsity first
-                if prefix in sparse_compression_targets:
-                    # sparse_compression_targets are automatically inferred by this fn
-                    generator = self.sparsity_compressor.decompress_from_state_dict(
-                        state_dict,
-                    )
-                    # generates (param_path, param_val)
-                    # of compressed and unused params
-                    state_dict = {key: value for key, value in generator}
-                # quantization second
-                if prefix in module_to_scheme:
-                    if (
-                        not hasattr(module.quantization_scheme, "format")
-                        or module.quantization_scheme.format is None
-                    ):
-                        if len(self.compression_formats) > 1:
-                            raise ValueError(
-                                "Applying multiple compressors without defining "
-                                "per module formats is not supported "
-                            )
-                        format = self.compression_formats[0]
-                    else:
-                        format = module.quantization_scheme.format
-                    quant_compressor = self.quantization_compressor.get(format)
-                    state_dict = quant_compressor.decompress_module_from_state_dict(
-                        prefix,
-                        state_dict,
-                        scheme=module_to_scheme[prefix],
-                    )
+        sparse_compression_targets = [
+            module_name
+            for module_name, _module in match_named_modules(
+                model=model,
+                targets=self.sparsity_config.targets if self.sparsity_config else [],
+                ignore=self.sparsity_config.ignore if self.sparsity_config else [],
+            )
+        ]
+        for prefix, module in tqdm(
+            match_named_modules(
+                model,
+                [*sparse_compression_targets, *module_to_scheme.keys()],
+                warn_on_fail=True,
+            ),
+            desc="Decompressing model",
+        ):
+            # in the future, support decompression on same device
+            with align_module_device(module, execution_device="cpu"):
+                state_dict = {
+                    f"{prefix}.{name}": param
+                    for name, param in module.named_parameters(recurse=False)
+                }
+            # sparsity first
+            if prefix in sparse_compression_targets:
+                # sparse_compression_targets are automatically inferred by this fn
+                generator = self.sparsity_compressor.decompress_from_state_dict(
+                    state_dict,
+                )
+                # generates (param_path, param_val)
+                # of compressed and unused params
+                state_dict = {key: value for key, value in generator}
+            # quantization second
+            if prefix in module_to_scheme:
+                if (
+                    not hasattr(module.quantization_scheme, "format")
+                    or module.quantization_scheme.format is None
+                ):
+                    if len(self.compression_formats) > 1:
+                        raise ValueError(
+                            "Applying multiple compressors without defining "
+                            "per module formats is not supported "
+                        )
+                    format = self.compression_formats[0]
+                else:
+                    format = module.quantization_scheme.format
+                quant_compressor = self.quantization_compressor.get(format)
+                state_dict = quant_compressor.decompress_module_from_state_dict(
+                    prefix,
+                    state_dict,
+                    scheme=module_to_scheme[prefix],
+                )
-                # remove any existing parameters
-                exec_device = get_execution_device(module)
-                offload_device = get_offloaded_device(module)
-                for name, _ in list(module.named_parameters(recurse=False)):
-                    delete_offload_parameter(module, name)
+            # remove any existing parameters
+            exec_device = get_execution_device(module)
+            offload_device = get_offloaded_device(module)
+            for name, _ in list(module.named_parameters(recurse=False)):
+                delete_offload_parameter(module, name)
-                # replace with decompressed parameters
-                for name, value in state_dict.items():
-                    name = name.removeprefix(f"{prefix}.")
-                    value = value.to(exec_device)
-                    param = torch.nn.Parameter(value, requires_grad=False)
-                    register_offload_parameter(module, name, param, offload_device)
+            # replace with decompressed parameters
+            for name, value in state_dict.items():
+                name = name.removeprefix(f"{prefix}.")
+                value = value.to(exec_device)
+                param = torch.nn.Parameter(value, requires_grad=False)
+                register_offload_parameter(module, name, param, offload_device)
-                module.quantization_status = QuantizationStatus.FROZEN
+            module.quantization_status = QuantizationStatus.FROZEN
     # ----- state dict compression pathways ----- #
@@ -614,11 +630,14 @@ class ModelCompressor:
                 )
         if self.sparsity_compressor is not None:
-            sparse_compression_targets: Set[str] = expand_target_names(
-                model=model,
-                targets=self.sparsity_config.targets,
-                ignore=self.sparsity_config.ignore,
-            )
+            sparse_compression_targets: Set[str] = {
+                module_name
+                for module_name, _module in match_named_modules(
+                    model=model,
+                    targets=self.sparsity_config.targets,
+                    ignore=self.sparsity_config.ignore,
+                )
+            }
             state_dict = self.sparsity_compressor.compress(
                 state_dict,
                 compression_targets=sparse_compression_targets,
@@ -641,11 +660,12 @@ class ModelCompressor:
         :param model_path: path to compressed weights
         :param model: pytorch model to load decompressed weights into
-        Note: decompress makes use of both _replace_sparsity_weights and _replace_weights
-        The variations in these methods are a result of the subtle variations between the sparsity
-        and quantization compressors. Specifically, quantization compressors return not just the
-        decompressed weight, but the quantization parameters (e.g scales, zero_point) whereas sparsity
-        compressors only return the decompressed weight.
+        Note: decompress makes use of both _replace_sparsity_weights and
+        _replace_weights. The variations in these methods are a result of the subtle
+        variations between the sparsity and quantization compressors. Specifically,
+        quantization compressors return not just the decompressed weight, but the
+        quantization parameters (e.g scales, zero_point) whereas sparsity compressors
+        only return the decompressed weight.
         """
         model_path = get_safetensors_folder(model_path)
@@ -683,18 +703,17 @@ class ModelCompressor:
             with override_quantization_status(
                 self.quantization_config, QuantizationStatus.FROZEN
             ):
                 names_to_scheme = apply_quantization_config(
                     model, self.quantization_config
                 )
                 # Load activation scales/zp or any other quantization parameters
-                # Conditionally load the weight quantization parameters if we have a dense compressor
-                # Or if a sparsity compressor has already been applied
+                # Conditionally load the weight quantization parameters if we have a
+                # dense compressor or if a sparsity compressor has already been applied
                 load_pretrained_quantization_parameters(
                     model,
                     model_path,
-                    # TODO: all weight quantization params will be moved to the compressor in a follow-up
-                    # including initialization
+                    # TODO: all weight quantization params will be moved to the
+                    # compressor in a follow-up including initialization
                     load_weight_quantization=(
                         sparse_decompressed
                         or isinstance(quant_compressor, DenseCompressor)
@@ -786,7 +805,6 @@ class ModelCompressor:
         :param model: The model whose weights are to be updated.
         """
         for name, data in tqdm(dense_weight_generator, desc="Decompressing model"):
             split_name = name.split(".")
             prefix, param_name = ".".join(split_name[:-1]), split_name[-1]
             module = operator.attrgetter(prefix)(model)
@@ -822,9 +840,10 @@ class ModelCompressor:
             for param_name, param_data in data.items():
                 if hasattr(module, param_name):
                     # If compressed, will have an incorrect dtype for transformers >4.49
-                    # TODO: we can also just skip initialization of scales/zp if in decompression in init
-                    # to be consistent with loading which happens later as well
-                    # however, update_data does a good shape check - should be moved to the compressor
+                    # TODO: we can also just skip initialization of scales/zp if in
+                    # decompression in init to be consistent with loading which happens
+                    # later as well however, update_data does a good shape check -
+                    # should be moved to the compressor
                     if param_name == "weight":
                         delattr(module, param_name)
                         requires_grad = param_data.dtype in (

compressed_tensors/compressors/quantized_compressors/base.py CHANGED Viewed

@@ -24,7 +24,6 @@ from compressed_tensors.utils import (
     get_nested_weight_mappings,
     merge_names,
 )
-from compressed_tensors.utils.safetensors_load import match_param_name
 from safetensors import safe_open
 from torch import Tensor
 from tqdm import tqdm
@@ -107,7 +106,8 @@ class BaseQuantizationCompressor(BaseCompressor):
                     compressed_dict[name] = value.to(compression_device)
                     continue
-                # compress values on meta if loading from meta otherwise on cpu (memory movement too expensive)
+                # compress values on meta if loading from meta otherwise on cpu (memory
+                # movement too expensive)
                 module_path = prefix[:-1] if prefix.endswith(".") else prefix
                 quant_args = names_to_scheme[module_path].weights
                 compressed_values = self.compress_weight(

compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py CHANGED Viewed

@@ -15,7 +15,6 @@
 from typing import Dict, Optional, Tuple
-import numpy
 import torch
 from compressed_tensors.compressors.base import BaseCompressor
 from compressed_tensors.compressors.quantized_compressors.base import (
@@ -92,7 +91,6 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
         zero_point: Optional[torch.Tensor] = None,
         g_idx: Optional[torch.Tensor] = None,
     ) -> Dict[str, torch.Tensor]:
         quantized_weight = quantize(
             x=weight,
             scale=scale,
@@ -112,7 +110,6 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
         compressed_data: Dict[str, Tensor],
         quantization_args: Optional[QuantizationArgs] = None,
     ) -> torch.Tensor:
         weight = compressed_data["weight_packed"]
         scale = compressed_data["weight_scale"]
         global_scale = compressed_data["weight_global_scale"]
@@ -175,14 +172,16 @@ kE2M1ToFloat = torch.tensor(
     [0.0, 0.5, 1.0, 1.5, 2.0, 3.0, 4.0, 6.0], dtype=torch.float32
 )
 # reference: : https://github.com/vllm-project/vllm/pull/16362
 def unpack_fp4_from_uint8(
     a: torch.Tensor, m: int, n: int, dtype: Optional[torch.dtype] = torch.bfloat16
 ) -> torch.Tensor:
     """
     Unpacks uint8 values into fp4. Each uint8 consists of two fp4 values
-    (i.e. first four bits correspond to one fp4 value, last four corresond to a consecutive
-    fp4 value). The bits represent an index, which are mapped to an fp4 value.
+    (i.e. first four bits correspond to one fp4 value, last four correspond to a
+    consecutive fp4 value). The bits represent an index, which are mapped to an fp4
+    value.
     :param a: tensor to unpack
     :param m: original dim 0 size of the unpacked tensor

compressed_tensors/compressors/quantized_compressors/pack_quantized.py CHANGED Viewed

@@ -14,7 +14,6 @@
 import math
 from typing import Dict, Literal, Optional, Tuple, Union
-import numpy as np
 import torch
 from compressed_tensors.compressors.base import BaseCompressor
 from compressed_tensors.compressors.quantized_compressors.base import (
@@ -135,7 +134,8 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
         compressed_dict["weight_shape"] = weight_shape
         compressed_dict["weight_packed"] = packed_weight
-        # We typically don't compress zp; apart from when using the packed_compressor and when storing group/channel zp
+        # We typically don't compress zp; apart from when using the packed_compressor
+        # and when storing group/channel zp
         if not quantization_args.symmetric and quantization_args.strategy in [
             QuantizationStrategy.GROUP.value,
             QuantizationStrategy.CHANNEL.value,
@@ -166,7 +166,8 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
         num_bits = quantization_args.num_bits
         unpacked = unpack_from_int32(weight, num_bits, original_shape)
-        # NOTE: this will fail decompression as we don't currently handle packed zp on decompression
+        # NOTE: this will fail decompression as we don't currently handle packed zp on
+        # decompression
         if not quantization_args.symmetric and quantization_args.strategy in [
             QuantizationStrategy.GROUP.value,
             QuantizationStrategy.CHANNEL.value,

compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 from dataclasses import dataclass
-from typing import Dict, Generator, List, Tuple, Union
+from typing import Dict, List, Tuple, Union
 import torch
 from compressed_tensors.compressors.base import BaseCompressor

compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py CHANGED Viewed

@@ -48,7 +48,7 @@ class Marlin24Compressor(BaseCompressor):
     @staticmethod
     def validate_quant_compatability(
-        names_to_scheme: Dict[str, QuantizationScheme]
+        names_to_scheme: Dict[str, QuantizationScheme],
     ) -> bool:
         """
         Checks if every quantized module in the model is compatible with Marlin24

compressed_tensors/quantization/lifecycle/apply.py CHANGED Viewed

@@ -13,12 +13,11 @@
 # limitations under the License.
 import logging
-import re
-from collections import OrderedDict, defaultdict
+from collections import OrderedDict
 from copy import deepcopy
 from typing import Dict, Iterable, List, Optional
 from typing import OrderedDict as OrderedDictType
-from typing import Set, Union
+from typing import Union
 import torch
 from compressed_tensors.config import CompressionFormat
@@ -39,7 +38,8 @@ from compressed_tensors.quantization.utils import (
     infer_quantization_status,
     is_kv_cache_quant_scheme,
 )
-from compressed_tensors.utils.helpers import fix_fsdp_module_name, replace_module
+from compressed_tensors.utils.helpers import deprecated, replace_module
+from compressed_tensors.utils.match import match_named_modules, match_targets
 from compressed_tensors.utils.offload import update_parameter_data
 from compressed_tensors.utils.safetensors_load import get_safetensors_folder
 from safetensors import safe_open
@@ -51,8 +51,6 @@ __all__ = [
     "apply_quantization_config",
     "apply_quantization_status",
     "find_name_or_class_matches",
-    "expand_target_names",
-    "is_target",
 ]
 from compressed_tensors.quantization.utils.helpers import is_module_quantized
@@ -73,14 +71,14 @@ def load_pretrained_quantization_parameters(
     Loads the quantization parameters (scale and zero point) from model_name_or_path to
     a model that has already been initialized with a quantization config.
-    NOTE: Will always load inputs/output parameters.
-    Will conditioanlly load weight parameters, if load_weight_quantization is set to True.
+    NOTE: Will always load inputs/output parameters. Will conditioanlly load weight
+    parameters, if load_weight_quantization is set to True.
     :param model: model to load pretrained quantization parameters to
     :param model_name_or_path: Hugging Face stub or local folder containing a quantized
         model, which is used to load quantization parameters
-    :param load_weight_quantization: whether or not the weight quantization parameters shoud
-        be laoded
+    :param load_weight_quantization: whether or not the weight quantization parameters
+        should be loaded
     """
     model_path = get_safetensors_folder(model_name_or_path)
     mapping = get_quantization_parameter_to_path_mapping(model_path)
@@ -147,47 +145,30 @@ def apply_quantization_config(
     if run_compressed:
         from compressed_tensors.linear.compressed_linear import CompressedLinear
-    # list of submodules to ignore
-    ignored_submodules = defaultdict(list)
     # mark appropriate layers for quantization by setting their quantization schemes
-    for name, submodule in model.named_modules():
-        # potentially fix module name to remove FSDP wrapper prefix
-        name = fix_fsdp_module_name(name)
-        if matches := find_name_or_class_matches(name, submodule, config.ignore):
-            for match in matches:
-                ignored_submodules[match].append(name)
-            continue  # layer matches ignore list, continue
-        targets = find_name_or_class_matches(name, submodule, target_to_scheme)
-        if targets:
-            # mark modules to be quantized by adding
-            # quant scheme to the matching layers
-            scheme = _scheme_from_targets(target_to_scheme, targets, name)
-            if run_compressed:
-                format = config.format
-                if format != CompressionFormat.dense.value:
-                    if isinstance(submodule, torch.nn.Linear):
-                        # TODO: expand to more module types
-                        compressed_linear = CompressedLinear.from_linear(
-                            submodule,
-                            quantization_scheme=scheme,
-                            quantization_format=format,
-                        )
-                        replace_module(model, name, compressed_linear)
-            # target matched - add layer and scheme to target list
-            submodule.quantization_scheme = scheme
-            names_to_scheme[name] = submodule.quantization_scheme
-    if config.ignore is not None and ignored_submodules is not None:
-        if set(config.ignore) - set(ignored_submodules):
-            _LOGGER.warning(
-                "Some layers that were to be ignored were "
-                "not found in the model: "
-                f"{set(config.ignore) - set(ignored_submodules)}"
-            )
+    for name, submodule in match_named_modules(
+        model, target_to_scheme, config.ignore, warn_on_fail=True
+    ):
+        # mark modules to be quantized by adding
+        # quant scheme to the matching layers
+        matched_targets = match_targets(name, submodule, target_to_scheme)
+        scheme = _scheme_from_targets(target_to_scheme, matched_targets, name)
+        if run_compressed:
+            format = config.format
+            if format != CompressionFormat.dense.value:
+                if isinstance(submodule, torch.nn.Linear):
+                    # TODO: expand to more module types
+                    compressed_linear = CompressedLinear.from_linear(
+                        submodule,
+                        quantization_scheme=scheme,
+                        quantization_format=format,
+                    )
+                    replace_module(model, name, compressed_linear)
+        # target matched - add layer and scheme to target list
+        submodule.quantization_scheme = scheme
+        names_to_scheme[name] = submodule.quantization_scheme
     # apply current quantization status across all targeted layers
     apply_quantization_status(model, config.quantization_status)
@@ -262,54 +243,10 @@ def apply_quantization_status(model: Module, status: QuantizationStatus):
         model.apply(compress_quantized_weights)
-def expand_target_names(
-    model: Module,
-    targets: Optional[Iterable[str]] = None,
-    ignore: Optional[Iterable[str]] = None,
-) -> Set[str]:
-    """
-    Finds all unique module names in the model that match the given
-    targets and ignore lists.
-    Note: Targets must be regexes, layer types, or full layer names.
-    :param model: model to search for targets in
-    :param targets: Iterable of targets to search for
-    :param ignore: Iterable of targets to ignore
-    :return: set of all targets that match the given targets and should
-        not be ignored
-    """
-    return {
-        name
-        for name, module in model.named_modules()
-        if is_target(name, module, targets, ignore)
-    }
-def is_target(
-    name: str,
-    module: Module,
-    targets: Optional[Iterable[str]] = None,
-    ignore: Optional[Iterable[str]] = None,
-) -> bool:
-    """
-    Determines if a module should be included in the targets based on the
-    targets and ignore lists.
-    Note: Targets must be regexes, layer types, or full layer names.
-    :param name: name of the module
-    :param module: the module itself
-    :param targets: Iterable of targets to search for
-    :param ignore: Iterable of targets to ignore
-    :return: True if the module is a target and not ignored, False otherwise
-    """
-    return bool(
-        find_name_or_class_matches(name, module, targets or [])
-        and not find_name_or_class_matches(name, module, ignore or [])
-    )
+@deprecated(
+    message="This function is deprecated and will be removed in a future release."
+    "Please use `match_targets` from `compressed_tensors.utils.match` instead."
+)
 def find_name_or_class_matches(
     name: str, module: Module, targets: Iterable[str], check_contains: bool = False
 ) -> List[str]:
@@ -322,38 +259,13 @@ def find_name_or_class_matches(
         2. matches on regex patterns
         3. matches on module names
     """
-    from compressed_tensors import InternalModule
-    if isinstance(module, InternalModule):
-        return []
-    targets = sorted(targets, key=lambda x: ("re:" in x, x))
-    if isinstance(targets, Iterable):
-        matches = _find_matches(name, targets) + _find_matches(
-            module.__class__.__name__, targets, check_contains
+    if check_contains:
+        raise NotImplementedError(
+            "This function is deprecated, and the check_contains=True option has been"
+            " removed."
         )
-        matches = [match for match in matches if match is not None]
-        return matches
-def _find_matches(
-    value: str, targets: Iterable[str], check_contains: bool = False
-) -> List[str]:
-    # returns all the targets that match value either
-    # exactly or as a regex after 're:'. if check_contains is set to True,
-    # additionally checks if the target string is contained with value.
-    matches = []
-    for target in targets:
-        if target.startswith("re:"):
-            pattern = target[3:]
-            if re.match(pattern, value):
-                matches.append(target)
-        elif check_contains:
-            if target.lower() in value.lower():
-                matches.append(target)
-        elif target == value:
-            matches.append(target)
-    return matches
+    return match_targets(name, module, targets)
 def _infer_status(model: Module) -> Optional[QuantizationStatus]:
@@ -429,7 +341,6 @@ def _scheme_from_targets(
 def _merge_schemes(
     schemes_to_merge: List[QuantizationScheme], name: str
 ) -> QuantizationScheme:
     kv_cache_quantization_scheme = [
         scheme for scheme in schemes_to_merge if is_kv_cache_quant_scheme(scheme)
     ]

compressed_tensors/quantization/lifecycle/forward.py CHANGED Viewed

@@ -205,7 +205,8 @@ def _process_quantization(
     q_min, q_max = calculate_range(args, x.device)
     group_size = args.group_size
-    # blockwise FP8: quantize per 2D block, supports block_structure for static block quant
+    # blockwise FP8: quantize per 2D block, supports block_structure for static block
+    # quantization
     if args.strategy == QuantizationStrategy.BLOCK:
         original_shape = x.shape
         rows, cols = x.shape[-2], x.shape[-1]
@@ -214,8 +215,8 @@ def _process_quantization(
         # Ensure exact division (tensor dimensions must be divisible by block size)
         if rows % block_height != 0:
             raise ValueError(
-                f"Tensor height {rows} is not divisible by block_height {block_height}. "
-                f"Block quantization requires exact division."
+                f"Tensor height {rows} is not divisible by block_height {block_height}."
+                f" Block quantization requires exact division."
             )
         if cols % block_width != 0:
             raise ValueError(
@@ -295,7 +296,7 @@ def _process_quantization(
             perm = torch.argsort(g_idx)
             x = safe_permute(x, perm, dim=1)
-        # Maintain all dimensions apart from the last dim, which is divided by the group_size
+        # Maintain all dimensions except the last dim, which is divided by group_size
         reshaped_dims = (
             ceil(x.shape[-1] / group_size),
             group_size,

compressed_tensors/quantization/lifecycle/initialize.py CHANGED Viewed

@@ -17,7 +17,7 @@ import logging
 import math
 import warnings
 from enum import Enum
-from typing import List, Optional
+from typing import Optional
 import torch
 from compressed_tensors.quantization.lifecycle.forward import (
@@ -87,7 +87,6 @@ def initialize_module_for_quantization(
         _initialize_attn_scales(module)
     else:
         if scheme.input_activations is not None:
             _initialize_scale_zero_point(
                 module,
@@ -183,7 +182,8 @@ def _initialize_scale_zero_point(
             num_groups = math.ceil(weight_shape[1] / quantization_args.group_size)
             expected_shape = (weight_shape[0], max(num_groups, 1))
         elif quantization_args.strategy == QuantizationStrategy.BLOCK:
-            # For block quantization, scale shape should match number of blocks - only for weights
+            # For block quantization, scale shape should match number of blocks - only
+            # for weights
             if quantization_args.block_structure is None:
                 raise ValueError(
                     "Block quantization requires block_structure to be specified"
@@ -196,9 +196,10 @@ def _initialize_scale_zero_point(
             # Warn if dimensions don't divide evenly
             if rows % block_height != 0 or cols % block_width != 0:
                 warnings.warn(
-                    f"Block quantization: tensor shape {weight_shape} does not divide evenly "
-                    f"by block structure {quantization_args.block_structure}. "
-                    f"Some blocks will be incomplete which may affect quantization quality.",
+                    f"Block quantization: tensor shape {weight_shape} does not divide"
+                    f"evenly by block structure {quantization_args.block_structure}. "
+                    f"Some blocks will be incomplete which may affect quantization"
+                    "quality.",
                     UserWarning,
                 )

compressed_tensors/quantization/quant_args.py CHANGED Viewed

@@ -217,16 +217,18 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
                 return [int(x) for x in value.split("x")]
             except Exception:
                 raise ValueError(
-                    f"Invalid block_structure '{value}'. Must be a list of two ints [rows, cols]."
+                    f"Invalid block_structure '{value}'. Must be a list of ints "
+                    "[rows, cols]."
                 )
         if isinstance(value, (list, tuple)):
             if len(value) != 2 or not all(isinstance(v, int) for v in value):
                 raise ValueError(
-                    f"Invalid block_structure '{value}'. Must be a list of two ints [rows, cols]."
+                    f"Invalid block_structure '{value}'. Must be a list of ints "
+                    "[rows, cols]."
                 )
             return list(value)
         raise ValueError(
-            f"Invalid block_structure '{value}'. Must be a list of two ints [rows, cols]."
+            f"Invalid block_structure '{value}'. Must be a list of ints [rows, cols]."
         )
     @field_validator("strategy", mode="before")
@@ -307,7 +309,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
             )
             if strategy not in supported_strategies:
                 raise ValueError(
-                    f"One of {supported_strategies} must be used for dynamic quantization"
+                    f"One of {supported_strategies} must be used for dynamic quant."
                 )
             if (
@@ -322,7 +324,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
                         observer != "memoryless"
                     ):  # avoid annoying users with old configs
                         warnings.warn(
-                            "No observer is used for dynamic quantization, setting to None"
+                            "No observer is used for dynamic quant., setting to None"
                         )
                     observer = None
             else:

compressed_tensors/quantization/quant_scheme.py CHANGED Viewed

@@ -81,9 +81,10 @@ class QuantizationScheme(BaseModel):
         ):
             warnings.warn(
                 "Using GROUP strategy for both weights and input_activations "
-                f"with different group sizes ({weights.group_size} vs {inputs.group_size}) "
-                "may complicate fused kernel implementations. Consider using "
-                "TENSOR_GROUP strategy for both or matching group sizes.",
+                f"with different group sizes ({weights.group_size} vs "
+                f"{inputs.group_size}) may complicate fused kernel implementations. "
+                "Consider using TENSOR_GROUP strategy for both or matching group"
+                " sizes.",
                 UserWarning,
                 stacklevel=2,
             )

compressed_tensors/quantization/utils/helpers.py CHANGED Viewed

@@ -29,7 +29,6 @@ from compressed_tensors.quantization.quant_scheme import QuantizationScheme
 from compressed_tensors.utils import deprecated
 from torch import FloatTensor, IntTensor, Tensor
 from torch.nn import Module
-from tqdm import tqdm
 __all__ = [

compressed_tensors/registry/registry.py CHANGED Viewed

@@ -55,7 +55,7 @@ def standardize_lookup_name(name: str) -> str:
 def standardize_alias_name(
-    name: Union[None, str, List[str]]
+    name: Union[None, str, List[str]],
 ) -> Union[None, str, List[str]]:
     if name is None:
         return None

compressed_tensors/transform/transform_config.py CHANGED Viewed

@@ -14,7 +14,7 @@
 from typing import Dict
-from compressed_tensors.transform import TransformArgs, TransformScheme
+from compressed_tensors.transform import TransformScheme
 from pydantic import BaseModel, ConfigDict

compressed_tensors/transform/utils/matrix.py CHANGED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Callable, Optional, Tuple
+from typing import Optional
 import torch
 from compressed_tensors.transform import TransformLocation

compressed_tensors/utils/match.py CHANGED Viewed

@@ -27,6 +27,7 @@ _LOGGER: logging.Logger = logging.getLogger(__name__)
 __all__ = [
     "match_named_modules",
     "match_named_parameters",
+    "match_targets",
     "match_modules_set",
     "is_match",
 ]
@@ -37,8 +38,8 @@ FusedMappping = Mapping[str, Iterable[str]]
 def match_named_modules(
     model: torch.nn.Module,
-    targets: Iterable[str],
-    ignore: Iterable[str] = tuple(),
+    targets: Optional[Iterable[str]],
+    ignore: Optional[Iterable[str]] = None,
     fused: Optional[FusedMappping] = None,
     warn_on_fail: bool = False,
 ) -> Generator[Tuple[str, torch.nn.Module]]:
@@ -54,14 +55,18 @@ def match_named_modules(
     :param warn_on_fail: if True, warns if any targets do not match any modules in model
     :return: generator of module names and modules
     """
+    targets = targets or []
+    ignore = ignore or []
     unmatched_targets = set(targets)
     for name, module in model.named_modules():
         for target in targets:
             if is_match(name, module, target, fused=fused):
                 unmatched_targets -= {target}
                 if not is_match(name, module, ignore, fused=fused):
                     yield name, module
+                break
     if warn_on_fail:
         for target in unmatched_targets:
@@ -72,8 +77,8 @@ def match_named_modules(
 def match_named_parameters(
     model: torch.nn.Module,
-    targets: Iterable[str],
-    ignore: Iterable[str] = tuple(),
+    targets: Optional[Iterable[str]],
+    ignore: Optional[Iterable[str]] = None,
     fused: Optional[FusedMappping] = None,
     warn_on_fail: bool = False,
 ) -> Generator[Tuple[str, torch.nn.Module, torch.nn.Parameter]]:
@@ -89,6 +94,9 @@ def match_named_parameters(
     :param warn_on_fail: if True, warns if any targets do not match any params in model
     :return: generator of fully-qualified param names, parent modules, and params
     """
+    targets = targets or []
+    ignore = ignore or []
     unmatched_targets = set(targets)
     for module_name, module in model.named_modules():
         if isinstance(module, InternalModule):
@@ -110,16 +118,54 @@ def match_named_parameters(
             )
+def match_targets(
+    name: str, module: torch.nn.Module, targets: Optional[Iterable[str]]
+) -> List[str]:
+    """
+    Returns the targets that match the given name and module.
+    :param name: the name of the module
+    :param module: the module to match
+    :param targets: the target strings, potentially containing "re:" prefixes
+    :return: the targets that match the given name and module
+    Outputs are ordered by type: exact name match, regex name match, class name match
+    """
+    targets = targets or []
+    if isinstance(module, InternalModule):
+        return []
+    # The order of the output `matches` list matters, the are arranged from most
+    # specific to least specific, and this order will be used when merging configs.
+    # The entries are sorted in the following order:
+    #     1. matches on exact strings
+    #     2. matches on regex patterns
+    #     3. matches on module names
+    targets = sorted(targets, key=lambda x: ("re:" in x, x))
+    matched_targets = []
+    for target in targets:
+        if _match_name(name, target):
+            matched_targets.append(target)
+    for target in targets:
+        if _match_class(module, target) and target not in matched_targets:
+            matched_targets.append(target)
+    return matched_targets
 def match_modules_set(
     model: torch.nn.Module,
-    targets: Iterable[str],
-    ignore: Iterable[str] = tuple(),
+    targets: Optional[Iterable[str]],
+    ignore: Optional[Iterable[str]] = None,
 ) -> Generator[Iterable[torch.nn.Module]]:
     """
     Yields modules grouped with the same order and size as `targets`.
     Values are returned in order of `model.named_modules()`
-    For example, the following targets would yield module belonging to the following layers:
+    E.g. the following targets would yield module belonging to the following layers:
     ```python3
     match_modules_set(model, ["q_proj", "k_proj", "v_proj"]) == (
         (
@@ -151,6 +197,9 @@ def match_modules_set(
     :param targets: target strings, potentially containing "re:" prefixes
     :param ignore: targets to ignore, potentially containing "re:" prefixes
     """
+    targets = targets or []
+    ignore = ignore or []
     matches = dict.fromkeys(targets, None)
     for name, module in model.named_modules():
         # match until we get a full set

compressed_tensors/utils/offload.py CHANGED Viewed

@@ -296,7 +296,6 @@ def disable_hf_hook(module: torch.nn.Module):
     hooks = {}
     def collect_hooks(module):
-        nonlocal hooks
         if hasattr(module, "_hf_hook"):
             hooks[module] = module._hf_hook
             remove_hook_from_module(module)

compressed_tensors/utils/safetensors_load.py CHANGED Viewed

@@ -18,7 +18,6 @@ import re
 import struct
 from typing import Dict, Iterable, Optional, Tuple, Union
-from safetensors import safe_open
 from torch import Tensor
 from transformers.utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, cached_file

compressed_tensors/version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.11.1.a20250819'
+__version__ = version = '0.11.1.a20250821'
 __version_tuple__ = version_tuple = (0, 11, 1)

{compressed_tensors-0.11.1a20250819.dist-info → compressed_tensors-0.11.1a20250821.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.11.1a20250819
+Version: 0.11.1a20250821
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.11.1a20250819.dist-info → compressed_tensors-0.11.1a20250821.dist-info}/RECORD RENAMED Viewed

@@ -1,23 +1,23 @@
 compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
 compressed_tensors/base.py,sha256=-gxWvDF4LCkyeDP8YlGzvBBKxo4Dk9h4NINPD61drFU,921
-compressed_tensors/version.py,sha256=pElXcDGgu5vhuycX_H8dgb9Fpdv6ZSZh84xJqJpYGEg,523
+compressed_tensors/version.py,sha256=QiPWK4b5m-LXWHE8_W5EK7VPtKZvorPc5Opz7BYczvA,523
 compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
 compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
 compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
 compressed_tensors/compressors/model_compressors/__init__.py,sha256=5RGGPFu4YqEt_aOdFSQYFYFDjcZFJN0CsMqRtDZz3Js,666
-compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=FuPS3LYSJk0ATu6caW_GQsFi31EqFTnQtR6mIe6fDAU,37278
+compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=x2AS1NAPQx51O8uxyLf3wItnp2-_0qU2fI6eQVFBBfY,37388
 compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=KvaFBL_Q84LxRGJOV035M8OBoCkAx8kOkfphswgkKWk,745
-compressed_tensors/compressors/quantized_compressors/base.py,sha256=YGUMzbxekj_36ChgQnVZN6T8uDjXtGG1zfMIBGBLWco,10354
+compressed_tensors/compressors/quantized_compressors/base.py,sha256=_mqTG_HjAIbHqDGucA3ZR_01OXU3CMFxtrDjfM-kY0g,10301
 compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=0ANDcuD8aXPqTYNPY6GnX9iS6eXJw6P0TzNV_rYS2l8,5369
-compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py,sha256=tKEaYom4SdMwZWg4MDMMMLNGTLgcVT20lPzewboVpMM,7145
-compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=47W1hFTi5YHVNKEWptzztsSutwI1kxy2Troh-NW1y14,11244
+compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py,sha256=Z8k2gi5a1F_36DiI0GJsXGc03Gh0qwBRMwMxuKIWkj8,7136
+compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=D8h9ltxSIYi1XEKYgbYu1ebbXzCibhPi-eZsBUi0NOg,11245
 compressed_tensors/compressors/sparse_compressors/__init__.py,sha256=Atuz-OdEgn8OCUhx7Ovd6gXdyImAI186uCR-uR0t_Nk,737
 compressed_tensors/compressors/sparse_compressors/base.py,sha256=YNZWcHjDleAlqbgRZQ6oJf44MQb_UDNvJGOqhl26uFA,8098
 compressed_tensors/compressors/sparse_compressors/dense.py,sha256=-OujJ1e0iXBvxYVULrIGvAZ9l-IC0mXczZRnimQdgo4,2314
-compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py,sha256=p8cNV-W4TZKaWDlCwjptQyaIrwqlHizZ1Pn4Vx3-ANk,9262
+compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py,sha256=U6oJz_BYbHi3qtB8RUo5YKxF7hHL1NJQzGBQKjTVJnQ,9251
 compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py,sha256=S8vW0FI9ep_XtUQOxj0P5utJt3vKEYOHjWEPp-Xd9aY,5820
 compressed_tensors/compressors/sparse_quantized_compressors/__init__.py,sha256=4f_cwcKXB1nVVMoiKgTFAc8jAPjPLElo-Df_EDm1_xw,675
-compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py,sha256=7F9J6wgkecitK5hHuqjetZ18HExHIF4QIw1wgm2Y6U8,10099
+compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py,sha256=U-zfkUYvQb1owXit8irlRINhlGcjevYwwjtPjb2S2I8,10100
 compressed_tensors/config/__init__.py,sha256=8sOoZ6xvYSC79mBvEtO8l6xk4PC80d29AnnJiGMrY2M,737
 compressed_tensors/config/base.py,sha256=FaImUwb5G93en2BHUKDs76L_tO8NFpdxlfwAgQL7mNM,3569
 compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74jNbjks,1317
@@ -26,23 +26,23 @@ compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5y
 compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
 compressed_tensors/linear/compressed_linear.py,sha256=1yo9RyjA0aQ--iuIknFfcSorJn43Mn4CoV-q4JlTJ_o,4052
 compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
-compressed_tensors/quantization/quant_args.py,sha256=PMoaa6hpyJLGGSeCWefGmzGVxbOtxAdDunHJi_L5gNs,12894
+compressed_tensors/quantization/quant_args.py,sha256=5AxYKqCSlg7CDgz2N8G4ZRVIiSUKvIm-SCQa-Bq_SF0,12916
 compressed_tensors/quantization/quant_config.py,sha256=2NgDwKuQn0f-ojiHC8c6tXtYX_zQlk26Rj-bU71QKvA,10598
-compressed_tensors/quantization/quant_scheme.py,sha256=aTdGWz_JenLlTqCeNFcujCJjgWQYkXEq2FXoN0YSNqI,8883
+compressed_tensors/quantization/quant_scheme.py,sha256=X5Z7oXMLPXnX8g-UvWXlRjn4YnD_qTk5mXfGzu20k9o,8903
 compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
-compressed_tensors/quantization/lifecycle/apply.py,sha256=wM8mVcbKvZjBo18pSXMp28i30YWwUXJPSS7_HCakH9U,17892
+compressed_tensors/quantization/lifecycle/apply.py,sha256=yc9xCuQIcdhy-MGFh8OmBrB45dzJ8TzZju4mBa3AONg,14909
 compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
-compressed_tensors/quantization/lifecycle/forward.py,sha256=HzfoRkK3CkEHuCqRWatq0kyu5sFx8ULZHNmmjRNIpWI,17571
+compressed_tensors/quantization/lifecycle/forward.py,sha256=xcLTgaff1wYUWzvQqYKmhWYkshWVI-PhLPtBOyyZro0,17576
 compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
-compressed_tensors/quantization/lifecycle/initialize.py,sha256=BM7bR_uNa-Ex4T-roHonWiRaxCi5sFysXyl0cFh1ZVs,10257
+compressed_tensors/quantization/lifecycle/initialize.py,sha256=f05UF6NaUGvR9qyxes_AgRcvg3KWgk5JeM_-NL1EQG0,10285
 compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
-compressed_tensors/quantization/utils/helpers.py,sha256=7a89X0kg6xDGplw6trOrkRQzMRPu-txY_qvEt07Vcgc,17036
+compressed_tensors/quantization/utils/helpers.py,sha256=-pfSmxqHkrB-RnjF0VYz8lMe9CVnB7IJrONf9Y9fjCo,17014
 compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
-compressed_tensors/registry/registry.py,sha256=0s15BxdGgzBv8RL4kUJCYcuDOFUh_KZYvNvLEeRqWTc,11956
+compressed_tensors/registry/registry.py,sha256=cWnlwZ66lgG0w9OAUEAgq5XVxqsgFm1o8ZYdNhkNvJY,11957
 compressed_tensors/transform/__init__.py,sha256=v2wfl4CMfA6KbD7Hxx_MbRev63y_6QLDlccZq-WTtdw,907
 compressed_tensors/transform/apply.py,sha256=nCJvhHleIyWPNYPr-SZvXhmTKpqHVpJrG8VfIW-K6d8,1422
 compressed_tensors/transform/transform_args.py,sha256=rVgReFp7wMXcYugkfd325e2tTFh8pGV3FnYTGCEv5jY,3429
-compressed_tensors/transform/transform_config.py,sha256=h2EYyMrUwAzyak84JY1lsAgZ7Eupotw_cYLq8Ov5SH4,1219
+compressed_tensors/transform/transform_config.py,sha256=3YdtGcau3qkcapX9GMUiLuhQHFQZKFYT3eLgJGj1L6s,1204
 compressed_tensors/transform/transform_scheme.py,sha256=S7vYLnuv7xZ_bwphkpCiGqZLjnnTnb4lj1T8a6WwnE0,2094
 compressed_tensors/transform/factory/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
 compressed_tensors/transform/factory/base.py,sha256=Txkr1nWKtlMU1MmBcQ85-JqJzD356Z9nYbaF24tJ5rw,7755
@@ -52,19 +52,19 @@ compressed_tensors/transform/factory/random_hadamard.py,sha256=nUhTlFa4ikSpcl4Um
 compressed_tensors/transform/utils/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
 compressed_tensors/transform/utils/hadamard.py,sha256=hDJZC0Gw2fKdxqa3f8TmFc5J0eJqxHtFRxswLU_yVJc,5548
 compressed_tensors/transform/utils/hadamards.safetensors,sha256=mFd1GzNodGG-ifA1IoH-0nHYzfraCOvrq_dX2zFI1B4,1436901
-compressed_tensors/transform/utils/matrix.py,sha256=FIHCUlpWVIIhdr3c6EbQec41JeiPAAjCM9Ejz77wb-w,6181
+compressed_tensors/transform/utils/matrix.py,sha256=3sPatOCzcLRE8ROLCGTKHr2c51DubJOFgmuNCgYdJP4,6164
 compressed_tensors/utils/__init__.py,sha256=spzbjUO4-hZ2jXGST27r3MIt2yzIXsjdbEaYyaMcizo,873
 compressed_tensors/utils/helpers.py,sha256=Q3iRAa2XSdmmn4vSpUplnvKOmWwn4Clao9ZkPBHXtpI,12604
 compressed_tensors/utils/internal.py,sha256=7SSWgDoNFRnlfadwkoFhLW-T2jOc7Po_WzWv5h32Sa8,982
-compressed_tensors/utils/match.py,sha256=wXzcfUWlVE5Wc_OAvN7oUFqLhVHJ83oSt1Nhzw01igg,9701
-compressed_tensors/utils/offload.py,sha256=7EDCtRbnw5gZQlZxTAICrc7ONeNW0pKdQFqOeqGlF6g,24467
+compressed_tensors/utils/match.py,sha256=y03xJyWTXV8bjIPN5Z4S0_w797qMnh-Z4aiPEGQ4zNE,11239
+compressed_tensors/utils/offload.py,sha256=jE9xj3VewMc85iOLWSikqdyjNL9JB3oZpO1uDKKCLUE,24444
 compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
 compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
-compressed_tensors/utils/safetensors_load.py,sha256=DMfZBuUbA6qp_BG_zIWT3ckiEE33K9ob34s-OgzReO4,12057
+compressed_tensors/utils/safetensors_load.py,sha256=Vql34aCTDHwmTZXJHzCyBISJo7iA7EQ78LdTlMjdpZo,12023
 compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
 compressed_tensors/utils/type.py,sha256=bNwoo_FWlvLuDpYAGGzZJITRg0JA_Ngk9LGPo-kvjeU,2554
-compressed_tensors-0.11.1a20250819.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors-0.11.1a20250819.dist-info/METADATA,sha256=Ye4XBtt53KgdkwRSg96PYTfIQ6c7rM3A5M8rZJXGNcc,7031
-compressed_tensors-0.11.1a20250819.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-compressed_tensors-0.11.1a20250819.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors-0.11.1a20250819.dist-info/RECORD,,
+compressed_tensors-0.11.1a20250821.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors-0.11.1a20250821.dist-info/METADATA,sha256=jpkjjAiWJwPLa19Ej2tIJm5MEHJ9gwYsPPfvkhF6YYg,7031
+compressed_tensors-0.11.1a20250821.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+compressed_tensors-0.11.1a20250821.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors-0.11.1a20250821.dist-info/RECORD,,

{compressed_tensors-0.11.1a20250819.dist-info → compressed_tensors-0.11.1a20250821.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors-0.11.1a20250819.dist-info → compressed_tensors-0.11.1a20250821.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{compressed_tensors-0.11.1a20250819.dist-info → compressed_tensors-0.11.1a20250821.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors 0.11.1a20250819__py3-none-any.whl → 0.11.1a20250821__py3-none-any.whl

compressed-tensors 0.11.1a20250819py3-none-any.whl → 0.11.1a20250821py3-none-any.whl