PyPI - compressed-tensors - Versions diffs - 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

compressed-tensors 0.3.2py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

compressed_tensors/base.py +2 -1
compressed_tensors/compressors/__init__.py +5 -1
compressed_tensors/compressors/base.py +11 -54
compressed_tensors/compressors/dense.py +4 -4
compressed_tensors/compressors/helpers.py +12 -12
compressed_tensors/compressors/int_quantized.py +126 -0
compressed_tensors/compressors/marlin_24.py +250 -0
compressed_tensors/compressors/model_compressor.py +315 -0
compressed_tensors/compressors/pack_quantized.py +212 -0
compressed_tensors/compressors/sparse_bitmask.py +4 -4
compressed_tensors/compressors/utils/__init__.py +19 -0
compressed_tensors/compressors/utils/helpers.py +43 -0
compressed_tensors/compressors/utils/permutations_24.py +65 -0
compressed_tensors/compressors/utils/semi_structured_conversions.py +341 -0
compressed_tensors/config/base.py +7 -4
compressed_tensors/config/dense.py +4 -4
compressed_tensors/config/sparse_bitmask.py +3 -3
compressed_tensors/quantization/lifecycle/__init__.py +1 -0
compressed_tensors/quantization/lifecycle/apply.py +75 -19
compressed_tensors/quantization/lifecycle/compressed.py +69 -0
compressed_tensors/quantization/lifecycle/forward.py +208 -22
compressed_tensors/quantization/lifecycle/frozen.py +4 -0
compressed_tensors/quantization/lifecycle/initialize.py +33 -5
compressed_tensors/quantization/observers/base.py +70 -5
compressed_tensors/quantization/observers/helpers.py +6 -1
compressed_tensors/quantization/observers/memoryless.py +17 -9
compressed_tensors/quantization/observers/min_max.py +44 -13
compressed_tensors/quantization/quant_args.py +33 -4
compressed_tensors/quantization/quant_config.py +69 -21
compressed_tensors/quantization/quant_scheme.py +81 -1
compressed_tensors/quantization/utils/helpers.py +77 -8
compressed_tensors/utils/helpers.py +26 -122
compressed_tensors/utils/safetensors_load.py +3 -2
compressed_tensors/version.py +53 -0
{compressed_tensors-0.3.2.dist-info → compressed_tensors-0.4.0.dist-info}/METADATA +46 -9
compressed_tensors-0.4.0.dist-info/RECORD +48 -0
compressed_tensors-0.3.2.dist-info/RECORD +0 -38
{compressed_tensors-0.3.2.dist-info → compressed_tensors-0.4.0.dist-info}/LICENSE +0 -0
{compressed_tensors-0.3.2.dist-info → compressed_tensors-0.4.0.dist-info}/WHEEL +0 -0
{compressed_tensors-0.3.2.dist-info → compressed_tensors-0.4.0.dist-info}/top_level.txt +0 -0

compressed_tensors/quantization/quant_args.py CHANGED Viewed

@@ -15,7 +15,7 @@
 from enum import Enum
 from typing import Any, Dict, Optional
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, validator
 __all__ = ["QuantizationType", "QuantizationStrategy", "QuantizationArgs"]
@@ -39,9 +39,10 @@ class QuantizationStrategy(str, Enum):
     CHANNEL = "channel"
     GROUP = "group"
     BLOCK = "block"
+    TOKEN = "token"
-class QuantizationArgs(BaseModel):
+class QuantizationArgs(BaseModel, use_enum_values=True):
     """
     User facing arguments used to define a quantization config for weights or
     activations
@@ -61,10 +62,10 @@ class QuantizationArgs(BaseModel):
     """
     num_bits: int = 8
-    type: QuantizationType = QuantizationType.INT
+    type: QuantizationType = QuantizationType.INT.value
     symmetric: bool = True
-    strategy: QuantizationStrategy = QuantizationStrategy.TENSOR
     group_size: Optional[int] = None
+    strategy: Optional[QuantizationStrategy] = None
     block_structure: Optional[str] = None
     dynamic: bool = False
     observer: str = Field(
@@ -94,3 +95,31 @@ class QuantizationArgs(BaseModel):
             self.observer = "memoryless"
         return Observer.load_from_registry(self.observer, quantization_args=self)
+    @validator("strategy", pre=True, always=True)
+    def validate_strategy(cls, value, values):
+        group_size = values.get("group_size")
+        # use group_size to determinine strategy if not given explicity
+        if group_size is not None and value is None:
+            if group_size > 0:
+                return QuantizationStrategy.GROUP
+            elif group_size == -1:
+                return QuantizationStrategy.CHANNEL
+            else:
+                raise ValueError(
+                    f"group_size={group_size} with strategy {value} is invald. "
+                    "group_size > 0 for strategy='group' and "
+                    "group_size = -1 for 'channel'"
+                )
+        if value == QuantizationStrategy.GROUP:
+            if group_size is None:
+                raise ValueError(f"strategy {value} requires group_size to be set.")
+        if value is None:
+            return QuantizationStrategy.TENSOR
+        return value

compressed_tensors/quantization/quant_config.py CHANGED Viewed

@@ -13,10 +13,13 @@
 # limitations under the License.
 from enum import Enum
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Union
-from compressed_tensors.base import QUANTIZATION_CONFIG_NAME
-from compressed_tensors.quantization.quant_scheme import QuantizationScheme
+from compressed_tensors.config import CompressionFormat
+from compressed_tensors.quantization.quant_scheme import (
+    QuantizationScheme,
+    preset_name_to_scheme,
+)
 from compressed_tensors.quantization.utils import (
     calculate_compression_ratio,
     is_module_quantized,
@@ -25,13 +28,14 @@ from compressed_tensors.quantization.utils import (
 )
 from pydantic import BaseModel, Field
 from torch.nn import Module
-from transformers import AutoConfig
 __all__ = [
     "QuantizationStatus",
     "QuantizationConfig",
     "LIFECYCLE_ORDER",
+    "DEFAULT_QUANTIZATION_METHOD",
+    "DEFAULT_QUANTIZATION_FORMAT",
 ]
@@ -62,10 +66,33 @@ class QuantizationStatus(str, Enum):
         return
     def __ge__(self, other):
+        if other is None:
+            return True
         if not isinstance(other, self.__class__):
             raise NotImplementedError
         return LIFECYCLE_ORDER.index(self) >= LIFECYCLE_ORDER.index(other)
+    def __gt__(self, other):
+        if other is None:
+            return True
+        if not isinstance(other, self.__class__):
+            raise NotImplementedError
+        return LIFECYCLE_ORDER.index(self) > LIFECYCLE_ORDER.index(other)
+    def __lt__(self, other):
+        if other is None:
+            return False
+        if not isinstance(other, self.__class__):
+            raise NotImplementedError
+        return LIFECYCLE_ORDER.index(self) < LIFECYCLE_ORDER.index(other)
+    def __le__(self, other):
+        if other is None:
+            return False
+        if not isinstance(other, self.__class__):
+            raise NotImplementedError
+        return LIFECYCLE_ORDER.index(self) <= LIFECYCLE_ORDER.index(other)
 LIFECYCLE_ORDER = [
     QuantizationStatus.INITIALIZED,
@@ -74,6 +101,9 @@ LIFECYCLE_ORDER = [
     QuantizationStatus.COMPRESSED,
 ]
+DEFAULT_QUANTIZATION_METHOD = "compressed-tensors"
+DEFAULT_QUANTIZATION_FORMAT = "fakequant"
 class QuantizationConfig(BaseModel):
     """
@@ -81,7 +111,8 @@ class QuantizationConfig(BaseModel):
     mapped to a QuantizationScheme in config_groups.
     :param config_groups: dict of QuantizationSchemes specifying the quantization
-    settings for each quantized layer
+    settings for each quantized layer. A group could also be a reference to
+    a predefined scheme name, mapped to a list of its target layers/classes
     :param quant_method: a constant used to differentiate sparseML quantization from
     other quantization configs
     :param format: specifies how the quantized model is stored on disk
@@ -93,30 +124,34 @@ class QuantizationConfig(BaseModel):
     are not quantized even if they match up with a target in config_groups
     """
-    config_groups: Dict[str, QuantizationScheme]
-    quant_method: str = "sparseml"
-    format: str = "fakequant"
+    config_groups: Dict[str, Union[QuantizationScheme, List[str]]]
+    quant_method: str = DEFAULT_QUANTIZATION_METHOD
+    format: str = DEFAULT_QUANTIZATION_FORMAT
     quantization_status: QuantizationStatus = QuantizationStatus.INITIALIZED
     global_compression_ratio: Optional[float] = None
     ignore: Optional[List[str]] = Field(default_factory=list)
-    @staticmethod
-    def from_model_config(model_name_or_path) -> "QuantizationConfig":
+    def model_post_init(self, __context):
         """
-        Given a path to a model config, extract a quantization config if it exists
-        :param pretrained_model_name_or_path: path to model config on disk or HF hub
-        :return: instantiated QuantizationConfig if config contains a quant config
+        updates any quantization schemes defined as presets to be fully loaded
+        schemes
         """
-        config = AutoConfig.from_pretrained(model_name_or_path)
-        quantization_config = getattr(config, QUANTIZATION_CONFIG_NAME, None)
-        if quantization_config is None:
-            return None
-        return QuantizationConfig.parse_obj(quantization_config)
+        for group_name, targets_or_scheme in self.config_groups.items():
+            if isinstance(targets_or_scheme, QuantizationScheme):
+                continue  # scheme already defined
+            self.config_groups[group_name] = preset_name_to_scheme(
+                name=group_name,
+                targets=targets_or_scheme,
+            )
+    def to_dict(self):
+        # for compatibility with HFQuantizer
+        return self.dict()
     @staticmethod
-    def from_pretrained(model: Module) -> "QuantizationConfig":
+    def from_pretrained(
+        model: Module, format: Optional[str] = None
+    ) -> Optional["QuantizationConfig"]:
         """
         Converts a model into its associated QuantizationConfig based on the
         QuantizationScheme attached to each quanitzed module
@@ -147,6 +182,9 @@ class QuantizationConfig(BaseModel):
                 if not match_found:
                     quant_scheme_to_layers.append(scheme)
+        if len(quant_scheme_to_layers) == 0:  # No quantized layers
+            return None
         # clean up ignore list, we can leave out layers types if none of the
         # instances are quantized
         consolidated_ignore = []
@@ -162,10 +200,20 @@ class QuantizationConfig(BaseModel):
             group_name = "group_" + str(idx)
             config_groups[group_name] = scheme
+        # TODO: this is incorrect in compressed mode, since we are overwriting the
+        # original weight we lose the uncompressed bit_depth indo
         compression_ratio = calculate_compression_ratio(model)
+        if format is None:
+            if quantization_status == QuantizationStatus.COMPRESSED:
+                format = CompressionFormat.int_quantized.value
+            else:
+                format = CompressionFormat.dense.value
         return QuantizationConfig(
             config_groups=config_groups,
             quantization_status=quantization_status,
             global_compression_ratio=compression_ratio,
+            format=format,
             ignore=consolidated_ignore,
         )

compressed_tensors/quantization/quant_scheme.py CHANGED Viewed

@@ -12,13 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from copy import deepcopy
 from typing import List, Optional
 from compressed_tensors.quantization.quant_args import QuantizationArgs
 from pydantic import BaseModel
-__all__ = ["QuantizationScheme"]
+__all__ = [
+    "QuantizationScheme",
+    "preset_name_to_scheme",
+    "is_preset_scheme",
+]
 class QuantizationScheme(BaseModel):
@@ -37,3 +42,78 @@ class QuantizationScheme(BaseModel):
     weights: Optional[QuantizationArgs] = None
     input_activations: Optional[QuantizationArgs] = None
     output_activations: Optional[QuantizationArgs] = None
+    @classmethod
+    def default_scheme(
+        cls,
+        targets: Optional[List[str]] = None,
+    ):
+        if targets is None:
+            # default to quantizing all Linear layers
+            targets = ["Linear"]
+        # default to 8 bit integer symmetric quantization
+        # for weights
+        weights = QuantizationArgs(num_bits=8, symmetric=True)
+        # default to 8 bit integer asymmetric quantization
+        input_activations = QuantizationArgs(num_bits=8, symmetric=True)
+        # Do not quantize the output activations
+        # by default
+        output_activations = None
+        return cls(
+            targets=targets,
+            weights=weights,
+            input_activations=input_activations,
+            output_activations=output_activations,
+        )
+"""
+Pre-Set Quantization Scheme Args
+"""
+def preset_name_to_scheme(name: str, targets: List[str]) -> QuantizationScheme:
+    """
+    :param name: preset quantization settings name. must exist in upper case in
+        PRESET_SCHEMES
+    :param targets: list of quantization targets to be passed to the Scheme
+    :return: new QuantizationScheme for a given name with the given targets
+    """
+    name = name.upper()
+    if name not in PRESET_SCHEMES:
+        raise KeyError(
+            f"Unknown preset scheme name {name}, "
+            f"available names: {list(PRESET_SCHEMES.keys())}"
+        )
+    scheme_args = deepcopy(PRESET_SCHEMES[name])  # deepcopy to avoid args references
+    return QuantizationScheme(
+        targets=targets,
+        **scheme_args,
+    )
+def is_preset_scheme(name: str) -> bool:
+    """
+    :param name: preset quantization settings name
+    :return: True if the name is a preset scheme name
+    """
+    return name.upper() in PRESET_SCHEMES
+W8A8 = dict(
+    weights=QuantizationArgs(), input_activations=QuantizationArgs(symmetric=True)
+)
+W4A16 = dict(weights=QuantizationArgs(num_bits=4, group_size=128))
+PRESET_SCHEMES = {
+    "W8A8": W8A8,
+    "W4A16": W4A16,
+}

compressed_tensors/quantization/utils/helpers.py CHANGED Viewed

@@ -12,21 +12,43 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Tuple
+import logging
+from typing import Optional, Tuple
 import torch
+from compressed_tensors.quantization.observers.base import Observer
 from torch.nn import Module
 from tqdm import tqdm
 __all__ = [
+    "infer_quantization_status",
     "is_module_quantized",
     "is_model_quantized",
     "iter_named_leaf_modules",
     "module_type",
     "calculate_compression_ratio",
+    "get_torch_bit_depth",
+    "can_quantize",
 ]
+_LOGGER: logging.Logger = logging.getLogger(__name__)
+def infer_quantization_status(model: Module) -> Optional["QuantizationStatus"]:  # noqa
+    """
+    Checks the quantization status of a model. Assumes all modules in the model have
+    the same status, so only the first quantized model is checked.
+    :param model: model to check quantization status for
+    :return: quantization status if the model is quantized, otherwise None
+    """
+    for module in model.modules():
+        status = getattr(module, "quantization_status", None)
+        if status is not None:
+            return status
+    return None
 def is_module_quantized(module: Module) -> bool:
     """
@@ -78,11 +100,60 @@ def module_type(module: Module) -> str:
 def iter_named_leaf_modules(model: Module) -> Tuple[str, Module]:
-    # yields modules that do not have any submodules
-    # TODO: potentially expand to add list of allowed submodules such as observers
+    """
+    Yields modules that do not have any submodules except observers. The observers
+    themselves are not yielded
+    :param model: model to get leaf modules of
+    :returns: generator tuple of (name, leaf_submodule)
+    """
     for name, submodule in model.named_modules():
-        if len(list(submodule.children())) == 0:
+        children = list(submodule.children())
+        if len(children) == 0 and not isinstance(submodule, Observer):
             yield name, submodule
+        else:
+            has_non_observer_children = False
+            for child in children:
+                if not isinstance(child, Observer):
+                    has_non_observer_children = True
+            if not has_non_observer_children:
+                yield name, submodule
+def get_torch_bit_depth(value: torch.Tensor) -> int:
+    """
+    Determine the number of bits used to represent the dtype of a tensor
+    :param value: tensor to check bit depth of
+    :return: bit depth of each element in the value tensor
+    """
+    try:
+        bit_depth = torch.finfo(value.dtype).bits
+    except TypeError:
+        bit_depth = torch.iinfo(value.dtype).bits
+    return bit_depth
+def can_quantize(value: torch.Tensor, quant_args: "QuantizationArgs") -> bool:  # noqa
+    """
+    Checks if value can be quantized by quant_args.
+    :param value: tensor to check for quantization
+    :param quant_args: QuantizationArgs to use for quantization
+    :return: False if value is already quantized to quant_args or value is incompatible
+    with quant_args, True if value can be quantized with quant_args
+    """
+    bit_depth = get_torch_bit_depth(value)
+    requested_depth = quant_args.num_bits
+    if bit_depth < quant_args.num_bits:
+        _LOGGER.warn(
+            f"Can't quantize tensor with bit depth {bit_depth} to {requested_depth}."
+            "The QuantizationArgs provided are not compatible with the input tensor."
+        )
+    return bit_depth > quant_args.num_bits
 def calculate_compression_ratio(model: Module) -> float:
@@ -101,13 +172,11 @@ def calculate_compression_ratio(model: Module) -> float:
         desc="Calculating quantization compression ratio",
     ):
         for parameter in model.parameters():
-            try:
-                uncompressed_bits = torch.finfo(parameter.dtype).bits
-            except TypeError:
-                uncompressed_bits = torch.iinfo(parameter.dtype).bits
+            uncompressed_bits = get_torch_bit_depth(parameter)
             compressed_bits = uncompressed_bits
             if is_module_quantized(submodule):
                 compressed_bits = submodule.quantization_scheme.weights.num_bits
             num_weights = parameter.numel()
             total_compressed += compressed_bits * num_weights
             total_uncompressed += uncompressed_bits * num_weights

compressed_tensors/utils/helpers.py CHANGED Viewed

@@ -12,47 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from pathlib import Path
-from typing import Dict, Optional, Union
-import torch
-from compressed_tensors.base import SPARSITY_CONFIG_NAME
-from compressed_tensors.compressors import ModelCompressor
-from compressed_tensors.config import (
-    CompressionConfig,
-    CompressionFormat,
-    DenseSparsityConfig,
-)
-from safetensors.torch import save_file
-from torch import Tensor
+from typing import Optional
 from transformers import AutoConfig
-__all__ = [
-    "infer_compressor_from_model_config",
-    "infer_compression_config_from_model_config",
-    "load_compressed",
-    "save_compressed",
-    "save_compressed_model",
-]
+__all__ = ["infer_compressor_from_model_config", "fix_fsdp_module_name"]
-def infer_compressor_from_model_config(
-    pretrained_model_name_or_path: str,
-) -> Optional[CompressionConfig]:
-    """
-    Given a path to a model config, extract a sparsity config if it exists and return
-    the associated CompressionConfig
-    :param pretrained_model_name_or_path: path to model config on disk or HF hub
-    :return: matching compression config if config contains a sparsity config
-    """
-    config = AutoConfig.from_pretrained(pretrained_model_name_or_path)
-    return getattr(config, SPARSITY_CONFIG_NAME, None)
+FSDP_WRAPPER_NAME = "_fsdp_wrapped_module"
 def infer_compressor_from_model_config(
     pretrained_model_name_or_path: str,
-) -> Optional[ModelCompressor]:
+) -> Optional["ModelCompressor"]:  # noqa: F821
     """
     Given a path to a model config, extract a sparsity config if it exists and return
     the associated ModelCompressor
@@ -60,100 +33,31 @@ def infer_compressor_from_model_config(
     :param pretrained_model_name_or_path: path to model config on disk or HF hub
     :return: matching compressor if config contains a sparsity config
     """
-    sparsity_config = infer_compressor_from_model_config(pretrained_model_name_or_path)
-    compressor = ModelCompressor.load_from_registry(sparsity_config.format, config=sparsity_config)
-    return compressor
-def save_compressed(
-    tensors: Dict[str, Tensor],
-    save_path: Union[str, Path],
-    compression_format: Optional[CompressionFormat] = None,
-):
-    """
-    Save compressed tensors to disk. If tensors are not compressed,
-    save them as is.
-    :param tensors: dictionary of tensors to compress
-    :param save_path: path to save compressed tensors
-    :param compression_format: compression format used for the tensors
-    :return: compression config, if tensors were compressed - None otherwise
-    """
-    if tensors is None or len(tensors) == 0:
-        raise ValueError("No tensors or empty tensors provided to compress")
+    from compressed_tensors.compressors import ModelCompressor
+    from compressed_tensors.config import CompressionConfig
-    # if no compression_format specified, default to `dense_sparsity`
-    compression_format = compression_format or CompressionFormat.dense_sparsity.value
-    if not (
-        compression_format in ModelCompressor.registered_names()
-        or compression_format in ModelCompressor.registered_aliases()
-    ):
-        raise ValueError(
-            f"Unknown compression format: {compression_format}. "
-            f"Must be one of {set(ModelCompressor.registered_names() + ModelCompressor.registered_aliases())}"  # noqa E501
-        )
+    config = AutoConfig.from_pretrained(pretrained_model_name_or_path)
+    sparsity_config = ModelCompressor.parse_sparsity_config(config)
+    if sparsity_config is None:
+        return None
-    # compress
-    compressor = ModelCompressor.load_from_registry(compression_format)
-    # save compressed tensors
-    compressed_tensors = compressor.compress(tensors)
-    save_file(compressed_tensors, save_path)
+    format = sparsity_config.get("format")
+    sparsity_config = CompressionConfig.load_from_registry(format, **sparsity_config)
+    compressor = ModelCompressor.load_from_registry(format, config=sparsity_config)
+    return compressor
-def load_compressed(
-    compressed_tensors: Union[str, Path],
-    compression_config: CompressionConfig = None,
-    device: Optional[str] = "cpu",
-) -> Dict[str, Tensor]:
+# TODO: There is already the same function in
+# SparseML, should be moved to a shared location
+# in the future
+def fix_fsdp_module_name(name: str) -> str:
     """
-    Load compressed tensors from disk. If tensors are not compressed,
-    load them as is.
-    :param compressed_tensors: path to compressed tensors
-    :param compression_config: compression config to use for decompressing tensors.
-    :param device: device to move tensors to. If None, tensors are loaded on CPU.
-    :return decompressed tensors
+    Remove FSDP wrapper prefixes from a module name
+    Accounts for scenario where FSDP_WRAPPER_NAME is
+    at the end of the name, as well as in the middle.
+    :param name: name to strip
+    :return: stripped name
     """
-    if compressed_tensors is None or not Path(compressed_tensors).exists():
-        raise ValueError("No compressed tensors provided to load")
-    # if no compression_config specified, default to `dense_sparsity`
-    compression_config = compression_config or DenseSparsityConfig()
-    # decompress
-    compression_format = compression_config.format
-    compressor = ModelCompressor.load_from_registry(
-        compression_format, config=compression_config
+    return name.replace(FSDP_WRAPPER_NAME + ".", "").replace(
+        "." + FSDP_WRAPPER_NAME, ""
     )
-    return dict(compressor.decompress(compressed_tensors, device=device))
-def save_compressed_model(
-    model: torch.nn.Module,
-    filename: str,
-    compression_format: Optional[CompressionFormat] = None,
-    force_contiguous: bool = True,
-):
-    """
-    Wrapper around safetensors `save_model` helper function, which allows for
-    saving compressed model to disk.
-    Note: The model is assumed to have a
-        state_dict with  unique entries
-    :param model: model to save on disk
-    :param filename: filename location to save the file
-    :param compression_format: compression format used for the model
-    :param force_contiguous: forcing the state_dict to be saved as contiguous tensors
-    """
-    state_dict = model.state_dict()
-    if force_contiguous:
-        state_dict = {k: v.contiguous() for k, v in state_dict.items()}
-    try:
-        save_compressed(state_dict, filename, compression_format=compression_format)
-    except ValueError as e:
-        msg = str(e)
-        msg += " Or use save_compressed_model(..., force_contiguous=True), read the docs for potential caveats."  # noqa E501
-        raise ValueError(msg)

compressed_tensors/utils/safetensors_load.py CHANGED Viewed

@@ -31,6 +31,7 @@ __all__ = [
     "get_weight_mappings",
     "get_nested_weight_mappings",
     "get_quantization_state_dict",
+    "is_quantization_param",
 ]
@@ -214,7 +215,7 @@ def get_quantization_state_dict(model_path: str) -> Dict[str, Tensor]:
     weight_mappings = get_weight_mappings(model_path)
     state_dict = {}
     for weight_name, safe_path in weight_mappings.items():
-        if not _is_quantization_weight(weight_name):
+        if not is_quantization_param(weight_name):
             continue
         with safe_open(safe_path, framework="pt", device="cpu") as f:
             state_dict[weight_name] = f.get_tensor(weight_name)
@@ -222,7 +223,7 @@ def get_quantization_state_dict(model_path: str) -> Dict[str, Tensor]:
     return state_dict
-def _is_quantization_weight(name: str) -> bool:
+def is_quantization_param(name: str) -> bool:
     """
     Checks is a parameter name is associated with a quantization parameter

compressed-tensors 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl

compressed-tensors 0.3.2py3-none-any.whl → 0.4.0py3-none-any.whl