PyPI - compressed-tensors - Versions diffs - 0.8.1__py3-none-any.whl → 0.9.1__py3-none-any.whl - Mend

compressed-tensors 0.8.1py3-none-any.whl → 0.9.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

compressed_tensors/compressors/model_compressors/model_compressor.py CHANGED Viewed

@@ -17,8 +17,9 @@ import logging
 import operator
 import os
 import re
+from contextlib import contextmanager
 from copy import deepcopy
-from typing import TYPE_CHECKING, Any, Dict, Optional, TypeVar, Union
+from typing import TYPE_CHECKING, Any, Dict, Optional, Set, TypeVar, Union
 import compressed_tensors
 import torch
@@ -38,6 +39,7 @@ from compressed_tensors.quantization import (
     apply_quantization_config,
     load_pretrained_quantization,
 )
+from compressed_tensors.quantization.lifecycle import expand_sparse_target_names
 from compressed_tensors.quantization.quant_args import QuantizationArgs
 from compressed_tensors.quantization.utils import (
     is_module_quantized,
@@ -104,7 +106,6 @@ class ModelCompressor:
         """
         config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
         compression_config = getattr(config, QUANTIZATION_CONFIG_NAME, None)
         return cls.from_compression_config(compression_config)
     @classmethod
@@ -137,7 +138,7 @@ class ModelCompressor:
                 format, **sparsity_config
             )
         if quantization_config is not None:
-            quantization_config = QuantizationConfig.parse_obj(quantization_config)
+            quantization_config = QuantizationConfig.model_validate(quantization_config)
         return cls(
             sparsity_config=sparsity_config, quantization_config=quantization_config
@@ -193,7 +194,7 @@ class ModelCompressor:
         if is_compressed_tensors_config(compression_config):
             s_config = compression_config.sparsity_config
-            return s_config.dict() if s_config is not None else None
+            return s_config.model_dump() if s_config is not None else None
         return compression_config.get(SPARSITY_CONFIG_NAME, None)
@@ -214,7 +215,7 @@ class ModelCompressor:
         if is_compressed_tensors_config(compression_config):
             q_config = compression_config.quantization_config
-            return q_config.dict() if q_config is not None else None
+            return q_config.model_dump() if q_config is not None else None
         quantization_config = deepcopy(compression_config)
         quantization_config.pop(SPARSITY_CONFIG_NAME, None)
@@ -282,8 +283,14 @@ class ModelCompressor:
                 )
         if self.sparsity_compressor is not None:
+            sparse_compression_targets: Set[str] = expand_sparse_target_names(
+                model=model,
+                targets=self.sparsity_config.targets,
+                ignore=self.sparsity_config.ignore,
+            )
             compressed_state_dict = self.sparsity_compressor.compress(
-                compressed_state_dict
+                compressed_state_dict,
+                compression_targets=sparse_compression_targets,
             )
         # HACK: Override the dtype_byte_size function in transformers to
@@ -301,23 +308,44 @@ class ModelCompressor:
         :param model: pytorch model to load decompressed weights into
         """
         model_path = get_safetensors_folder(model_path)
-        if self.sparsity_compressor is not None:
+        sparse_decompressed = False
+        if (
+            self.sparsity_compressor is not None
+            and self.sparsity_config.format != CompressionFormat.dense.value
+        ):
+            # Sparse decompression is applied on the model_path
             dense_gen = self.sparsity_compressor.decompress(model_path)
             self._replace_weights(dense_gen, model)
             setattr(model, SPARSITY_CONFIG_NAME, self.sparsity_compressor.config)
+            sparse_decompressed = True
         if self.quantization_compressor is not None:
-            names_to_scheme = apply_quantization_config(model, self.quantization_config)
-            load_pretrained_quantization(model, model_path)
+            # Temporarily set quantization status to FROZEN to prevent
+            # quantization during apply_quantization_config. This ensures
+            # that the dtypes of the weights are not unintentionally updated.
+            # The status is restored after quantization params are loaded.
+            with override_quantization_status(
+                self.quantization_config, QuantizationStatus.FROZEN
+            ):
+                names_to_scheme = apply_quantization_config(
+                    model, self.quantization_config
+                )
+                load_pretrained_quantization(model, model_path)
+            model_path_or_state_dict = (
+                model.state_dict() if sparse_decompressed else model_path
+            )
             dense_gen = self.quantization_compressor.decompress(
-                model_path, names_to_scheme=names_to_scheme
+                model_path_or_state_dict, names_to_scheme=names_to_scheme
             )
             self._replace_weights(dense_gen, model)
-            def update_status(module):
+            def freeze_quantization_status(module):
                 module.quantization_status = QuantizationStatus.FROZEN
-            model.apply(update_status)
+            model.apply(freeze_quantization_status)
             setattr(model, QUANTIZATION_CONFIG_NAME, self.quantization_config)
     def update_config(self, save_directory: str):
@@ -367,12 +395,26 @@ class ModelCompressor:
         with open(config_file_path, "w") as config_file:
             json.dump(config_data, config_file, indent=2, sort_keys=True)
-    def _replace_weights(self, dense_weight_generator, model):
+    def _replace_weights(self, dense_weight_generator, model: Module):
+        """
+        Replace the weights of the model with the
+        provided dense weights.
+        This method iterates over the dense_weight_generator and
+        updates the corresponding weights in the model. If a parameter
+        name does not exist in the model, it will be skipped.
+        :param dense_weight_generator (generator): A generator that yields
+            tuples of (name, data), where 'name' is the parameter name and
+            'data' is the updated param data
+        :param model: The model whose weights are to be updated.
+        """
         for name, data in tqdm(dense_weight_generator, desc="Decompressing model"):
             split_name = name.split(".")
             prefix, param_name = ".".join(split_name[:-1]), split_name[-1]
             module = operator.attrgetter(prefix)(model)
-            update_parameter_data(module, data, param_name)
+            if hasattr(module, param_name):
+                update_parameter_data(module, data, param_name)
 def map_modules_to_quant_args(model: Module) -> Dict[str, QuantizationArgs]:
@@ -402,3 +444,23 @@ def new_dtype_byte_size(dtype):
         raise ValueError(f"`dtype` is not a valid dtype: {dtype}.")
     bit_size = int(bit_search.groups()[0])
     return bit_size // 8
+@contextmanager
+def override_quantization_status(
+    config: QuantizationConfig, status: QuantizationStatus
+):
+    """
+    Within this context, the quantization status will be set to the
+    supplied status. After the context exits, the original status
+    will be restored.
+    :param config: the quantization config to override
+    :param status: the status to temporarily set
+    """
+    original_status = config.quantization_status
+    config.quantization_status = status
+    try:
+        yield
+    finally:
+        config.quantization_status = original_status

compressed_tensors/compressors/quantized_compressors/base.py CHANGED Viewed

@@ -13,12 +13,17 @@
 # limitations under the License.
 import logging
-from typing import Dict, Generator, Tuple
+from pathlib import Path
+from typing import Any, Dict, Generator, Tuple, Union
 import torch
 from compressed_tensors.compressors.base import BaseCompressor
 from compressed_tensors.quantization import QuantizationArgs
-from compressed_tensors.utils import get_nested_weight_mappings, merge_names
+from compressed_tensors.utils import (
+    get_nested_mappings_from_state_dict,
+    get_nested_weight_mappings,
+    merge_names,
+)
 from safetensors import safe_open
 from torch import Tensor
 from tqdm import tqdm
@@ -113,7 +118,7 @@ class BaseQuantizationCompressor(BaseCompressor):
     def decompress(
         self,
-        path_to_model_or_tensors: str,
+        path_to_model_or_tensors: Union[str, Path, Dict[str, Any]],
         names_to_scheme: Dict[str, QuantizationArgs],
         device: str = "cpu",
     ) -> Generator[Tuple[str, Tensor], None, None]:
@@ -121,15 +126,25 @@ class BaseQuantizationCompressor(BaseCompressor):
         Reads a compressed state dict located at path_to_model_or_tensors
         and returns a generator for sequentially decompressing back to a
         dense state dict
         :param path_to_model_or_tensors: path to compressed safetensors model (directory
             with one or more safetensors files) or compressed tensors file
         :param names_to_scheme: quantization args for each quantized weight
         :param device: optional device to load intermediate weights into
         :return: compressed state dict
         """
+        if isinstance(path_to_model_or_tensors, (str, Path)):
+            yield from self._decompress_from_path(
+                path_to_model_or_tensors, names_to_scheme, device
+            )
+        else:
+            yield from self._decompress_from_state_dict(
+                path_to_model_or_tensors, names_to_scheme
+            )
+    def _decompress_from_path(self, path_to_model, names_to_scheme, device):
         weight_mappings = get_nested_weight_mappings(
-            path_to_model_or_tensors, self.COMPRESSION_PARAM_NAMES
+            path_to_model, self.COMPRESSION_PARAM_NAMES
         )
         for weight_name in weight_mappings.keys():
             weight_data = {}
@@ -137,6 +152,21 @@ class BaseQuantizationCompressor(BaseCompressor):
                 full_name = merge_names(weight_name, param_name)
                 with safe_open(safe_path, framework="pt", device=device) as f:
                     weight_data[param_name] = f.get_tensor(full_name)
+            if "weight_scale" in weight_data:
+                quant_args = names_to_scheme[weight_name]
+                decompressed = self.decompress_weight(
+                    compressed_data=weight_data, quantization_args=quant_args
+                )
+                yield merge_names(weight_name, "weight"), decompressed
+    def _decompress_from_state_dict(self, state_dict, names_to_scheme):
+        weight_mappings = get_nested_mappings_from_state_dict(
+            state_dict, self.COMPRESSION_PARAM_NAMES
+        )
+        for weight_name in weight_mappings.keys():
+            weight_data = {}
+            for param_name, param_value in weight_mappings[weight_name].items():
+                weight_data[param_name] = param_value
             if "weight_scale" in weight_data:
                 quant_args = names_to_scheme[weight_name]

compressed_tensors/compressors/quantized_compressors/naive_quantized.py CHANGED Viewed

@@ -68,9 +68,9 @@ class NaiveQuantizationCompressor(BaseQuantizationCompressor):
         self,
         weight: Tensor,
         scale: Tensor,
+        quantization_args: QuantizationArgs,
         zero_point: Optional[Tensor] = None,
         g_idx: Optional[torch.Tensor] = None,
-        quantization_args: Optional[QuantizationArgs] = None,
         device: Optional[torch.device] = None,
     ) -> Dict[str, torch.Tensor]:
         """
@@ -78,9 +78,9 @@ class NaiveQuantizationCompressor(BaseQuantizationCompressor):
         :param weight: uncompressed weight tensor
         :param scale: quantization scale for weight
+        :param quantization_args: quantization parameters for weight
         :param zero_point: quantization zero point for weight
         :param g_idx: optional mapping from column index to group index
-        :param quantization_args: quantization parameters for weight
         :param device: optional device to move compressed output to
         :return: dictionary of compressed weight data
         """

compressed_tensors/compressors/quantized_compressors/pack_quantized.py CHANGED Viewed

@@ -68,9 +68,9 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
         self,
         weight: Tensor,
         scale: Tensor,
+        quantization_args: QuantizationArgs,
         zero_point: Optional[Tensor] = None,
         g_idx: Optional[torch.Tensor] = None,
-        quantization_args: Optional[QuantizationArgs] = None,
         device: Optional[torch.device] = None,
     ) -> Dict[str, torch.Tensor]:
         """
@@ -78,9 +78,9 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
         :param weight: uncompressed weight tensor
         :param scale: quantization scale for weight
+        :param quantization_args: quantization parameters for weight
         :param zero_point: quantization zero point for weight
         :param g_idx: optional mapping from column index to group index
-        :param quantization_args: quantization parameters for weight
         :param device: optional device to move compressed output to
         :return: dictionary of compressed weight data
         """

compressed_tensors/compressors/sparse_compressors/__init__.py CHANGED Viewed

@@ -15,4 +15,5 @@
 from .base import *
 from .dense import *
+from .sparse_24_bitmask import *
 from .sparse_bitmask import *

compressed_tensors/compressors/sparse_compressors/base.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 import logging
-from typing import Dict, Generator, Tuple
+from typing import Dict, Generator, Optional, Set, Tuple
 from compressed_tensors.compressors.base import BaseCompressor
 from compressed_tensors.utils import get_nested_weight_mappings, merge_names
@@ -30,7 +30,8 @@ _LOGGER: logging.Logger = logging.getLogger(__name__)
 class BaseSparseCompressor(BaseCompressor):
     """
     Base class representing a sparse compression algorithm. Each child class should
-    implement compression_param_info, compress_weight and decompress_weight.
+    implement compression_param_info, compress_weight and decompress_weight; child
+    classes should also define COMPRESSION_PARAM_NAMES.
     Compressors support compressing/decompressing a full module state dict or a single
     quantized PyTorch leaf module.
@@ -59,11 +60,17 @@ class BaseSparseCompressor(BaseCompressor):
     :param config: config specifying compression parameters
     """
-    def compress(self, model_state: Dict[str, Tensor]) -> Dict[str, Tensor]:
+    def compress(
+        self,
+        model_state: Dict[str, Tensor],
+        compression_targets: Optional[Set[str]] = None,
+    ) -> Dict[str, Tensor]:
         """
         Compresses a dense state dict using bitmask compression
         :param model_state: state dict of uncompressed model
+        :param compression_targets: optional set of layer prefixes to compress,
+            otherwise compress all layers (for backwards compatibility)
         :return: compressed state dict
         """
         compressed_dict = {}
@@ -71,7 +78,14 @@ class BaseSparseCompressor(BaseCompressor):
             f"Compressing model with {len(model_state)} parameterized layers..."
         )
         for name, value in tqdm(model_state.items(), desc="Compressing model"):
-            compression_data = self.compress_weight(name, value)
+            if not self.should_compress(name, compression_targets):
+                compressed_dict[name] = value
+                continue
+            prefix = name
+            if prefix.endswith(".weight"):
+                prefix = prefix[: -(len(".weight"))]
+            compression_data = self.compress_weight(prefix, value)
             for key in compression_data.keys():
                 if key in compressed_dict:
                     _LOGGER.warn(
@@ -97,8 +111,10 @@ class BaseSparseCompressor(BaseCompressor):
         :param device: device to load decompressed weights onto
         :return: iterator for generating decompressed weights
         """
-        weight_mappings = get_nested_weight_mappings(
-            path_to_model_or_tensors, self.COMPRESSION_PARAM_NAMES
+        weight_mappings, ignored_params = get_nested_weight_mappings(
+            path_to_model_or_tensors,
+            self.COMPRESSION_PARAM_NAMES,
+            return_unmatched_params=True,
         )
         for weight_name in weight_mappings.keys():
             weight_data = {}
@@ -107,4 +123,26 @@ class BaseSparseCompressor(BaseCompressor):
                 with safe_open(safe_path, framework="pt", device=device) as f:
                     weight_data[param_name] = f.get_tensor(full_name)
             decompressed = self.decompress_weight(weight_data)
-            yield weight_name, decompressed
+            yield merge_names(weight_name, "weight"), decompressed
+        for ignored_param_name, safe_path in ignored_params.items():
+            with safe_open(safe_path, framework="pt", device=device) as f:
+                value = f.get_tensor(ignored_param_name)
+            yield ignored_param_name, value
+    @staticmethod
+    def should_compress(name: str, expanded_targets: Optional[Set[str]] = None) -> bool:
+        """
+        Check if a parameter should be compressed.
+        Currently, this only returns True for weight parameters.
+        :param name: name of the parameter
+        :param expanded_targets: set of layer prefixes to compress
+        :return: whether or not the parameter should be compressed
+        """
+        if expanded_targets is None:
+            return name.endswith(".weight")
+        return (
+            name.endswith(".weight") and name[: -(len(".weight"))] in expanded_targets
+        )

compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py ADDED Viewed

@@ -0,0 +1,240 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from dataclasses import dataclass
+from typing import Dict, List, Tuple, Union
+import torch
+from compressed_tensors.compressors.base import BaseCompressor
+from compressed_tensors.compressors.sparse_compressors.base import BaseSparseCompressor
+from compressed_tensors.config import CompressionFormat, SparsityStructure
+from compressed_tensors.quantization import FP8_DTYPE
+from compressed_tensors.utils import merge_names, pack_bitmasks, unpack_bitmasks
+from torch import Tensor
+__all__ = [
+    "Sparse24BitMaskCompressor",
+    "Sparse24BitMaskTensor",
+    "sparse24_bitmask_compress",
+    "sparse24_bitmask_decompress",
+    "get_24_bytemasks",
+]
+@BaseCompressor.register(name=CompressionFormat.sparse_24_bitmask.value)
+class Sparse24BitMaskCompressor(BaseSparseCompressor):
+    """
+    Compression for sparse models using bitmasks. Non-zero weights are stored in a 2d
+    values tensor, with their locations stored in a 2d bitmask
+    """
+    COMPRESSION_PARAM_NAMES = [
+        "shape",
+        "compressed",
+        "bitmask",
+    ]
+    def compress_weight(self, name, value):
+        bitmask_tensor = Sparse24BitMaskTensor.from_dense(
+            value, self.config.sparsity_structure
+        )
+        bitmask_dict = bitmask_tensor.dict(name_prefix=name, device="cpu")
+        return bitmask_dict
+    def decompress_weight(self, weight_data):
+        data = Sparse24BitMaskTensor.from_compressed_data(**weight_data)
+        decompressed = data.decompress()
+        return decompressed
+@dataclass
+class Sparse24BitMaskTensor:
+    """
+    Owns compressions and decompression for a single 2:4 sparse
+    bitmask compressed tensor.
+    :param shape: shape of dense tensor
+    :param compressed: 2d tensor of non-zero values
+    :param bitmask: 2d bitmask of non-zero values
+    """
+    shape: List[int]
+    compressed: Tensor
+    bitmask: Tensor
+    @staticmethod
+    def from_dense(
+        tensor: Tensor,
+        sparsity_structure: Union[SparsityStructure, str] = SparsityStructure.TWO_FOUR,
+    ) -> "Sparse24BitMaskTensor":
+        """
+        :param tensor: dense tensor to compress
+        :return: instantiated compressed tensor
+        """
+        shape = list(tensor.shape)
+        compressed, bitmask = sparse24_bitmask_compress(
+            tensor.cpu(), sparsity_structure=sparsity_structure
+        )
+        return Sparse24BitMaskTensor(
+            shape=shape,
+            compressed=compressed,
+            bitmask=bitmask,
+        )
+    @staticmethod
+    def from_compressed_data(
+        shape: Union[List[int], Tensor], compressed: Tensor, bitmask: Tensor
+    ) -> "Sparse24BitMaskTensor":
+        """
+        :param shape: shape of the dense tensor (can be a list or a tensor)
+        :param compressed: 2d tensor of non-zero values
+        :param bitmask: 2d bitmask of non-zero values
+        :return: instantiated Sparse24BitMaskTensor
+        """
+        if isinstance(shape, list):
+            shape = torch.tensor(shape)
+        if isinstance(shape, torch.Tensor):
+            shape = shape.flatten().tolist()
+        return Sparse24BitMaskTensor(
+            shape=shape, compressed=compressed, bitmask=bitmask
+        )
+    def decompress(self) -> Tensor:
+        """
+        :return: reconstructed dense tensor
+        """
+        return sparse24_bitmask_decompress(self.compressed, self.bitmask, self.shape)
+    def curr_memory_size_bytes(self) -> int:
+        """
+        :return: size in bytes required to store compressed tensor on disk
+        """
+        def sizeof_tensor(a: Tensor) -> int:
+            return a.element_size() * a.nelement()
+        return sizeof_tensor(self.compressed) + sizeof_tensor(self.bitmask)
+    def dict(self, name_prefix: str, device: str = "cpu") -> Dict[str, Tensor]:
+        """
+        :param name_prefix: name of original tensor to store compressed weight as
+        :return: dict of compressed data for the stored weight
+        """
+        if name_prefix.endswith(".weight"):
+            name_prefix = name_prefix[: -len(".weight")]
+        return {
+            merge_names(name_prefix, "shape"): torch.tensor(
+                self.shape, device=device
+            ).reshape(-1, 1),
+            merge_names(name_prefix, "compressed"): self.compressed.to(device),
+            merge_names(name_prefix, "bitmask"): self.bitmask.to(device),
+        }
+    def __repr__(self) -> str:
+        return f"BitMaskTensor(shape={self.shape}, compressed=True)"
+def sparse24_bitmask_compress(
+    tensor: Tensor,
+    sparsity_structure: Union[SparsityStructure, str] = SparsityStructure.TWO_FOUR,
+) -> Tuple[Tensor, Tensor, Tensor]:
+    """
+    Compresses a dense tensor using bitmask compression
+    :param tensor: dense 2D tensor to compress
+    :param sparsity_structure: structure of sparsity in the tensor, defaults
+        to unstructured, can also be set to `2:4`
+    :return: tuple of compressed data representing tensor
+    """
+    assert len(tensor.shape) == 2, "Only 2D tensors are supported"
+    assert (
+        SparsityStructure(sparsity_structure) == SparsityStructure.TWO_FOUR
+    ), "Only 2:4 sparsity is supported"
+    bytemasks = get_24_bytemasks(tensor=tensor)
+    if tensor.dtype == FP8_DTYPE:
+        # acces raw bytes of the tensor
+        tensor_view = tensor.view(torch.int8)
+        values = tensor_view[bytemasks]
+        values = values.view(FP8_DTYPE)
+    else:
+        values = tensor[bytemasks]
+    num_rows, num_cols = tensor.shape
+    compressed_values = values.reshape(num_rows, num_cols // 2)
+    bitmasks_packed = pack_bitmasks(bytemasks)
+    return compressed_values, bitmasks_packed
+def sparse24_bitmask_decompress(
+    values: Tensor, bitmasks: Tensor, original_shape: torch.Size
+) -> Tensor:
+    """
+    Reconstructs a dense tensor from a compressed one
+    :param values: 1d tensor of non-zero values
+    :param bitmasks: 2d int8 tensor flagging locations of non-zero values in the
+    tensors original shape
+    :param original_shape: shape of the dense tensor
+    :return: decompressed dense tensor
+    """
+    bytemasks_unpacked = unpack_bitmasks(bitmasks, original_shape)
+    decompressed_tensor = torch.zeros(original_shape, dtype=values.dtype)
+    decompressed_tensor = decompressed_tensor.to(values.device)
+    values = values.flatten()
+    if decompressed_tensor.dtype == FP8_DTYPE:
+        decompressed_tensor[bytemasks_unpacked] = values
+        decompressed_tensor = decompressed_tensor.cuda()
+    else:
+        decompressed_tensor[bytemasks_unpacked] = values
+    return decompressed_tensor
+def get_24_bytemasks(tensor):
+    """
+    Generate a 2:4 sparsity mask for the given tensor.
+    This function creates a mask where exactly 2 out of every 4 elements are
+    preserved based on their magnitudes. The preserved elements are the ones
+    with the highest absolute values in each group of 4 elements.
+    :param tensor: The input tensor for which the 2:4 sparsity mask is to be created.
+                   The tensor can be of any shape but its total number of elements
+                   must be a multiple of 4.
+    :return: A boolean tensor of the same shape as the input tensor, where `True`
+             indicates the preserved elements and `False` indicates the pruned elements.
+    :raises ValueError: If the total number of elements in the tensor is not a
+                        multiple of 4.
+    """
+    original_dtype = tensor.dtype
+    if tensor.dtype == FP8_DTYPE:
+        tensor = tensor.view(torch.int8)
+    original_shape = tensor.shape
+    num_elements = tensor.numel()
+    if num_elements % 4 != 0:
+        raise ValueError("Tensor size must be a multiple of 4 for TWO_FOUR sparsity")
+    reshaped_tensor = tensor.view(-1, 4)
+    abs_tensor = reshaped_tensor.abs()
+    topk_indices = abs_tensor.topk(2, dim=1).indices
+    mask = torch.zeros_like(reshaped_tensor, dtype=torch.bool)
+    mask.scatter_(1, topk_indices, True)
+    mask = mask.view(original_shape)
+    tensor = tensor.view(original_dtype)
+    return mask

compressed-tensors 0.8.1__py3-none-any.whl → 0.9.1__py3-none-any.whl

compressed-tensors 0.8.1py3-none-any.whl → 0.9.1py3-none-any.whl