PyPI - compressed-tensors - Versions diffs - 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl - Mend

compressed-tensors 0.3.1py3-none-any.whl → 0.3.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

compressed_tensors/compressors/__init__.py CHANGED Viewed

@@ -16,10 +16,5 @@
 from .base import ModelCompressor
 from .dense import DenseCompressor
-from .helpers import (
-    infer_compressor_from_model_config,
-    load_compressed,
-    save_compressed,
-    save_compressed_model,
-)
+from .helpers import load_compressed, save_compressed, save_compressed_model
 from .sparse_bitmask import BitmaskCompressor, BitmaskTensor

compressed_tensors/compressors/base.py CHANGED Viewed

@@ -22,6 +22,7 @@ from compressed_tensors.utils import get_safetensors_folder
 from torch import Tensor
 from torch.nn import Module, Parameter
 from tqdm import tqdm
+from transformers import AutoConfig
 __all__ = ["ModelCompressor"]
@@ -34,6 +35,29 @@ class ModelCompressor(RegistryMixin):
     :param config: config specifying compression parameters
     """
+    @classmethod
+    def from_pretrained(
+        cls, pretrained_model_name_or_path: str
+    ) -> Optional["ModelCompressor"]:
+        """
+        Given a path to a model config, extract a sparsity config if it exists and
+        return the associated ModelCompressor
+        :param pretrained_model_name_or_path: path to model config on disk or HF hub
+        :return: matching compressor if config contains a sparsity config
+        """
+        config = AutoConfig.from_pretrained(pretrained_model_name_or_path)
+        sparsity_config = getattr(config, SPARSITY_CONFIG_NAME, None)
+        if sparsity_config is None:
+            return None
+        format = sparsity_config.get("format")
+        sparsity_config = CompressionConfig.load_from_registry(
+            format, **sparsity_config
+        )
+        compressor = cls.load_from_registry(format, config=sparsity_config)
+        return compressor
     def __init__(self, config: Optional[CompressionConfig] = None):
         self.config = config
@@ -47,7 +71,7 @@ class ModelCompressor(RegistryMixin):
         raise NotImplementedError()
     def decompress(
-        self, path_to_model_or_tensors: str
+        self, path_to_model_or_tensors: str, device: str = "cpu"
     ) -> Generator[Tuple[str, Tensor], None, None]:
         """
         Reads a compressed state dict located at path_to_model_or_tensors

compressed_tensors/compressors/dense.py CHANGED Viewed

@@ -29,6 +29,6 @@ class DenseCompressor(ModelCompressor):
         return model_state
     def decompress(
-        self, path_to_model_or_tensors: str, device: str
+        self, path_to_model_or_tensors: str, device: str = "cpu"
     ) -> Generator[Tuple[str, Tensor], None, None]:
         return iter([])

compressed_tensors/compressors/helpers.py CHANGED Viewed

@@ -16,45 +16,21 @@ from pathlib import Path
 from typing import Dict, Generator, Optional, Tuple, Union
 import torch
-from compressed_tensors.base import SPARSITY_CONFIG_NAME
 from compressed_tensors.compressors import ModelCompressor
 from compressed_tensors.config import CompressionConfig, CompressionFormat
 from compressed_tensors.utils.safetensors_load import get_weight_mappings
 from safetensors import safe_open
 from safetensors.torch import save_file
 from torch import Tensor
-from transformers import AutoConfig
 __all__ = [
-    "infer_compressor_from_model_config",
     "load_compressed",
     "save_compressed",
     "save_compressed_model",
 ]
-def infer_compressor_from_model_config(
-    pretrained_model_name_or_path: str,
-) -> Optional[ModelCompressor]:
-    """
-    Given a path to a model config, extract a sparsity config if it exists and return
-    the associated ModelCompressor
-    :param pretrained_model_name_or_path: path to model config on disk or HF hub
-    :return: matching compressor if config contains a sparsity config
-    """
-    config = AutoConfig.from_pretrained(pretrained_model_name_or_path)
-    sparsity_config = getattr(config, SPARSITY_CONFIG_NAME, None)
-    if sparsity_config is None:
-        return None
-    format = sparsity_config.get("format")
-    sparsity_config = CompressionConfig.load_from_registry(format, **sparsity_config)
-    compressor = ModelCompressor.load_from_registry(format, config=sparsity_config)
-    return compressor
 def save_compressed(
     tensors: Dict[str, Tensor],
     save_path: Union[str, Path],

compressed_tensors/compressors/sparse_bitmask.py CHANGED Viewed

@@ -67,7 +67,7 @@ class BitmaskCompressor(ModelCompressor):
                         f"found an existing entry for {key}. The existing entry will "
                         "be replaced."
                     )
-            compressed_dict |= bitmask_dict
+            compressed_dict.update(bitmask_dict)
         return compressed_dict
@@ -75,8 +75,9 @@ class BitmaskCompressor(ModelCompressor):
         self, path_to_model_or_tensors: str, device: str = "cpu"
     ) -> Generator[Tuple[str, Tensor], None, None]:
         """
-        Reads a bitmask compressed state dict located at path_to_model_or_tensors
-        and returns a generator for sequentially decompressing back to a dense state dict
+        Reads a bitmask compressed state dict located
+        at path_to_model_or_tensors and returns a generator
+        for sequentially decompressing back to a dense state dict
         :param model_path: path to compressed safetensors model (directory with
             one or more safetensors files) or compressed tensors file

compressed_tensors/quantization/lifecycle/apply.py CHANGED Viewed

@@ -36,6 +36,7 @@ __all__ = [
     "load_pretrained_quantization",
     "apply_quantization_config",
     "apply_quantization_status",
+    "find_first_name_or_class_match",
 ]
 from compressed_tensors.quantization.utils.helpers import is_module_quantized
@@ -99,9 +100,9 @@ def apply_quantization_config(model: Module, config: QuantizationConfig):
     # mark appropriate layers for quantization by setting their quantization schemes
     for name, submodule in iter_named_leaf_modules(model):
-        if _find_first_name_or_class_match(name, submodule, config.ignore):
+        if find_first_name_or_class_match(name, submodule, config.ignore):
             continue  # layer matches ignore list, continue
-        target = _find_first_name_or_class_match(name, submodule, target_to_scheme)
+        target = find_first_name_or_class_match(name, submodule, target_to_scheme)
         if target is not None:
             # target matched - add layer and scheme to target list
             submodule.quantization_scheme = target_to_scheme[target]
@@ -125,27 +126,31 @@ def apply_quantization_status(model: Module, status: QuantizationStatus):
         model.apply(freeze_module_quantization)
-def _find_first_name_or_class_match(
-    name: str,
-    module: Module,
-    targets: Iterable[str],
+def find_first_name_or_class_match(
+    name: str, module: Module, targets: Iterable[str], check_contains: bool = False
 ) -> Optional[str]:
     # first element of targets that matches the given name
     # if no name matches returns first target that matches the class name
     # returns None otherwise
     return _find_first_match(name, targets) or _find_first_match(
-        module.__class__.__name__, targets
+        module.__class__.__name__, targets, check_contains
     )
-def _find_first_match(value: str, targets: Iterable[str]) -> Optional[str]:
+def _find_first_match(
+    value: str, targets: Iterable[str], check_contains: bool = False
+) -> Optional[str]:
     # returns first element of target that matches value either
-    # exactly or as a regex after 're:'
+    # exactly or as a regex after 're:'. if check_contains is set to True,
+    # additionally checks if the target string is contained with value.
     for target in targets:
         if target.startswith("re:"):
             pattern = target[3:]
             if re.match(pattern, value):
                 return target
+        elif check_contains:
+            if target.lower() in value.lower():
+                return target
         elif target == value:
             return target
     return None

compressed_tensors/quantization/lifecycle/forward.py CHANGED Viewed

@@ -13,15 +13,19 @@
 # limitations under the License.
 from functools import wraps
+from math import ceil
 import torch
-from compressed_tensors.quantization.quant_args import QuantizationArgs
+from compressed_tensors.quantization.quant_args import (
+    QuantizationArgs,
+    QuantizationStrategy,
+)
 from compressed_tensors.quantization.quant_config import QuantizationStatus
 from compressed_tensors.quantization.quant_scheme import QuantizationScheme
 from torch.nn import Module
-__all__ = ["wrap_module_forward_quantized"]
+__all__ = ["wrap_module_forward_quantized", "maybe_calibrate_or_quantize"]
 @torch.no_grad()
@@ -32,10 +36,9 @@ def quantize(
     q_min: torch.Tensor,
     q_max: torch.Tensor,
 ) -> torch.Tensor:
     return torch.clamp(
-        torch.round(
-            x / scale + zero_point,
-        ),
+        torch.round(x / scale + zero_point),
         q_min,
         q_max,
     )
@@ -57,12 +60,88 @@ def fake_quantize(
     zero_point: torch.Tensor,
     args: QuantizationArgs,
 ) -> torch.Tensor:
+    """
+    Fake quantize the input tensor x depending on the group_size.
+    if group_size is greater than 0, then q/dq by groups. The groups
+    must be divisible by the column size
+    if group_size is -1, then channel wise q/dq. THe input scale and
+    zero_points are reshaped to support vectorization (Assumes 1 is
+    the channel dimension)
+    :param x: Input tensor
+    :param scale: scale tensor
+    :param zero_point: zero point tensor
+    :param args: quantization args that contain group_size info
+    :return: fake quantized tensor
+    """
     bit_range = 2**args.num_bits
     max_q = torch.tensor(bit_range / 2 - 1, device=x.device)
     min_q = torch.tensor(-bit_range / 2, device=x.device)
-    Q = torch.zeros_like(x)
-    Q = quantize(x, scale, zero_point, min_q, max_q)
-    return dequantize(Q, scale, zero_point)
+    group_size = args.group_size
+    # group
+    if args.strategy == QuantizationStrategy.GROUP:
+        DQ = torch.zeros_like(x)
+        # TODO: vectorize the for loop
+        # TODO: fix genetric assumption about the tensor size for computing group
+        # TODO: make validation step for inputs
+        while scale.ndim < 2:
+            # pad scale and zero point dims for slicing
+            scale = scale.unsqueeze(1)
+            zero_point = zero_point.unsqueeze(1)
+        columns = x.shape[1]
+        if columns >= group_size:
+            if columns % group_size != 0:
+                raise ValueError(
+                    "tesnor column shape must be divisble "
+                    f"by the given group_size {group_size}"
+                )
+        for i in range(ceil(columns / group_size)):
+            # scale.shape should be [nchan, ndim]
+            # sc.shape should be [nchan, 1] after unsqueeze
+            sc = scale[:, i].unsqueeze(1)
+            zp = zero_point[:, i].unsqueeze(1)
+            idx = i * group_size
+            Q = quantize(x[:, idx : (idx + group_size)], sc, zp, min_q, max_q)
+            DQ[:, idx : (idx + group_size)] = dequantize(Q, sc, zp)
+    # channel-wise
+    elif args.strategy == QuantizationStrategy.CHANNEL:  # group_size == -1
+        # before: scale shape = [channel_size]
+        # after: scale shape = [1, channel_size]
+        scale = scale.unsqueeze(0)
+        zero_point = zero_point.unsqueeze(0)
+        Q = quantize(x, scale, zero_point, min_q, max_q)
+        DQ = dequantize(Q, scale, zero_point)
+    # per-token
+    elif args.strategy == QuantizationStrategy.TOKEN:
+        # before: scale shape = [num_tokens]
+        # after: scale shape = [num_tokens, 1]
+        # x.shape = 1, num_tokens, 1]
+        # scale gets broadcasted as expected withput having [1, num_tokens, 1] shape
+        scale = scale.unsqueeze(1)
+        zero_point = zero_point.unsqueeze(1)
+        Q = quantize(x, scale, zero_point, min_q, max_q)
+        DQ = dequantize(Q, scale, zero_point)
+    else:
+        Q = quantize(x, scale, zero_point, min_q, max_q)
+        DQ = dequantize(Q, scale, zero_point)
+    return DQ
 def wrap_module_forward_quantized(module: Module, scheme: QuantizationScheme):
@@ -76,14 +155,14 @@ def wrap_module_forward_quantized(module: Module, scheme: QuantizationScheme):
         if scheme.input_activations is not None:
             # calibrate and (fake) quantize input activations when applicable
-            input_ = _maybe_calibrate_or_quantize(
+            input_ = maybe_calibrate_or_quantize(
                 module, input_, "input", scheme.input_activations
             )
         if scheme.weights is not None:
             # calibrate and (fake) quantize weights when applicable
             unquantized_weight = self.weight.data.clone()
-            self.weight.data = _maybe_calibrate_or_quantize(
+            self.weight.data = maybe_calibrate_or_quantize(
                 module, self.weight, "weight", scheme.weights
             )
@@ -94,7 +173,7 @@ def wrap_module_forward_quantized(module: Module, scheme: QuantizationScheme):
         if scheme.output_activations is not None:
             # calibrate and (fake) quantize output activations when applicable
-            output = _maybe_calibrate_or_quantize(
+            output = maybe_calibrate_or_quantize(
                 module, output, "output", scheme.output_activations
             )
@@ -110,8 +189,8 @@ def wrap_module_forward_quantized(module: Module, scheme: QuantizationScheme):
     setattr(module, "forward", bound_wrapped_forward)
-def _maybe_calibrate_or_quantize(
-    module: Module, value: Module, base_name: str, args: "QuantizationArgs"
+def maybe_calibrate_or_quantize(
+    module: Module, value: torch.Tensor, base_name: str, args: "QuantizationArgs"
 ) -> torch.Tensor:
     # only run quantized for the included stages
     if module.quantization_status not in {
@@ -120,17 +199,23 @@ def _maybe_calibrate_or_quantize(
     }:
         return value
-    device = next(module.parameters()).device
-    scale = getattr(module, f"{base_name}_scale")
-    zero_point = getattr(module, f"{base_name}_zero_point")
-    if module.quantization_status == QuantizationStatus.CALIBRATION:
-        # get observer and get new quant params from observation
+    if args.dynamic:
+        # dynamic quantization - get scale and zero point directly from observer
         observer = getattr(module, f"{base_name}_observer")
-        updated_scale, updated_zero_point = observer(value)
-        # update scale and zero point
-        scale.data = updated_scale.to(device)
-        zero_point.data = updated_zero_point.to(device)
+        scale, zero_point = observer(value)
+    else:
+        # static quantization - get previous scale and zero point from layer
+        scale = getattr(module, f"{base_name}_scale")
+        zero_point = getattr(module, f"{base_name}_zero_point")
+        if module.quantization_status == QuantizationStatus.CALIBRATION:
+            # calibration mode - get new quant params from observer
+            observer = getattr(module, f"{base_name}_observer")
+            updated_scale, updated_zero_point = observer(value)
+            # update scale and zero point
+            device = next(module.parameters()).device
+            scale.data = updated_scale.to(device)
+            zero_point.data = updated_zero_point.to(device)
     return fake_quantize(value, scale, zero_point, args)

compressed_tensors/quantization/lifecycle/frozen.py CHANGED Viewed

@@ -30,17 +30,17 @@ def freeze_module_quantization(module: Module):
     :param module: module to freeze quantization for
     """
-    if not getattr(module, "quantization_scheme", None):
+    scheme = getattr(module, "quantization_scheme", None)
+    if not scheme:
         # no quantization scheme nothing to do
         return
-    # delete observers from module
-    observer_names = []
-    for submodule_name, _ in module.named_modules():
-        if "." not in submodule_name and submodule_name.endswith("_observer"):
-            # delete any observers that belong directly to this module
-            observer_names.append(submodule_name)
-    for observer_name in observer_names:
-        delattr(module, observer_name)
+    # delete observers from module if not dynamic
+    if scheme.input_activations and not scheme.input_activations.dynamic:
+        delattr(module, "input_observer")
+    if scheme.weights and not scheme.weights.dynamic:
+        delattr(module, "weight_observer")
+    if scheme.output_activations and not scheme.output_activations.dynamic:
+        delattr(module, "output_observer")
     module.quantization_status = QuantizationStatus.FROZEN

compressed_tensors/quantization/lifecycle/initialize.py CHANGED Viewed

@@ -80,6 +80,13 @@ def initialize_module_for_quantization(
 def _initialize_scale_zero_point_observer(
     module: Module, base_name: str, quantization_args: QuantizationArgs
 ):
+    # initialize observer module and attach as submodule
+    observer = quantization_args.get_observer()
+    module.register_module(f"{base_name}_observer", observer)
+    if quantization_args.dynamic:
+        return  # no need to register a scale and zero point for a dynamic observer
     device = next(module.parameters()).device
     # initializes empty scale and zero point parameters for the module
@@ -90,7 +97,3 @@ def _initialize_scale_zero_point_observer(
         torch.empty(0, device=device, dtype=int), requires_grad=False
     )
     module.register_parameter(f"{base_name}_zero_point", init_zero_point)
-    # initialize observer module and attach as submodule
-    observer = quantization_args.get_observer()
-    module.register_module(f"{base_name}_observer", observer)

compressed_tensors/quantization/observers/base.py CHANGED Viewed

@@ -14,7 +14,11 @@
 from typing import Optional, Tuple
-from compressed_tensors.quantization.quant_args import QuantizationArgs
+import torch
+from compressed_tensors.quantization.quant_args import (
+    QuantizationArgs,
+    QuantizationStrategy,
+)
 from compressed_tensors.registry.registry import RegistryMixin
 from torch import FloatTensor, IntTensor, Tensor
 from torch.nn import Module
@@ -52,6 +56,12 @@ class Observer(Module, RegistryMixin):
         """
         raise NotImplementedError(f"{self.__class__} must implement calculate_qparams")
+    def post_calculate_qparams(self) -> None:
+        """
+        Run any logic specific to its observers after running calculate_qparams
+        """
+        ...
     def get_qparams(
         self, observed: Optional[Tensor] = None
     ) -> Tuple[FloatTensor, IntTensor]:
@@ -64,6 +74,57 @@ class Observer(Module, RegistryMixin):
         :return: tuple of scale and zero point based on last observed value
         """
         if observed is not None:
-            # re-calcualte scale and zero point, update the stored value
-            self._scale, self._zero_point = self.calculate_qparams(observed)
+            group_size = self.quantization_args.group_size
+            if self.quantization_args.strategy == QuantizationStrategy.TENSOR:
+                # re-calculate scale and zero point, update the stored value
+                self._scale, self._zero_point = self.calculate_qparams(observed)
+            elif self.quantization_args.strategy == QuantizationStrategy.GROUP:
+                columns = observed.shape[1]
+                scales, zero_points = [], []
+                for i in range(0, columns, self.quantization_args.group_size):
+                    scale, zero_point = self.get_qparams_along_dim(
+                        observed[:, i : (i + group_size)],
+                        0,
+                    )
+                    scales.append(scale)
+                    zero_points.append(zero_point)
+                self._scale = torch.stack(scales, dim=1)
+                self._zero_point = torch.stack(zero_points, dim=1)
+            elif self.quantization_args.strategy == QuantizationStrategy.CHANNEL:
+                # assume observed is transposed, because its the output, hence use dim 0
+                self._scale, self._zero_point = self.get_qparams_along_dim(observed, 0)
+            elif self.quantization_args.strategy == QuantizationStrategy.TOKEN:
+                # use dim 1, assume the obsersed.shape = [batch, token, hidden]
+                # should be batch, token
+                self._scale, self._zero_point = self.get_qparams_along_dim(
+                    observed, dim=1
+                )
         return self._scale, self._zero_point
+    def get_qparams_along_dim(self, observed, dim: int):
+        # TODO: add documentation that specifies the shape must
+        #   be padded with 1-dims so the scales are along the right channel
+        # TODO: generalize the logic for reduce_dims
+        scales, zero_points = [], []
+        # TODO: make a more generic way to get the channel
+        num_dims = observed.shape[dim]
+        for dim_idx in range(num_dims):
+            scale, zero_point = self.calculate_qparams(
+                observed.select(dim=dim, index=dim_idx)
+            )
+            scales.append(scale)
+            zero_points.append(zero_point)
+        # breakpoint()
+        return torch.stack(scales), torch.stack(zero_points)

compressed_tensors/quantization/observers/memoryless.py CHANGED Viewed

@@ -23,10 +23,10 @@ from torch import FloatTensor, IntTensor, Tensor
 __all__ = ["MemorylessObserver"]
-@Observer.register("memoryless")
+@Observer.register("memoryless", alias=["dynamic"])
 class MemorylessObserver(Observer):
     """
-    Implements a dynamic quantization observer that sets the scale and
+    Implements a quantization observer that sets the scale and
     zero point based on the latest observed value without tracking state
     """

compressed_tensors/quantization/quant_args.py CHANGED Viewed

@@ -15,7 +15,7 @@
 from enum import Enum
 from typing import Any, Dict, Optional
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, validator
 __all__ = ["QuantizationType", "QuantizationStrategy", "QuantizationArgs"]
@@ -39,6 +39,7 @@ class QuantizationStrategy(str, Enum):
     CHANNEL = "channel"
     GROUP = "group"
     BLOCK = "block"
+    TOKEN = "token"
 class QuantizationArgs(BaseModel):
@@ -53,14 +54,20 @@ class QuantizationArgs(BaseModel):
     :param group_size: group length to use for the group strategy
     :param block_structure: 2d block structure to use for the block strategy, must be
     of the format "2x4", "8x16", etc.
+    :param dynamic: set True to perform dynamic quantization - values will not be
+        calibrated during calibration phase, instead during inference new quantization
+        ranges will be observed with every sample. Defaults to False for static
+        quantization. Note that enabling dynamic quantization will change the default
+        observer to a memoryless one
     """
     num_bits: int = 8
     type: QuantizationType = QuantizationType.INT
     symmetric: bool = True
-    strategy: QuantizationStrategy = QuantizationStrategy.TENSOR
     group_size: Optional[int] = None
+    strategy: Optional[QuantizationStrategy] = None
     block_structure: Optional[str] = None
+    dynamic: bool = False
     observer: str = Field(
         default="minmax",
         description=(
@@ -82,4 +89,37 @@ class QuantizationArgs(BaseModel):
         """
         from compressed_tensors.quantization.observers.base import Observer
+        if self.observer == "minmax" and self.dynamic:
+            # override defualt observer for dynamic, you never want minmax which
+            # keeps state across samples for dynamic
+            self.observer = "memoryless"
         return Observer.load_from_registry(self.observer, quantization_args=self)
+    @validator("strategy", pre=True, always=True)
+    def validate_strategy(cls, value, values):
+        group_size = values.get("group_size")
+        # use group_size to determinine strategy if not given explicity
+        if group_size is not None and value is None:
+            if group_size > 0:
+                return QuantizationStrategy.GROUP
+            elif group_size == -1:
+                return QuantizationStrategy.CHANNEL
+            else:
+                raise ValueError(
+                    f"group_size={group_size} with strategy {value} is invald. "
+                    "group_size > 0 for strategy='group' and "
+                    "group_size = -1 for 'channel'"
+                )
+        if value == QuantizationStrategy.GROUP:
+            if group_size is None:
+                raise ValueError(f"strategy {value} requires group_size to be set.")
+        if value is None:
+            return QuantizationStrategy.TENSOR
+        return value

compressed_tensors/quantization/utils/helpers.py CHANGED Viewed

@@ -108,6 +108,7 @@ def calculate_compression_ratio(model: Module) -> float:
             compressed_bits = uncompressed_bits
             if is_module_quantized(submodule):
                 compressed_bits = submodule.quantization_scheme.weights.num_bits
             num_weights = parameter.numel()
             total_compressed += compressed_bits * num_weights
             total_uncompressed += uncompressed_bits * num_weights

compressed_tensors/utils/helpers.py CHANGED Viewed

@@ -12,28 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from pathlib import Path
-from typing import Dict, Optional, Union
-import torch
+from typing import Optional
 from compressed_tensors.base import SPARSITY_CONFIG_NAME
 from compressed_tensors.compressors import ModelCompressor
-from compressed_tensors.config import (
-    CompressionConfig,
-    CompressionFormat,
-    DenseSparsityConfig,
-)
-from safetensors.torch import save_file
-from torch import Tensor
+from compressed_tensors.config import CompressionConfig
 from transformers import AutoConfig
-__all__ = [
-    "infer_compressor_from_model_config",
-    "load_compressed",
-    "save_compressed",
-    "save_compressed_model",
-]
+__all__ = ["infer_compressor_from_model_config"]
 def infer_compressor_from_model_config(
@@ -55,97 +43,3 @@ def infer_compressor_from_model_config(
     sparsity_config = CompressionConfig.load_from_registry(format, **sparsity_config)
     compressor = ModelCompressor.load_from_registry(format, config=sparsity_config)
     return compressor
-def save_compressed(
-    tensors: Dict[str, Tensor],
-    save_path: Union[str, Path],
-    compression_format: Optional[CompressionFormat] = None,
-):
-    """
-    Save compressed tensors to disk. If tensors are not compressed,
-    save them as is.
-    :param tensors: dictionary of tensors to compress
-    :param save_path: path to save compressed tensors
-    :param compression_format: compression format used for the tensors
-    :return: compression config, if tensors were compressed - None otherwise
-    """
-    if tensors is None or len(tensors) == 0:
-        raise ValueError("No tensors or empty tensors provided to compress")
-    # if no compression_format specified, default to `dense_sparsity`
-    compression_format = compression_format or CompressionFormat.dense_sparsity.value
-    if not (
-        compression_format in ModelCompressor.registered_names()
-        or compression_format in ModelCompressor.registered_aliases()
-    ):
-        raise ValueError(
-            f"Unknown compression format: {compression_format}. "
-            f"Must be one of {set(ModelCompressor.registered_names() + ModelCompressor.registered_aliases())}"  # noqa E501
-        )
-    # compress
-    compressor = ModelCompressor.load_from_registry(compression_format)
-    # save compressed tensors
-    compressed_tensors = compressor.compress(tensors)
-    save_file(compressed_tensors, save_path)
-def load_compressed(
-    compressed_tensors: Union[str, Path],
-    compression_config: CompressionConfig = None,
-    device: Optional[str] = "cpu",
-) -> Dict[str, Tensor]:
-    """
-    Load compressed tensors from disk. If tensors are not compressed,
-    load them as is.
-    :param compressed_tensors: path to compressed tensors
-    :param compression_config: compression config to use for decompressing tensors.
-    :param device: device to move tensors to. If None, tensors are loaded on CPU.
-    :return decompressed tensors
-    """
-    if compressed_tensors is None or not Path(compressed_tensors).exists():
-        raise ValueError("No compressed tensors provided to load")
-    # if no compression_config specified, default to `dense_sparsity`
-    compression_config = compression_config or DenseSparsityConfig()
-    # decompress
-    compression_format = compression_config.format
-    compressor = ModelCompressor.load_from_registry(
-        compression_format, config=compression_config
-    )
-    return dict(compressor.decompress(compressed_tensors, device=device))
-def save_compressed_model(
-    model: torch.nn.Module,
-    filename: str,
-    compression_format: Optional[CompressionFormat] = None,
-    force_contiguous: bool = True,
-):
-    """
-    Wrapper around safetensors `save_model` helper function, which allows for
-    saving compressed model to disk.
-    Note: The model is assumed to have a
-        state_dict with  unique entries
-    :param model: model to save on disk
-    :param filename: filename location to save the file
-    :param compression_format: compression format used for the model
-    :param force_contiguous: forcing the state_dict to be saved as contiguous tensors
-    """
-    state_dict = model.state_dict()
-    if force_contiguous:
-        state_dict = {k: v.contiguous() for k, v in state_dict.items()}
-    try:
-        save_compressed(state_dict, filename, compression_format=compression_format)
-    except ValueError as e:
-        msg = str(e)
-        msg += " Or use save_compressed_model(..., force_contiguous=True), read the docs for potential caveats."  # noqa E501
-        raise ValueError(msg)

{compressed_tensors-0.3.1.dist-info → compressed_tensors-0.3.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: compressed-tensors
-Version: 0.3.1
+Version: 0.3.3
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.
@@ -20,7 +20,7 @@ Requires-Dist: nbconvert >=7.16.3 ; extra == 'dev'
 Requires-Dist: pytest >=6.0.0 ; extra == 'dev'
 Requires-Dist: wheel >=0.36.2 ; extra == 'dev'
-# compressed-tensors
+# compressed_tensors
 This repository extends a [safetensors](https://github.com/huggingface/safetensors) format to efficiently store sparse and/or quantized tensors on disk. `compressed-tensors` format supports multiple compression types to minimize the disk space and facilitate the tensor manipulation.
@@ -103,4 +103,6 @@ save_compressed_model(model, "compressed_model.safetensors", compression_format=
 state_dict = dict(load_compressed("compressed_model.safetensors", compression_config))
 ```
+For more in-depth tutorial on bitmask compression, refer to the [notebook](https://github.com/neuralmagic/compressed-tensors/blob/d707c5b84bc3fef164aebdcd97cb6eaa571982f8/examples/bitmask_compression.ipynb).

{compressed_tensors-0.3.1.dist-info → compressed_tensors-0.3.3.dist-info}/RECORD RENAMED Viewed

@@ -1,38 +1,38 @@
 compressed_tensors/__init__.py,sha256=SV1csvHUVCd8kHXz6UDZim1HZ_fAVG3vfk-j_4Bb6hY,789
 compressed_tensors/base.py,sha256=8zbgK87LpHkKoSknM55svXCT4E4dLLjPijwF9HfzmsQ,717
-compressed_tensors/compressors/__init__.py,sha256=3ZHKWSIWTjMx8XXgLtoP9JaVaCTvRecguLZTxLAAkKk,898
-compressed_tensors/compressors/base.py,sha256=F1smyJ6x2Sfq43tuP0QE9wZuhVqnewq-XUFPMtdU9yQ,2936
-compressed_tensors/compressors/dense.py,sha256=_VTusI3XjaY-zOdB_d7z4zOgPTJi9TJZZHF13g9ulS4,1263
-compressed_tensors/compressors/helpers.py,sha256=kSseqbwnu3JHZUKH8u4kQo5bmd87FvCcmWe0u2ikysA,6421
-compressed_tensors/compressors/sparse_bitmask.py,sha256=PYAK_Hcy2T57zlbpwl1FYkslluIr2x-d0Rh048YAtpI,8639
+compressed_tensors/compressors/__init__.py,sha256=UcHp0CwUBJoS2MBN6mLUT7B3uRf1TEoRGbME7gLPD38,841
+compressed_tensors/compressors/base.py,sha256=CqQo00ZIkAWpy0yVux5TXhK7WK_6Ws6qb5mCAvIoxB4,3902
+compressed_tensors/compressors/dense.py,sha256=ig9lItmyCX5-VzgMuUqea-s8fHsTjPj5-0VIsPLl0g0,1271
+compressed_tensors/compressors/helpers.py,sha256=wstgUEUYUCTMMu6G1YLF9G7vXqIJPj3MsWhqwU4J6Vw,5458
+compressed_tensors/compressors/sparse_bitmask.py,sha256=qXXFSf1UuQEzodB_xkQgYEJMwPgFsBgTQb8-LqesCsY,8652
 compressed_tensors/config/__init__.py,sha256=ZBqWn3r6ku1qfmlHHYp0mQueY0i7Pwhr9rbQk9dDlMc,704
 compressed_tensors/config/base.py,sha256=IP-3Y416w-811WozDzKHycIBXjdlG4Ddy7vpbwhOPD8,1373
 compressed_tensors/config/dense.py,sha256=xtkri7DkP7USu44FnSoTgTSqdGegCBtjRf3DfblSEL0,1311
 compressed_tensors/config/sparse_bitmask.py,sha256=y8fmQaOoGjIiI4FR6BJjfIqisAcqNQ_zjKyjT75bXwY,1284
 compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
-compressed_tensors/quantization/quant_args.py,sha256=dxSrq0_88ORQXcyIMYqoMZJvYEjnqdYl37f7lgZQqhw,2742
+compressed_tensors/quantization/quant_args.py,sha256=A6b2V8lhsM8Ho8RjlPBQdxRUDNWhqq-ie5E3RR2_GNg,4360
 compressed_tensors/quantization/quant_config.py,sha256=DWx8ae3gDlw99zAn3MUN9I4qeksbbmITmOXHRynqPB8,6650
 compressed_tensors/quantization/quant_scheme.py,sha256=X3oqmZPiIKtX5tEKKUj-0N6hB68NeiU2b1GcQEQPadQ,1480
 compressed_tensors/quantization/lifecycle/__init__.py,sha256=fM9XBtPgJX6z54PTm3Sd0SpK5od95ibwaSf2FFR8DqE,772
-compressed_tensors/quantization/lifecycle/apply.py,sha256=WXUL3q1g0s244k0wuqGYZPXTXiscdyrp7RScN2j_KGA,6651
+compressed_tensors/quantization/lifecycle/apply.py,sha256=LQUESSqS5a2_7ij9rHvBdLjjdTOAf9v7chsgfWwh-Jg,6973
 compressed_tensors/quantization/lifecycle/calibration.py,sha256=mLns4jlaWmBwOW8Jtlm5bMX-JET1AiZYUBO7qa-XuxI,1776
-compressed_tensors/quantization/lifecycle/forward.py,sha256=hnjk7pocZLDhLdMx237FKayYdvsdKbYSjTmSN5xbQO8,4599
-compressed_tensors/quantization/lifecycle/frozen.py,sha256=NHNmlDIaxurifqeI_qZC8xa4BstQsBNdOCXJjRzAfNU,1596
-compressed_tensors/quantization/lifecycle/initialize.py,sha256=8pifqZQSgVqWYI_Qtv6QfBICPbCTFHy48OWPeQsxEHQ,3578
+compressed_tensors/quantization/lifecycle/forward.py,sha256=JcxGBUsthl6_ao5vi6t7poU3YOJsBEzGpE0MEH4Kxus,7600
+compressed_tensors/quantization/lifecycle/frozen.py,sha256=FF7BleuOUX46Egk7F1ZE5r4fjWt9jG5-tO8BjXU1r78,1606
+compressed_tensors/quantization/lifecycle/initialize.py,sha256=U6g9qifSF6pagQZQZEwd-rwWC6uQ_dZXn1wg6nr1Abg,3697
 compressed_tensors/quantization/observers/__init__.py,sha256=DNH31NQYrIBBcmHsMyFA6whh4pbRsLwuNa6L8AeXaGc,745
-compressed_tensors/quantization/observers/base.py,sha256=O76dAxkin7bB602e9kjmxc84p71-PxBtjIq5L69xplI,2786
+compressed_tensors/quantization/observers/base.py,sha256=UqXaR4gOUmMRLKqq4N7IrVuGL11VDWwdmYYFmhk8a3o,5097
 compressed_tensors/quantization/observers/helpers.py,sha256=SxvOf9zwZ9NDRC3E4Xm7z3RqHcbcPtCABLKX9GnGGHM,2109
-compressed_tensors/quantization/observers/memoryless.py,sha256=3f6bUlcf5mzOHPkTRhoQ7Zd8xu_pUmj8e3Y85fGysSU,1848
+compressed_tensors/quantization/observers/memoryless.py,sha256=ZHTPh4aURE8LvHBFaP--HIC2JanMX5-VRdIkE2JHthw,1859
 compressed_tensors/quantization/observers/min_max.py,sha256=uAcZd5aY6WKM-KumTb2ybX28s8iKGVy6Nrje5Sddqew,2439
 compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
-compressed_tensors/quantization/utils/helpers.py,sha256=N_wYfrPcFr__Q1mn6mHoNUTclwpTW8P5PDHkR7GvXWo,3694
+compressed_tensors/quantization/utils/helpers.py,sha256=U7tgFUntFbebT43HDSE80rsjlUky_ON_Y8zm__24fd4,3695
 compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
 compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85SLG77nml2iA,11890
 compressed_tensors/utils/__init__.py,sha256=5DrYjoZbaEvSkJcC-GRSbM_RBHVF4tG9gMd3zsJnjLw,665
-compressed_tensors/utils/helpers.py,sha256=wLgiPrk7Vn29AijOGQGk3UnXItRd1jpROS6FxHoC4VQ,5530
+compressed_tensors/utils/helpers.py,sha256=h0jfl9drs5FAx40tCHRcVtJqXixB5hT5yq_IG2aY_-w,1735
 compressed_tensors/utils/safetensors_load.py,sha256=wo9UirGrGlenBqZeqotvpCT7D5MEdjCo2J3HeRaIFoU,8502
-compressed_tensors-0.3.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors-0.3.1.dist-info/METADATA,sha256=vpGRbjHWdPUTl9HFoDxIkwAKQJNpff75P4pKC3nJE4A,3850
-compressed_tensors-0.3.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-compressed_tensors-0.3.1.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors-0.3.1.dist-info/RECORD,,
+compressed_tensors-0.3.3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors-0.3.3.dist-info/METADATA,sha256=ff5Bt4LgmRvE9HGubzPqXfpidTLn7vyTpAMt-k8hvu8,4059
+compressed_tensors-0.3.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+compressed_tensors-0.3.3.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors-0.3.3.dist-info/RECORD,,

{compressed_tensors-0.3.1.dist-info → compressed_tensors-0.3.3.dist-info}/LICENSE RENAMED Viewed

File without changes

{compressed_tensors-0.3.1.dist-info → compressed_tensors-0.3.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors-0.3.1.dist-info → compressed_tensors-0.3.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

compressed-tensors 0.3.1py3-none-any.whl → 0.3.3py3-none-any.whl