PyPI - compressed-tensors-nightly - Versions diffs - 0.4.0.20240619__py3-none-any.whl → 0.4.0.20240621__py3-none-any.whl - Mend

compressed-tensors-nightly 0.4.0.20240619py3-none-any.whl → 0.4.0.20240621py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

compressed_tensors/compressors/__init__.py CHANGED Viewed

@@ -17,8 +17,12 @@
 from .base import Compressor
 from .dense import DenseCompressor
 from .helpers import load_compressed, save_compressed, save_compressed_model
-from .int_quantized import IntQuantizationCompressor
 from .marlin_24 import Marlin24Compressor
 from .model_compressor import ModelCompressor, map_modules_to_quant_args
+from .naive_quantized import (
+    FloatQuantizationCompressor,
+    IntQuantizationCompressor,
+    QuantizationCompressor,
+)
 from .pack_quantized import PackedQuantizationCompressor
 from .sparse_bitmask import BitmaskCompressor, BitmaskTensor

compressed_tensors/compressors/model_compressor.py CHANGED Viewed

@@ -16,9 +16,12 @@ import json
 import logging
 import operator
 import os
+import re
 from copy import deepcopy
 from typing import Any, Dict, Optional, Union
+import torch
+import transformers
 from compressed_tensors.base import (
     COMPRESSION_CONFIG_NAME,
     QUANTIZATION_CONFIG_NAME,
@@ -236,6 +239,11 @@ class ModelCompressor:
                 compressed_state_dict
             )
+        # HACK: Override the dtype_byte_size function in transformers to
+        # support float8 types. Fix is posted upstream
+        # https://github.com/huggingface/transformers/pull/30488
+        transformers.modeling_utils.dtype_byte_size = new_dtype_byte_size
         return compressed_state_dict
     def decompress(self, model_path: str, model: Module):
@@ -313,3 +321,15 @@ def map_modules_to_quant_args(model: Module) -> Dict:
                 quantized_modules_to_args[name] = submodule.quantization_scheme.weights
     return quantized_modules_to_args
+# HACK: Override the dtype_byte_size function in transformers to support float8 types
+# Fix is posted upstream https://github.com/huggingface/transformers/pull/30488
+def new_dtype_byte_size(dtype):
+    if dtype == torch.bool:
+        return 1 / 8
+    bit_search = re.search(r"[^\d](\d+)_?", str(dtype))
+    if bit_search is None:
+        raise ValueError(f"`dtype` is not a valid dtype: {dtype}.")
+    bit_size = int(bit_search.groups()[0])
+    return bit_size // 8

compressed_tensors/compressors/{int_quantized.py → naive_quantized.py} RENAMED Viewed

@@ -27,17 +27,21 @@ from torch import Tensor
 from tqdm import tqdm
-__all__ = ["IntQuantizationCompressor"]
+__all__ = [
+    "QuantizationCompressor",
+    "IntQuantizationCompressor",
+    "FloatQuantizationCompressor",
+]
 _LOGGER: logging.Logger = logging.getLogger(__name__)
-@Compressor.register(name=CompressionFormat.int_quantized.value)
-class IntQuantizationCompressor(Compressor):
+@Compressor.register(name=CompressionFormat.naive_quantized.value)
+class QuantizationCompressor(Compressor):
     """
-    Integer compression for quantized models. Weight of each quantized layer is
-    converted from its original float type to the format specified by the layer's
-    quantization scheme.
+    Implements naive compression for quantized models. Weight of each
+    quantized layer is converted from its original float type to the closest Pytorch
+    type to the type specified by the layer's QuantizationArgs.
     """
     COMPRESSION_PARAM_NAMES = ["weight", "weight_scale", "weight_zero_point"]
@@ -77,7 +81,7 @@ class IntQuantizationCompressor(Compressor):
                             scale=scale,
                             zero_point=zp,
                             args=quant_args,
-                            dtype=torch.int8,
+                            dtype=quant_args.pytorch_dtype(),
                         )
             elif name.endswith("zero_point"):
                 if torch.all(value == 0):
@@ -114,13 +118,27 @@ class IntQuantizationCompressor(Compressor):
             if "weight_scale" in weight_data:
                 zero_point = weight_data.get("weight_zero_point", None)
                 scale = weight_data["weight_scale"]
-                if zero_point is None:
-                    # zero_point assumed to be 0 if not included in state_dict
-                    zero_point = torch.zeros_like(scale)
                 decompressed = dequantize(
                     x_q=weight_data["weight"],
                     scale=scale,
                     zero_point=zero_point,
                 )
                 yield merge_names(weight_name, "weight"), decompressed
+@Compressor.register(name=CompressionFormat.int_quantized.value)
+class IntQuantizationCompressor(QuantizationCompressor):
+    """
+    Alias for integer quantized models
+    """
+    pass
+@Compressor.register(name=CompressionFormat.float_quantized.value)
+class FloatQuantizationCompressor(QuantizationCompressor):
+    """
+    Alias for fp quantized models
+    """
+    pass

compressed_tensors/compressors/pack_quantized.py CHANGED Viewed

@@ -126,10 +126,6 @@ class PackedQuantizationCompressor(Compressor):
             if "weight_scale" in weight_data:
                 zero_point = weight_data.get("weight_zero_point", None)
                 scale = weight_data["weight_scale"]
-                if zero_point is None:
-                    # zero_point assumed to be 0 if not included in state_dict
-                    zero_point = torch.zeros_like(scale)
                 weight = weight_data["weight_packed"]
                 original_shape = torch.Size(weight_data["weight_shape"])
                 unpacked = unpack_4bit_ints(weight, original_shape)

compressed_tensors/config/base.py CHANGED Viewed

@@ -26,6 +26,8 @@ class CompressionFormat(Enum):
     dense = "dense"
     sparse_bitmask = "sparse-bitmask"
     int_quantized = "int-quantized"
+    float_quantized = "float-quantized"
+    naive_quantized = "naive-quantized"
     pack_quantized = "pack-quantized"
     marlin_24 = "marlin-24"

compressed_tensors/quantization/lifecycle/apply.py CHANGED Viewed

@@ -215,15 +215,11 @@ def _load_quant_args_from_state_dict(
     scale = getattr(module, scale_name, None)
     zp = getattr(module, zp_name, None)
     if scale is not None:
-        state_dict_scale = state_dict.get(f"{module_name}.{scale_name}")
-        if state_dict_scale is not None:
-            scale.data = state_dict_scale.to(device).to(scale.dtype)
-        else:
-            scale.data = scale.data.to(device)
+        state_dict_scale = state_dict[f"{module_name}.{scale_name}"]
+        scale.data = state_dict_scale.to(device).to(scale.dtype)
     if zp is not None:
         zp_from_state = state_dict.get(f"{module_name}.{zp_name}", None)
         if zp_from_state is not None:  # load the non-zero zero points
-            zp.data = state_dict[f"{module_name}.{zp_name}"].to(device)
+            zp.data = zp_from_state.to(device).to(zp.dtype)
         else:  # fill with zeros matching scale shape
-            zp.data = torch.zeros_like(scale, dtype=torch.int8).to(device)
+            zp.data = torch.zeros_like(scale, dtype=zp.dtype).to(device)

compressed_tensors/quantization/lifecycle/forward.py CHANGED Viewed

@@ -17,9 +17,11 @@ from math import ceil
 from typing import Optional
 import torch
+from compressed_tensors.quantization.observers.helpers import calculate_range
 from compressed_tensors.quantization.quant_args import (
     QuantizationArgs,
     QuantizationStrategy,
+    round_to_quantized_type,
 )
 from compressed_tensors.quantization.quant_config import QuantizationStatus
 from compressed_tensors.quantization.quant_scheme import QuantizationScheme
@@ -80,8 +82,9 @@ def quantize(
 def dequantize(
     x_q: torch.Tensor,
     scale: torch.Tensor,
-    zero_point: torch.Tensor,
+    zero_point: torch.Tensor = None,
     args: QuantizationArgs = None,
+    dtype: Optional[torch.dtype] = None,
 ) -> torch.Tensor:
     """
     Dequantize a quantized input tensor x_q based on the strategy specified in args. If
@@ -91,16 +94,9 @@ def dequantize(
     :param scale: scale tensor
     :param zero_point: zero point tensor
     :param args: quantization args used to quantize x_q
+    :param dtype: optional dtype to cast the dequantized output to
     :return: dequantized float tensor
     """
-    # ensure all tensors are on the same device
-    # assumes that the target device is the input
-    # tensor's device
-    if x_q.device != scale.device:
-        scale = scale.to(x_q.device)
-    if x_q.device != zero_point.device:
-        zero_point = zero_point.to(x_q.device)
     if args is None:
         if scale.ndim == 0 or scale.ndim == 1:
             args = QuantizationArgs(strategy=QuantizationStrategy.TENSOR)
@@ -115,8 +111,12 @@ def dequantize(
         else:
             raise ValueError(
                 f"Could not infer a quantization strategy from scale with {scale.ndim} "
-                "dimmensions. Expected 0-2 dimmensions."
+                "dimmensions. Expected 0 or 2 dimmensions."
             )
+    if dtype is None:
+        dtype = scale.dtype
     return _process_quantization(
         x=x_q,
         scale=scale,
@@ -124,6 +124,7 @@ def dequantize(
         args=args,
         do_quantize=False,
         do_dequantize=True,
+        dtype=dtype,
     )
@@ -167,19 +168,13 @@ def _process_quantization(
     do_quantize: bool = True,
     do_dequantize: bool = True,
 ) -> torch.Tensor:
-    bit_range = 2**args.num_bits
-    q_max = torch.tensor(bit_range / 2 - 1, device=x.device)
-    q_min = torch.tensor(-bit_range / 2, device=x.device)
+    q_min, q_max = calculate_range(args, x.device)
     group_size = args.group_size
     if args.strategy == QuantizationStrategy.GROUP:
-        if do_dequantize and not do_quantize:
-            # if dequantizing a quantized type infer the output type from the scale
-            output = torch.zeros_like(x, dtype=scale.dtype)
-        else:
-            output_dtype = dtype if dtype is not None else x.dtype
-            output = torch.zeros_like(x, dtype=output_dtype)
+        output_dtype = dtype if dtype is not None else x.dtype
+        output = torch.zeros_like(x).to(output_dtype)
         # TODO: vectorize the for loop
         # TODO: fix genetric assumption about the tensor size for computing group
@@ -189,7 +184,7 @@ def _process_quantization(
         while scale.ndim < 2:
             # pad scale and zero point dims for slicing
             scale = scale.unsqueeze(1)
-            zero_point = zero_point.unsqueeze(1)
+            zero_point = zero_point.unsqueeze(1) if zero_point is not None else None
         columns = x.shape[1]
         if columns >= group_size:
@@ -202,12 +197,18 @@ def _process_quantization(
             # scale.shape should be [nchan, ndim]
             # sc.shape should be [nchan, 1] after unsqueeze
             sc = scale[:, i].view(-1, 1)
-            zp = zero_point[:, i].view(-1, 1)
+            zp = zero_point[:, i].view(-1, 1) if zero_point is not None else None
             idx = i * group_size
             if do_quantize:
                 output[:, idx : (idx + group_size)] = _quantize(
-                    x[:, idx : (idx + group_size)], sc, zp, q_min, q_max, dtype=dtype
+                    x[:, idx : (idx + group_size)],
+                    sc,
+                    zp,
+                    q_min,
+                    q_max,
+                    args,
+                    dtype=dtype,
                 )
             if do_dequantize:
                 input = (
@@ -219,7 +220,15 @@ def _process_quantization(
     else:  # covers channel, token and tensor strategies
         if do_quantize:
-            output = _quantize(x, scale, zero_point, q_min, q_max, dtype=dtype)
+            output = _quantize(
+                x,
+                scale,
+                zero_point,
+                q_min,
+                q_max,
+                args,
+                dtype=dtype,
+            )
         if do_dequantize:
             output = _dequantize(output if do_quantize else x, scale, zero_point)
@@ -313,14 +322,18 @@ def _quantize(
     zero_point: torch.Tensor,
     q_min: torch.Tensor,
     q_max: torch.Tensor,
+    args: QuantizationArgs,
     dtype: Optional[torch.dtype] = None,
 ) -> torch.Tensor:
-    quantized_value = torch.clamp(
-        torch.round(x / scale + zero_point),
+    scaled = x / scale + zero_point.to(x.dtype)
+    # clamp first because cast isn't guaranteed to be saturated (ie for fp8)
+    clamped_value = torch.clamp(
+        scaled,
         q_min,
         q_max,
     )
+    quantized_value = round_to_quantized_type(clamped_value, args)
     if dtype is not None:
         quantized_value = quantized_value.to(dtype)
@@ -331,6 +344,16 @@ def _quantize(
 def _dequantize(
     x_q: torch.Tensor,
     scale: torch.Tensor,
-    zero_point: torch.Tensor,
+    zero_point: torch.Tensor = None,
+    dtype: Optional[torch.dtype] = None,
 ) -> torch.Tensor:
-    return (x_q - zero_point) * scale
+    dequant_value = x_q
+    if zero_point is not None:
+        dequant_value = dequant_value - zero_point.to(scale.dtype)
+    dequant_value = dequant_value.to(scale.dtype) * scale
+    if dtype is not None:
+        dequant_value = dequant_value.to(dtype)
+    return dequant_value

compressed_tensors/quantization/lifecycle/initialize.py CHANGED Viewed

@@ -120,8 +120,9 @@ def _initialize_scale_zero_point_observer(
     )
     module.register_parameter(f"{base_name}_scale", init_scale)
+    zp_dtype = quantization_args.pytorch_dtype()
     init_zero_point = Parameter(
-        torch.empty(expected_shape, device=device, dtype=int),
+        torch.empty(expected_shape, device=device, dtype=zp_dtype),
         requires_grad=False,
     )
     module.register_parameter(f"{base_name}_zero_point", init_zero_point)

compressed_tensors/quantization/observers/helpers.py CHANGED Viewed

@@ -15,11 +15,15 @@
 from typing import Tuple
 import torch
-from compressed_tensors.quantization.quant_args import QuantizationArgs
+from compressed_tensors.quantization.quant_args import (
+    FP8_DTYPE,
+    QuantizationArgs,
+    QuantizationType,
+)
 from torch import FloatTensor, IntTensor, Tensor
-__all__ = ["calculate_qparams"]
+__all__ = ["calculate_qparams", "calculate_range"]
 def calculate_qparams(
@@ -37,22 +41,53 @@ def calculate_qparams(
     max_vals = torch.max(max_vals, torch.zeros_like(max_vals))
     device = min_vals.device
-    bit_range = 2**quantization_args.num_bits - 1
-    bit_min = -(bit_range + 1) / 2
-    bit_max = bit_min + bit_range
+    bit_min, bit_max = calculate_range(quantization_args, device)
+    bit_range = bit_max - bit_min
+    zp_dtype = quantization_args.pytorch_dtype()
     if quantization_args.symmetric:
-        max_val_pos = torch.max(-min_vals, max_vals)
+        max_val_pos = torch.max(torch.abs(min_vals), torch.abs(max_vals))
         scales = max_val_pos / (float(bit_range) / 2)
         scales = torch.clamp(scales, min=torch.finfo(torch.float32).eps)
-        zero_points = torch.zeros(scales.shape, device=device, dtype=torch.int8)
+        zero_points = torch.zeros(scales.shape, device=device, dtype=min_vals.dtype)
     else:
         scales = (max_vals - min_vals) / float(bit_range)
         scales = torch.clamp(scales, min=torch.finfo(torch.float32).eps)
-        zero_points = bit_min - torch.round(min_vals / scales)
-        zero_points = torch.clamp(zero_points, bit_min, bit_max).to(torch.int8)
+        zero_points = bit_min - (min_vals / scales)
+        zero_points = torch.clamp(zero_points, bit_min, bit_max)
+    # match zero-points to quantized type
+    zero_points = zero_points.to(zp_dtype)
     if scales.ndim == 0:
         scales = scales.reshape(1)
         zero_points = zero_points.reshape(1)
     return scales, zero_points
+def calculate_range(quantization_args: QuantizationArgs, device: str) -> Tuple:
+    """
+    Calculated the effective quantization range for the given Quantization Args
+    :param quantization_args: quantization args to get range of
+    :param device: device to store the range to
+    :return: tuple endpoints for the given quantization range
+    """
+    if quantization_args.type == QuantizationType.INT:
+        bit_range = 2**quantization_args.num_bits
+        q_max = torch.tensor(bit_range / 2 - 1, device=device)
+        q_min = torch.tensor(-bit_range / 2, device=device)
+    elif quantization_args.type == QuantizationType.FLOAT:
+        if quantization_args.num_bits != 8:
+            raise ValueError(
+                "Floating point quantization is only supported for 8 bits,"
+                f"got {quantization_args.num_bits}"
+            )
+        fp_range_info = torch.finfo(FP8_DTYPE)
+        q_max = torch.tensor(fp_range_info.max, device=device)
+        q_min = torch.tensor(fp_range_info.min, device=device)
+    else:
+        raise ValueError(f"Invalid quantization type {quantization_args.type}")
+    return q_min, q_max

compressed_tensors/quantization/quant_args.py CHANGED Viewed

@@ -15,10 +15,19 @@
 from enum import Enum
 from typing import Any, Dict, Optional
+import torch
 from pydantic import BaseModel, Field, validator
-__all__ = ["QuantizationType", "QuantizationStrategy", "QuantizationArgs"]
+__all__ = [
+    "FP8_DTYPE",
+    "QuantizationType",
+    "QuantizationStrategy",
+    "QuantizationArgs",
+    "round_to_quantized_type",
+]
+FP8_DTYPE = torch.float8_e4m3fn
 class QuantizationType(str, Enum):
@@ -123,3 +132,38 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
             return QuantizationStrategy.TENSOR
         return value
+    def pytorch_dtype(self) -> torch.dtype:
+        if self.type == QuantizationType.FLOAT:
+            return FP8_DTYPE
+        elif self.type == QuantizationType.INT:
+            if self.num_bits <= 8:
+                return torch.int8
+            elif self.num_bits <= 16:
+                return torch.int16
+            else:
+                return torch.int32
+        else:
+            raise ValueError(f"Invalid quantization type {self.type}")
+def round_to_quantized_type(
+    tensor: torch.Tensor, args: QuantizationArgs
+) -> torch.Tensor:
+    """
+    Rounds each element of the input tensor to the nearest quantized representation,
+    keeping to original dtype
+    :param tensor: tensor to round
+    :param args: QuantizationArgs to pull appropriate dtype from
+    :return: rounded tensor
+    """
+    original_dtype = tensor.dtype
+    if args.type == QuantizationType.FLOAT:
+        rounded = tensor.to(FP8_DTYPE)
+    elif args.type == QuantizationType.INT:
+        rounded = torch.round(tensor)
+    else:
+        raise ValueError(f"Invalid quantization type {args.type}")
+    return rounded.to(original_dtype)

compressed_tensors/quantization/quant_scheme.py CHANGED Viewed

@@ -15,7 +15,11 @@
 from copy import deepcopy
 from typing import List, Optional
-from compressed_tensors.quantization.quant_args import QuantizationArgs
+from compressed_tensors.quantization.quant_args import (
+    QuantizationArgs,
+    QuantizationStrategy,
+    QuantizationType,
+)
 from pydantic import BaseModel
@@ -107,13 +111,15 @@ def is_preset_scheme(name: str) -> bool:
     return name.upper() in PRESET_SCHEMES
-W8A8 = dict(
-    weights=QuantizationArgs(), input_activations=QuantizationArgs(symmetric=True)
+W8A8 = dict(weights=QuantizationArgs(), input_activations=QuantizationArgs())
+W4A16 = dict(weights=QuantizationArgs(num_bits=4, group_size=128))
+FP8 = dict(
+    weights=QuantizationArgs(type=QuantizationType.FLOAT),
+    input_activations=QuantizationArgs(type=QuantizationType.FLOAT),
 )
-W4A16 = dict(weights=QuantizationArgs(num_bits=4, symmetric=True))
+PRESET_SCHEMES = {"W8A8": W8A8, "W4A16": W4A16, "FP8": FP8}
-PRESET_SCHEMES = {
-    "W8A8": W8A8,
-    "W4A16": W4A16,
-}
+PRESET_SCHEMES = {"W8A8": W8A8, "W4A16": W4A16, "FP8": FP8}

{compressed_tensors_nightly-0.4.0.20240619.dist-info → compressed_tensors_nightly-0.4.0.20240621.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: compressed-tensors-nightly
-Version: 0.4.0.20240619
+Version: 0.4.0.20240621
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors_nightly-0.4.0.20240619.dist-info → compressed_tensors_nightly-0.4.0.20240621.dist-info}/RECORD RENAMED Viewed

@@ -1,37 +1,37 @@
 compressed_tensors/__init__.py,sha256=SV1csvHUVCd8kHXz6UDZim1HZ_fAVG3vfk-j_4Bb6hY,789
 compressed_tensors/base.py,sha256=OA2TOLP1gP3LSH7gp508eqr2ZtDQ-pqRHElCp-aB0vs,755
 compressed_tensors/version.py,sha256=cJJf0y0NnXErTtQtVQjOvrq9hMIkhXIfBwuu4Tuxl24,1586
-compressed_tensors/compressors/__init__.py,sha256=rhqPp3YXFxCJRLZs1KRNSHTIxK2rNU--sYwDI8MW47w,1061
+compressed_tensors/compressors/__init__.py,sha256=wmX4VnkUTS63xBwK5-6w8FP78bNZpcdcqvf2KOEC5E4,1133
 compressed_tensors/compressors/base.py,sha256=LWEgbpgTxzmoqQ7Xhq2OQszUgWoDtFuGCiV1Y8nlBGw,2134
 compressed_tensors/compressors/dense.py,sha256=G_XHbvuENyupIKlXSITOQgvPkNkcMEOLcLWQr70V9EE,1257
 compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24SCfhzZ-RusXfw,5403
-compressed_tensors/compressors/int_quantized.py,sha256=Ct2vCK0yoPm6vkIFlzDMGQ7m14xT1GyURsSwH9DP770,5242
 compressed_tensors/compressors/marlin_24.py,sha256=X_BjtFB3Mn0hqiLz56UM3jGX2eNmGLnvEIPfbg7di6U,9444
-compressed_tensors/compressors/model_compressor.py,sha256=h3ixQtfzt6HxSNtdnB9OVdpCucTmIo4paDoaM7XYZXE,12559
-compressed_tensors/compressors/pack_quantized.py,sha256=VPiLlgJlDgARrn7YmiQoLqUfxErKBfj54epMYWRsF8k,8451
+compressed_tensors/compressors/model_compressor.py,sha256=83AWAhlrR3QTNelfMGCh_10G-VfMIRXRTvV0ZZinCU8,13338
+compressed_tensors/compressors/naive_quantized.py,sha256=N3y5LxsCaTUJHT30sqEhnviZsyoz1v2eUaayE7-f8Xs,5562
+compressed_tensors/compressors/pack_quantized.py,sha256=ODb03_WaBQ1l99Gmp49olAUZ2TB_67z9qNZbc56X7NU,8275
 compressed_tensors/compressors/sparse_bitmask.py,sha256=H9oZSTYI1oRCzAMbd4zThUnZd1h2rfs8DmA3tPcvuNE,8637
 compressed_tensors/compressors/utils/__init__.py,sha256=-mbGDZh1hd9T6u62Ht_iBIK255UmMg0f5bLkSs1f9Cc,731
 compressed_tensors/compressors/utils/helpers.py,sha256=4fq7KclSIK__jemCG9pwYlgWLrQjsaAMxhIrhjdw0BQ,1506
 compressed_tensors/compressors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
 compressed_tensors/compressors/utils/semi_structured_conversions.py,sha256=g1EZHzdv-ko7ufPX430dp7wE33o6FWJXuSP4zZydCu0,13488
 compressed_tensors/config/__init__.py,sha256=ZBqWn3r6ku1qfmlHHYp0mQueY0i7Pwhr9rbQk9dDlMc,704
-compressed_tensors/config/base.py,sha256=ZnpuOevCE0pXdA8OJfIJnxj-ccproH7o1EOwRY8_hUU,1482
+compressed_tensors/config/base.py,sha256=caSZ7xZ_kgcHRMXZ5hM1i6TKbgY__CkiSjZ93imHZQ0,1562
 compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74jNbjks,1317
 compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5ynVAUeiiYpS1Gt8,1308
 compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
-compressed_tensors/quantization/quant_args.py,sha256=Z9Zu20ooAwEWlliAdUw1f1zwSrheuD6vqm3YXgJ1Lws,4388
+compressed_tensors/quantization/quant_args.py,sha256=Vc_tWSTcbZZsMJlACpLq4JEPvGx87izc8VEx-mcXjoM,5621
 compressed_tensors/quantization/quant_config.py,sha256=hL42sXp1wAZxyrkHarw7tAMRcwSVEr0MT3wmrmL3NhE,8285
-compressed_tensors/quantization/quant_scheme.py,sha256=3LX_UMS0uaYbBW1Zei-REuGXtNtFwAMk8lIj7lnHDP0,3569
+compressed_tensors/quantization/quant_scheme.py,sha256=Yhaj3QJn4lifGMoQ8mlXXOdLDZA6iGMthb_0hlAzvVk,3811
 compressed_tensors/quantization/lifecycle/__init__.py,sha256=ggRGWRqhCxCaTTDWRcgTVX3axnS2xV6rc5YvdzK7fSg,798
-compressed_tensors/quantization/lifecycle/apply.py,sha256=aZrglJ5mR3Xaxwj51-1BVVB1JGVkKQEeHxGfBaVmsHI,8881
+compressed_tensors/quantization/lifecycle/apply.py,sha256=eQfuIGcX6KBKeMta1svviXXRpKO3og2CRrxhKlGcE_k,8756
 compressed_tensors/quantization/lifecycle/calibration.py,sha256=mLns4jlaWmBwOW8Jtlm5bMX-JET1AiZYUBO7qa-XuxI,1776
 compressed_tensors/quantization/lifecycle/compressed.py,sha256=VreB10xPwgSLQQlTu20UCrFpRS--cA7-lx5s7nrPPrg,2247
-compressed_tensors/quantization/lifecycle/forward.py,sha256=F4VQmCPXBpjWsDnlEErJ2eTWK54Ir9S1Kn2frlkBr9A,11401
+compressed_tensors/quantization/lifecycle/forward.py,sha256=tcjL_qyE3ODourNprt2bndF7_ALlUEGY2_Yag4exLoE,11908
 compressed_tensors/quantization/lifecycle/frozen.py,sha256=h1XYt89MouBTf3jTYLG_6OdFxIu5q2N8tPjsy6J4E6Y,1726
-compressed_tensors/quantization/lifecycle/initialize.py,sha256=9xgPzHejQUO_AkZcc_SH5kqFeieG-9uo0fMRYV51i7Y,4577
+compressed_tensors/quantization/lifecycle/initialize.py,sha256=kIEx6a7UyqAIG_ZPNBhijrDiAHnp2wR7K_GC3envz4M,4631
 compressed_tensors/quantization/observers/__init__.py,sha256=DNH31NQYrIBBcmHsMyFA6whh4pbRsLwuNa6L8AeXaGc,745
 compressed_tensors/quantization/observers/base.py,sha256=z_JC-CRz-PY7WlpSoyOoSQQWz5ekTEd5LbXt0iHQRes,5239
-compressed_tensors/quantization/observers/helpers.py,sha256=FUyYUNd-3LbXt0-8Lwr7EPI2m-LXXBTXW1l5iOajNhA,2272
+compressed_tensors/quantization/observers/helpers.py,sha256=DSNGNJpZyT2Lyu0c82dHEGf9q5vm4N3zgI3DpkBbp0Q,3597
 compressed_tensors/quantization/observers/memoryless.py,sha256=jH_c6K3gxf4W3VNXQ7tbnP-J_86QTrEfjBn6Kh1C-H8,2165
 compressed_tensors/quantization/observers/min_max.py,sha256=UK7zCMzxv9GGn6BflBxdajV20RiWaCY2RHcvZodCP1w,3669
 compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
@@ -41,8 +41,8 @@ compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85S
 compressed_tensors/utils/__init__.py,sha256=5DrYjoZbaEvSkJcC-GRSbM_RBHVF4tG9gMd3zsJnjLw,665
 compressed_tensors/utils/helpers.py,sha256=5ull5yFT31M2zVxKeFvpvvlvX5f1Sk1LGuj_wrfZWCY,2267
 compressed_tensors/utils/safetensors_load.py,sha256=0MheXwx1jeY12PeISppiSIZHs6rmN2YddwPpFb9V67I,8527
-compressed_tensors_nightly-0.4.0.20240619.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors_nightly-0.4.0.20240619.dist-info/METADATA,sha256=xJpJU2rjediV-76_G-EwExDIQ_qTDgq2L3Gci1vpFdg,5668
-compressed_tensors_nightly-0.4.0.20240619.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-compressed_tensors_nightly-0.4.0.20240619.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors_nightly-0.4.0.20240619.dist-info/RECORD,,
+compressed_tensors_nightly-0.4.0.20240621.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors_nightly-0.4.0.20240621.dist-info/METADATA,sha256=zC3A9MK7GzcOAboNXZHhw_exVI35srmQ3ocSgzAy6j0,5668
+compressed_tensors_nightly-0.4.0.20240621.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+compressed_tensors_nightly-0.4.0.20240621.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors_nightly-0.4.0.20240621.dist-info/RECORD,,

{compressed_tensors_nightly-0.4.0.20240619.dist-info → compressed_tensors_nightly-0.4.0.20240621.dist-info}/LICENSE RENAMED Viewed

File without changes

{compressed_tensors_nightly-0.4.0.20240619.dist-info → compressed_tensors_nightly-0.4.0.20240621.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors_nightly-0.4.0.20240619.dist-info → compressed_tensors_nightly-0.4.0.20240621.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors-nightly 0.4.0.20240619__py3-none-any.whl → 0.4.0.20240621__py3-none-any.whl

compressed-tensors-nightly 0.4.0.20240619py3-none-any.whl → 0.4.0.20240621py3-none-any.whl