PyPI - compressed-tensors-nightly - Versions diffs - 0.7.1.20241031__py3-none-any.whl → 0.7.1.20241102__py3-none-any.whl - Mend

compressed-tensors-nightly 0.7.1.20241031py3-none-any.whl → 0.7.1.20241102py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

compressed_tensors/quantization/__init__.py CHANGED Viewed

@@ -19,4 +19,3 @@ from .quant_args import *
 from .quant_config import *
 from .quant_scheme import *
 from .lifecycle import *
-from .cache import QuantizedKVParameterCache

compressed_tensors/quantization/lifecycle/__init__.py CHANGED Viewed

@@ -15,9 +15,7 @@
 # flake8: noqa
 # isort: skip_file
-from .calibration import *
 from .forward import *
-from .frozen import *
 from .initialize import *
 from .compressed import *
 from .apply import *

compressed_tensors/quantization/lifecycle/apply.py CHANGED Viewed

@@ -22,13 +22,9 @@ from typing import Union
 import torch
 from compressed_tensors.config import CompressionFormat
-from compressed_tensors.quantization.lifecycle.calibration import (
-    set_module_for_calibration,
-)
 from compressed_tensors.quantization.lifecycle.compressed import (
     compress_quantized_weights,
 )
-from compressed_tensors.quantization.lifecycle.frozen import freeze_module_quantization
 from compressed_tensors.quantization.lifecycle.initialize import (
     initialize_module_for_quantization,
 )
@@ -233,6 +229,7 @@ def apply_quantization_status(model: Module, status: QuantizationStatus):
     :param model: model to apply quantization to
     :param status: status to update the module to
     """
     current_status = infer_quantization_status(model)
     if status >= QuantizationStatus.INITIALIZED > current_status:
@@ -243,18 +240,6 @@ def apply_quantization_status(model: Module, status: QuantizationStatus):
             )
         )
-    if current_status < status >= QuantizationStatus.CALIBRATION > current_status:
-        # only quantize weights up front when our end goal state is calibration,
-        # weight quantization parameters are already loaded for frozen/compressed
-        quantize_weights_upfront = status == QuantizationStatus.CALIBRATION
-        model.apply(
-            lambda module: set_module_for_calibration(
-                module, quantize_weights_upfront=quantize_weights_upfront
-            )
-        )
-    if current_status < status >= QuantizationStatus.FROZEN > current_status:
-        model.apply(freeze_module_quantization)
     if current_status < status >= QuantizationStatus.COMPRESSED > current_status:
         model.apply(compress_quantized_weights)

compressed_tensors/quantization/lifecycle/forward.py CHANGED Viewed

@@ -14,14 +14,9 @@
 from functools import wraps
 from math import ceil
-from typing import Callable, Optional
+from typing import Optional
 import torch
-from compressed_tensors.quantization.cache import QuantizedKVParameterCache
-from compressed_tensors.quantization.observers.helpers import (
-    calculate_range,
-    compute_dynamic_scales_and_zp,
-)
 from compressed_tensors.quantization.quant_args import (
     QuantizationArgs,
     QuantizationStrategy,
@@ -29,7 +24,11 @@ from compressed_tensors.quantization.quant_args import (
 )
 from compressed_tensors.quantization.quant_config import QuantizationStatus
 from compressed_tensors.quantization.quant_scheme import QuantizationScheme
-from compressed_tensors.utils import safe_permute, update_parameter_data
+from compressed_tensors.quantization.utils import (
+    calculate_range,
+    compute_dynamic_scales_and_zp,
+)
+from compressed_tensors.utils import safe_permute
 from torch.nn import Module
@@ -39,7 +38,6 @@ __all__ = [
     "fake_quantize",
     "wrap_module_forward_quantized",
     "forward_quantize",
-    "calibrate_activations",
 ]
@@ -276,19 +274,7 @@ def wrap_module_forward_quantized(module: Module, scheme: QuantizationScheme):
         compressed = module.quantization_status == QuantizationStatus.COMPRESSED
         if scheme.input_activations is not None:
-            # calibrate and (fake) quantize input activations when applicable
-            # NOTE: will be moved out of compressed-tensors
-            if (
-                module.quantization_status == QuantizationStatus.CALIBRATION
-                and not scheme.input_activations.dynamic
-            ):
-                calibrate_activations(
-                    module=module,
-                    value=input_,
-                    base_name="input",
-                    quantization_args=scheme.input_activations,
-                )
+            # prehook should calibrate activations before forward call
             input_ = forward_quantize(module, input_, "input", scheme.input_activations)
         if scheme.weights is not None and not compressed:
@@ -302,31 +288,22 @@ def wrap_module_forward_quantized(module: Module, scheme: QuantizationScheme):
         output = forward_func_orig.__get__(module, module.__class__)(
             input_, *args[1:], **kwargs
         )
-        if scheme.output_activations is not None:
-            # calibrate and (fake) quantize output activations when applicable
-            # kv_cache scales updated on model self_attn forward call in
-            # wrap_module_forward_quantized_attn
+        # restore back to unquantized_value
+        if scheme.weights is not None and not compressed:
+            self.weight.data = unquantized_weight
+        if scheme.output_activations is not None:
+            # forward-hook should calibrate/forward_quantize
             if (
                 module.quantization_status == QuantizationStatus.CALIBRATION
                 and not scheme.output_activations.dynamic
             ):
-                calibrate_activations(
-                    module=module,
-                    value=output,
-                    base_name="output",
-                    quantization_args=scheme.ouput_activations,
-                )
+                return output
             output = forward_quantize(
                 module, output, "output", scheme.output_activations
             )
-        # restore back to unquantized_value
-        if scheme.weights is not None and not compressed:
-            self.weight.data = unquantized_weight
         return output
     # bind wrapped forward to module class so reference to `self` is correct
@@ -335,77 +312,6 @@ def wrap_module_forward_quantized(module: Module, scheme: QuantizationScheme):
     setattr(module, "forward", bound_wrapped_forward)
-def wrap_module_forward_quantized_attn(module: Module, scheme: QuantizationScheme):
-    # expects a module already initialized and injected with the parameters in
-    # initialize_module_for_quantization
-    if hasattr(module.forward, "__func__"):
-        forward_func_orig = module.forward.__func__
-    else:
-        forward_func_orig = module.forward.func
-    @wraps(forward_func_orig)  # ensures docstring, names, etc are propagated
-    def wrapped_forward(self, *args, **kwargs):
-        # kv cache stored under weights
-        if module.quantization_status == QuantizationStatus.CALIBRATION:
-            quantization_args: QuantizationArgs = scheme.output_activations
-            past_key_value: QuantizedKVParameterCache = quantization_args.get_kv_cache()
-            kwargs["past_key_value"] = past_key_value
-            # QuantizedKVParameterCache used for obtaining k_scale, v_scale only,
-            # does not store quantized_key_states and quantized_value_state
-            kwargs["use_cache"] = False
-            attn_forward: Callable = forward_func_orig.__get__(module, module.__class__)
-            past_key_value.reset_states()
-            rtn = attn_forward(*args, **kwargs)
-            update_parameter_data(
-                module, past_key_value.k_scales[module.layer_idx], "k_scale"
-            )
-            update_parameter_data(
-                module, past_key_value.v_scales[module.layer_idx], "v_scale"
-            )
-            return rtn
-        return forward_func_orig.__get__(module, module.__class__)(*args, **kwargs)
-    # bind wrapped forward to module class so reference to `self` is correct
-    bound_wrapped_forward = wrapped_forward.__get__(module, module.__class__)
-    # set forward to wrapped forward
-    setattr(module, "forward", bound_wrapped_forward)
-def calibrate_activations(
-    module: Module,
-    value: torch.Tensor,
-    base_name: str,
-    quantization_args: QuantizationArgs,
-):
-    # If empty tensor, can't update zp/scale
-    # Case for MoEs
-    if value.numel() == 0:
-        return
-    # calibration mode - get new quant params from observer
-    if not hasattr(module, f"{base_name}_observer"):
-        from compressed_tensors.quantization.lifecycle import initialize_observers
-        initialize_observers(
-            module=module, base_name=base_name, quantization_args=quantization_args
-        )
-    observer = getattr(module, f"{base_name}_observer")
-    updated_scale, updated_zero_point = observer(value)
-    # update scale and zero point
-    update_parameter_data(module, updated_scale, f"{base_name}_scale")
-    update_parameter_data(module, updated_zero_point, f"{base_name}_zero_point")
 def forward_quantize(
     module: Module, value: torch.Tensor, base_name: str, args: "QuantizationArgs"
 ) -> torch.Tensor:
@@ -426,10 +332,10 @@ def forward_quantize(
     g_idx = getattr(module, "weight_g_idx", None)
     if args.dynamic:
-        # dynamic quantization - no need to invoke observer
+        # dynamic quantization - determine the scale/zp on the fly
         scale, zero_point = compute_dynamic_scales_and_zp(value=value, args=args)
     else:
-        # static quantization - get previous scale and zero point from layer
+        # static quantization - get scale and zero point from layer
         scale = getattr(module, f"{base_name}_scale")
         zero_point = getattr(module, f"{base_name}_zero_point", None)

compressed_tensors/quantization/lifecycle/initialize.py CHANGED Viewed

@@ -14,13 +14,12 @@
 import logging
+from enum import Enum
 from typing import Optional
 import torch
-from compressed_tensors.quantization.cache import KVCacheScaleType
 from compressed_tensors.quantization.lifecycle.forward import (
     wrap_module_forward_quantized,
-    wrap_module_forward_quantized_attn,
 )
 from compressed_tensors.quantization.quant_args import (
     ActivationOrdering,
@@ -34,12 +33,21 @@ from compressed_tensors.utils import get_execution_device, is_module_offloaded
 from torch.nn import Module, Parameter
-__all__ = ["initialize_module_for_quantization", "initialize_observers"]
+__all__ = [
+    "initialize_module_for_quantization",
+    "is_attention_module",
+    "KVCacheScaleType",
+]
 _LOGGER = logging.getLogger(__name__)
+class KVCacheScaleType(Enum):
+    KEY = "k_scale"
+    VALUE = "v_scale"
 def initialize_module_for_quantization(
     module: Module,
     scheme: Optional[QuantizationScheme] = None,
@@ -64,9 +72,7 @@ def initialize_module_for_quantization(
         return
     if is_attention_module(module):
-        # wrap forward call of module to perform
         # quantized actions based on calltime status
-        wrap_module_forward_quantized_attn(module, scheme)
         _initialize_attn_scales(module)
     else:
@@ -107,6 +113,7 @@ def initialize_module_for_quantization(
         module.quantization_status = QuantizationStatus.INITIALIZED
         offloaded = False
+        # What is this doing/why isn't this in the attn case?
         if is_module_offloaded(module):
             try:
                 from accelerate.hooks import add_hook_to_module, remove_hook_from_module
@@ -144,14 +151,12 @@ def initialize_module_for_quantization(
                 module._hf_hook.weights_map = new_prefix_dict
-def initialize_observers(
-    module: Module,
-    base_name: str,
-    quantization_args: QuantizationArgs,
-):
-    # initialize observer module and attach as submodule
-    observer = quantization_args.get_observer()
-    module.register_module(f"{base_name}_observer", observer)
+def is_attention_module(module: Module):
+    return "attention" in module.__class__.__name__.lower() and (
+        hasattr(module, "k_proj")
+        or hasattr(module, "v_proj")
+        or hasattr(module, "qkv_proj")
+    )
 def _initialize_scale_zero_point(
@@ -209,14 +214,6 @@ def _initialize_scale_zero_point(
         module.register_parameter(f"{base_name}_g_idx", init_g_idx)
-def is_attention_module(module: Module):
-    return "attention" in module.__class__.__name__.lower() and (
-        hasattr(module, "k_proj")
-        or hasattr(module, "v_proj")
-        or hasattr(module, "qkv_proj")
-    )
 def _initialize_attn_scales(module: Module) -> None:
     """Initlaize k_scale, v_scale for  self_attn"""

compressed_tensors/quantization/quant_args.py CHANGED Viewed

@@ -114,20 +114,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
         """
         :return: torch quantization FakeQuantize built based on these QuantizationArgs
         """
-        from compressed_tensors.quantization.observers.base import Observer
-        # No observer required for the dynamic case
-        if self.dynamic:
-            self.observer = None
-            return self.observer
-        return Observer.load_from_registry(self.observer, quantization_args=self)
-    def get_kv_cache(self):
-        """Get the singleton KV Cache"""
-        from compressed_tensors.quantization.cache import QuantizedKVParameterCache
-        return QuantizedKVParameterCache(self)
+        return self.observer
     @field_validator("type", mode="before")
     def validate_type(cls, value) -> QuantizationType:
@@ -210,6 +197,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
                 "activation ordering"
             )
+        # infer observer w.r.t. dynamic
         if dynamic:
             if strategy not in (
                 QuantizationStrategy.TOKEN,
@@ -221,18 +209,19 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
                     "quantization",
                 )
             if observer is not None:
-                warnings.warn(
-                    "No observer is used for dynamic quantization, setting to None"
-                )
-                model.observer = None
+                if observer != "memoryless":  # avoid annoying users with old configs
+                    warnings.warn(
+                        "No observer is used for dynamic quantization, setting to None"
+                    )
+                observer = None
-        # if we have not set an observer and we
-        # are running static quantization, use minmax
-        if not observer and not dynamic:
-            model.observer = "minmax"
+        elif observer is None:
+            # default to minmax for non-dynamic cases
+            observer = "minmax"
         # write back modified values
         model.strategy = strategy
+        model.observer = observer
         return model
     def pytorch_dtype(self) -> torch.dtype:

compressed_tensors/quantization/utils/helpers.py CHANGED Viewed

@@ -16,9 +16,14 @@ import logging
 from typing import Generator, List, Optional, Tuple
 import torch
-from compressed_tensors.quantization.observers.base import Observer
-from compressed_tensors.quantization.quant_args import QuantizationArgs
+from compressed_tensors.quantization.quant_args import (
+    FP8_DTYPE,
+    QuantizationArgs,
+    QuantizationStrategy,
+    QuantizationType,
+)
 from compressed_tensors.quantization.quant_scheme import QuantizationScheme
+from torch import FloatTensor, IntTensor, Tensor
 from torch.nn import Module
 from tqdm import tqdm
@@ -36,6 +41,9 @@ __all__ = [
     "is_kv_cache_quant_scheme",
     "iter_named_leaf_modules",
     "iter_named_quantizable_modules",
+    "compute_dynamic_scales_and_zp",
+    "calculate_range",
+    "calculate_qparams",
 ]
 # target the self_attn layer
@@ -45,6 +53,105 @@ KV_CACHE_TARGETS = ["re:.*self_attn$"]
 _LOGGER: logging.Logger = logging.getLogger(__name__)
+def calculate_qparams(
+    min_vals: Tensor, max_vals: Tensor, quantization_args: QuantizationArgs
+) -> Tuple[FloatTensor, IntTensor]:
+    """
+    :param min_vals: tensor of min value(s) to calculate scale(s) and zero point(s)
+        from
+    :param max_vals: tensor of max value(s) to calculate scale(s) and zero point(s)
+        from
+    :param quantization_args: settings to quantization
+    :return: tuple of the calculated scale(s) and zero point(s)
+    """
+    min_vals = torch.min(min_vals, torch.zeros_like(min_vals))
+    max_vals = torch.max(max_vals, torch.zeros_like(max_vals))
+    device = min_vals.device
+    bit_min, bit_max = calculate_range(quantization_args, device)
+    bit_range = bit_max - bit_min
+    zp_dtype = quantization_args.pytorch_dtype()
+    if quantization_args.symmetric:
+        max_val_pos = torch.max(torch.abs(min_vals), torch.abs(max_vals))
+        scales = max_val_pos / (float(bit_range) / 2)
+        scales = torch.clamp(scales, min=torch.finfo(torch.float32).eps)
+        zero_points = torch.zeros(scales.shape, device=device, dtype=min_vals.dtype)
+    else:
+        scales = (max_vals - min_vals) / float(bit_range)
+        scales = torch.clamp(scales, min=torch.finfo(torch.float32).eps)
+        zero_points = bit_min - (min_vals / scales)
+        zero_points = torch.clamp(zero_points, bit_min, bit_max)
+    # match zero-points to quantized type
+    zero_points = zero_points.to(zp_dtype)
+    if scales.ndim == 0:
+        scales = scales.reshape(1)
+        zero_points = zero_points.reshape(1)
+    return scales, zero_points
+def compute_dynamic_scales_and_zp(value: Tensor, args: QuantizationArgs):
+    """
+    Returns the computed scales and zero points for dynamic activation
+    qunatization.
+    :param value: tensor to calculate quantization parameters for
+    :param args: quantization args
+    :param reduce_dims: optional tuple of dimensions to reduce along,
+        returned scale and zero point will be shaped (1,) along the
+        reduced dimensions
+    :return: tuple of scale and zero point derived from the observed tensor
+    """
+    if args.strategy == QuantizationStrategy.TOKEN:
+        dim = {1, 2}
+        reduce_dims = tuple(idx for idx in range(value.ndim) if idx not in dim)
+    elif args.strategy == QuantizationStrategy.TENSOR:
+        reduce_dims = None
+    else:
+        raise ValueError(
+            f"One of {QuantizationStrategy.TOKEN} or {QuantizationStrategy.TENSOR} ",
+            "must be used for dynamic quantization",
+        )
+    if not reduce_dims:
+        min_val, max_val = torch.aminmax(value)
+    else:
+        min_val = torch.amin(value, dim=reduce_dims, keepdims=True)
+        max_val = torch.amax(value, dim=reduce_dims, keepdims=True)
+    return calculate_qparams(min_val, max_val, args)
+def calculate_range(quantization_args: QuantizationArgs, device: str) -> Tuple:
+    """
+    Calculated the effective quantization range for the given Quantization Args
+    :param quantization_args: quantization args to get range of
+    :param device: device to store the range to
+    :return: tuple endpoints for the given quantization range
+    """
+    if quantization_args.type == QuantizationType.INT:
+        bit_range = 2**quantization_args.num_bits
+        q_max = torch.tensor(bit_range / 2 - 1, device=device)
+        q_min = torch.tensor(-bit_range / 2, device=device)
+    elif quantization_args.type == QuantizationType.FLOAT:
+        if quantization_args.num_bits != 8:
+            raise ValueError(
+                "Floating point quantization is only supported for 8 bits,"
+                f"got {quantization_args.num_bits}"
+            )
+        fp_range_info = torch.finfo(FP8_DTYPE)
+        q_max = torch.tensor(fp_range_info.max, device=device)
+        q_min = torch.tensor(fp_range_info.min, device=device)
+    else:
+        raise ValueError(f"Invalid quantization type {quantization_args.type}")
+    return q_min, q_max
 def infer_quantization_status(model: Module) -> Optional["QuantizationStatus"]:  # noqa
     """
     Checks the quantization status of a model. Assumes all modules in the model have
@@ -118,12 +225,17 @@ def iter_named_leaf_modules(model: Module) -> Generator[Tuple[str, Module], None
     """
     for name, submodule in model.named_modules():
         children = list(submodule.children())
-        if len(children) == 0 and not isinstance(submodule, Observer):
+        # TODO: verify if an observer would ever be attached in this case/remove check
+        if len(children) == 0 and "observer" in name:
             yield name, submodule
         else:
+            if len(children) > 0:
+                named_children, children = zip(*list(submodule.named_children()))
             has_non_observer_children = False
-            for child in children:
-                if not isinstance(child, Observer):
+            for i in range(len(children)):
+                child_name = named_children[i]
+                if "observer" not in child_name:
                     has_non_observer_children = True
             if not has_non_observer_children:
@@ -144,14 +256,19 @@ def iter_named_quantizable_modules(
     :returns: generator tuple of (name, submodule)
     """
     for name, submodule in model.named_modules():
+        # TODO: verify if an observer would ever be attached in this case/remove check
         if include_children:
             children = list(submodule.children())
-            if len(children) == 0 and not isinstance(submodule, Observer):
+            if len(children) == 0 and "observer" not in name:
                 yield name, submodule
             else:
+                if len(children) > 0:
+                    named_children, children = zip(*list(submodule.named_children()))
                 has_non_observer_children = False
-                for child in children:
-                    if not isinstance(child, Observer):
+                for i in range(len(children)):
+                    child_name = named_children[i]
+                    if "observer" not in child_name:
                         has_non_observer_children = True
                 if not has_non_observer_children:

compressed_tensors/registry/registry.py CHANGED Viewed

@@ -258,7 +258,7 @@ def get_from_registry(
         retrieved_value = _import_and_get_value_from_module(module_path, value_name)
     else:
         # look up name in alias registry
-        name = _ALIAS_REGISTRY[parent_class].get(name)
+        name = _ALIAS_REGISTRY[parent_class].get(name, name)
         # look up name in registry
         retrieved_value = _REGISTRY[parent_class].get(name)
         if retrieved_value is None:

{compressed_tensors_nightly-0.7.1.20241031.dist-info → compressed_tensors_nightly-0.7.1.20241102.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: compressed-tensors-nightly
-Version: 0.7.1.20241031
+Version: 0.7.1.20241102
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors_nightly-0.7.1.20241031.dist-info → compressed_tensors_nightly-0.7.1.20241102.dist-info}/RECORD RENAMED Viewed

@@ -22,28 +22,20 @@ compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74j
 compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5ynVAUeiiYpS1Gt8,1308
 compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
 compressed_tensors/linear/compressed_linear.py,sha256=0jTTf6XxOAjAYs3tvFtgiNMAO4W10sSeR-pdH2M413g,3218
-compressed_tensors/quantization/__init__.py,sha256=nWP_fsl6Nn0ksEgZPzerGiETdvF-ZfNwPnwGlRiR5pY,805
-compressed_tensors/quantization/cache.py,sha256=Sf_9Nfe3RpX04V84iUJMgLN9pWNMFIYvZW02LXcPUQw,6830
-compressed_tensors/quantization/quant_args.py,sha256=k7NuZn8OqjgzmAVaN2-jHPQ1bgDkMuUoLJtLnhkvIOI,9085
+compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
+compressed_tensors/quantization/quant_args.py,sha256=osjNwCSB6tcyH9Qeg5sHEiB-bHyi3XJ8TzkGVJuGTc4,8711
 compressed_tensors/quantization/quant_config.py,sha256=NCiMvUMnnz5kTyAkDylxjtEGQnjgsIYIeNR2zyHEdTQ,10371
 compressed_tensors/quantization/quant_scheme.py,sha256=5ggPz5sqEfTUgvJJeiPIINA74QtO-08hb3szsm7UHGE,6000
-compressed_tensors/quantization/lifecycle/__init__.py,sha256=MXE2E7GfIfRRfhrdGy2Og3AZOz5N59B0ZGFcsD89y6c,821
-compressed_tensors/quantization/lifecycle/apply.py,sha256=czaayvpeUYyWRJhO_klffw6esptOgA9sBKL5TWQcRdw,15805
-compressed_tensors/quantization/lifecycle/calibration.py,sha256=fJ2RDL3E4hmWR8v8nYhq_tv31K8WV00o_4Y3xr7c37Y,3041
+compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
+compressed_tensors/quantization/lifecycle/apply.py,sha256=pdCqxXnVw7HoDDanaOtek13g8x_nb54CBUlfuMdhFG4,14993
 compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
-compressed_tensors/quantization/lifecycle/forward.py,sha256=8GjOnx4rwOZZqSDTdnejNOY2DVTjNDzH0DfY_rQam6k,16575
-compressed_tensors/quantization/lifecycle/frozen.py,sha256=71TsgS0Uxku0NomdWOBJsVfXCGTne-Gx9zUEMsCmw5Q,1764
+compressed_tensors/quantization/lifecycle/forward.py,sha256=QPL6-vKOFuKdKIEsVqMhsw4x552Jpm2sqO0oeChbnrM,12941
 compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
-compressed_tensors/quantization/lifecycle/initialize.py,sha256=lKoFy18PjbSklyum7f4hoLuWtHShBKax7JDTBzPlCqM,8839
-compressed_tensors/quantization/observers/__init__.py,sha256=DYrttzq-8MHLZUzpX-xzzm4hrw6HcXkMkux82KBKb1M,738
-compressed_tensors/quantization/observers/base.py,sha256=5ovQicWPYHjIxr6-EkQ4lgOX0PpI9g23iSzKpxjM1Zg,8420
-compressed_tensors/quantization/observers/helpers.py,sha256=nUFdNEIACiPBfFwNYDGCXOvw6tf7j6jfTvDwImHKMPg,5506
-compressed_tensors/quantization/observers/min_max.py,sha256=sQXqU3z-voxIDfR_9mQzwQUflZj2sASm_G8CYaXntFw,3865
-compressed_tensors/quantization/observers/mse.py,sha256=G5Y9v4MqXUVcKxBSmCFFW3p_7rlu-6scqLIN88ng-sE,6080
+compressed_tensors/quantization/lifecycle/initialize.py,sha256=C41hKA5VANyEwkB5FxzEn3Z0Da5tfxF1I07P8rUcyS0,8537
 compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
-compressed_tensors/quantization/utils/helpers.py,sha256=y4LEyC2oUd876ZMdALWKGH3Ct5EgBJZV4id_NUjTGH8,9531
+compressed_tensors/quantization/utils/helpers.py,sha256=DBP-sGRpGAY01K0LFE7qqonNj4hkTYL_mXrMs2LtAD8,14100
 compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
-compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85SLG77nml2iA,11890
+compressed_tensors/registry/registry.py,sha256=vRcjVB1ITfSbfYUaGndBBmqhip_5vsS62weorVg0iXo,11896
 compressed_tensors/utils/__init__.py,sha256=gS4gSU2pwcAbsKj-6YMaqhm25udFy6ISYaWBf-myRSM,808
 compressed_tensors/utils/helpers.py,sha256=hWGIR0W7ENHwdC7wW2SQJJiCF9-xOu_u3fY2RzLyYg4,4101
 compressed_tensors/utils/offload.py,sha256=d9q8LNe8HyF8tOjgjA7QGLD3HRysmNp0d8eBbdqBgIM,4089
@@ -51,8 +43,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
 compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
 compressed_tensors/utils/safetensors_load.py,sha256=m08ANVuTBxQdoa6LufDgcNJ7wCLDJolyZljB8VEybAU,8578
 compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
-compressed_tensors_nightly-0.7.1.20241031.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors_nightly-0.7.1.20241031.dist-info/METADATA,sha256=5wd-PII0N7nJCHB0UGxJKh0J226nAyuDQTbM8Otp4FU,6799
-compressed_tensors_nightly-0.7.1.20241031.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
-compressed_tensors_nightly-0.7.1.20241031.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors_nightly-0.7.1.20241031.dist-info/RECORD,,
+compressed_tensors_nightly-0.7.1.20241102.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors_nightly-0.7.1.20241102.dist-info/METADATA,sha256=pQ8FXKctjUHKkisrXYyeDUuunknVPkjHnHvS-uJ89oI,6799
+compressed_tensors_nightly-0.7.1.20241102.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
+compressed_tensors_nightly-0.7.1.20241102.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors_nightly-0.7.1.20241102.dist-info/RECORD,,

compressed-tensors-nightly 0.7.1.20241031__py3-none-any.whl → 0.7.1.20241102__py3-none-any.whl

compressed-tensors-nightly 0.7.1.20241031py3-none-any.whl → 0.7.1.20241102py3-none-any.whl