PyPI - compressed-tensors-nightly - Versions diffs - 0.9.1.20250129__tar.gz → 0.9.1.20250202__tar.gz - Mend

@@ -138,8 +138,20 @@ def pack_to_int32(value: torch.Tensor, num_bits: int) -> torch.Tensor:
     """
     Packs a tensor of quantized weights stored in int8 into int32s with padding
+    Pseudocode:
+     1. Shift wrt num_bits to convert to unsigned. num_bits=8
+        [1,2] -> [129, 130]
+     2. Pad to fill in 32 bits
+        [129, 130] -> [129, 130, 0, 0]
+     3. convert to binary align in order
+        [129, 130, 0, 0] -> 00000000 00000000 10000010 10000001
+     4. convert aligned binary to number
+        00000000000000001000001010000001 -> 33409
+     5. covert back to uint32
+        33409 -> 33409
     :param value: tensor to pack
-    :param num_bits: number of bits used to store underlying data
+    :param num_bits: number of bits used to store underlying data, must be at least 1
     :returns: packed int32 tensor
     """
     if value.dtype is not torch.int8:
@@ -148,19 +160,22 @@ def pack_to_int32(value: torch.Tensor, num_bits: int) -> torch.Tensor:
     if num_bits > 8:
         raise ValueError("Packing is only supported for less than 8 bits")
+    if num_bits < 1:
+        raise ValueError(f"num_bits must be at least 1, got {num_bits}")
     # convert to unsigned for packing
-    offset = pow(2, num_bits) // 2
+    offset = 1 << (num_bits - 1)
     value = (value + offset).to(torch.uint8)
     value = value.cpu().numpy().astype(np.uint32)
     pack_factor = 32 // num_bits
     # pad input tensor and initialize packed output
     packed_size = math.ceil(value.shape[1] / pack_factor)
-    packed = np.zeros((value.shape[0], packed_size), dtype=np.uint32)
-    padding = packed.shape[1] * pack_factor - value.shape[1]
+    padding = packed_size * pack_factor - value.shape[1]
     value = np.pad(value, pad_width=[(0, 0), (0, padding)], constant_values=0)
     # pack values
+    packed = np.zeros((value.shape[0], packed_size), dtype=np.uint32)
     for i in range(pack_factor):
         packed |= value[:, i::pack_factor] << num_bits * i
@@ -174,7 +189,9 @@ def unpack_from_int32(
 ) -> torch.Tensor:
     """
     Unpacks a tensor of packed int32 weights into individual int8s, maintaining the
-    original their bit range
+    original bit range.
+    Return tensors in int8
     :param value: tensor to upack
     :param num_bits: number of bits to unpack each data point into
@@ -192,7 +209,7 @@ def unpack_from_int32(
     pack_factor = 32 // num_bits
     # unpack
-    mask = pow(2, num_bits) - 1
+    mask = (1 << num_bits) - 1
     unpacked = torch.zeros(
         (value.shape[0], value.shape[1] * pack_factor),
         device=value.device,

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: compressed-tensors-nightly
-Version: 0.9.1.20250129
+Version: 0.9.1.20250202
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

@@ -18,6 +18,7 @@ from typing import Any, Dict, Optional, Union
 import torch
 from compressed_tensors.utils import Aliasable
+from compressed_tensors.utils.helpers import deprecated
 from pydantic import BaseModel, Field, field_validator, model_validator
@@ -123,12 +124,6 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
         ),
     )
-    def get_observer(self):
-        """
-        :return: torch quantization FakeQuantize built based on these QuantizationArgs
-        """
-        return self.observer
     @field_validator("type", mode="before")
     def validate_type(cls, value) -> QuantizationType:
         if isinstance(value, str):
@@ -250,6 +245,10 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
         else:
             raise ValueError(f"Invalid quantization type {self.type}")
+    @deprecated("QuantizationArgs.observer")
+    def get_observer(self) -> str:
+        return self.observer
 def round_to_quantized_type(
     tensor: torch.Tensor, args: QuantizationArgs

@@ -170,8 +170,8 @@ def deprecated(future_name: Optional[str] = None, message: Optional[str] = None)
     """
     Decorator to mark functions as deprecated
-    :param new_function: Function called in place of depreciated function
-    :param message: Depreciation message, replaces default depreciation message
+    :param new_function: Function called in place of deprecated function
+    :param message: Deprecation message, replaces default deprecation message
     """
     def decorator(func: Callable[[Any], Any]):

@@ -26,6 +26,7 @@ Utilities associated with offloading functionality provided by `accelerate`.
 """
 import contextlib
+import warnings
 from functools import wraps
 from typing import Any, Callable, Dict, Literal, Optional, Union
@@ -200,9 +201,14 @@ def update_offload_parameter(
     """
     param = getattr(module, name)
     data = data.to(param.dtype)
+    if param.data.shape != data.shape:
+        warnings.warn(
+            f"Shape of parameter being updated {param.data.shape} does not match shape "
+            f"of update data {data.shape}"
+        )
     # copy data into onloaded parameter if applicable
-    if param.device != "meta":
+    if param.device != torch.device("meta"):
         param.data.copy_(data)
     # update offload dict

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: compressed-tensors-nightly
-Version: 0.9.1.20250129
+Version: 0.9.1.20250202
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

compressed-tensors-nightly 0.9.1.20250129__tar.gz → 0.9.1.20250202__tar.gz

compressed-tensors-nightly 0.9.1.20250129tar.gz → 0.9.1.20250202tar.gz