PyPI - compressed-tensors-nightly - Versions diffs - 0.4.0.20240630__py3-none-any.whl → 0.4.0.20240701__py3-none-any.whl - Mend

compressed-tensors-nightly 0.4.0.20240630py3-none-any.whl → 0.4.0.20240701py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

compressed_tensors/compressors/pack_quantized.py CHANGED Viewed

@@ -29,13 +29,7 @@ from torch import Tensor
 from tqdm import tqdm
-__all__ = [
-    "PackedQuantizationCompressor",
-    "pack_4bit_ints",
-    "pack_8bit_ints",
-    "unpack_4bit_ints",
-    "unpack_8bit_ints",
-]
+__all__ = ["PackedQuantizationCompressor", "pack_to_int32", "unpack_from_int32"]
 _LOGGER: logging.Logger = logging.getLogger(__name__)
@@ -91,11 +85,7 @@ class PackedQuantizationCompressor(Compressor):
                             args=quant_args,
                             dtype=torch.int8,
                         )
-                    if quant_args.num_bits == 8:
-                        value = pack_8bit_ints(value.cpu())
-                    else:
-                        value = pack_4bit_ints(value.cpu())
+                    value = pack_to_int32(value.cpu(), quant_args.num_bits)
                     compressed_dict[merge_names(prefix, "weight_shape")] = shape
                     compressed_dict[merge_names(prefix, "weight_packed")] = value
                     continue
@@ -143,10 +133,7 @@ class PackedQuantizationCompressor(Compressor):
                 weight = weight_data["weight_packed"]
                 num_bits = weight_data["num_bits"]
                 original_shape = torch.Size(weight_data["weight_shape"])
-                if num_bits == 4:
-                    unpacked = unpack_4bit_ints(weight, original_shape)
-                else:
-                    unpacked = unpack_8bit_ints(weight, original_shape)
+                unpacked = unpack_from_int32(weight, num_bits, original_shape)
                 decompressed = dequantize(
                     x_q=unpacked,
                     scale=scale,
@@ -155,67 +142,50 @@ class PackedQuantizationCompressor(Compressor):
                 yield merge_names(weight_name, "weight"), decompressed
-def pack_8bit_ints(value: torch.Tensor) -> torch.Tensor:
-    """
-    Packs a tensor of int8 into int32s with padding
-    :param value: tensor to pack
-    :returns: packed int32 tensor
-    """
-    # need to convert to unsigned 8bit to use numpy's pack/unpack
-    value_uint = (value - 128).to(torch.uint8)
-    bits = np.unpackbits(value_uint, axis=-1, bitorder="little")
-    return _pack_bits(bits_to_pack=bits)
-def pack_4bit_ints(value: torch.Tensor) -> torch.Tensor:
+def pack_to_int32(value: torch.Tensor, num_bits: int) -> torch.Tensor:
     """
-    Packs a tensor of int4 weights stored in int8 into int32s with padding
+    Packs a tensor of quantized weights stored in int8 into int32s with padding
     :param value: tensor to pack
+    :param num_bits: number of bits used to store underlying data
     :returns: packed int32 tensor
     """
     if value.dtype is not torch.int8:
         raise ValueError("Tensor must be quantized to torch.int8 before packing")
-    # need to convert to unsigned 8bit to use numpy's pack/unpack
-    temp = (value - 8).to(torch.uint8)
-    bits = np.unpackbits(temp.numpy(), axis=-1, bitorder="little")
-    ranges = np.array([range(x, x + 4) for x in range(0, bits.shape[1], 8)]).flatten()
-    only_4_bits = bits[:, ranges]  # top 4 bits are 0 because we're really uint4
-    return _pack_bits(bits_to_pack=only_4_bits)
+    if num_bits > 8:
+        raise ValueError("Packing is only supported for less than 8 bits")
+    # convert to unsigned for packing
+    offset = pow(2, num_bits) // 2
+    value = (value + offset).to(torch.uint8)
+    value = value.cpu().numpy().astype(np.uint32)
+    pack_factor = 32 // num_bits
-def unpack_8bit_ints(value: torch.Tensor, shape: torch.Size) -> torch.Tensor:
-    """
-    Unpacks a tensor packed int8 weights in int32
+    # pad input tensor and initialize packed output
+    packed_size = math.ceil(value.shape[1] / pack_factor)
+    packed = np.zeros((value.shape[0], packed_size), dtype=np.uint32)
+    padding = packed.shape[1] * pack_factor - value.shape[1]
+    value = np.pad(value, pad_width=[(0, 0), (0, padding)], constant_values=0)
-    :param value: tensor to upack
-    :param shape: shape to unpack into, used to remove padding
-    :returns: unpacked int8 tensor
-    """
-    if value.dtype is not torch.int32:
-        raise ValueError(
-            f"Expected {torch.int32} but got {value.dtype}, Aborting unpack."
-        )
+    # pack values
+    for i in range(pack_factor):
+        packed |= value[:, i::pack_factor] << num_bits * i
-    # unpack bits and undo padding to nearest int32 bits
-    individual_depth = 8
-    as_uint8 = value.numpy().view(np.uint8)
-    bits = np.unpackbits(as_uint8, axis=-1, bitorder="little")
-    original_row_size = int(shape[1] * individual_depth)
-    bits = bits[:, :original_row_size]
-    bits = np.packbits(bits, axis=-1, bitorder="little")
-    final = (bits - 128).astype(np.int8)
-    return torch.from_numpy(final)
+    # convert back to signed and torch
+    packed = np.ascontiguousarray(packed).view(np.int32)
+    return torch.from_numpy(packed)
-def unpack_4bit_ints(value: torch.Tensor, shape: torch.Size) -> torch.Tensor:
+def unpack_from_int32(
+    value: torch.Tensor, num_bits: int, shape: torch.Size
+) -> torch.Tensor:
     """
-    Unpacks a tensor packed int4 weights into individual int8s, maintaining the
-    original their int4 range
+    Unpacks a tensor of packed int32 weights into individual int8s, maintaining the
+    original their bit range
     :param value: tensor to upack
+    :param num_bits: number of bits to unpack each data point into
     :param shape: shape to unpack into, used to remove padding
     :returns: unpacked int8 tensor
     """
@@ -224,49 +194,26 @@ def unpack_4bit_ints(value: torch.Tensor, shape: torch.Size) -> torch.Tensor:
             f"Expected {torch.int32} but got {value.dtype}, Aborting unpack."
         )
-    # unpack bits and undo padding to nearest int32 bits
-    individual_depth = 4
-    as_uint8 = value.numpy().view(np.uint8)
-    bits = np.unpackbits(as_uint8, axis=-1, bitorder="little")
-    original_row_size = int(shape[1] * individual_depth)
-    bits = bits[:, :original_row_size]
+    if num_bits > 8:
+        raise ValueError("Unpacking is only supported for less than 8 bits")
-    # reformat each packed uint4 to a uint8 by filling to top 4 bits with zeros
-    # (uint8 format is required by np.packbits)
-    shape_8bit = (bits.shape[0], bits.shape[1] * 2)
-    bits_as_8bit = np.zeros(shape_8bit, dtype=np.uint8)
-    ranges = np.array([range(x, x + 4) for x in range(0, shape_8bit[1], 8)]).flatten()
-    bits_as_8bit[:, ranges] = bits
+    # convert packed input to unsigned numpy
+    value = value.numpy().view(np.uint32)
+    pack_factor = 32 // num_bits
-    # repack the bits to uint8
-    repacked = np.packbits(bits_as_8bit, axis=-1, bitorder="little")
+    # unpack
+    mask = pow(2, num_bits) - 1
+    unpacked = np.zeros((value.shape[0], value.shape[1] * pack_factor))
+    for i in range(pack_factor):
+        unpacked[:, i::pack_factor] = (value >> (num_bits * i)) & mask
-    # bits are packed in unsigned format, reformat to signed
-    # update the value range from uint4 to int4
-    final = repacked.astype(np.int8) - 8
+    # remove padding
+    original_row_size = int(shape[1])
+    unpacked = unpacked[:, :original_row_size]
-    return torch.from_numpy(final)
-def _pack_bits(bits_to_pack: torch.Tensor) -> torch.Tensor:
-    """
-    Pack a tensor of bits to int32.
+    # bits are packed in unsigned format, reformat to signed
+    # update the value range from unsigned to signed
+    offset = pow(2, num_bits) // 2
+    unpacked = (unpacked.astype(np.int16) - offset).astype(np.int8)
-    :param bits_to_pack: tensor of bits to pack
-    """
-    # pad each row to fill a full 32bit int
-    pack_depth = 32
-    padding = (
-        math.ceil(bits_to_pack.shape[1] / pack_depth) * pack_depth
-        - bits_to_pack.shape[1]
-    )
-    padded_bits = np.pad(
-        bits_to_pack, pad_width=[(0, 0), (0, padding)], constant_values=0
-    )
-    # after packbits each uint8 is two packed uint4s
-    # then we keep the bit pattern the same but convert to int32
-    compressed = np.packbits(padded_bits, axis=-1, bitorder="little")
-    compressed = np.ascontiguousarray(compressed).view(np.int32)
-    return torch.from_numpy(compressed)
+    return torch.from_numpy(unpacked)

{compressed_tensors_nightly-0.4.0.20240630.dist-info → compressed_tensors_nightly-0.4.0.20240701.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: compressed-tensors-nightly
-Version: 0.4.0.20240630
+Version: 0.4.0.20240701
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors_nightly-0.4.0.20240630.dist-info → compressed_tensors_nightly-0.4.0.20240701.dist-info}/RECORD RENAMED Viewed

@@ -8,7 +8,7 @@ compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24S
 compressed_tensors/compressors/marlin_24.py,sha256=PULMP1fp1sNWz-xOxvM0JXhOrUbq6sPwOTscYSifgDw,9450
 compressed_tensors/compressors/model_compressor.py,sha256=t4dH7Yh637JV53VPyys-gkoMPJHGf_tlWWufLRyIdUM,13418
 compressed_tensors/compressors/naive_quantized.py,sha256=6_1wuTF96-lw-UzzrsiEX_ipciKiQQJoZ8uotVwtbyQ,5569
-compressed_tensors/compressors/pack_quantized.py,sha256=ZRqqBVPB6B-nZQOSdu7WhKrKWIm2-ZVrUQHATxO2Boc,10297
+compressed_tensors/compressors/pack_quantized.py,sha256=tnhqvkko6fIaTywI2JNvh5lE2xXWKJ_hYShv_s6C9Vk,8506
 compressed_tensors/compressors/sparse_bitmask.py,sha256=kiDwBlFV0sJGLcIdDYxIiuF64ccgwDfqq1hWRQThYDc,8647
 compressed_tensors/compressors/utils/__init__.py,sha256=-mbGDZh1hd9T6u62Ht_iBIK255UmMg0f5bLkSs1f9Cc,731
 compressed_tensors/compressors/utils/helpers.py,sha256=4fq7KclSIK__jemCG9pwYlgWLrQjsaAMxhIrhjdw0BQ,1506
@@ -41,8 +41,8 @@ compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85S
 compressed_tensors/utils/__init__.py,sha256=5DrYjoZbaEvSkJcC-GRSbM_RBHVF4tG9gMd3zsJnjLw,665
 compressed_tensors/utils/helpers.py,sha256=dt4uxSIeqvqDmeJBJ6UUVHEOnMI7EtMSzEDv6PRUu14,2266
 compressed_tensors/utils/safetensors_load.py,sha256=0MheXwx1jeY12PeISppiSIZHs6rmN2YddwPpFb9V67I,8527
-compressed_tensors_nightly-0.4.0.20240630.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors_nightly-0.4.0.20240630.dist-info/METADATA,sha256=aNXJRk0cgoKcRmxqZH6XFQECIANuPybDh8ZA--zldD4,5668
-compressed_tensors_nightly-0.4.0.20240630.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-compressed_tensors_nightly-0.4.0.20240630.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors_nightly-0.4.0.20240630.dist-info/RECORD,,
+compressed_tensors_nightly-0.4.0.20240701.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors_nightly-0.4.0.20240701.dist-info/METADATA,sha256=01PuMUcrvra_BAJaUwOExROXU3KAyNCzOSZqPov7kEI,5668
+compressed_tensors_nightly-0.4.0.20240701.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+compressed_tensors_nightly-0.4.0.20240701.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors_nightly-0.4.0.20240701.dist-info/RECORD,,

{compressed_tensors_nightly-0.4.0.20240630.dist-info → compressed_tensors_nightly-0.4.0.20240701.dist-info}/LICENSE RENAMED Viewed

File without changes

{compressed_tensors_nightly-0.4.0.20240630.dist-info → compressed_tensors_nightly-0.4.0.20240701.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors_nightly-0.4.0.20240630.dist-info → compressed_tensors_nightly-0.4.0.20240701.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors-nightly 0.4.0.20240630__py3-none-any.whl → 0.4.0.20240701__py3-none-any.whl

compressed-tensors-nightly 0.4.0.20240630py3-none-any.whl → 0.4.0.20240701py3-none-any.whl