compressed-tensors-nightly 0.4.0.20240629__py3-none-any.whl → 0.4.0.20240701__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,13 +29,7 @@ from torch import Tensor
29
29
  from tqdm import tqdm
30
30
 
31
31
 
32
- __all__ = [
33
- "PackedQuantizationCompressor",
34
- "pack_4bit_ints",
35
- "pack_8bit_ints",
36
- "unpack_4bit_ints",
37
- "unpack_8bit_ints",
38
- ]
32
+ __all__ = ["PackedQuantizationCompressor", "pack_to_int32", "unpack_from_int32"]
39
33
 
40
34
  _LOGGER: logging.Logger = logging.getLogger(__name__)
41
35
 
@@ -91,11 +85,7 @@ class PackedQuantizationCompressor(Compressor):
91
85
  args=quant_args,
92
86
  dtype=torch.int8,
93
87
  )
94
-
95
- if quant_args.num_bits == 8:
96
- value = pack_8bit_ints(value.cpu())
97
- else:
98
- value = pack_4bit_ints(value.cpu())
88
+ value = pack_to_int32(value.cpu(), quant_args.num_bits)
99
89
  compressed_dict[merge_names(prefix, "weight_shape")] = shape
100
90
  compressed_dict[merge_names(prefix, "weight_packed")] = value
101
91
  continue
@@ -143,10 +133,7 @@ class PackedQuantizationCompressor(Compressor):
143
133
  weight = weight_data["weight_packed"]
144
134
  num_bits = weight_data["num_bits"]
145
135
  original_shape = torch.Size(weight_data["weight_shape"])
146
- if num_bits == 4:
147
- unpacked = unpack_4bit_ints(weight, original_shape)
148
- else:
149
- unpacked = unpack_8bit_ints(weight, original_shape)
136
+ unpacked = unpack_from_int32(weight, num_bits, original_shape)
150
137
  decompressed = dequantize(
151
138
  x_q=unpacked,
152
139
  scale=scale,
@@ -155,67 +142,50 @@ class PackedQuantizationCompressor(Compressor):
155
142
  yield merge_names(weight_name, "weight"), decompressed
156
143
 
157
144
 
158
- def pack_8bit_ints(value: torch.Tensor) -> torch.Tensor:
159
- """
160
- Packs a tensor of int8 into int32s with padding
161
-
162
- :param value: tensor to pack
163
- :returns: packed int32 tensor
164
- """
165
- # need to convert to unsigned 8bit to use numpy's pack/unpack
166
- value_uint = (value - 128).to(torch.uint8)
167
- bits = np.unpackbits(value_uint, axis=-1, bitorder="little")
168
- return _pack_bits(bits_to_pack=bits)
169
-
170
-
171
- def pack_4bit_ints(value: torch.Tensor) -> torch.Tensor:
145
+ def pack_to_int32(value: torch.Tensor, num_bits: int) -> torch.Tensor:
172
146
  """
173
- Packs a tensor of int4 weights stored in int8 into int32s with padding
147
+ Packs a tensor of quantized weights stored in int8 into int32s with padding
174
148
 
175
149
  :param value: tensor to pack
150
+ :param num_bits: number of bits used to store underlying data
176
151
  :returns: packed int32 tensor
177
152
  """
178
153
  if value.dtype is not torch.int8:
179
154
  raise ValueError("Tensor must be quantized to torch.int8 before packing")
180
155
 
181
- # need to convert to unsigned 8bit to use numpy's pack/unpack
182
- temp = (value - 8).to(torch.uint8)
183
- bits = np.unpackbits(temp.numpy(), axis=-1, bitorder="little")
184
- ranges = np.array([range(x, x + 4) for x in range(0, bits.shape[1], 8)]).flatten()
185
- only_4_bits = bits[:, ranges] # top 4 bits are 0 because we're really uint4
186
- return _pack_bits(bits_to_pack=only_4_bits)
156
+ if num_bits > 8:
157
+ raise ValueError("Packing is only supported for less than 8 bits")
187
158
 
159
+ # convert to unsigned for packing
160
+ offset = pow(2, num_bits) // 2
161
+ value = (value + offset).to(torch.uint8)
162
+ value = value.cpu().numpy().astype(np.uint32)
163
+ pack_factor = 32 // num_bits
188
164
 
189
- def unpack_8bit_ints(value: torch.Tensor, shape: torch.Size) -> torch.Tensor:
190
- """
191
- Unpacks a tensor packed int8 weights in int32
165
+ # pad input tensor and initialize packed output
166
+ packed_size = math.ceil(value.shape[1] / pack_factor)
167
+ packed = np.zeros((value.shape[0], packed_size), dtype=np.uint32)
168
+ padding = packed.shape[1] * pack_factor - value.shape[1]
169
+ value = np.pad(value, pad_width=[(0, 0), (0, padding)], constant_values=0)
192
170
 
193
- :param value: tensor to upack
194
- :param shape: shape to unpack into, used to remove padding
195
- :returns: unpacked int8 tensor
196
- """
197
- if value.dtype is not torch.int32:
198
- raise ValueError(
199
- f"Expected {torch.int32} but got {value.dtype}, Aborting unpack."
200
- )
171
+ # pack values
172
+ for i in range(pack_factor):
173
+ packed |= value[:, i::pack_factor] << num_bits * i
201
174
 
202
- # unpack bits and undo padding to nearest int32 bits
203
- individual_depth = 8
204
- as_uint8 = value.numpy().view(np.uint8)
205
- bits = np.unpackbits(as_uint8, axis=-1, bitorder="little")
206
- original_row_size = int(shape[1] * individual_depth)
207
- bits = bits[:, :original_row_size]
208
- bits = np.packbits(bits, axis=-1, bitorder="little")
209
- final = (bits - 128).astype(np.int8)
210
- return torch.from_numpy(final)
175
+ # convert back to signed and torch
176
+ packed = np.ascontiguousarray(packed).view(np.int32)
177
+ return torch.from_numpy(packed)
211
178
 
212
179
 
213
- def unpack_4bit_ints(value: torch.Tensor, shape: torch.Size) -> torch.Tensor:
180
+ def unpack_from_int32(
181
+ value: torch.Tensor, num_bits: int, shape: torch.Size
182
+ ) -> torch.Tensor:
214
183
  """
215
- Unpacks a tensor packed int4 weights into individual int8s, maintaining the
216
- original their int4 range
184
+ Unpacks a tensor of packed int32 weights into individual int8s, maintaining the
185
+ original their bit range
217
186
 
218
187
  :param value: tensor to upack
188
+ :param num_bits: number of bits to unpack each data point into
219
189
  :param shape: shape to unpack into, used to remove padding
220
190
  :returns: unpacked int8 tensor
221
191
  """
@@ -224,49 +194,26 @@ def unpack_4bit_ints(value: torch.Tensor, shape: torch.Size) -> torch.Tensor:
224
194
  f"Expected {torch.int32} but got {value.dtype}, Aborting unpack."
225
195
  )
226
196
 
227
- # unpack bits and undo padding to nearest int32 bits
228
- individual_depth = 4
229
- as_uint8 = value.numpy().view(np.uint8)
230
- bits = np.unpackbits(as_uint8, axis=-1, bitorder="little")
231
- original_row_size = int(shape[1] * individual_depth)
232
- bits = bits[:, :original_row_size]
197
+ if num_bits > 8:
198
+ raise ValueError("Unpacking is only supported for less than 8 bits")
233
199
 
234
- # reformat each packed uint4 to a uint8 by filling to top 4 bits with zeros
235
- # (uint8 format is required by np.packbits)
236
- shape_8bit = (bits.shape[0], bits.shape[1] * 2)
237
- bits_as_8bit = np.zeros(shape_8bit, dtype=np.uint8)
238
- ranges = np.array([range(x, x + 4) for x in range(0, shape_8bit[1], 8)]).flatten()
239
- bits_as_8bit[:, ranges] = bits
200
+ # convert packed input to unsigned numpy
201
+ value = value.numpy().view(np.uint32)
202
+ pack_factor = 32 // num_bits
240
203
 
241
- # repack the bits to uint8
242
- repacked = np.packbits(bits_as_8bit, axis=-1, bitorder="little")
204
+ # unpack
205
+ mask = pow(2, num_bits) - 1
206
+ unpacked = np.zeros((value.shape[0], value.shape[1] * pack_factor))
207
+ for i in range(pack_factor):
208
+ unpacked[:, i::pack_factor] = (value >> (num_bits * i)) & mask
243
209
 
244
- # bits are packed in unsigned format, reformat to signed
245
- # update the value range from uint4 to int4
246
- final = repacked.astype(np.int8) - 8
210
+ # remove padding
211
+ original_row_size = int(shape[1])
212
+ unpacked = unpacked[:, :original_row_size]
247
213
 
248
- return torch.from_numpy(final)
249
-
250
-
251
- def _pack_bits(bits_to_pack: torch.Tensor) -> torch.Tensor:
252
- """
253
- Pack a tensor of bits to int32.
214
+ # bits are packed in unsigned format, reformat to signed
215
+ # update the value range from unsigned to signed
216
+ offset = pow(2, num_bits) // 2
217
+ unpacked = (unpacked.astype(np.int16) - offset).astype(np.int8)
254
218
 
255
- :param bits_to_pack: tensor of bits to pack
256
- """
257
- # pad each row to fill a full 32bit int
258
- pack_depth = 32
259
- padding = (
260
- math.ceil(bits_to_pack.shape[1] / pack_depth) * pack_depth
261
- - bits_to_pack.shape[1]
262
- )
263
- padded_bits = np.pad(
264
- bits_to_pack, pad_width=[(0, 0), (0, padding)], constant_values=0
265
- )
266
-
267
- # after packbits each uint8 is two packed uint4s
268
- # then we keep the bit pattern the same but convert to int32
269
- compressed = np.packbits(padded_bits, axis=-1, bitorder="little")
270
- compressed = np.ascontiguousarray(compressed).view(np.int32)
271
-
272
- return torch.from_numpy(compressed)
219
+ return torch.from_numpy(unpacked)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: compressed-tensors-nightly
3
- Version: 0.4.0.20240629
3
+ Version: 0.4.0.20240701
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -8,7 +8,7 @@ compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24S
8
8
  compressed_tensors/compressors/marlin_24.py,sha256=PULMP1fp1sNWz-xOxvM0JXhOrUbq6sPwOTscYSifgDw,9450
9
9
  compressed_tensors/compressors/model_compressor.py,sha256=t4dH7Yh637JV53VPyys-gkoMPJHGf_tlWWufLRyIdUM,13418
10
10
  compressed_tensors/compressors/naive_quantized.py,sha256=6_1wuTF96-lw-UzzrsiEX_ipciKiQQJoZ8uotVwtbyQ,5569
11
- compressed_tensors/compressors/pack_quantized.py,sha256=ZRqqBVPB6B-nZQOSdu7WhKrKWIm2-ZVrUQHATxO2Boc,10297
11
+ compressed_tensors/compressors/pack_quantized.py,sha256=tnhqvkko6fIaTywI2JNvh5lE2xXWKJ_hYShv_s6C9Vk,8506
12
12
  compressed_tensors/compressors/sparse_bitmask.py,sha256=kiDwBlFV0sJGLcIdDYxIiuF64ccgwDfqq1hWRQThYDc,8647
13
13
  compressed_tensors/compressors/utils/__init__.py,sha256=-mbGDZh1hd9T6u62Ht_iBIK255UmMg0f5bLkSs1f9Cc,731
14
14
  compressed_tensors/compressors/utils/helpers.py,sha256=4fq7KclSIK__jemCG9pwYlgWLrQjsaAMxhIrhjdw0BQ,1506
@@ -41,8 +41,8 @@ compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85S
41
41
  compressed_tensors/utils/__init__.py,sha256=5DrYjoZbaEvSkJcC-GRSbM_RBHVF4tG9gMd3zsJnjLw,665
42
42
  compressed_tensors/utils/helpers.py,sha256=dt4uxSIeqvqDmeJBJ6UUVHEOnMI7EtMSzEDv6PRUu14,2266
43
43
  compressed_tensors/utils/safetensors_load.py,sha256=0MheXwx1jeY12PeISppiSIZHs6rmN2YddwPpFb9V67I,8527
44
- compressed_tensors_nightly-0.4.0.20240629.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
45
- compressed_tensors_nightly-0.4.0.20240629.dist-info/METADATA,sha256=6bwIx08fza4K-Xxr0Ok3qXY2IyiZMH0BY6v0q7Wg8Mo,5668
46
- compressed_tensors_nightly-0.4.0.20240629.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
47
- compressed_tensors_nightly-0.4.0.20240629.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
48
- compressed_tensors_nightly-0.4.0.20240629.dist-info/RECORD,,
44
+ compressed_tensors_nightly-0.4.0.20240701.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
45
+ compressed_tensors_nightly-0.4.0.20240701.dist-info/METADATA,sha256=01PuMUcrvra_BAJaUwOExROXU3KAyNCzOSZqPov7kEI,5668
46
+ compressed_tensors_nightly-0.4.0.20240701.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
47
+ compressed_tensors_nightly-0.4.0.20240701.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
48
+ compressed_tensors_nightly-0.4.0.20240701.dist-info/RECORD,,