compressed-tensors-nightly 0.4.0.20240630__py3-none-any.whl → 0.4.0.20240701__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors/compressors/pack_quantized.py +48 -101
- {compressed_tensors_nightly-0.4.0.20240630.dist-info → compressed_tensors_nightly-0.4.0.20240701.dist-info}/METADATA +1 -1
- {compressed_tensors_nightly-0.4.0.20240630.dist-info → compressed_tensors_nightly-0.4.0.20240701.dist-info}/RECORD +6 -6
- {compressed_tensors_nightly-0.4.0.20240630.dist-info → compressed_tensors_nightly-0.4.0.20240701.dist-info}/LICENSE +0 -0
- {compressed_tensors_nightly-0.4.0.20240630.dist-info → compressed_tensors_nightly-0.4.0.20240701.dist-info}/WHEEL +0 -0
- {compressed_tensors_nightly-0.4.0.20240630.dist-info → compressed_tensors_nightly-0.4.0.20240701.dist-info}/top_level.txt +0 -0
@@ -29,13 +29,7 @@ from torch import Tensor
|
|
29
29
|
from tqdm import tqdm
|
30
30
|
|
31
31
|
|
32
|
-
__all__ = [
|
33
|
-
"PackedQuantizationCompressor",
|
34
|
-
"pack_4bit_ints",
|
35
|
-
"pack_8bit_ints",
|
36
|
-
"unpack_4bit_ints",
|
37
|
-
"unpack_8bit_ints",
|
38
|
-
]
|
32
|
+
__all__ = ["PackedQuantizationCompressor", "pack_to_int32", "unpack_from_int32"]
|
39
33
|
|
40
34
|
_LOGGER: logging.Logger = logging.getLogger(__name__)
|
41
35
|
|
@@ -91,11 +85,7 @@ class PackedQuantizationCompressor(Compressor):
|
|
91
85
|
args=quant_args,
|
92
86
|
dtype=torch.int8,
|
93
87
|
)
|
94
|
-
|
95
|
-
if quant_args.num_bits == 8:
|
96
|
-
value = pack_8bit_ints(value.cpu())
|
97
|
-
else:
|
98
|
-
value = pack_4bit_ints(value.cpu())
|
88
|
+
value = pack_to_int32(value.cpu(), quant_args.num_bits)
|
99
89
|
compressed_dict[merge_names(prefix, "weight_shape")] = shape
|
100
90
|
compressed_dict[merge_names(prefix, "weight_packed")] = value
|
101
91
|
continue
|
@@ -143,10 +133,7 @@ class PackedQuantizationCompressor(Compressor):
|
|
143
133
|
weight = weight_data["weight_packed"]
|
144
134
|
num_bits = weight_data["num_bits"]
|
145
135
|
original_shape = torch.Size(weight_data["weight_shape"])
|
146
|
-
|
147
|
-
unpacked = unpack_4bit_ints(weight, original_shape)
|
148
|
-
else:
|
149
|
-
unpacked = unpack_8bit_ints(weight, original_shape)
|
136
|
+
unpacked = unpack_from_int32(weight, num_bits, original_shape)
|
150
137
|
decompressed = dequantize(
|
151
138
|
x_q=unpacked,
|
152
139
|
scale=scale,
|
@@ -155,67 +142,50 @@ class PackedQuantizationCompressor(Compressor):
|
|
155
142
|
yield merge_names(weight_name, "weight"), decompressed
|
156
143
|
|
157
144
|
|
158
|
-
def
|
159
|
-
"""
|
160
|
-
Packs a tensor of int8 into int32s with padding
|
161
|
-
|
162
|
-
:param value: tensor to pack
|
163
|
-
:returns: packed int32 tensor
|
164
|
-
"""
|
165
|
-
# need to convert to unsigned 8bit to use numpy's pack/unpack
|
166
|
-
value_uint = (value - 128).to(torch.uint8)
|
167
|
-
bits = np.unpackbits(value_uint, axis=-1, bitorder="little")
|
168
|
-
return _pack_bits(bits_to_pack=bits)
|
169
|
-
|
170
|
-
|
171
|
-
def pack_4bit_ints(value: torch.Tensor) -> torch.Tensor:
|
145
|
+
def pack_to_int32(value: torch.Tensor, num_bits: int) -> torch.Tensor:
|
172
146
|
"""
|
173
|
-
Packs a tensor of
|
147
|
+
Packs a tensor of quantized weights stored in int8 into int32s with padding
|
174
148
|
|
175
149
|
:param value: tensor to pack
|
150
|
+
:param num_bits: number of bits used to store underlying data
|
176
151
|
:returns: packed int32 tensor
|
177
152
|
"""
|
178
153
|
if value.dtype is not torch.int8:
|
179
154
|
raise ValueError("Tensor must be quantized to torch.int8 before packing")
|
180
155
|
|
181
|
-
|
182
|
-
|
183
|
-
bits = np.unpackbits(temp.numpy(), axis=-1, bitorder="little")
|
184
|
-
ranges = np.array([range(x, x + 4) for x in range(0, bits.shape[1], 8)]).flatten()
|
185
|
-
only_4_bits = bits[:, ranges] # top 4 bits are 0 because we're really uint4
|
186
|
-
return _pack_bits(bits_to_pack=only_4_bits)
|
156
|
+
if num_bits > 8:
|
157
|
+
raise ValueError("Packing is only supported for less than 8 bits")
|
187
158
|
|
159
|
+
# convert to unsigned for packing
|
160
|
+
offset = pow(2, num_bits) // 2
|
161
|
+
value = (value + offset).to(torch.uint8)
|
162
|
+
value = value.cpu().numpy().astype(np.uint32)
|
163
|
+
pack_factor = 32 // num_bits
|
188
164
|
|
189
|
-
|
190
|
-
|
191
|
-
|
165
|
+
# pad input tensor and initialize packed output
|
166
|
+
packed_size = math.ceil(value.shape[1] / pack_factor)
|
167
|
+
packed = np.zeros((value.shape[0], packed_size), dtype=np.uint32)
|
168
|
+
padding = packed.shape[1] * pack_factor - value.shape[1]
|
169
|
+
value = np.pad(value, pad_width=[(0, 0), (0, padding)], constant_values=0)
|
192
170
|
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
"""
|
197
|
-
if value.dtype is not torch.int32:
|
198
|
-
raise ValueError(
|
199
|
-
f"Expected {torch.int32} but got {value.dtype}, Aborting unpack."
|
200
|
-
)
|
171
|
+
# pack values
|
172
|
+
for i in range(pack_factor):
|
173
|
+
packed |= value[:, i::pack_factor] << num_bits * i
|
201
174
|
|
202
|
-
#
|
203
|
-
|
204
|
-
|
205
|
-
bits = np.unpackbits(as_uint8, axis=-1, bitorder="little")
|
206
|
-
original_row_size = int(shape[1] * individual_depth)
|
207
|
-
bits = bits[:, :original_row_size]
|
208
|
-
bits = np.packbits(bits, axis=-1, bitorder="little")
|
209
|
-
final = (bits - 128).astype(np.int8)
|
210
|
-
return torch.from_numpy(final)
|
175
|
+
# convert back to signed and torch
|
176
|
+
packed = np.ascontiguousarray(packed).view(np.int32)
|
177
|
+
return torch.from_numpy(packed)
|
211
178
|
|
212
179
|
|
213
|
-
def
|
180
|
+
def unpack_from_int32(
|
181
|
+
value: torch.Tensor, num_bits: int, shape: torch.Size
|
182
|
+
) -> torch.Tensor:
|
214
183
|
"""
|
215
|
-
Unpacks a tensor packed
|
216
|
-
original their
|
184
|
+
Unpacks a tensor of packed int32 weights into individual int8s, maintaining the
|
185
|
+
original their bit range
|
217
186
|
|
218
187
|
:param value: tensor to upack
|
188
|
+
:param num_bits: number of bits to unpack each data point into
|
219
189
|
:param shape: shape to unpack into, used to remove padding
|
220
190
|
:returns: unpacked int8 tensor
|
221
191
|
"""
|
@@ -224,49 +194,26 @@ def unpack_4bit_ints(value: torch.Tensor, shape: torch.Size) -> torch.Tensor:
|
|
224
194
|
f"Expected {torch.int32} but got {value.dtype}, Aborting unpack."
|
225
195
|
)
|
226
196
|
|
227
|
-
|
228
|
-
|
229
|
-
as_uint8 = value.numpy().view(np.uint8)
|
230
|
-
bits = np.unpackbits(as_uint8, axis=-1, bitorder="little")
|
231
|
-
original_row_size = int(shape[1] * individual_depth)
|
232
|
-
bits = bits[:, :original_row_size]
|
197
|
+
if num_bits > 8:
|
198
|
+
raise ValueError("Unpacking is only supported for less than 8 bits")
|
233
199
|
|
234
|
-
#
|
235
|
-
|
236
|
-
|
237
|
-
bits_as_8bit = np.zeros(shape_8bit, dtype=np.uint8)
|
238
|
-
ranges = np.array([range(x, x + 4) for x in range(0, shape_8bit[1], 8)]).flatten()
|
239
|
-
bits_as_8bit[:, ranges] = bits
|
200
|
+
# convert packed input to unsigned numpy
|
201
|
+
value = value.numpy().view(np.uint32)
|
202
|
+
pack_factor = 32 // num_bits
|
240
203
|
|
241
|
-
#
|
242
|
-
|
204
|
+
# unpack
|
205
|
+
mask = pow(2, num_bits) - 1
|
206
|
+
unpacked = np.zeros((value.shape[0], value.shape[1] * pack_factor))
|
207
|
+
for i in range(pack_factor):
|
208
|
+
unpacked[:, i::pack_factor] = (value >> (num_bits * i)) & mask
|
243
209
|
|
244
|
-
#
|
245
|
-
|
246
|
-
|
210
|
+
# remove padding
|
211
|
+
original_row_size = int(shape[1])
|
212
|
+
unpacked = unpacked[:, :original_row_size]
|
247
213
|
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
"""
|
253
|
-
Pack a tensor of bits to int32.
|
214
|
+
# bits are packed in unsigned format, reformat to signed
|
215
|
+
# update the value range from unsigned to signed
|
216
|
+
offset = pow(2, num_bits) // 2
|
217
|
+
unpacked = (unpacked.astype(np.int16) - offset).astype(np.int8)
|
254
218
|
|
255
|
-
|
256
|
-
"""
|
257
|
-
# pad each row to fill a full 32bit int
|
258
|
-
pack_depth = 32
|
259
|
-
padding = (
|
260
|
-
math.ceil(bits_to_pack.shape[1] / pack_depth) * pack_depth
|
261
|
-
- bits_to_pack.shape[1]
|
262
|
-
)
|
263
|
-
padded_bits = np.pad(
|
264
|
-
bits_to_pack, pad_width=[(0, 0), (0, padding)], constant_values=0
|
265
|
-
)
|
266
|
-
|
267
|
-
# after packbits each uint8 is two packed uint4s
|
268
|
-
# then we keep the bit pattern the same but convert to int32
|
269
|
-
compressed = np.packbits(padded_bits, axis=-1, bitorder="little")
|
270
|
-
compressed = np.ascontiguousarray(compressed).view(np.int32)
|
271
|
-
|
272
|
-
return torch.from_numpy(compressed)
|
219
|
+
return torch.from_numpy(unpacked)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: compressed-tensors-nightly
|
3
|
-
Version: 0.4.0.
|
3
|
+
Version: 0.4.0.20240701
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -8,7 +8,7 @@ compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24S
|
|
8
8
|
compressed_tensors/compressors/marlin_24.py,sha256=PULMP1fp1sNWz-xOxvM0JXhOrUbq6sPwOTscYSifgDw,9450
|
9
9
|
compressed_tensors/compressors/model_compressor.py,sha256=t4dH7Yh637JV53VPyys-gkoMPJHGf_tlWWufLRyIdUM,13418
|
10
10
|
compressed_tensors/compressors/naive_quantized.py,sha256=6_1wuTF96-lw-UzzrsiEX_ipciKiQQJoZ8uotVwtbyQ,5569
|
11
|
-
compressed_tensors/compressors/pack_quantized.py,sha256=
|
11
|
+
compressed_tensors/compressors/pack_quantized.py,sha256=tnhqvkko6fIaTywI2JNvh5lE2xXWKJ_hYShv_s6C9Vk,8506
|
12
12
|
compressed_tensors/compressors/sparse_bitmask.py,sha256=kiDwBlFV0sJGLcIdDYxIiuF64ccgwDfqq1hWRQThYDc,8647
|
13
13
|
compressed_tensors/compressors/utils/__init__.py,sha256=-mbGDZh1hd9T6u62Ht_iBIK255UmMg0f5bLkSs1f9Cc,731
|
14
14
|
compressed_tensors/compressors/utils/helpers.py,sha256=4fq7KclSIK__jemCG9pwYlgWLrQjsaAMxhIrhjdw0BQ,1506
|
@@ -41,8 +41,8 @@ compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85S
|
|
41
41
|
compressed_tensors/utils/__init__.py,sha256=5DrYjoZbaEvSkJcC-GRSbM_RBHVF4tG9gMd3zsJnjLw,665
|
42
42
|
compressed_tensors/utils/helpers.py,sha256=dt4uxSIeqvqDmeJBJ6UUVHEOnMI7EtMSzEDv6PRUu14,2266
|
43
43
|
compressed_tensors/utils/safetensors_load.py,sha256=0MheXwx1jeY12PeISppiSIZHs6rmN2YddwPpFb9V67I,8527
|
44
|
-
compressed_tensors_nightly-0.4.0.
|
45
|
-
compressed_tensors_nightly-0.4.0.
|
46
|
-
compressed_tensors_nightly-0.4.0.
|
47
|
-
compressed_tensors_nightly-0.4.0.
|
48
|
-
compressed_tensors_nightly-0.4.0.
|
44
|
+
compressed_tensors_nightly-0.4.0.20240701.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
45
|
+
compressed_tensors_nightly-0.4.0.20240701.dist-info/METADATA,sha256=01PuMUcrvra_BAJaUwOExROXU3KAyNCzOSZqPov7kEI,5668
|
46
|
+
compressed_tensors_nightly-0.4.0.20240701.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
47
|
+
compressed_tensors_nightly-0.4.0.20240701.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
|
48
|
+
compressed_tensors_nightly-0.4.0.20240701.dist-info/RECORD,,
|
File without changes
|
File without changes
|