compressed-tensors 0.9.5a20250530__py3-none-any.whl → 0.9.5a20250602__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors/quantization/lifecycle/forward.py +35 -24
- compressed_tensors/quantization/quant_args.py +1 -0
- compressed_tensors/quantization/utils/helpers.py +1 -2
- compressed_tensors/version.py +1 -1
- {compressed_tensors-0.9.5a20250530.dist-info → compressed_tensors-0.9.5a20250602.dist-info}/METADATA +1 -1
- {compressed_tensors-0.9.5a20250530.dist-info → compressed_tensors-0.9.5a20250602.dist-info}/RECORD +9 -9
- {compressed_tensors-0.9.5a20250530.dist-info → compressed_tensors-0.9.5a20250602.dist-info}/WHEEL +0 -0
- {compressed_tensors-0.9.5a20250530.dist-info → compressed_tensors-0.9.5a20250602.dist-info}/licenses/LICENSE +0 -0
- {compressed_tensors-0.9.5a20250530.dist-info → compressed_tensors-0.9.5a20250602.dist-info}/top_level.txt +0 -0
@@ -227,31 +227,42 @@ def _process_quantization(
|
|
227
227
|
perm = torch.argsort(g_idx)
|
228
228
|
x = safe_permute(x, perm, dim=1)
|
229
229
|
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
if do_quantize:
|
239
|
-
output[:, start:end] = _quantize(
|
240
|
-
x=x[:, start:end],
|
241
|
-
scale=sc,
|
242
|
-
zero_point=zp,
|
243
|
-
q_min=q_min,
|
244
|
-
q_max=q_max,
|
245
|
-
args=args,
|
246
|
-
dtype=dtype,
|
247
|
-
global_scale=global_scale,
|
248
|
-
)
|
230
|
+
x = torch.reshape(
|
231
|
+
x,
|
232
|
+
(
|
233
|
+
x.shape[0],
|
234
|
+
ceil(x.shape[1] / group_size),
|
235
|
+
group_size,
|
236
|
+
),
|
237
|
+
)
|
249
238
|
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
)
|
239
|
+
if do_quantize:
|
240
|
+
output = _quantize(
|
241
|
+
x=x,
|
242
|
+
scale=scale.unsqueeze(-1),
|
243
|
+
zero_point=zero_point.unsqueeze(-1) if zero_point is not None else None,
|
244
|
+
dtype=dtype,
|
245
|
+
global_scale=global_scale,
|
246
|
+
q_min=q_min,
|
247
|
+
q_max=q_max,
|
248
|
+
args=args,
|
249
|
+
)
|
250
|
+
|
251
|
+
if do_dequantize:
|
252
|
+
input = output if do_quantize else x
|
253
|
+
output = _dequantize(
|
254
|
+
x_q=input,
|
255
|
+
scale=scale.unsqueeze(-1),
|
256
|
+
zero_point=zero_point.unsqueeze(-1) if zero_point is not None else None,
|
257
|
+
global_scale=global_scale,
|
258
|
+
)
|
259
|
+
|
260
|
+
output = torch.reshape(
|
261
|
+
output,
|
262
|
+
(output.shape[0], output.shape[1] * output.shape[2]),
|
263
|
+
)
|
264
|
+
|
265
|
+
output = output.to(output_dtype)
|
255
266
|
|
256
267
|
if not is_column_order:
|
257
268
|
output = safe_permute(output, torch.argsort(perm), dim=1)
|
@@ -81,7 +81,7 @@ def calculate_qparams(
|
|
81
81
|
currently only applied/supported for Fp4
|
82
82
|
|
83
83
|
:return: tuple of the calculated scale(s) and zero point(s). For FP4, the calculated
|
84
|
-
scale
|
84
|
+
scale is of dtype FP8
|
85
85
|
"""
|
86
86
|
# based on the implementations for consuming quantized values,
|
87
87
|
# 0.0 must always be representable within the quantized range
|
@@ -490,7 +490,6 @@ def generate_global_scale(
|
|
490
490
|
attempts to use the entire FP8 dtype range while mapping a per-group max
|
491
491
|
to the FP4 max.
|
492
492
|
"""
|
493
|
-
scale_dtype = scale_data.dtype
|
494
493
|
tensor_amax = torch.abs(input_tensor.data).max().to(dtype)
|
495
494
|
global_scale = scale_data.max * quant_data.max / tensor_amax
|
496
495
|
return global_scale.to(dtype)
|
compressed_tensors/version.py
CHANGED
{compressed_tensors-0.9.5a20250530.dist-info → compressed_tensors-0.9.5a20250602.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: compressed-tensors
|
3
|
-
Version: 0.9.
|
3
|
+
Version: 0.9.5a20250602
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
{compressed_tensors-0.9.5a20250530.dist-info → compressed_tensors-0.9.5a20250602.dist-info}/RECORD
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
|
2
2
|
compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
|
3
|
-
compressed_tensors/version.py,sha256=
|
3
|
+
compressed_tensors/version.py,sha256=HRt5ki0v2pIyEYRWy0pzJwoSKwmkkF4pTx8ox29EyvU,521
|
4
4
|
compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
|
5
5
|
compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
|
6
6
|
compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
|
@@ -26,17 +26,17 @@ compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5y
|
|
26
26
|
compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
|
27
27
|
compressed_tensors/linear/compressed_linear.py,sha256=1yo9RyjA0aQ--iuIknFfcSorJn43Mn4CoV-q4JlTJ_o,4052
|
28
28
|
compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
|
29
|
-
compressed_tensors/quantization/quant_args.py,sha256=
|
29
|
+
compressed_tensors/quantization/quant_args.py,sha256=2OpiiSdl4KidzNmjx7J8UlQoAYmt5k5GdXv_73ELw0A,11823
|
30
30
|
compressed_tensors/quantization/quant_config.py,sha256=aFi6PKqmEX9iP9O8GVn3mEUjRDEwk_hOCbmmiq-j9oU,10198
|
31
31
|
compressed_tensors/quantization/quant_scheme.py,sha256=IDWa1GWUbUdWCo8j78Jz6svYF5hLz89J2PVYWBBnXRc,7102
|
32
32
|
compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
|
33
33
|
compressed_tensors/quantization/lifecycle/apply.py,sha256=-OKZ-FFFfIIoeGTrho8lXx6HVWZQp3Xkn3Q-G0hU-CM,18294
|
34
34
|
compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
|
35
|
-
compressed_tensors/quantization/lifecycle/forward.py,sha256=
|
35
|
+
compressed_tensors/quantization/lifecycle/forward.py,sha256=WFwvNebxXNUlpX5p1xG80oa8W9fz4-Xd6LCH_B_nptg,14881
|
36
36
|
compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
|
37
37
|
compressed_tensors/quantization/lifecycle/initialize.py,sha256=976sZ45ywGVzH1n4pyVhG7hnUBP1wKEWoo9cHrmKHxU,12522
|
38
38
|
compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
|
39
|
-
compressed_tensors/quantization/utils/helpers.py,sha256=
|
39
|
+
compressed_tensors/quantization/utils/helpers.py,sha256=UXs7mTItMdNyGh9NcsztO7qqxG1pmtNQqaAj_bsa7m8,17553
|
40
40
|
compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
|
41
41
|
compressed_tensors/registry/registry.py,sha256=0s15BxdGgzBv8RL4kUJCYcuDOFUh_KZYvNvLEeRqWTc,11956
|
42
42
|
compressed_tensors/transform/__init__.py,sha256=oa5VdrE-GtDYYceXNSwj5X_ropoXLLukm6Aufcc9WhY,747
|
@@ -50,8 +50,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
|
|
50
50
|
compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
|
51
51
|
compressed_tensors/utils/safetensors_load.py,sha256=DMfZBuUbA6qp_BG_zIWT3ckiEE33K9ob34s-OgzReO4,12057
|
52
52
|
compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
|
53
|
-
compressed_tensors-0.9.
|
54
|
-
compressed_tensors-0.9.
|
55
|
-
compressed_tensors-0.9.
|
56
|
-
compressed_tensors-0.9.
|
57
|
-
compressed_tensors-0.9.
|
53
|
+
compressed_tensors-0.9.5a20250602.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
54
|
+
compressed_tensors-0.9.5a20250602.dist-info/METADATA,sha256=U1CBsVaqy32z2gjdWS4xVTMmlIAxk9BweHUhk51l338,7004
|
55
|
+
compressed_tensors-0.9.5a20250602.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
56
|
+
compressed_tensors-0.9.5a20250602.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
|
57
|
+
compressed_tensors-0.9.5a20250602.dist-info/RECORD,,
|
{compressed_tensors-0.9.5a20250530.dist-info → compressed_tensors-0.9.5a20250602.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|