compressed-tensors 0.9.5a20250528__py3-none-any.whl → 0.9.5a20250602__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -227,31 +227,42 @@ def _process_quantization(
227
227
  perm = torch.argsort(g_idx)
228
228
  x = safe_permute(x, perm, dim=1)
229
229
 
230
- # TODO: experiment with vectorizing for loop for performance
231
- end = 0
232
- for index, group_count in enumerate(group_sizes):
233
- sc = scale[:, index].view(-1, 1)
234
- zp = zero_point[:, index].view(-1, 1) if zero_point is not None else None
235
-
236
- start = end
237
- end = start + group_count
238
- if do_quantize:
239
- output[:, start:end] = _quantize(
240
- x=x[:, start:end],
241
- scale=sc,
242
- zero_point=zp,
243
- q_min=q_min,
244
- q_max=q_max,
245
- args=args,
246
- dtype=dtype,
247
- global_scale=global_scale,
248
- )
230
+ x = torch.reshape(
231
+ x,
232
+ (
233
+ x.shape[0],
234
+ ceil(x.shape[1] / group_size),
235
+ group_size,
236
+ ),
237
+ )
249
238
 
250
- if do_dequantize:
251
- input = output[:, start:end] if do_quantize else x[:, start:end]
252
- output[:, start:end] = _dequantize(
253
- x_q=input, scale=sc, zero_point=zp, global_scale=global_scale
254
- )
239
+ if do_quantize:
240
+ output = _quantize(
241
+ x=x,
242
+ scale=scale.unsqueeze(-1),
243
+ zero_point=zero_point.unsqueeze(-1) if zero_point is not None else None,
244
+ dtype=dtype,
245
+ global_scale=global_scale,
246
+ q_min=q_min,
247
+ q_max=q_max,
248
+ args=args,
249
+ )
250
+
251
+ if do_dequantize:
252
+ input = output if do_quantize else x
253
+ output = _dequantize(
254
+ x_q=input,
255
+ scale=scale.unsqueeze(-1),
256
+ zero_point=zero_point.unsqueeze(-1) if zero_point is not None else None,
257
+ global_scale=global_scale,
258
+ )
259
+
260
+ output = torch.reshape(
261
+ output,
262
+ (output.shape[0], output.shape[1] * output.shape[2]),
263
+ )
264
+
265
+ output = output.to(output_dtype)
255
266
 
256
267
  if not is_column_order:
257
268
  output = safe_permute(output, torch.argsort(perm), dim=1)
@@ -53,6 +53,7 @@ class FP4_E2M1_DATA(FloatArgs):
53
53
  min = -6.0
54
54
 
55
55
  @staticmethod
56
+ @torch.compile
56
57
  def cast_to_fp4(x):
57
58
  sign = torch.sign(x)
58
59
  x = torch.abs(x)
@@ -81,7 +81,7 @@ def calculate_qparams(
81
81
  currently only applied/supported for Fp4
82
82
 
83
83
  :return: tuple of the calculated scale(s) and zero point(s). For FP4, the calculated
84
- scale if of dtype FP8
84
+ scale is of dtype FP8
85
85
  """
86
86
  # based on the implementations for consuming quantized values,
87
87
  # 0.0 must always be representable within the quantized range
@@ -490,7 +490,6 @@ def generate_global_scale(
490
490
  attempts to use the entire FP8 dtype range while mapping a per-group max
491
491
  to the FP4 max.
492
492
  """
493
- scale_dtype = scale_data.dtype
494
493
  tensor_amax = torch.abs(input_tensor.data).max().to(dtype)
495
494
  global_scale = scale_data.max * quant_data.max / tensor_amax
496
495
  return global_scale.to(dtype)
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.9.5.a20250528'
20
+ __version__ = version = '0.9.5.a20250602'
21
21
  __version_tuple__ = version_tuple = (0, 9, 5)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.9.5a20250528
3
+ Version: 0.9.5a20250602
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -1,6 +1,6 @@
1
1
  compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
2
2
  compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
3
- compressed_tensors/version.py,sha256=iHdCbvf5_sP-ylnF-60aPKldM3BLsLc1pARRzA74l60,521
3
+ compressed_tensors/version.py,sha256=HRt5ki0v2pIyEYRWy0pzJwoSKwmkkF4pTx8ox29EyvU,521
4
4
  compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
5
5
  compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
6
6
  compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
@@ -26,17 +26,17 @@ compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5y
26
26
  compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
27
27
  compressed_tensors/linear/compressed_linear.py,sha256=1yo9RyjA0aQ--iuIknFfcSorJn43Mn4CoV-q4JlTJ_o,4052
28
28
  compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
29
- compressed_tensors/quantization/quant_args.py,sha256=huROC8fbY899EYa2MnEmujvcBeHYLpn-e8ZEViEFASo,11804
29
+ compressed_tensors/quantization/quant_args.py,sha256=2OpiiSdl4KidzNmjx7J8UlQoAYmt5k5GdXv_73ELw0A,11823
30
30
  compressed_tensors/quantization/quant_config.py,sha256=aFi6PKqmEX9iP9O8GVn3mEUjRDEwk_hOCbmmiq-j9oU,10198
31
31
  compressed_tensors/quantization/quant_scheme.py,sha256=IDWa1GWUbUdWCo8j78Jz6svYF5hLz89J2PVYWBBnXRc,7102
32
32
  compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
33
33
  compressed_tensors/quantization/lifecycle/apply.py,sha256=-OKZ-FFFfIIoeGTrho8lXx6HVWZQp3Xkn3Q-G0hU-CM,18294
34
34
  compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
35
- compressed_tensors/quantization/lifecycle/forward.py,sha256=65USJEtsp_n8X36L5y4g4ftMnhrQyRWbwKJ8RZMMiBo,14797
35
+ compressed_tensors/quantization/lifecycle/forward.py,sha256=WFwvNebxXNUlpX5p1xG80oa8W9fz4-Xd6LCH_B_nptg,14881
36
36
  compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
37
37
  compressed_tensors/quantization/lifecycle/initialize.py,sha256=976sZ45ywGVzH1n4pyVhG7hnUBP1wKEWoo9cHrmKHxU,12522
38
38
  compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
39
- compressed_tensors/quantization/utils/helpers.py,sha256=I-bJcMdBFXjIUQEpnxMMN_FfQyXjojpe5w7ZIKSZ5UU,17588
39
+ compressed_tensors/quantization/utils/helpers.py,sha256=UXs7mTItMdNyGh9NcsztO7qqxG1pmtNQqaAj_bsa7m8,17553
40
40
  compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
41
41
  compressed_tensors/registry/registry.py,sha256=0s15BxdGgzBv8RL4kUJCYcuDOFUh_KZYvNvLEeRqWTc,11956
42
42
  compressed_tensors/transform/__init__.py,sha256=oa5VdrE-GtDYYceXNSwj5X_ropoXLLukm6Aufcc9WhY,747
@@ -50,8 +50,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
50
50
  compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
51
51
  compressed_tensors/utils/safetensors_load.py,sha256=DMfZBuUbA6qp_BG_zIWT3ckiEE33K9ob34s-OgzReO4,12057
52
52
  compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
53
- compressed_tensors-0.9.5a20250528.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
54
- compressed_tensors-0.9.5a20250528.dist-info/METADATA,sha256=diiEZExV1kI7i_tWkgGp3B1UPGtoNtTABx2BJZcHk8I,7004
55
- compressed_tensors-0.9.5a20250528.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
56
- compressed_tensors-0.9.5a20250528.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
57
- compressed_tensors-0.9.5a20250528.dist-info/RECORD,,
53
+ compressed_tensors-0.9.5a20250602.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
54
+ compressed_tensors-0.9.5a20250602.dist-info/METADATA,sha256=U1CBsVaqy32z2gjdWS4xVTMmlIAxk9BweHUhk51l338,7004
55
+ compressed_tensors-0.9.5a20250602.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
56
+ compressed_tensors-0.9.5a20250602.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
57
+ compressed_tensors-0.9.5a20250602.dist-info/RECORD,,