compressed-tensors 0.10.3a20250728__py3-none-any.whl → 0.10.3a20250731__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -61,6 +61,27 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
61
61
  "weight_global_scale",
62
62
  )
63
63
 
64
+ def compression_param_info(
65
+ self,
66
+ weight_shape: torch.Size,
67
+ quantization_args: Optional[QuantizationArgs] = None,
68
+ ) -> Dict[str, Tuple[torch.Size, torch.dtype]]:
69
+ """
70
+ Creates a dictionary of expected shapes and dtypes for each compression
71
+ parameter used by the compressor
72
+
73
+ :param weight_shape: uncompressed weight shape
74
+ :param quantization_args: quantization parameters for the weight
75
+ :return: dictionary mapping compressed parameter names to shape and dtype
76
+ """
77
+ output = {
78
+ "weight_packed": (
79
+ torch.Size((weight_shape[0], weight_shape[1] // 2)),
80
+ torch.uint8,
81
+ ),
82
+ }
83
+ return output
84
+
64
85
  def compress_weight(
65
86
  self,
66
87
  weight: Tensor,
@@ -257,13 +257,10 @@ def _process_quantization(
257
257
  QuantizationStrategy.GROUP,
258
258
  QuantizationStrategy.TENSOR_GROUP,
259
259
  ):
260
- n_dims = x.shape
261
- if len(n_dims) > 2:
262
- x = x.squeeze(0)
263
260
 
264
261
  output_dtype = dtype if dtype is not None else x.dtype
265
262
  output = torch.zeros_like(x).to(output_dtype)
266
- columns = output.shape[1]
263
+ columns = output.shape[-1]
267
264
 
268
265
  # TODO: make validation step for inputs
269
266
 
@@ -293,14 +290,12 @@ def _process_quantization(
293
290
  perm = torch.argsort(g_idx)
294
291
  x = safe_permute(x, perm, dim=1)
295
292
 
296
- x = torch.reshape(
297
- x,
298
- (
299
- x.shape[0],
300
- ceil(x.shape[1] / group_size),
301
- group_size,
302
- ),
293
+ # Maintain all dimensions apart from the last dim, which is divided by the group_size
294
+ reshaped_dims = (
295
+ ceil(x.shape[-1] / group_size),
296
+ group_size,
303
297
  )
298
+ x = x.unflatten(-1, reshaped_dims)
304
299
 
305
300
  if do_quantize:
306
301
  output = _quantize(
@@ -323,19 +318,12 @@ def _process_quantization(
323
318
  global_scale=global_scale,
324
319
  )
325
320
 
326
- output = torch.reshape(
327
- output,
328
- (output.shape[0], output.shape[1] * output.shape[2]),
329
- )
330
-
321
+ output = output.flatten(start_dim=-2)
331
322
  output = output.to(output_dtype)
332
323
 
333
324
  if not is_column_order:
334
325
  output = safe_permute(output, torch.argsort(perm), dim=1)
335
326
 
336
- if len(n_dims) > 2:
337
- output = output.unsqueeze(0)
338
-
339
327
  else: # covers channel, token and tensor strategies
340
328
  if do_quantize:
341
329
  output = _quantize(
@@ -175,20 +175,16 @@ def compute_dynamic_scales_and_zp(
175
175
  QuantizationStrategy.TENSOR_GROUP,
176
176
  QuantizationStrategy.GROUP,
177
177
  ):
178
- if len(value.shape) > 2:
179
- value = value.squeeze(0)
180
178
 
181
- dim = {0, 1}
182
- reduce_dims = tuple(idx for idx in range(3) if idx not in dim)
179
+ reduce_dims = -1
183
180
  keep_dims = False
184
- value = torch.reshape(
185
- value,
186
- (
187
- value.shape[0],
188
- math.ceil(value.shape[1] / args.group_size),
189
- args.group_size,
190
- ),
181
+
182
+ reshaped_dims = (
183
+ math.ceil(value.shape[-1] / args.group_size),
184
+ args.group_size,
191
185
  )
186
+ value = value.unflatten(-1, reshaped_dims)
187
+
192
188
  else:
193
189
  supported_strategies = (
194
190
  QuantizationStrategy.TOKEN,
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.10.3.a20250728'
20
+ __version__ = version = '0.10.3.a20250731'
21
21
  __version_tuple__ = version_tuple = (0, 10, 3)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.10.3a20250728
3
+ Version: 0.10.3a20250731
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -1,6 +1,6 @@
1
1
  compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
2
2
  compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
3
- compressed_tensors/version.py,sha256=EY3NpvLIsm31BPA-e32djbQIUYdm3sP8W28lHH72d0Y,523
3
+ compressed_tensors/version.py,sha256=cuOuj6FL5GE-iPKjLVFuRjlwW0_6uDC3tDxFkkHyXFg,523
4
4
  compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
5
5
  compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
6
6
  compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
@@ -9,7 +9,7 @@ compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=e-2n
9
9
  compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=KvaFBL_Q84LxRGJOV035M8OBoCkAx8kOkfphswgkKWk,745
10
10
  compressed_tensors/compressors/quantized_compressors/base.py,sha256=YGUMzbxekj_36ChgQnVZN6T8uDjXtGG1zfMIBGBLWco,10354
11
11
  compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=0ANDcuD8aXPqTYNPY6GnX9iS6eXJw6P0TzNV_rYS2l8,5369
12
- compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py,sha256=Gw-lVzk5jrKUlM5UTCiJBmhM5gHzB9mn8r298MVUbDI,6395
12
+ compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py,sha256=tKEaYom4SdMwZWg4MDMMMLNGTLgcVT20lPzewboVpMM,7145
13
13
  compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=47W1hFTi5YHVNKEWptzztsSutwI1kxy2Troh-NW1y14,11244
14
14
  compressed_tensors/compressors/sparse_compressors/__init__.py,sha256=Atuz-OdEgn8OCUhx7Ovd6gXdyImAI186uCR-uR0t_Nk,737
15
15
  compressed_tensors/compressors/sparse_compressors/base.py,sha256=YNZWcHjDleAlqbgRZQ6oJf44MQb_UDNvJGOqhl26uFA,8098
@@ -32,11 +32,11 @@ compressed_tensors/quantization/quant_scheme.py,sha256=xk2LPn18tjS1PEOyf0WKvavBq
32
32
  compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
33
33
  compressed_tensors/quantization/lifecycle/apply.py,sha256=wM8mVcbKvZjBo18pSXMp28i30YWwUXJPSS7_HCakH9U,17892
34
34
  compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
35
- compressed_tensors/quantization/lifecycle/forward.py,sha256=V98jWzb3rfV91EC6kfzAyXtmnbLjNF01Rd_EHU2bLo8,17506
35
+ compressed_tensors/quantization/lifecycle/forward.py,sha256=lQwibkDGroJqONhP9ATZWwaZF9suPmCZMQEagFlFc94,17329
36
36
  compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
37
37
  compressed_tensors/quantization/lifecycle/initialize.py,sha256=BM7bR_uNa-Ex4T-roHonWiRaxCi5sFysXyl0cFh1ZVs,10257
38
38
  compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
39
- compressed_tensors/quantization/utils/helpers.py,sha256=Je96Wai9SOizbdE5ph0nsJ86zS96lE4fkf_9q9o2tpA,17212
39
+ compressed_tensors/quantization/utils/helpers.py,sha256=7a89X0kg6xDGplw6trOrkRQzMRPu-txY_qvEt07Vcgc,17036
40
40
  compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
41
41
  compressed_tensors/registry/registry.py,sha256=0s15BxdGgzBv8RL4kUJCYcuDOFUh_KZYvNvLEeRqWTc,11956
42
42
  compressed_tensors/transform/__init__.py,sha256=v2wfl4CMfA6KbD7Hxx_MbRev63y_6QLDlccZq-WTtdw,907
@@ -62,8 +62,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
62
62
  compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
63
63
  compressed_tensors/utils/safetensors_load.py,sha256=DMfZBuUbA6qp_BG_zIWT3ckiEE33K9ob34s-OgzReO4,12057
64
64
  compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
65
- compressed_tensors-0.10.3a20250728.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
66
- compressed_tensors-0.10.3a20250728.dist-info/METADATA,sha256=rQbbrFahVspKPEfY86EpebdjgoYAtSyyH7JLOPTPcrg,7031
67
- compressed_tensors-0.10.3a20250728.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
68
- compressed_tensors-0.10.3a20250728.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
69
- compressed_tensors-0.10.3a20250728.dist-info/RECORD,,
65
+ compressed_tensors-0.10.3a20250731.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
66
+ compressed_tensors-0.10.3a20250731.dist-info/METADATA,sha256=1NCpfVbLTf6aGJ38rJz3Lmu9DptHpuYm5vTRxIB9PB8,7031
67
+ compressed_tensors-0.10.3a20250731.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
68
+ compressed_tensors-0.10.3a20250731.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
69
+ compressed_tensors-0.10.3a20250731.dist-info/RECORD,,