compressed-tensors 0.10.3a20250724__py3-none-any.whl → 0.10.3a20250731__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -61,6 +61,27 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
61
61
  "weight_global_scale",
62
62
  )
63
63
 
64
+ def compression_param_info(
65
+ self,
66
+ weight_shape: torch.Size,
67
+ quantization_args: Optional[QuantizationArgs] = None,
68
+ ) -> Dict[str, Tuple[torch.Size, torch.dtype]]:
69
+ """
70
+ Creates a dictionary of expected shapes and dtypes for each compression
71
+ parameter used by the compressor
72
+
73
+ :param weight_shape: uncompressed weight shape
74
+ :param quantization_args: quantization parameters for the weight
75
+ :return: dictionary mapping compressed parameter names to shape and dtype
76
+ """
77
+ output = {
78
+ "weight_packed": (
79
+ torch.Size((weight_shape[0], weight_shape[1] // 2)),
80
+ torch.uint8,
81
+ ),
82
+ }
83
+ return output
84
+
64
85
  def compress_weight(
65
86
  self,
66
87
  weight: Tensor,
@@ -112,17 +112,21 @@ def dequantize(
112
112
  if scale.shape[1] == 1:
113
113
  args = QuantizationArgs(strategy=QuantizationStrategy.CHANNEL)
114
114
  # Scale height matches input or is 1 -> group quantization across columns
115
- #
115
+ #
116
116
  # Example 1: scale.shape[0] == 1
117
117
  # x_q: (4, 8), scale: (1, 4) -> 2 columns per group
118
118
  #
119
- # Example 2: scale.shape[0] == x_q.shape[0]
119
+ # Example 2: scale.shape[0] == x_q.shape[0]
120
120
  # x_q: (4, 8), scale: (4, 4) -> 2 elements per group (per row)
121
121
  elif (scale.shape[0] == 1) or (scale.shape[0] == x_q.shape[0]):
122
122
  group_size = int(x_q.shape[1] / scale.shape[1])
123
- args = QuantizationArgs(strategy=QuantizationStrategy.GROUP, group_size=group_size)
123
+ args = QuantizationArgs(
124
+ strategy=QuantizationStrategy.GROUP, group_size=group_size
125
+ )
124
126
  else:
125
- args = QuantizationArgs(strategy=QuantizationStrategy.BLOCK, block_structure=scale.shape)
127
+ args = QuantizationArgs(
128
+ strategy=QuantizationStrategy.BLOCK, block_structure=scale.shape
129
+ )
126
130
  else:
127
131
  raise ValueError(
128
132
  f"Could not infer a quantization strategy from scale with {scale.ndim} "
@@ -253,13 +257,10 @@ def _process_quantization(
253
257
  QuantizationStrategy.GROUP,
254
258
  QuantizationStrategy.TENSOR_GROUP,
255
259
  ):
256
- n_dims = x.shape
257
- if len(n_dims) > 2:
258
- x = x.squeeze(0)
259
260
 
260
261
  output_dtype = dtype if dtype is not None else x.dtype
261
262
  output = torch.zeros_like(x).to(output_dtype)
262
- columns = output.shape[1]
263
+ columns = output.shape[-1]
263
264
 
264
265
  # TODO: make validation step for inputs
265
266
 
@@ -289,14 +290,12 @@ def _process_quantization(
289
290
  perm = torch.argsort(g_idx)
290
291
  x = safe_permute(x, perm, dim=1)
291
292
 
292
- x = torch.reshape(
293
- x,
294
- (
295
- x.shape[0],
296
- ceil(x.shape[1] / group_size),
297
- group_size,
298
- ),
293
+ # Maintain all dimensions apart from the last dim, which is divided by the group_size
294
+ reshaped_dims = (
295
+ ceil(x.shape[-1] / group_size),
296
+ group_size,
299
297
  )
298
+ x = x.unflatten(-1, reshaped_dims)
300
299
 
301
300
  if do_quantize:
302
301
  output = _quantize(
@@ -319,19 +318,12 @@ def _process_quantization(
319
318
  global_scale=global_scale,
320
319
  )
321
320
 
322
- output = torch.reshape(
323
- output,
324
- (output.shape[0], output.shape[1] * output.shape[2]),
325
- )
326
-
321
+ output = output.flatten(start_dim=-2)
327
322
  output = output.to(output_dtype)
328
323
 
329
324
  if not is_column_order:
330
325
  output = safe_permute(output, torch.argsort(perm), dim=1)
331
326
 
332
- if len(n_dims) > 2:
333
- output = output.unsqueeze(0)
334
-
335
327
  else: # covers channel, token and tensor strategies
336
328
  if do_quantize:
337
329
  output = _quantize(
@@ -185,27 +185,29 @@ def _initialize_scale_zero_point(
185
185
  elif quantization_args.strategy == QuantizationStrategy.BLOCK:
186
186
  # For block quantization, scale shape should match number of blocks - only for weights
187
187
  if quantization_args.block_structure is None:
188
- raise ValueError("Block quantization requires block_structure to be specified")
188
+ raise ValueError(
189
+ "Block quantization requires block_structure to be specified"
190
+ )
189
191
  block_height, block_width = quantization_args.block_structure
190
192
  rows, cols = weight_shape[-2], weight_shape[-1]
191
193
  num_rows_blocks = math.ceil(rows / block_height)
192
194
  num_cols_blocks = math.ceil(cols / block_width)
193
-
195
+
194
196
  # Warn if dimensions don't divide evenly
195
197
  if rows % block_height != 0 or cols % block_width != 0:
196
198
  warnings.warn(
197
199
  f"Block quantization: tensor shape {weight_shape} does not divide evenly "
198
200
  f"by block structure {quantization_args.block_structure}. "
199
201
  f"Some blocks will be incomplete which may affect quantization quality.",
200
- UserWarning
202
+ UserWarning,
201
203
  )
202
-
204
+
203
205
  expected_shape = (num_rows_blocks, num_cols_blocks)
204
206
  elif quantization_args.strategy == QuantizationStrategy.BLOCK:
205
207
  warnings.warn(
206
208
  f"BLOCK quantization not supported for {base_name} activations. "
207
209
  f"Falling back to tensor-level quantization.",
208
- UserWarning
210
+ UserWarning,
209
211
  )
210
212
  expected_shape = 1
211
213
 
@@ -64,8 +64,9 @@ class QuantizationScheme(BaseModel):
64
64
  raise ValueError("Cannot apply actorder to output activations")
65
65
 
66
66
  if (
67
- inputs and weights
68
- and weights.strategy == QuantizationStrategy.GROUP
67
+ inputs
68
+ and weights
69
+ and weights.strategy == QuantizationStrategy.GROUP
69
70
  and inputs.strategy == QuantizationStrategy.GROUP
70
71
  and weights.group_size != inputs.group_size
71
72
  ):
@@ -75,7 +76,7 @@ class QuantizationScheme(BaseModel):
75
76
  "may complicate fused kernel implementations. Consider using "
76
77
  "TENSOR_GROUP strategy for both or matching group sizes.",
77
78
  UserWarning,
78
- stacklevel=2
79
+ stacklevel=2,
79
80
  )
80
81
 
81
82
  return model
@@ -175,20 +175,16 @@ def compute_dynamic_scales_and_zp(
175
175
  QuantizationStrategy.TENSOR_GROUP,
176
176
  QuantizationStrategy.GROUP,
177
177
  ):
178
- if len(value.shape) > 2:
179
- value = value.squeeze(0)
180
178
 
181
- dim = {0, 1}
182
- reduce_dims = tuple(idx for idx in range(3) if idx not in dim)
179
+ reduce_dims = -1
183
180
  keep_dims = False
184
- value = torch.reshape(
185
- value,
186
- (
187
- value.shape[0],
188
- math.ceil(value.shape[1] / args.group_size),
189
- args.group_size,
190
- ),
181
+
182
+ reshaped_dims = (
183
+ math.ceil(value.shape[-1] / args.group_size),
184
+ args.group_size,
191
185
  )
186
+ value = value.unflatten(-1, reshaped_dims)
187
+
192
188
  else:
193
189
  supported_strategies = (
194
190
  QuantizationStrategy.TOKEN,
@@ -12,9 +12,9 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import math
15
16
  from typing import Optional, Union
16
17
 
17
- import math
18
18
  import torch
19
19
  from compressed_tensors.transform import TransformArgs, TransformScheme
20
20
  from compressed_tensors.transform.factory.base import TransformBase, TransformFactory
@@ -103,7 +103,8 @@ class HadamardTransform(TransformBase):
103
103
 
104
104
  if self.args.inverse:
105
105
  weight = weight.T
106
-
107
- return apply_transform_weight(
108
- weight, value, self.args.location, self.module_type
109
- ) / self._scale
106
+
107
+ return (
108
+ apply_transform_weight(weight, value, self.args.location, self.module_type)
109
+ / self._scale
110
+ )
@@ -18,6 +18,7 @@ from collections.abc import Generator
18
18
  from typing import Iterable, Tuple
19
19
 
20
20
  import torch
21
+ from compressed_tensors.utils.internal import InternalModule
21
22
 
22
23
 
23
24
  _LOGGER: logging.Logger = logging.getLogger(__name__)
@@ -28,8 +29,6 @@ __all__ = [
28
29
  "match_named_parameters",
29
30
  "match_modules_set",
30
31
  "is_match",
31
- "match_name",
32
- "match_class",
33
32
  ]
34
33
 
35
34
 
@@ -83,13 +82,16 @@ def match_named_parameters(
83
82
  """
84
83
  unmatched_targets = set(targets)
85
84
  for module_name, module in model.named_modules():
85
+ if isinstance(module, InternalModule):
86
+ continue
87
+
86
88
  for param_name, param in module.named_parameters(recurse=False):
87
89
  param_fqn = f"{module_name}.{param_name}"
88
90
  for target in targets:
89
- if match_name(param_fqn, target):
91
+ if _match_name(param_fqn, target):
90
92
  unmatched_targets -= {target}
91
93
 
92
- if not any(match_name(param_fqn, ign) for ign in ignore):
94
+ if not any(_match_name(param_fqn, ign) for ign in ignore):
93
95
  yield param_fqn, module, param
94
96
 
95
97
  if warn_on_fail:
@@ -165,11 +167,14 @@ def match_modules_set(
165
167
  def is_match(name: str, module: torch.nn.Module, target: str) -> bool:
166
168
  """
167
169
  Returns true if either module name or module parent classes match against target
170
+ and the module is not an internal module
168
171
  """
169
- return match_name(name, target) or match_class(module, target)
172
+ return not isinstance(module, InternalModule) and (
173
+ _match_name(name, target) or _match_class(module, target)
174
+ )
170
175
 
171
176
 
172
- def match_name(name: str, target: str) -> bool:
177
+ def _match_name(name: str, target: str) -> bool:
173
178
  """
174
179
  Returns true if target string begins with "re:" and
175
180
  regex matches or if target string exactly matches name
@@ -180,7 +185,7 @@ def match_name(name: str, target: str) -> bool:
180
185
  return target == name
181
186
 
182
187
 
183
- def match_class(module: torch.nn.Module, target: str) -> bool:
188
+ def _match_class(module: torch.nn.Module, target: str) -> bool:
184
189
  """
185
190
  Returns true if any torch parent class names match the target string exactly
186
191
  """
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.10.3.a20250724'
20
+ __version__ = version = '0.10.3.a20250731'
21
21
  __version_tuple__ = version_tuple = (0, 10, 3)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.10.3a20250724
3
+ Version: 0.10.3a20250731
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -1,6 +1,6 @@
1
1
  compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
2
2
  compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
3
- compressed_tensors/version.py,sha256=LKiXh8O_XB2unUsk0HmC-_PgpfbOswj5PZqtDHOPnRg,523
3
+ compressed_tensors/version.py,sha256=cuOuj6FL5GE-iPKjLVFuRjlwW0_6uDC3tDxFkkHyXFg,523
4
4
  compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
5
5
  compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
6
6
  compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
@@ -9,7 +9,7 @@ compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=e-2n
9
9
  compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=KvaFBL_Q84LxRGJOV035M8OBoCkAx8kOkfphswgkKWk,745
10
10
  compressed_tensors/compressors/quantized_compressors/base.py,sha256=YGUMzbxekj_36ChgQnVZN6T8uDjXtGG1zfMIBGBLWco,10354
11
11
  compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=0ANDcuD8aXPqTYNPY6GnX9iS6eXJw6P0TzNV_rYS2l8,5369
12
- compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py,sha256=Gw-lVzk5jrKUlM5UTCiJBmhM5gHzB9mn8r298MVUbDI,6395
12
+ compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py,sha256=tKEaYom4SdMwZWg4MDMMMLNGTLgcVT20lPzewboVpMM,7145
13
13
  compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=47W1hFTi5YHVNKEWptzztsSutwI1kxy2Troh-NW1y14,11244
14
14
  compressed_tensors/compressors/sparse_compressors/__init__.py,sha256=Atuz-OdEgn8OCUhx7Ovd6gXdyImAI186uCR-uR0t_Nk,737
15
15
  compressed_tensors/compressors/sparse_compressors/base.py,sha256=YNZWcHjDleAlqbgRZQ6oJf44MQb_UDNvJGOqhl26uFA,8098
@@ -28,15 +28,15 @@ compressed_tensors/linear/compressed_linear.py,sha256=1yo9RyjA0aQ--iuIknFfcSorJn
28
28
  compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
29
29
  compressed_tensors/quantization/quant_args.py,sha256=yKTj_4lAy_pnXeTCyUADpyz2qAzJXYJU2P03NF_TP68,12835
30
30
  compressed_tensors/quantization/quant_config.py,sha256=w6sEEZGVGIF0Ub2r_cqRfZwbkBT8WzfY3ug52olmjGY,10049
31
- compressed_tensors/quantization/quant_scheme.py,sha256=qApRLsPxELe5S2qFv8OVyAZ5TpRL7gT35i4U3c9PAwI,8461
31
+ compressed_tensors/quantization/quant_scheme.py,sha256=xk2LPn18tjS1PEOyf0WKvavBq3rzAVHFLB3H2mQQWnc,8473
32
32
  compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
33
33
  compressed_tensors/quantization/lifecycle/apply.py,sha256=wM8mVcbKvZjBo18pSXMp28i30YWwUXJPSS7_HCakH9U,17892
34
34
  compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
35
- compressed_tensors/quantization/lifecycle/forward.py,sha256=jT70Mbbu9pH10vu5ALVD7VWGoFdMEUpxmihGrf4frjM,17432
35
+ compressed_tensors/quantization/lifecycle/forward.py,sha256=lQwibkDGroJqONhP9ATZWwaZF9suPmCZMQEagFlFc94,17329
36
36
  compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
37
- compressed_tensors/quantization/lifecycle/initialize.py,sha256=3Vuj1a-Y7f_7QXagG7BAeAPnDGtbWGFJXBATg6eT-O0,10241
37
+ compressed_tensors/quantization/lifecycle/initialize.py,sha256=BM7bR_uNa-Ex4T-roHonWiRaxCi5sFysXyl0cFh1ZVs,10257
38
38
  compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
39
- compressed_tensors/quantization/utils/helpers.py,sha256=Je96Wai9SOizbdE5ph0nsJ86zS96lE4fkf_9q9o2tpA,17212
39
+ compressed_tensors/quantization/utils/helpers.py,sha256=7a89X0kg6xDGplw6trOrkRQzMRPu-txY_qvEt07Vcgc,17036
40
40
  compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
41
41
  compressed_tensors/registry/registry.py,sha256=0s15BxdGgzBv8RL4kUJCYcuDOFUh_KZYvNvLEeRqWTc,11956
42
42
  compressed_tensors/transform/__init__.py,sha256=v2wfl4CMfA6KbD7Hxx_MbRev63y_6QLDlccZq-WTtdw,907
@@ -46,7 +46,7 @@ compressed_tensors/transform/transform_config.py,sha256=A3RuLNDqBNEByQNeu40Kg7sI
46
46
  compressed_tensors/transform/transform_scheme.py,sha256=uGLC4avdbhrVqNC3-Eo0p7WzNRQK92Fpg0N9hWiuCRQ,1752
47
47
  compressed_tensors/transform/factory/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
48
48
  compressed_tensors/transform/factory/base.py,sha256=Zplf8QO-mFqGwDEhLdYL_afSu7v4nMa79oNhidRNPvY,5880
49
- compressed_tensors/transform/factory/hadamard.py,sha256=iJ2OyKitR2Duw0z5Jqj69GTih2C1WtHRXQCTtATaTtw,4180
49
+ compressed_tensors/transform/factory/hadamard.py,sha256=B0BVjbF3y707MO6L2XfEoZJTQU965vU9dUPLOiUSXII,4193
50
50
  compressed_tensors/transform/factory/matrix_multiply.py,sha256=LdoV2E12HTucmUWcw7UKOpRNnL8QhOOIUnNVlpOpGiI,3925
51
51
  compressed_tensors/transform/factory/random_hadamard.py,sha256=nUhTlFa4ikSpcl4Umme71pnjMPgwYoGlwjKlU27UHZ4,1634
52
52
  compressed_tensors/transform/utils/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
@@ -56,14 +56,14 @@ compressed_tensors/transform/utils/matrix.py,sha256=FIHCUlpWVIIhdr3c6EbQec41JeiP
56
56
  compressed_tensors/utils/__init__.py,sha256=KZctuotCmX4byXhwDvSeXgp-Ny_awpziAX-WUkZfodI,853
57
57
  compressed_tensors/utils/helpers.py,sha256=Q3iRAa2XSdmmn4vSpUplnvKOmWwn4Clao9ZkPBHXtpI,12604
58
58
  compressed_tensors/utils/internal.py,sha256=7SSWgDoNFRnlfadwkoFhLW-T2jOc7Po_WzWv5h32Sa8,982
59
- compressed_tensors/utils/match.py,sha256=DjqTH-J9-E7ULVXPLV-HBRhdi07JhK-H90PbFK-DRAY,7017
59
+ compressed_tensors/utils/match.py,sha256=ZVBPzrGYExq7-6RRUlU5XeCjl0ooLaNUoDO6Cgnn9cY,7220
60
60
  compressed_tensors/utils/offload.py,sha256=3XiBuWbUkBAt8v1t5i57qDcbB3VJQs_FDeayi-JzIWg,23896
61
61
  compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
62
62
  compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
63
63
  compressed_tensors/utils/safetensors_load.py,sha256=DMfZBuUbA6qp_BG_zIWT3ckiEE33K9ob34s-OgzReO4,12057
64
64
  compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
65
- compressed_tensors-0.10.3a20250724.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
66
- compressed_tensors-0.10.3a20250724.dist-info/METADATA,sha256=ZH66sWeKBfvuLUe-ArnII1LYXG3UAEYUt6D6YPQ_W-M,7031
67
- compressed_tensors-0.10.3a20250724.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
68
- compressed_tensors-0.10.3a20250724.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
69
- compressed_tensors-0.10.3a20250724.dist-info/RECORD,,
65
+ compressed_tensors-0.10.3a20250731.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
66
+ compressed_tensors-0.10.3a20250731.dist-info/METADATA,sha256=1NCpfVbLTf6aGJ38rJz3Lmu9DptHpuYm5vTRxIB9PB8,7031
67
+ compressed_tensors-0.10.3a20250731.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
68
+ compressed_tensors-0.10.3a20250731.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
69
+ compressed_tensors-0.10.3a20250731.dist-info/RECORD,,