ai-edge-quantizer-nightly 0.4.0.dev20251027__py3-none-any.whl → 0.4.0.dev20251029__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py +22 -25
  2. ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py +1 -1
  3. ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py +1 -2
  4. ai_edge_quantizer/algorithms/uniform_quantize/mse.py +5 -3
  5. ai_edge_quantizer/algorithms/uniform_quantize/mse_test.py +1 -1
  6. ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py +6 -11
  7. ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py +18 -14
  8. ai_edge_quantizer/algorithms/uniform_quantize/octav.py +9 -5
  9. ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py +1 -2
  10. ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py +39 -12
  11. ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py +5 -2
  12. ai_edge_quantizer/algorithms/utils/common_utils.py +4 -7
  13. ai_edge_quantizer/default_policy.py +5 -13
  14. ai_edge_quantizer/qtyping.py +24 -3
  15. ai_edge_quantizer/quantizer_test.py +38 -2
  16. ai_edge_quantizer/recipe_manager_test.py +0 -6
  17. {ai_edge_quantizer_nightly-0.4.0.dev20251027.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info}/METADATA +1 -1
  18. {ai_edge_quantizer_nightly-0.4.0.dev20251027.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info}/RECORD +21 -21
  19. {ai_edge_quantizer_nightly-0.4.0.dev20251027.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info}/LICENSE +0 -0
  20. {ai_edge_quantizer_nightly-0.4.0.dev20251027.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info}/WHEEL +0 -0
  21. {ai_edge_quantizer_nightly-0.4.0.dev20251027.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info}/top_level.txt +0 -0
@@ -1165,39 +1165,36 @@ def init_tensor_min_max(
1165
1165
  A dictionary containing the min/max values for the tensor, or an empty
1166
1166
  dictionary if the tensor data is None.
1167
1167
  """
1168
- if tensor_data is None:
1168
+ weight_tensor_config = op_info.op_quant_config.weight_tensor_config
1169
+ if tensor_data is None or weight_tensor_config is None:
1169
1170
  return {}
1170
1171
  else:
1171
- weight_tensor_config = op_info.op_quant_config.weight_tensor_config
1172
- quantized_dim = None
1173
- if weight_tensor_config is not None and (
1174
- weight_tensor_config.granularity == qtyping.QuantGranularity.CHANNELWISE
1175
- ):
1172
+ # Get reduce dimension for min/max calculation based on quantization
1173
+ # granularity.
1174
+ granularity = weight_tensor_config.granularity
1175
+ if granularity == qtyping.QuantGranularity.TENSORWISE:
1176
+ reduce_dims = None
1177
+ keep_dims = True
1178
+ elif granularity == qtyping.QuantGranularity.CHANNELWISE:
1176
1179
  quantized_dim = common_utils.get_weight_quantized_dim(
1177
1180
  op_info, tensor_data, weight_tensor_config.granularity
1178
1181
  )
1179
- if (
1180
- weight_tensor_config is not None
1181
- and weight_tensor_config.granularity
1182
- == qtyping.QuantGranularity.BLOCKWISE
1183
- ):
1184
- reshaped_data, reduce_dims = (
1182
+ reduce_dims = common_utils.get_reduce_dims(
1183
+ quantized_dim, tensor_data.shape
1184
+ )
1185
+ keep_dims = True
1186
+ elif uniform_quantize_tensor.is_blockwise(granularity):
1187
+ tensor_data, reduce_dims = (
1185
1188
  uniform_quantize_tensor.reshape_data_for_blockwise(
1186
1189
  tensor_data,
1187
1190
  op_info.op_name,
1188
- weight_tensor_config.block_size,
1191
+ granularity,
1189
1192
  )
1190
1193
  )
1191
- return {
1192
- "min": np.min(reshaped_data, axis=reduce_dims, keepdims=False),
1193
- "max": np.max(reshaped_data, axis=reduce_dims, keepdims=False),
1194
- }
1195
-
1194
+ keep_dims = False
1196
1195
  else:
1197
- reduce_dims = common_utils.get_reduce_dims(
1198
- quantized_dim, tensor_data.shape
1199
- )
1200
- return {
1201
- "min": np.min(tensor_data, axis=reduce_dims, keepdims=True),
1202
- "max": np.max(tensor_data, axis=reduce_dims, keepdims=True),
1203
- }
1196
+ raise ValueError(f"Unsupported granularity: {granularity}")
1197
+ return {
1198
+ "min": np.min(tensor_data, axis=reduce_dims, keepdims=keep_dims),
1199
+ "max": np.max(tensor_data, axis=reduce_dims, keepdims=keep_dims),
1200
+ }
@@ -158,7 +158,7 @@ def get_tensor_quant_params(
158
158
  op_info, tensor_quant_config, tensor_content, tensor_qsv
159
159
  )
160
160
 
161
- if tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE:
161
+ if uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity):
162
162
  raise ValueError(
163
163
  "Blockwise quantization is not supported for dequantized weight"
164
164
  " recovery."
@@ -147,8 +147,7 @@ class HadamardRotationFullyConnectedTest(parameterized.TestCase):
147
147
  weight_tensor_config=_TensorQuantConfig(
148
148
  num_bits=8,
149
149
  symmetric=True,
150
- granularity=qtyping.QuantGranularity.BLOCKWISE,
151
- block_size=32,
150
+ granularity=qtyping.QuantGranularity.BLOCKWISE_32,
152
151
  ),
153
152
  ),
154
153
  )
@@ -55,7 +55,7 @@ def get_tensor_quant_params(
55
55
  ValueError: `tensor_qsv` must contain min/max values, or `tensor_content`
56
56
  must be provided so that they can be inferred.
57
57
  """
58
- if tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE:
58
+ if uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity):
59
59
  raise ValueError(
60
60
  "Blockwise quantization is not supported for MSE quantization."
61
61
  )
@@ -113,13 +113,15 @@ def get_tensor_quant_params(
113
113
  num_bits=tensor_quant_config.num_bits,
114
114
  symmetric=tensor_quant_config.symmetric,
115
115
  quantized_dimension=quantized_dim,
116
- block_size=tensor_quant_config.block_size,
116
+ block_size=uniform_quantize_tensor.extract_block_size_from_granularity(
117
+ tensor_quant_config.granularity
118
+ ),
117
119
  )
118
120
 
119
121
  quantized_vars = uniform_quantize_tensor.uniform_quantize(
120
122
  tensor_content,
121
123
  quant_params,
122
- tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE,
124
+ uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity),
123
125
  )
124
126
 
125
127
  return dataclasses.replace(quant_params, quantized_data=quantized_vars)
@@ -84,7 +84,7 @@ class MseQuantizeTest(parameterized.TestCase):
84
84
  tensor_quant_config=qtyping.TensorQuantizationConfig(
85
85
  num_bits=4,
86
86
  symmetric=True,
87
- granularity=qtyping.QuantGranularity.BLOCKWISE,
87
+ granularity=qtyping.QuantGranularity.BLOCKWISE_32,
88
88
  ),
89
89
  tensor_content=test_data,
90
90
  )
@@ -15,6 +15,7 @@
15
15
 
16
16
  """Performs naive min/max uniform quantization."""
17
17
 
18
+ import dataclasses
18
19
  from typing import Any, Optional
19
20
  import numpy as np
20
21
  from ai_edge_quantizer import qtyping
@@ -91,7 +92,9 @@ def get_tensor_quant_params(
91
92
  num_bits=tensor_quant_config.num_bits,
92
93
  symmetric=tensor_quant_config.symmetric,
93
94
  quantized_dimension=quantized_dim,
94
- block_size=tensor_quant_config.block_size,
95
+ block_size=uniform_quantize_tensor.extract_block_size_from_granularity(
96
+ tensor_quant_config.granularity
97
+ ),
95
98
  )
96
99
  if tensor_content is None:
97
100
  return quant_params
@@ -99,18 +102,10 @@ def get_tensor_quant_params(
99
102
  quantized_vars = uniform_quantize_tensor.uniform_quantize(
100
103
  tensor_content,
101
104
  quant_params,
102
- tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE,
105
+ uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity),
103
106
  )
104
107
  # Update with quantized values.
105
- return qtyping.UniformQuantParams(
106
- scale=scale,
107
- zero_point=zp,
108
- num_bits=tensor_quant_config.num_bits,
109
- symmetric=tensor_quant_config.symmetric,
110
- quantized_dimension=quantized_dim,
111
- quantized_data=quantized_vars,
112
- block_size=tensor_quant_config.block_size,
113
- )
108
+ return dataclasses.replace(quant_params, quantized_data=quantized_vars)
114
109
 
115
110
 
116
111
  # TODO: b/333731147 - Use named tuple to store min/max.
@@ -17,6 +17,7 @@ import os
17
17
  from typing import cast
18
18
 
19
19
  from absl.testing import parameterized
20
+ import ml_dtypes
20
21
  import numpy as np
21
22
 
22
23
  from tensorflow.python.platform import googletest
@@ -165,8 +166,7 @@ class NaiveMinMaxQuantizeTest(parameterized.TestCase):
165
166
  weight_tensor_config = _TensorQuantConfig(
166
167
  num_bits=4,
167
168
  symmetric=True,
168
- granularity=qtyping.QuantGranularity.BLOCKWISE,
169
- block_size=2,
169
+ granularity=qtyping.QuantGranularity.BLOCKWISE_32,
170
170
  )
171
171
  op_info = qtyping.OpInfo(
172
172
  op=fc_op,
@@ -176,28 +176,32 @@ class NaiveMinMaxQuantizeTest(parameterized.TestCase):
176
176
  weight_tensor_config=weight_tensor_config,
177
177
  ),
178
178
  )
179
- test_data = np.array([[-7, 7], [4, -4], [4, -4], [7, 7]])
179
+ test_data = np.random.uniform(low=-10, high=10, size=(4, 32)).astype(
180
+ np.float32
181
+ )
180
182
  quant_params = naive_min_max_quantize.get_tensor_quant_params(
181
183
  op_info=op_info,
182
184
  tensor_quant_config=weight_tensor_config,
183
185
  tensor_content=test_data,
184
186
  )
185
- scale = quant_params.scale
186
187
  zp = quant_params.zero_point
187
- expected_scale = np.array([
188
- [1],
189
- [0.5703125],
190
- [0.5703125],
191
- [1],
192
- ])
193
- expected_zp = np.zeros([4, 1])
194
- self.assertTrue(np.array_equal(zp, expected_zp))
195
- self.assertTrue(np.array_equal(scale, expected_scale))
188
+ self.assertEqual(zp.shape, (4, 1))
189
+ self.assertTrue(np.array_equal(zp, np.zeros([4, 1])))
190
+
191
+ self.assertEqual(quant_params.scale.shape, (4, 1))
192
+ expected_scales = np.max(np.abs(test_data), axis=1, keepdims=True) / 7.0
193
+ expected_scales = (
194
+ expected_scales.astype(ml_dtypes.bfloat16)
195
+ .astype(np.float16)
196
+ .astype(np.float32)
197
+ )
198
+ self.assertTrue(np.allclose(quant_params.scale, expected_scales, atol=1e-5))
199
+
196
200
  self.assertIsNotNone(quant_params.quantized_data)
197
201
  self.assertTupleEqual(
198
202
  cast(np.ndarray, quant_params.quantized_data).shape, test_data.shape
199
203
  )
200
- self.assertEqual(quant_params.block_size, 2)
204
+ self.assertEqual(quant_params.block_size, 32)
201
205
  self.assertEqual(quant_params.quantized_dimension, 1)
202
206
 
203
207
  def test_calibrate_ignores_inf_min_max(self):
@@ -131,12 +131,12 @@ def get_tensor_quant_params(
131
131
  quantized_dim = common_utils.get_weight_quantized_dim(
132
132
  op_info, tensor_content, tensor_quant_config.granularity
133
133
  )
134
- if tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE:
134
+ if uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity):
135
135
  reshaped_data, reduce_dims = (
136
136
  uniform_quantize_tensor.reshape_data_for_blockwise(
137
137
  tensor_content,
138
138
  op_info.op_name,
139
- tensor_quant_config.block_size,
139
+ tensor_quant_config.granularity,
140
140
  )
141
141
  )
142
142
  else:
@@ -154,7 +154,7 @@ def get_tensor_quant_params(
154
154
  # We created a new dimension in order to reduce properly for blockwise
155
155
  # quantization, so we need to reshape the clipping constants back to the
156
156
  # min/max shape for the next step.
157
- if tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE:
157
+ if uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity):
158
158
  clipping_constants = clipping_constants.reshape(tensor_min_max["min"].shape)
159
159
 
160
160
  zp, scale = uniform_quantize_tensor.tensor_zp_scale_from_min_max(
@@ -172,13 +172,17 @@ def get_tensor_quant_params(
172
172
  num_bits=tensor_quant_config.num_bits,
173
173
  symmetric=tensor_quant_config.symmetric,
174
174
  quantized_dimension=quantized_dim,
175
- block_size=tensor_quant_config.block_size,
175
+ block_size=uniform_quantize_tensor.extract_block_size_from_granularity(
176
+ tensor_quant_config.granularity
177
+ ),
176
178
  )
177
179
 
178
180
  quantized_vars = uniform_quantize_tensor.uniform_quantize(
179
181
  tensor_content,
180
182
  quant_params,
181
- tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE,
183
+ is_blockwise_quant=uniform_quantize_tensor.is_blockwise(
184
+ tensor_quant_config.granularity
185
+ ),
182
186
  )
183
187
 
184
188
  return dataclasses.replace(quant_params, quantized_data=quantized_vars)
@@ -196,8 +196,7 @@ class OctavQuantizeTest(parameterized.TestCase):
196
196
  tensor_config = qtyping.TensorQuantizationConfig(
197
197
  num_bits=4,
198
198
  symmetric=True,
199
- granularity=qtyping.QuantGranularity.BLOCKWISE,
200
- block_size=32,
199
+ granularity=qtyping.QuantGranularity.BLOCKWISE_32,
201
200
  )
202
201
  fc_op_info = qtyping.OpInfo(
203
202
  op=self._fc_op,
@@ -29,6 +29,11 @@ class IntType:
29
29
  signed: bool
30
30
 
31
31
 
32
+ def is_blockwise(granularity: qtyping.QuantGranularity) -> bool:
33
+ """Checks if the quantization granularity is blockwise."""
34
+ return "BLOCKWISE" in str(granularity)
35
+
36
+
32
37
  def get_quantized_range(qtype: IntType) -> tuple[float, float]:
33
38
  """Calculates range of the quantized type."""
34
39
  if qtype.signed:
@@ -40,6 +45,22 @@ def get_quantized_range(qtype: IntType) -> tuple[float, float]:
40
45
  return float(qmin), float(qmax)
41
46
 
42
47
 
48
+ def extract_block_size_from_granularity(
49
+ granularity: qtyping.QuantGranularity,
50
+ ) -> int:
51
+ """Get the block size for blockwise quantization."""
52
+ if granularity == qtyping.QuantGranularity.BLOCKWISE_32:
53
+ return 32
54
+ elif granularity == qtyping.QuantGranularity.BLOCKWISE_64:
55
+ return 64
56
+ elif granularity == qtyping.QuantGranularity.BLOCKWISE_128:
57
+ return 128
58
+ elif granularity == qtyping.QuantGranularity.BLOCKWISE_256:
59
+ return 256
60
+ else:
61
+ return 0
62
+
63
+
43
64
  def _round_and_clip(
44
65
  tensor: np.ndarray, qtype: IntType, narrow: bool
45
66
  ) -> np.ndarray:
@@ -157,14 +178,16 @@ def _get_tensor_shape_for_blockwise(
157
178
 
158
179
 
159
180
  def reshape_data_for_blockwise(
160
- tensor_data: np.ndarray, op_name: qtyping.TFLOperationName, block_size: int
181
+ tensor_data: np.ndarray,
182
+ op_name: qtyping.TFLOperationName,
183
+ granularity: qtyping.QuantGranularity,
161
184
  ) -> tuple[np.ndarray, int]:
162
185
  """Reshapes data for blockwise quantization.
163
186
 
164
187
  Args:
165
188
  tensor_data: The original tensor data.
166
189
  op_name: The name of the TFL op.
167
- block_size: The size of the block.
190
+ granularity: The quantization granularity for the tensor.
168
191
 
169
192
  Returns:
170
193
  A tuple containing the reshaped tensor data and the new reduce dimension.
@@ -172,11 +195,11 @@ def reshape_data_for_blockwise(
172
195
  quantized_dim = tfl_flatbuffer_utils.TFL_OP_TO_BLOCKWISE_WEIGHT_QUANTIZED_DIM[
173
196
  op_name
174
197
  ]
198
+ block_size = extract_block_size_from_granularity(granularity)
175
199
  new_shape = _get_tensor_shape_for_blockwise(
176
200
  tensor_data.shape, quantized_dim, block_size
177
201
  )
178
- reshaped_data = tensor_data.reshape(new_shape)
179
- return reshaped_data, quantized_dim + 1
202
+ return tensor_data.reshape(new_shape), quantized_dim + 1
180
203
 
181
204
 
182
205
  def _broadcast_scale_zp_for_blockwise(
@@ -233,21 +256,21 @@ def _broadcast_scale_zp_for_blockwise(
233
256
  def uniform_quantize(
234
257
  tensor_data: np.ndarray,
235
258
  quantization_params: qtyping.UniformQuantParams,
236
- is_blockwise: bool = False,
259
+ is_blockwise_quant: bool = False,
237
260
  ):
238
261
  """Uniform quantize a tensor.
239
262
 
240
263
  Args:
241
264
  tensor_data: The tensor to be quantized.
242
265
  quantization_params: The quantization parameters.
243
- is_blockwise: Whether the tensor is blockwise quantized.
266
+ is_blockwise_quant: Whether the tensor is blockwise quantized.
244
267
 
245
268
  Returns:
246
269
  The quantized tensor.
247
270
  """
248
271
  # The reshaping for blockwise quantization is unique hence we do this here
249
272
  # to avoid unexpected broadcast behavior downstream.
250
- if is_blockwise:
273
+ if is_blockwise_quant:
251
274
  quantization_params = _broadcast_scale_zp_for_blockwise(
252
275
  tensor_data, quantization_params
253
276
  )
@@ -381,10 +404,13 @@ def symmetric_quantize_bias_tensor(
381
404
  quantized_vars = uniform_quantize(bias_content, bias_quant_params)
382
405
  if check_error:
383
406
  dequantized_bias = uniform_dequantize(quantized_vars, bias_quant_params)
384
- quantization_error = np.abs(dequantized_bias - bias_content)
385
- if np.any(quantization_error > effective_output_scale):
407
+ max_quant_error = np.max(np.abs(dequantized_bias - bias_content))
408
+ error_tolerance = np.maximum(1e-6, np.max(effective_output_scale))
409
+ if max_quant_error > error_tolerance:
386
410
  raise ValueError(
387
- "Quantization error is too large for bias tensor quantization."
411
+ "Quantization error is too large for bias tensor quantization. Max"
412
+ f" quantization error is {max_quant_error}, which exceed"
413
+ f" the threshold {error_tolerance}"
388
414
  )
389
415
 
390
416
  # Save the int32 quantized bias as int64 if the input tensor is quantized to
@@ -432,6 +458,7 @@ def tensor_zp_scale_from_min_max(
432
458
  Returns:
433
459
  The zero point and scale of the tensor.
434
460
  """
461
+
435
462
  # TODO: b/332574603 - support unsigned data type.
436
463
  qtype = IntType(
437
464
  num_bits,
@@ -442,7 +469,7 @@ def tensor_zp_scale_from_min_max(
442
469
  pos_clipping_values = None if clipping_values is None else clipping_values
443
470
  neg_clipping_values = None if clipping_values is None else -clipping_values
444
471
 
445
- if granularity == qtyping.QuantGranularity.BLOCKWISE:
472
+ if is_blockwise(granularity):
446
473
  # Blockwise quantization uses float16 scale,
447
474
  # with 7 bit mantissa, so the maximum scale value is 65280 and maximum
448
475
  # representable range is [-65280 * (2 ** num_bits),
@@ -490,7 +517,7 @@ def tensor_zp_scale_from_min_max(
490
517
  zp = qmin - bound_min / scale
491
518
  zp = np.rint(zp)
492
519
 
493
- if granularity == qtyping.QuantGranularity.BLOCKWISE:
520
+ if is_blockwise(granularity):
494
521
  # Round the scale values to 7 bit mantissa.
495
522
  scale = (
496
523
  scale.astype(ml_dtypes.bfloat16).astype(np.float16).astype(np.float32)
@@ -222,7 +222,7 @@ class TensorUtilsTest(parameterized.TestCase):
222
222
  zero_point=np.array([-6]),
223
223
  symmetric=True,
224
224
  ),
225
- is_blockwise=True,
225
+ is_blockwise_quant=True,
226
226
  )
227
227
 
228
228
  @parameterized.parameters(
@@ -431,7 +431,10 @@ class TensorUtilsTest(parameterized.TestCase):
431
431
  )
432
432
  # This will result in quantized bias of 3e9, which is larger than int32 max.
433
433
  bias_tensor_data = np.array([3e7])
434
- with self.assertRaises(ValueError):
434
+ with self.assertRaisesRegex(
435
+ ValueError,
436
+ "Quantization error is too large for bias tensor quantization.",
437
+ ):
435
438
  uniform_quantize_tensor.symmetric_quantize_bias_tensor(
436
439
  bias_tensor_data,
437
440
  input_quant_config,
@@ -51,8 +51,9 @@ def check_subchannel_config(
51
51
  """Checks the op quantization config for subchannel quantization."""
52
52
  if (
53
53
  op_quant_config.weight_tensor_config is not None
54
- and op_quant_config.weight_tensor_config.granularity
55
- == qtyping.QuantGranularity.BLOCKWISE
54
+ and uniform_quantize_tensor.is_blockwise(
55
+ op_quant_config.weight_tensor_config.granularity
56
+ )
56
57
  ):
57
58
  if op_name not in _SUPPORTED_SUBCHANNEL_OPS:
58
59
  raise ValueError(f"Unsupported op for blockwise quantization: {op_name}.")
@@ -66,10 +67,6 @@ def check_subchannel_config(
66
67
  "Blockwise quantization does not support for asymmetric weight"
67
68
  " quantization."
68
69
  )
69
- if op_quant_config.weight_tensor_config.block_size <= 0:
70
- raise ValueError(
71
- "Blockwise quantization must have a non-zero block size."
72
- )
73
70
 
74
71
 
75
72
  def check_if_valid_op_config(
@@ -993,7 +990,7 @@ def get_weight_quantized_dim(
993
990
  quantized_dim = tfl_flatbuffer_utils.TFL_OP_TO_WEIGHT_QUANTIZED_DIM.get(
994
991
  op_info.op_name, None
995
992
  )
996
- elif granularity == qtyping.QuantGranularity.BLOCKWISE:
993
+ elif uniform_quantize_tensor.is_blockwise(granularity):
997
994
  quantized_dim = (
998
995
  tfl_flatbuffer_utils.TFL_OP_TO_BLOCKWISE_WEIGHT_QUANTIZED_DIM[
999
996
  op_info.op_name
@@ -61,9 +61,8 @@ DEFAULT_JSON_POLICY = """
61
61
  "weight_tensor_config": {
62
62
  "num_bits": 4,
63
63
  "symmetric": [true],
64
- "granularity": ["BLOCKWISE"],
65
- "dtype": "INT",
66
- "block_size": [32, 64, 96, 128, 256]
64
+ "granularity": ["BLOCKWISE_32", "BLOCKWISE_64", "BLOCKWISE_128", "BLOCKWISE_256"],
65
+ "dtype": "INT"
67
66
  },
68
67
  "explicit_dequantize": false,
69
68
  "compute_precision": "INTEGER"
@@ -320,16 +319,9 @@ def _unroll_json_config(
320
319
  "granularity": granularity,
321
320
  "dtype": json_config["weight_tensor_config"]["dtype"],
322
321
  }
323
- if "block_size" in json_config["weight_tensor_config"]:
324
- for block_size in json_config["weight_tensor_config"]["block_size"]:
325
- tensor_config["block_size"] = block_size
326
- weight_configs.append(
327
- qtyping.TensorQuantizationConfig.from_dict(tensor_config)
328
- )
329
- else:
330
- weight_configs.append(
331
- qtyping.TensorQuantizationConfig.from_dict(tensor_config)
332
- )
322
+ weight_configs.append(
323
+ qtyping.TensorQuantizationConfig.from_dict(tensor_config)
324
+ )
333
325
 
334
326
  if activation_configs:
335
327
  for activation_config in activation_configs:
@@ -112,7 +112,11 @@ class TensorDataType(str, enum.Enum):
112
112
  class QuantGranularity(str, enum.Enum):
113
113
  TENSORWISE = 'TENSORWISE'
114
114
  CHANNELWISE = 'CHANNELWISE'
115
- BLOCKWISE = 'BLOCKWISE'
115
+ # Blockwise quantization with various block sizes.
116
+ BLOCKWISE_32 = 'BLOCKWISE_32'
117
+ BLOCKWISE_64 = 'BLOCKWISE_64'
118
+ BLOCKWISE_128 = 'BLOCKWISE_128'
119
+ BLOCKWISE_256 = 'BLOCKWISE_256'
116
120
 
117
121
 
118
122
  class QuantTransformation(enum.Enum):
@@ -310,7 +314,6 @@ class TensorQuantizationConfig:
310
314
  granularity: Whether to perform per-tensor, per-channel or per-block
311
315
  quantization.
312
316
  dtype: The data type of the tensor.
313
- block_size: The block size for blockwise quantization, ignored otherwise.
314
317
  algorithm_key: The algorithm key to use for quantization.
315
318
  """
316
319
 
@@ -318,7 +321,6 @@ class TensorQuantizationConfig:
318
321
  symmetric: bool = True
319
322
  granularity: QuantGranularity = QuantGranularity.TENSORWISE
320
323
  dtype: TensorDataType = TensorDataType.INT
321
- block_size: int = 0
322
324
 
323
325
  def to_dict(self) -> dict[str, Any]:
324
326
  """Converts ActivationQuantizationConfig to dict."""
@@ -336,9 +338,28 @@ class TensorQuantizationConfig:
336
338
  def from_dict(cls, params: dict[str, Any]) -> 'TensorQuantizationConfig':
337
339
  """Converts a given dict to TensorQuantizationConfig."""
338
340
  params_copy = copy.deepcopy(params)
341
+ # Process block_size config from legacy recipe.
342
+ params_copy = _process_block_size(params_copy)
339
343
  return cls(**params_copy)
340
344
 
341
345
 
346
+ def _process_block_size(params: dict[str, Any]) -> dict[str, Any]:
347
+ """Processes block size in the params."""
348
+ block_size = params.pop('block_size', 0)
349
+ if block_size > 0:
350
+ if block_size == 32:
351
+ params['granularity'] = QuantGranularity.BLOCKWISE_32
352
+ elif block_size == 64:
353
+ params['granularity'] = QuantGranularity.BLOCKWISE_64
354
+ elif block_size == 128:
355
+ params['granularity'] = QuantGranularity.BLOCKWISE_128
356
+ elif block_size == 256:
357
+ params['granularity'] = QuantGranularity.BLOCKWISE_256
358
+ else:
359
+ raise ValueError(f'Unsupported block size: {block_size}')
360
+ return params
361
+
362
+
342
363
  @dataclasses.dataclass(frozen=True)
343
364
  class OpQuantizationConfig:
344
365
  """Configuration class to control the quantization process behavior.
@@ -309,6 +309,44 @@ class QuantizerTest(parameterized.TestCase):
309
309
  saved_recipe = json.load(json_file)
310
310
  self.assertEqual(saved_recipe, self._test_recipe)
311
311
 
312
+ def test_saved_legacy_recipe_lacks_block_size(self):
313
+ model_name = 'test_model'
314
+ legacy_recipe_path = os.path.join(
315
+ TEST_DATA_PREFIX_PATH,
316
+ 'recipes/dynamic_legacy_wi8_afp32_recipe.json',
317
+ )
318
+ self._quantizer.load_quantization_recipe(legacy_recipe_path)
319
+ result = self._quantizer.quantize()
320
+ result.save(self._tmp_save_path, model_name)
321
+ saved_recipe_path = os.path.join(
322
+ self._tmp_save_path, model_name + '_recipe.json'
323
+ )
324
+ with open(saved_recipe_path) as json_file:
325
+ saved_recipe = json.load(json_file)
326
+ with open(legacy_recipe_path) as json_file:
327
+ legacy_recipe = json.load(json_file)
328
+
329
+ self.assertNotEqual(saved_recipe, legacy_recipe)
330
+
331
+ # Verify that the default test recipe contains 'block_size'.
332
+ has_block_size = False
333
+ for config in legacy_recipe:
334
+ op_config = config.get('op_config')
335
+ if op_config:
336
+ weight_config = op_config.get('weight_tensor_config')
337
+ if weight_config and 'block_size' in weight_config:
338
+ has_block_size = True
339
+ break
340
+ self.assertTrue(has_block_size)
341
+
342
+ # Verify that the saved recipe does not have 'block_size'.
343
+ for config in saved_recipe:
344
+ op_config = config.get('op_config')
345
+ if op_config:
346
+ weight_config = op_config.get('weight_tensor_config')
347
+ if weight_config:
348
+ self.assertNotIn('block_size', weight_config)
349
+
312
350
  def test_save_no_quantize_raise_error(self):
313
351
  error_message = 'No quantized model to save.'
314
352
  with self.assertRaisesWithPredicateMatch(
@@ -535,14 +573,12 @@ class QuantizerMultiSignatureModelTest(parameterized.TestCase):
535
573
  'symmetric': False,
536
574
  'granularity': 'TENSORWISE',
537
575
  'dtype': 'INT',
538
- 'block_size': 0,
539
576
  },
540
577
  'weight_tensor_config': {
541
578
  'num_bits': 8,
542
579
  'symmetric': True,
543
580
  'granularity': 'CHANNELWISE',
544
581
  'dtype': 'INT',
545
- 'block_size': 0,
546
582
  },
547
583
  'compute_precision': 'INTEGER',
548
584
  'explicit_dequantize': False,
@@ -569,14 +569,12 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
569
569
  'symmetric': False,
570
570
  'granularity': _QuantGranularity.TENSORWISE,
571
571
  'dtype': 'INT',
572
- 'block_size': 0,
573
572
  },
574
573
  'weight_tensor_config': {
575
574
  'num_bits': 8,
576
575
  'symmetric': True,
577
576
  'granularity': _QuantGranularity.TENSORWISE,
578
577
  'dtype': 'INT',
579
- 'block_size': 0,
580
578
  },
581
579
  # WEIGHT_ONLY.
582
580
  'compute_precision': _ComputePrecision.INTEGER,
@@ -595,7 +593,6 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
595
593
  'num_bits': 8,
596
594
  'symmetric': True,
597
595
  'granularity': _QuantGranularity.TENSORWISE,
598
- 'block_size': 0,
599
596
  },
600
597
  # WEIGHT_ONLY.
601
598
  'compute_precision': _ComputePrecision.FLOAT,
@@ -614,7 +611,6 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
614
611
  'num_bits': 4,
615
612
  'symmetric': True,
616
613
  'granularity': _QuantGranularity.TENSORWISE,
617
- 'block_size': 0,
618
614
  },
619
615
  # WEIGHT_ONLY.
620
616
  'compute_precision': _ComputePrecision.FLOAT,
@@ -633,7 +629,6 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
633
629
  'num_bits': 6,
634
630
  'symmetric': True,
635
631
  'granularity': _QuantGranularity.TENSORWISE,
636
- 'block_size': 0,
637
632
  },
638
633
  # WEIGHT_ONLY.
639
634
  'compute_precision': _ComputePrecision.FLOAT,
@@ -652,7 +647,6 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
652
647
  'num_bits': 3,
653
648
  'symmetric': True,
654
649
  'granularity': _QuantGranularity.TENSORWISE,
655
- 'block_size': 0,
656
650
  },
657
651
  # WEIGHT_ONLY.
658
652
  'compute_precision': _ComputePrecision.FLOAT,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ai-edge-quantizer-nightly
3
- Version: 0.4.0.dev20251027
3
+ Version: 0.4.0.dev20251029
4
4
  Summary: A quantizer for advanced developers to quantize converted AI Edge models.
5
5
  Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
6
6
  Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
@@ -5,19 +5,19 @@ ai_edge_quantizer/algorithm_manager_api_test.py,sha256=w6bSONvXkX6bzXAGc0-7b6gND
5
5
  ai_edge_quantizer/calibrator.py,sha256=Sms7_AIHPH9G5xFaz5Ef3a5gPhxuIWQI8d2LUM8C96I,12071
6
6
  ai_edge_quantizer/calibrator_test.py,sha256=ZLzIMWB2FSFU4TOatDioYuwp_kLh8iSCefZ5_Q9FU7s,11900
7
7
  ai_edge_quantizer/conftest.py,sha256=SxCz-5LlRD_lQm4hQc4c6IGG7DS8d7IyEWY9gnscPN0,794
8
- ai_edge_quantizer/default_policy.py,sha256=i_AcnIIElHqoJNc2jyJFEC2tYqfQ4Nvn4mQlTvormzk,11702
8
+ ai_edge_quantizer/default_policy.py,sha256=YcwwtVzoWUhjYgMtJ7b9f647740lURKteDOeJvwe17o,11384
9
9
  ai_edge_quantizer/model_modifier.py,sha256=U70JByv6CItP8tg4bdyMfX-R3UlwylAGSviZkF_FSAM,10468
10
10
  ai_edge_quantizer/model_modifier_test.py,sha256=CV4pgMEQkBJr_qbYR720TO8HBCutbEYLHptDHgdQMUE,7274
11
11
  ai_edge_quantizer/model_validator.py,sha256=Hj0_5o-Oa3dSlJ3ryVjRhvsyelHNyek1GrtG9buMczg,13153
12
12
  ai_edge_quantizer/model_validator_test.py,sha256=EeqOP_mrZsnZ3rug756s0ryDDqd2KgIDld5Lm_gDuWY,13020
13
13
  ai_edge_quantizer/params_generator.py,sha256=0w-sDGk84sVNkXoduon1wDqq30sGOHVgBVbdg44QVF4,20153
14
14
  ai_edge_quantizer/params_generator_test.py,sha256=RDYoRZDJfEZRtjlTAU2kZ_4t3JHOqEHxfJX9V4ETAhg,40597
15
- ai_edge_quantizer/qtyping.py,sha256=f2NRz4xqM-7gMe0QFpR4x2m5lzTJI3tmsT0cehO5Vsg,17232
15
+ ai_edge_quantizer/qtyping.py,sha256=y9KretGzUGztyLdmto2XV6U0cxrSrfLWP1UOVcwR4dY,18011
16
16
  ai_edge_quantizer/quantizer.py,sha256=teYeONdIS31IAY6ubLujCRi1t6lYAd0LkC8dRPxQdbw,18919
17
- ai_edge_quantizer/quantizer_test.py,sha256=9BVwt7oyM8IsSC7jN73nI0O-4MikBkymm_FigJnSeCM,27117
17
+ ai_edge_quantizer/quantizer_test.py,sha256=CqAT83gLWGIUacN7cAKxrefQ77-9MME4HzBKa421zdg,28446
18
18
  ai_edge_quantizer/recipe.py,sha256=MEkfQ2Sg3KAE9LAORHWcbjYNPg06EUbwc1d-VspQA2U,6461
19
19
  ai_edge_quantizer/recipe_manager.py,sha256=6l2uq8KL23KLu9OQDmPGkxrFiwHrdDB9xnn-ni8WdEM,15036
20
- ai_edge_quantizer/recipe_manager_test.py,sha256=qjgGUF-wggXnSXqZ5khmqrDMIQI5CShk52IVWTahq6s,36817
20
+ ai_edge_quantizer/recipe_manager_test.py,sha256=gYK3haUJ8-AISQvTI6tD-E-drJXQPSXPqBZdgpc5QTo,36595
21
21
  ai_edge_quantizer/recipe_test.py,sha256=QisyaTol8JRZFcGOGyee7QRCvqj5VbF4guKWdIoMUOE,6213
22
22
  ai_edge_quantizer/transformation_instruction_generator.py,sha256=O0U2aZcB8aXQgOV8r9g1rGNzDUiuI5Ta53XnxZbVffE,31576
23
23
  ai_edge_quantizer/transformation_instruction_generator_test.py,sha256=KW5-WoTTo9IqLEVnWxVC8ut8eWLi_91xfKgGqVQ9QDk,54635
@@ -28,22 +28,22 @@ ai_edge_quantizer/algorithms/nonlinear_quantize/__init__.py,sha256=lpq1g2ayg3lCP
28
28
  ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting.py,sha256=Bs9CK7wZAw6jNaZ8xEtbwO2vM34VYXNZSMVWvxJo9nw,9297
29
29
  ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting_test.py,sha256=EqIHGEZ1LgUrTN7zf880RuAzEv3Qy7kgh5ivObJGHSo,22646
30
30
  ai_edge_quantizer/algorithms/uniform_quantize/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
31
- ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=HF7aNccdDmCbZGZ21UxeO5UpSpQOLr3TiOEyLwWOVPQ,39888
31
+ ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=wrp4F2Wo9ammz_6VXFjXu04RMJV4_MxGfp4XyFMhZHc,39904
32
32
  ai_edge_quantizer/algorithms/uniform_quantize/common_quantize_test.py,sha256=GGf_n3wIeg3GB_eGsmyNJ0fTcxgpeMMbugTMRONK6TQ,3553
33
- ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha256=BDdn_uBZakfHyzdMJPKadsOqxqyC-s6W2ZzFH99L4fE,8652
33
+ ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha256=VjBDxGxjITHJc7xJABqBbZt6_qhobtZAl2gnVQrYJgc,8652
34
34
  ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py,sha256=sT5eX5TLZEHTtPfnSkCPDlS0sQxlTFWbCsbvOuj--yY,8889
35
35
  ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation.py,sha256=qxt9CPDcidVWIxp5nSWPN2hKKj1XZcsOOLBd2SYIvW0,14572
36
- ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py,sha256=mgv6aGIqQouxfA8_GacuGdOftvL75XBF1_h5tlCCYJQ,15468
37
- ai_edge_quantizer/algorithms/uniform_quantize/mse.py,sha256=qiIyzogATGVxjYwxzH0cZvgwPSPBJv_3y8NSumHZXTk,4561
38
- ai_edge_quantizer/algorithms/uniform_quantize/mse_test.py,sha256=-_P4jQJ7gVo0FNSapP3sIGcnhwfjQHW1AKLfoiAlS_s,7142
39
- ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=1sB2j1vlvvWDKyjcGvA_JLCpN2KbCmMslGCBUc4--V4,8461
40
- ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=nscKDvNb14ErZdAfG0aXRWyRs6bTvhMqMjKx2vxvUK0,8725
41
- ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=Umxh4kJyeHddZf-Wd4aXE5MTI1XWFa5KRuM17uYU714,6922
42
- ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=sha1d99Xk87bI87tgz0g5LeDC-EeE4WMfM5rRC98-m4,9140
43
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=0bLDAjCm5wxasGXKT3XiS4quk-zXlWK6JKb-14FQAd4,19570
44
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=0xOdoIWuEo9JlXvZdX_Gbq5lfwCwEcG7RwOxhXAFOOY,15939
36
+ ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py,sha256=1ejj5WS3GZwFk3qpsPiPS8jcmVS1-e7zRmvj2Nj8fKw,15440
37
+ ai_edge_quantizer/algorithms/uniform_quantize/mse.py,sha256=EP5yPw6khAhTo6VNTPXEE2aGKLfNnqz8COeJnTKaGWs,4641
38
+ ai_edge_quantizer/algorithms/uniform_quantize/mse_test.py,sha256=-E1LIlxadckspltdgBWTiUzsiwbawSubndavHhWLt1g,7145
39
+ ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=3HldmkAZv1EN0GeUWr574L9brknb569KB8i1iIGgcx0,8334
40
+ ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=Eqa4OUqoCGywbHz-HxJ9dWRj9BKlVzJPuIhVzvrpdLM,8925
41
+ ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=-n-QZyp9y8WCy5FPSpXZXHfOA-p-RLvfSaCzAfhHiHI,7040
42
+ ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=6m2U-9JdNei0XzOORg2gt87TJdD0XHZ-z5h9c4g_TB4,9120
43
+ ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=ZU7QWZeN1KjdprJWWvfmSikz8ebhSH1aS1Cl7g1Qp0Q,20446
44
+ ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=eTrrc8AGaSf1Ytp5gsRONAZ94PHFJUTd4dGi5ZnKZjU,16038
45
45
  ai_edge_quantizer/algorithms/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
46
- ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=4eAlGph6DDW18bUdoY0XcUoOXEr3P_3_W1ptidD8qK4,37611
46
+ ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=Q6BoDDR1flnmxLL2NZ1YrNCaL3uUbt76htW8aHO6ukE,37462
47
47
  ai_edge_quantizer/algorithms/utils/common_utils_test.py,sha256=zqapGEfYhjQWe9cNGPLmdbwtEUUYQRhlO_kNe0cXX6E,18104
48
48
  ai_edge_quantizer/transformations/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
49
49
  ai_edge_quantizer/transformations/dequant_insert.py,sha256=sL1LHFVzBDSd9jgrzlHz38LWU0bwmVX7iBkaNcui0ts,3566
@@ -74,8 +74,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOi
74
74
  ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
75
75
  ai_edge_quantizer/utils/validation_utils.py,sha256=QTYyQ_HDVrFTGPIsrA240Lv8tUw1fwWp2fu9kTVISkE,6224
76
76
  ai_edge_quantizer/utils/validation_utils_test.py,sha256=lO51rGskhzpXePRdZMU87u_YO35_sDp9_eQ85CmupL4,4600
77
- ai_edge_quantizer_nightly-0.4.0.dev20251027.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
78
- ai_edge_quantizer_nightly-0.4.0.dev20251027.dist-info/METADATA,sha256=3tcuuP3R_QQoBgH0QbodInLmEq-GyrIQeBYOlVo4DrM,1508
79
- ai_edge_quantizer_nightly-0.4.0.dev20251027.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
80
- ai_edge_quantizer_nightly-0.4.0.dev20251027.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
81
- ai_edge_quantizer_nightly-0.4.0.dev20251027.dist-info/RECORD,,
77
+ ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
78
+ ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info/METADATA,sha256=WZuAdLSBsGAybfUkGXKpnL9dO6CbiueTPfDRvLJ0A7w,1508
79
+ ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
80
+ ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
81
+ ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info/RECORD,,