ai-edge-quantizer-nightly 0.4.0.dev20251029__py3-none-any.whl → 0.4.0.dev20251030__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py +25 -22
  2. ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py +1 -1
  3. ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py +2 -1
  4. ai_edge_quantizer/algorithms/uniform_quantize/mse.py +3 -5
  5. ai_edge_quantizer/algorithms/uniform_quantize/mse_test.py +1 -1
  6. ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py +11 -6
  7. ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py +14 -18
  8. ai_edge_quantizer/algorithms/uniform_quantize/octav.py +5 -9
  9. ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py +2 -1
  10. ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py +9 -33
  11. ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py +1 -1
  12. ai_edge_quantizer/algorithms/utils/common_utils.py +7 -4
  13. ai_edge_quantizer/default_policy.py +13 -5
  14. ai_edge_quantizer/qtyping.py +3 -24
  15. ai_edge_quantizer/quantizer_test.py +2 -38
  16. ai_edge_quantizer/recipe_manager_test.py +6 -0
  17. {ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20251030.dist-info}/METADATA +1 -1
  18. {ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20251030.dist-info}/RECORD +21 -21
  19. {ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20251030.dist-info}/LICENSE +0 -0
  20. {ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20251030.dist-info}/WHEEL +0 -0
  21. {ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20251030.dist-info}/top_level.txt +0 -0
@@ -1165,36 +1165,39 @@ def init_tensor_min_max(
1165
1165
  A dictionary containing the min/max values for the tensor, or an empty
1166
1166
  dictionary if the tensor data is None.
1167
1167
  """
1168
- weight_tensor_config = op_info.op_quant_config.weight_tensor_config
1169
- if tensor_data is None or weight_tensor_config is None:
1168
+ if tensor_data is None:
1170
1169
  return {}
1171
1170
  else:
1172
- # Get reduce dimension for min/max calculation based on quantization
1173
- # granularity.
1174
- granularity = weight_tensor_config.granularity
1175
- if granularity == qtyping.QuantGranularity.TENSORWISE:
1176
- reduce_dims = None
1177
- keep_dims = True
1178
- elif granularity == qtyping.QuantGranularity.CHANNELWISE:
1171
+ weight_tensor_config = op_info.op_quant_config.weight_tensor_config
1172
+ quantized_dim = None
1173
+ if weight_tensor_config is not None and (
1174
+ weight_tensor_config.granularity == qtyping.QuantGranularity.CHANNELWISE
1175
+ ):
1179
1176
  quantized_dim = common_utils.get_weight_quantized_dim(
1180
1177
  op_info, tensor_data, weight_tensor_config.granularity
1181
1178
  )
1182
- reduce_dims = common_utils.get_reduce_dims(
1183
- quantized_dim, tensor_data.shape
1184
- )
1185
- keep_dims = True
1186
- elif uniform_quantize_tensor.is_blockwise(granularity):
1187
- tensor_data, reduce_dims = (
1179
+ if (
1180
+ weight_tensor_config is not None
1181
+ and weight_tensor_config.granularity
1182
+ == qtyping.QuantGranularity.BLOCKWISE
1183
+ ):
1184
+ reshaped_data, reduce_dims = (
1188
1185
  uniform_quantize_tensor.reshape_data_for_blockwise(
1189
1186
  tensor_data,
1190
1187
  op_info.op_name,
1191
- granularity,
1188
+ weight_tensor_config.block_size,
1192
1189
  )
1193
1190
  )
1194
- keep_dims = False
1191
+ return {
1192
+ "min": np.min(reshaped_data, axis=reduce_dims, keepdims=False),
1193
+ "max": np.max(reshaped_data, axis=reduce_dims, keepdims=False),
1194
+ }
1195
+
1195
1196
  else:
1196
- raise ValueError(f"Unsupported granularity: {granularity}")
1197
- return {
1198
- "min": np.min(tensor_data, axis=reduce_dims, keepdims=keep_dims),
1199
- "max": np.max(tensor_data, axis=reduce_dims, keepdims=keep_dims),
1200
- }
1197
+ reduce_dims = common_utils.get_reduce_dims(
1198
+ quantized_dim, tensor_data.shape
1199
+ )
1200
+ return {
1201
+ "min": np.min(tensor_data, axis=reduce_dims, keepdims=True),
1202
+ "max": np.max(tensor_data, axis=reduce_dims, keepdims=True),
1203
+ }
@@ -158,7 +158,7 @@ def get_tensor_quant_params(
158
158
  op_info, tensor_quant_config, tensor_content, tensor_qsv
159
159
  )
160
160
 
161
- if uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity):
161
+ if tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE:
162
162
  raise ValueError(
163
163
  "Blockwise quantization is not supported for dequantized weight"
164
164
  " recovery."
@@ -147,7 +147,8 @@ class HadamardRotationFullyConnectedTest(parameterized.TestCase):
147
147
  weight_tensor_config=_TensorQuantConfig(
148
148
  num_bits=8,
149
149
  symmetric=True,
150
- granularity=qtyping.QuantGranularity.BLOCKWISE_32,
150
+ granularity=qtyping.QuantGranularity.BLOCKWISE,
151
+ block_size=32,
151
152
  ),
152
153
  ),
153
154
  )
@@ -55,7 +55,7 @@ def get_tensor_quant_params(
55
55
  ValueError: `tensor_qsv` must contain min/max values, or `tensor_content`
56
56
  must be provided so that they can be inferred.
57
57
  """
58
- if uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity):
58
+ if tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE:
59
59
  raise ValueError(
60
60
  "Blockwise quantization is not supported for MSE quantization."
61
61
  )
@@ -113,15 +113,13 @@ def get_tensor_quant_params(
113
113
  num_bits=tensor_quant_config.num_bits,
114
114
  symmetric=tensor_quant_config.symmetric,
115
115
  quantized_dimension=quantized_dim,
116
- block_size=uniform_quantize_tensor.extract_block_size_from_granularity(
117
- tensor_quant_config.granularity
118
- ),
116
+ block_size=tensor_quant_config.block_size,
119
117
  )
120
118
 
121
119
  quantized_vars = uniform_quantize_tensor.uniform_quantize(
122
120
  tensor_content,
123
121
  quant_params,
124
- uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity),
122
+ tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE,
125
123
  )
126
124
 
127
125
  return dataclasses.replace(quant_params, quantized_data=quantized_vars)
@@ -84,7 +84,7 @@ class MseQuantizeTest(parameterized.TestCase):
84
84
  tensor_quant_config=qtyping.TensorQuantizationConfig(
85
85
  num_bits=4,
86
86
  symmetric=True,
87
- granularity=qtyping.QuantGranularity.BLOCKWISE_32,
87
+ granularity=qtyping.QuantGranularity.BLOCKWISE,
88
88
  ),
89
89
  tensor_content=test_data,
90
90
  )
@@ -15,7 +15,6 @@
15
15
 
16
16
  """Performs naive min/max uniform quantization."""
17
17
 
18
- import dataclasses
19
18
  from typing import Any, Optional
20
19
  import numpy as np
21
20
  from ai_edge_quantizer import qtyping
@@ -92,9 +91,7 @@ def get_tensor_quant_params(
92
91
  num_bits=tensor_quant_config.num_bits,
93
92
  symmetric=tensor_quant_config.symmetric,
94
93
  quantized_dimension=quantized_dim,
95
- block_size=uniform_quantize_tensor.extract_block_size_from_granularity(
96
- tensor_quant_config.granularity
97
- ),
94
+ block_size=tensor_quant_config.block_size,
98
95
  )
99
96
  if tensor_content is None:
100
97
  return quant_params
@@ -102,10 +99,18 @@ def get_tensor_quant_params(
102
99
  quantized_vars = uniform_quantize_tensor.uniform_quantize(
103
100
  tensor_content,
104
101
  quant_params,
105
- uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity),
102
+ tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE,
106
103
  )
107
104
  # Update with quantized values.
108
- return dataclasses.replace(quant_params, quantized_data=quantized_vars)
105
+ return qtyping.UniformQuantParams(
106
+ scale=scale,
107
+ zero_point=zp,
108
+ num_bits=tensor_quant_config.num_bits,
109
+ symmetric=tensor_quant_config.symmetric,
110
+ quantized_dimension=quantized_dim,
111
+ quantized_data=quantized_vars,
112
+ block_size=tensor_quant_config.block_size,
113
+ )
109
114
 
110
115
 
111
116
  # TODO: b/333731147 - Use named tuple to store min/max.
@@ -17,7 +17,6 @@ import os
17
17
  from typing import cast
18
18
 
19
19
  from absl.testing import parameterized
20
- import ml_dtypes
21
20
  import numpy as np
22
21
 
23
22
  from tensorflow.python.platform import googletest
@@ -166,7 +165,8 @@ class NaiveMinMaxQuantizeTest(parameterized.TestCase):
166
165
  weight_tensor_config = _TensorQuantConfig(
167
166
  num_bits=4,
168
167
  symmetric=True,
169
- granularity=qtyping.QuantGranularity.BLOCKWISE_32,
168
+ granularity=qtyping.QuantGranularity.BLOCKWISE,
169
+ block_size=2,
170
170
  )
171
171
  op_info = qtyping.OpInfo(
172
172
  op=fc_op,
@@ -176,32 +176,28 @@ class NaiveMinMaxQuantizeTest(parameterized.TestCase):
176
176
  weight_tensor_config=weight_tensor_config,
177
177
  ),
178
178
  )
179
- test_data = np.random.uniform(low=-10, high=10, size=(4, 32)).astype(
180
- np.float32
181
- )
179
+ test_data = np.array([[-7, 7], [4, -4], [4, -4], [7, 7]])
182
180
  quant_params = naive_min_max_quantize.get_tensor_quant_params(
183
181
  op_info=op_info,
184
182
  tensor_quant_config=weight_tensor_config,
185
183
  tensor_content=test_data,
186
184
  )
185
+ scale = quant_params.scale
187
186
  zp = quant_params.zero_point
188
- self.assertEqual(zp.shape, (4, 1))
189
- self.assertTrue(np.array_equal(zp, np.zeros([4, 1])))
190
-
191
- self.assertEqual(quant_params.scale.shape, (4, 1))
192
- expected_scales = np.max(np.abs(test_data), axis=1, keepdims=True) / 7.0
193
- expected_scales = (
194
- expected_scales.astype(ml_dtypes.bfloat16)
195
- .astype(np.float16)
196
- .astype(np.float32)
197
- )
198
- self.assertTrue(np.allclose(quant_params.scale, expected_scales, atol=1e-5))
199
-
187
+ expected_scale = np.array([
188
+ [1],
189
+ [0.5703125],
190
+ [0.5703125],
191
+ [1],
192
+ ])
193
+ expected_zp = np.zeros([4, 1])
194
+ self.assertTrue(np.array_equal(zp, expected_zp))
195
+ self.assertTrue(np.array_equal(scale, expected_scale))
200
196
  self.assertIsNotNone(quant_params.quantized_data)
201
197
  self.assertTupleEqual(
202
198
  cast(np.ndarray, quant_params.quantized_data).shape, test_data.shape
203
199
  )
204
- self.assertEqual(quant_params.block_size, 32)
200
+ self.assertEqual(quant_params.block_size, 2)
205
201
  self.assertEqual(quant_params.quantized_dimension, 1)
206
202
 
207
203
  def test_calibrate_ignores_inf_min_max(self):
@@ -131,12 +131,12 @@ def get_tensor_quant_params(
131
131
  quantized_dim = common_utils.get_weight_quantized_dim(
132
132
  op_info, tensor_content, tensor_quant_config.granularity
133
133
  )
134
- if uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity):
134
+ if tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE:
135
135
  reshaped_data, reduce_dims = (
136
136
  uniform_quantize_tensor.reshape_data_for_blockwise(
137
137
  tensor_content,
138
138
  op_info.op_name,
139
- tensor_quant_config.granularity,
139
+ tensor_quant_config.block_size,
140
140
  )
141
141
  )
142
142
  else:
@@ -154,7 +154,7 @@ def get_tensor_quant_params(
154
154
  # We created a new dimension in order to reduce properly for blockwise
155
155
  # quantization, so we need to reshape the clipping constants back to the
156
156
  # min/max shape for the next step.
157
- if uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity):
157
+ if tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE:
158
158
  clipping_constants = clipping_constants.reshape(tensor_min_max["min"].shape)
159
159
 
160
160
  zp, scale = uniform_quantize_tensor.tensor_zp_scale_from_min_max(
@@ -172,17 +172,13 @@ def get_tensor_quant_params(
172
172
  num_bits=tensor_quant_config.num_bits,
173
173
  symmetric=tensor_quant_config.symmetric,
174
174
  quantized_dimension=quantized_dim,
175
- block_size=uniform_quantize_tensor.extract_block_size_from_granularity(
176
- tensor_quant_config.granularity
177
- ),
175
+ block_size=tensor_quant_config.block_size,
178
176
  )
179
177
 
180
178
  quantized_vars = uniform_quantize_tensor.uniform_quantize(
181
179
  tensor_content,
182
180
  quant_params,
183
- is_blockwise_quant=uniform_quantize_tensor.is_blockwise(
184
- tensor_quant_config.granularity
185
- ),
181
+ tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE,
186
182
  )
187
183
 
188
184
  return dataclasses.replace(quant_params, quantized_data=quantized_vars)
@@ -196,7 +196,8 @@ class OctavQuantizeTest(parameterized.TestCase):
196
196
  tensor_config = qtyping.TensorQuantizationConfig(
197
197
  num_bits=4,
198
198
  symmetric=True,
199
- granularity=qtyping.QuantGranularity.BLOCKWISE_32,
199
+ granularity=qtyping.QuantGranularity.BLOCKWISE,
200
+ block_size=32,
200
201
  )
201
202
  fc_op_info = qtyping.OpInfo(
202
203
  op=self._fc_op,
@@ -29,11 +29,6 @@ class IntType:
29
29
  signed: bool
30
30
 
31
31
 
32
- def is_blockwise(granularity: qtyping.QuantGranularity) -> bool:
33
- """Checks if the quantization granularity is blockwise."""
34
- return "BLOCKWISE" in str(granularity)
35
-
36
-
37
32
  def get_quantized_range(qtype: IntType) -> tuple[float, float]:
38
33
  """Calculates range of the quantized type."""
39
34
  if qtype.signed:
@@ -45,22 +40,6 @@ def get_quantized_range(qtype: IntType) -> tuple[float, float]:
45
40
  return float(qmin), float(qmax)
46
41
 
47
42
 
48
- def extract_block_size_from_granularity(
49
- granularity: qtyping.QuantGranularity,
50
- ) -> int:
51
- """Get the block size for blockwise quantization."""
52
- if granularity == qtyping.QuantGranularity.BLOCKWISE_32:
53
- return 32
54
- elif granularity == qtyping.QuantGranularity.BLOCKWISE_64:
55
- return 64
56
- elif granularity == qtyping.QuantGranularity.BLOCKWISE_128:
57
- return 128
58
- elif granularity == qtyping.QuantGranularity.BLOCKWISE_256:
59
- return 256
60
- else:
61
- return 0
62
-
63
-
64
43
  def _round_and_clip(
65
44
  tensor: np.ndarray, qtype: IntType, narrow: bool
66
45
  ) -> np.ndarray:
@@ -178,16 +157,14 @@ def _get_tensor_shape_for_blockwise(
178
157
 
179
158
 
180
159
  def reshape_data_for_blockwise(
181
- tensor_data: np.ndarray,
182
- op_name: qtyping.TFLOperationName,
183
- granularity: qtyping.QuantGranularity,
160
+ tensor_data: np.ndarray, op_name: qtyping.TFLOperationName, block_size: int
184
161
  ) -> tuple[np.ndarray, int]:
185
162
  """Reshapes data for blockwise quantization.
186
163
 
187
164
  Args:
188
165
  tensor_data: The original tensor data.
189
166
  op_name: The name of the TFL op.
190
- granularity: The quantization granularity for the tensor.
167
+ block_size: The size of the block.
191
168
 
192
169
  Returns:
193
170
  A tuple containing the reshaped tensor data and the new reduce dimension.
@@ -195,11 +172,11 @@ def reshape_data_for_blockwise(
195
172
  quantized_dim = tfl_flatbuffer_utils.TFL_OP_TO_BLOCKWISE_WEIGHT_QUANTIZED_DIM[
196
173
  op_name
197
174
  ]
198
- block_size = extract_block_size_from_granularity(granularity)
199
175
  new_shape = _get_tensor_shape_for_blockwise(
200
176
  tensor_data.shape, quantized_dim, block_size
201
177
  )
202
- return tensor_data.reshape(new_shape), quantized_dim + 1
178
+ reshaped_data = tensor_data.reshape(new_shape)
179
+ return reshaped_data, quantized_dim + 1
203
180
 
204
181
 
205
182
  def _broadcast_scale_zp_for_blockwise(
@@ -256,21 +233,21 @@ def _broadcast_scale_zp_for_blockwise(
256
233
  def uniform_quantize(
257
234
  tensor_data: np.ndarray,
258
235
  quantization_params: qtyping.UniformQuantParams,
259
- is_blockwise_quant: bool = False,
236
+ is_blockwise: bool = False,
260
237
  ):
261
238
  """Uniform quantize a tensor.
262
239
 
263
240
  Args:
264
241
  tensor_data: The tensor to be quantized.
265
242
  quantization_params: The quantization parameters.
266
- is_blockwise_quant: Whether the tensor is blockwise quantized.
243
+ is_blockwise: Whether the tensor is blockwise quantized.
267
244
 
268
245
  Returns:
269
246
  The quantized tensor.
270
247
  """
271
248
  # The reshaping for blockwise quantization is unique hence we do this here
272
249
  # to avoid unexpected broadcast behavior downstream.
273
- if is_blockwise_quant:
250
+ if is_blockwise:
274
251
  quantization_params = _broadcast_scale_zp_for_blockwise(
275
252
  tensor_data, quantization_params
276
253
  )
@@ -458,7 +435,6 @@ def tensor_zp_scale_from_min_max(
458
435
  Returns:
459
436
  The zero point and scale of the tensor.
460
437
  """
461
-
462
438
  # TODO: b/332574603 - support unsigned data type.
463
439
  qtype = IntType(
464
440
  num_bits,
@@ -469,7 +445,7 @@ def tensor_zp_scale_from_min_max(
469
445
  pos_clipping_values = None if clipping_values is None else clipping_values
470
446
  neg_clipping_values = None if clipping_values is None else -clipping_values
471
447
 
472
- if is_blockwise(granularity):
448
+ if granularity == qtyping.QuantGranularity.BLOCKWISE:
473
449
  # Blockwise quantization uses float16 scale,
474
450
  # with 7 bit mantissa, so the maximum scale value is 65280 and maximum
475
451
  # representable range is [-65280 * (2 ** num_bits),
@@ -517,7 +493,7 @@ def tensor_zp_scale_from_min_max(
517
493
  zp = qmin - bound_min / scale
518
494
  zp = np.rint(zp)
519
495
 
520
- if is_blockwise(granularity):
496
+ if granularity == qtyping.QuantGranularity.BLOCKWISE:
521
497
  # Round the scale values to 7 bit mantissa.
522
498
  scale = (
523
499
  scale.astype(ml_dtypes.bfloat16).astype(np.float16).astype(np.float32)
@@ -222,7 +222,7 @@ class TensorUtilsTest(parameterized.TestCase):
222
222
  zero_point=np.array([-6]),
223
223
  symmetric=True,
224
224
  ),
225
- is_blockwise_quant=True,
225
+ is_blockwise=True,
226
226
  )
227
227
 
228
228
  @parameterized.parameters(
@@ -51,9 +51,8 @@ def check_subchannel_config(
51
51
  """Checks the op quantization config for subchannel quantization."""
52
52
  if (
53
53
  op_quant_config.weight_tensor_config is not None
54
- and uniform_quantize_tensor.is_blockwise(
55
- op_quant_config.weight_tensor_config.granularity
56
- )
54
+ and op_quant_config.weight_tensor_config.granularity
55
+ == qtyping.QuantGranularity.BLOCKWISE
57
56
  ):
58
57
  if op_name not in _SUPPORTED_SUBCHANNEL_OPS:
59
58
  raise ValueError(f"Unsupported op for blockwise quantization: {op_name}.")
@@ -67,6 +66,10 @@ def check_subchannel_config(
67
66
  "Blockwise quantization does not support for asymmetric weight"
68
67
  " quantization."
69
68
  )
69
+ if op_quant_config.weight_tensor_config.block_size <= 0:
70
+ raise ValueError(
71
+ "Blockwise quantization must have a non-zero block size."
72
+ )
70
73
 
71
74
 
72
75
  def check_if_valid_op_config(
@@ -990,7 +993,7 @@ def get_weight_quantized_dim(
990
993
  quantized_dim = tfl_flatbuffer_utils.TFL_OP_TO_WEIGHT_QUANTIZED_DIM.get(
991
994
  op_info.op_name, None
992
995
  )
993
- elif uniform_quantize_tensor.is_blockwise(granularity):
996
+ elif granularity == qtyping.QuantGranularity.BLOCKWISE:
994
997
  quantized_dim = (
995
998
  tfl_flatbuffer_utils.TFL_OP_TO_BLOCKWISE_WEIGHT_QUANTIZED_DIM[
996
999
  op_info.op_name
@@ -61,8 +61,9 @@ DEFAULT_JSON_POLICY = """
61
61
  "weight_tensor_config": {
62
62
  "num_bits": 4,
63
63
  "symmetric": [true],
64
- "granularity": ["BLOCKWISE_32", "BLOCKWISE_64", "BLOCKWISE_128", "BLOCKWISE_256"],
65
- "dtype": "INT"
64
+ "granularity": ["BLOCKWISE"],
65
+ "dtype": "INT",
66
+ "block_size": [32, 64, 96, 128, 256]
66
67
  },
67
68
  "explicit_dequantize": false,
68
69
  "compute_precision": "INTEGER"
@@ -319,9 +320,16 @@ def _unroll_json_config(
319
320
  "granularity": granularity,
320
321
  "dtype": json_config["weight_tensor_config"]["dtype"],
321
322
  }
322
- weight_configs.append(
323
- qtyping.TensorQuantizationConfig.from_dict(tensor_config)
324
- )
323
+ if "block_size" in json_config["weight_tensor_config"]:
324
+ for block_size in json_config["weight_tensor_config"]["block_size"]:
325
+ tensor_config["block_size"] = block_size
326
+ weight_configs.append(
327
+ qtyping.TensorQuantizationConfig.from_dict(tensor_config)
328
+ )
329
+ else:
330
+ weight_configs.append(
331
+ qtyping.TensorQuantizationConfig.from_dict(tensor_config)
332
+ )
325
333
 
326
334
  if activation_configs:
327
335
  for activation_config in activation_configs:
@@ -112,11 +112,7 @@ class TensorDataType(str, enum.Enum):
112
112
  class QuantGranularity(str, enum.Enum):
113
113
  TENSORWISE = 'TENSORWISE'
114
114
  CHANNELWISE = 'CHANNELWISE'
115
- # Blockwise quantization with various block sizes.
116
- BLOCKWISE_32 = 'BLOCKWISE_32'
117
- BLOCKWISE_64 = 'BLOCKWISE_64'
118
- BLOCKWISE_128 = 'BLOCKWISE_128'
119
- BLOCKWISE_256 = 'BLOCKWISE_256'
115
+ BLOCKWISE = 'BLOCKWISE'
120
116
 
121
117
 
122
118
  class QuantTransformation(enum.Enum):
@@ -314,6 +310,7 @@ class TensorQuantizationConfig:
314
310
  granularity: Whether to perform per-tensor, per-channel or per-block
315
311
  quantization.
316
312
  dtype: The data type of the tensor.
313
+ block_size: The block size for blockwise quantization, ignored otherwise.
317
314
  algorithm_key: The algorithm key to use for quantization.
318
315
  """
319
316
 
@@ -321,6 +318,7 @@ class TensorQuantizationConfig:
321
318
  symmetric: bool = True
322
319
  granularity: QuantGranularity = QuantGranularity.TENSORWISE
323
320
  dtype: TensorDataType = TensorDataType.INT
321
+ block_size: int = 0
324
322
 
325
323
  def to_dict(self) -> dict[str, Any]:
326
324
  """Converts ActivationQuantizationConfig to dict."""
@@ -338,28 +336,9 @@ class TensorQuantizationConfig:
338
336
  def from_dict(cls, params: dict[str, Any]) -> 'TensorQuantizationConfig':
339
337
  """Converts a given dict to TensorQuantizationConfig."""
340
338
  params_copy = copy.deepcopy(params)
341
- # Process block_size config from legacy recipe.
342
- params_copy = _process_block_size(params_copy)
343
339
  return cls(**params_copy)
344
340
 
345
341
 
346
- def _process_block_size(params: dict[str, Any]) -> dict[str, Any]:
347
- """Processes block size in the params."""
348
- block_size = params.pop('block_size', 0)
349
- if block_size > 0:
350
- if block_size == 32:
351
- params['granularity'] = QuantGranularity.BLOCKWISE_32
352
- elif block_size == 64:
353
- params['granularity'] = QuantGranularity.BLOCKWISE_64
354
- elif block_size == 128:
355
- params['granularity'] = QuantGranularity.BLOCKWISE_128
356
- elif block_size == 256:
357
- params['granularity'] = QuantGranularity.BLOCKWISE_256
358
- else:
359
- raise ValueError(f'Unsupported block size: {block_size}')
360
- return params
361
-
362
-
363
342
  @dataclasses.dataclass(frozen=True)
364
343
  class OpQuantizationConfig:
365
344
  """Configuration class to control the quantization process behavior.
@@ -309,44 +309,6 @@ class QuantizerTest(parameterized.TestCase):
309
309
  saved_recipe = json.load(json_file)
310
310
  self.assertEqual(saved_recipe, self._test_recipe)
311
311
 
312
- def test_saved_legacy_recipe_lacks_block_size(self):
313
- model_name = 'test_model'
314
- legacy_recipe_path = os.path.join(
315
- TEST_DATA_PREFIX_PATH,
316
- 'recipes/dynamic_legacy_wi8_afp32_recipe.json',
317
- )
318
- self._quantizer.load_quantization_recipe(legacy_recipe_path)
319
- result = self._quantizer.quantize()
320
- result.save(self._tmp_save_path, model_name)
321
- saved_recipe_path = os.path.join(
322
- self._tmp_save_path, model_name + '_recipe.json'
323
- )
324
- with open(saved_recipe_path) as json_file:
325
- saved_recipe = json.load(json_file)
326
- with open(legacy_recipe_path) as json_file:
327
- legacy_recipe = json.load(json_file)
328
-
329
- self.assertNotEqual(saved_recipe, legacy_recipe)
330
-
331
- # Verify that the default test recipe contains 'block_size'.
332
- has_block_size = False
333
- for config in legacy_recipe:
334
- op_config = config.get('op_config')
335
- if op_config:
336
- weight_config = op_config.get('weight_tensor_config')
337
- if weight_config and 'block_size' in weight_config:
338
- has_block_size = True
339
- break
340
- self.assertTrue(has_block_size)
341
-
342
- # Verify that the saved recipe does not have 'block_size'.
343
- for config in saved_recipe:
344
- op_config = config.get('op_config')
345
- if op_config:
346
- weight_config = op_config.get('weight_tensor_config')
347
- if weight_config:
348
- self.assertNotIn('block_size', weight_config)
349
-
350
312
  def test_save_no_quantize_raise_error(self):
351
313
  error_message = 'No quantized model to save.'
352
314
  with self.assertRaisesWithPredicateMatch(
@@ -573,12 +535,14 @@ class QuantizerMultiSignatureModelTest(parameterized.TestCase):
573
535
  'symmetric': False,
574
536
  'granularity': 'TENSORWISE',
575
537
  'dtype': 'INT',
538
+ 'block_size': 0,
576
539
  },
577
540
  'weight_tensor_config': {
578
541
  'num_bits': 8,
579
542
  'symmetric': True,
580
543
  'granularity': 'CHANNELWISE',
581
544
  'dtype': 'INT',
545
+ 'block_size': 0,
582
546
  },
583
547
  'compute_precision': 'INTEGER',
584
548
  'explicit_dequantize': False,
@@ -569,12 +569,14 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
569
569
  'symmetric': False,
570
570
  'granularity': _QuantGranularity.TENSORWISE,
571
571
  'dtype': 'INT',
572
+ 'block_size': 0,
572
573
  },
573
574
  'weight_tensor_config': {
574
575
  'num_bits': 8,
575
576
  'symmetric': True,
576
577
  'granularity': _QuantGranularity.TENSORWISE,
577
578
  'dtype': 'INT',
579
+ 'block_size': 0,
578
580
  },
579
581
  # WEIGHT_ONLY.
580
582
  'compute_precision': _ComputePrecision.INTEGER,
@@ -593,6 +595,7 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
593
595
  'num_bits': 8,
594
596
  'symmetric': True,
595
597
  'granularity': _QuantGranularity.TENSORWISE,
598
+ 'block_size': 0,
596
599
  },
597
600
  # WEIGHT_ONLY.
598
601
  'compute_precision': _ComputePrecision.FLOAT,
@@ -611,6 +614,7 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
611
614
  'num_bits': 4,
612
615
  'symmetric': True,
613
616
  'granularity': _QuantGranularity.TENSORWISE,
617
+ 'block_size': 0,
614
618
  },
615
619
  # WEIGHT_ONLY.
616
620
  'compute_precision': _ComputePrecision.FLOAT,
@@ -629,6 +633,7 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
629
633
  'num_bits': 6,
630
634
  'symmetric': True,
631
635
  'granularity': _QuantGranularity.TENSORWISE,
636
+ 'block_size': 0,
632
637
  },
633
638
  # WEIGHT_ONLY.
634
639
  'compute_precision': _ComputePrecision.FLOAT,
@@ -647,6 +652,7 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
647
652
  'num_bits': 3,
648
653
  'symmetric': True,
649
654
  'granularity': _QuantGranularity.TENSORWISE,
655
+ 'block_size': 0,
650
656
  },
651
657
  # WEIGHT_ONLY.
652
658
  'compute_precision': _ComputePrecision.FLOAT,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ai-edge-quantizer-nightly
3
- Version: 0.4.0.dev20251029
3
+ Version: 0.4.0.dev20251030
4
4
  Summary: A quantizer for advanced developers to quantize converted AI Edge models.
5
5
  Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
6
6
  Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
@@ -5,19 +5,19 @@ ai_edge_quantizer/algorithm_manager_api_test.py,sha256=w6bSONvXkX6bzXAGc0-7b6gND
5
5
  ai_edge_quantizer/calibrator.py,sha256=Sms7_AIHPH9G5xFaz5Ef3a5gPhxuIWQI8d2LUM8C96I,12071
6
6
  ai_edge_quantizer/calibrator_test.py,sha256=ZLzIMWB2FSFU4TOatDioYuwp_kLh8iSCefZ5_Q9FU7s,11900
7
7
  ai_edge_quantizer/conftest.py,sha256=SxCz-5LlRD_lQm4hQc4c6IGG7DS8d7IyEWY9gnscPN0,794
8
- ai_edge_quantizer/default_policy.py,sha256=YcwwtVzoWUhjYgMtJ7b9f647740lURKteDOeJvwe17o,11384
8
+ ai_edge_quantizer/default_policy.py,sha256=i_AcnIIElHqoJNc2jyJFEC2tYqfQ4Nvn4mQlTvormzk,11702
9
9
  ai_edge_quantizer/model_modifier.py,sha256=U70JByv6CItP8tg4bdyMfX-R3UlwylAGSviZkF_FSAM,10468
10
10
  ai_edge_quantizer/model_modifier_test.py,sha256=CV4pgMEQkBJr_qbYR720TO8HBCutbEYLHptDHgdQMUE,7274
11
11
  ai_edge_quantizer/model_validator.py,sha256=Hj0_5o-Oa3dSlJ3ryVjRhvsyelHNyek1GrtG9buMczg,13153
12
12
  ai_edge_quantizer/model_validator_test.py,sha256=EeqOP_mrZsnZ3rug756s0ryDDqd2KgIDld5Lm_gDuWY,13020
13
13
  ai_edge_quantizer/params_generator.py,sha256=0w-sDGk84sVNkXoduon1wDqq30sGOHVgBVbdg44QVF4,20153
14
14
  ai_edge_quantizer/params_generator_test.py,sha256=RDYoRZDJfEZRtjlTAU2kZ_4t3JHOqEHxfJX9V4ETAhg,40597
15
- ai_edge_quantizer/qtyping.py,sha256=y9KretGzUGztyLdmto2XV6U0cxrSrfLWP1UOVcwR4dY,18011
15
+ ai_edge_quantizer/qtyping.py,sha256=f2NRz4xqM-7gMe0QFpR4x2m5lzTJI3tmsT0cehO5Vsg,17232
16
16
  ai_edge_quantizer/quantizer.py,sha256=teYeONdIS31IAY6ubLujCRi1t6lYAd0LkC8dRPxQdbw,18919
17
- ai_edge_quantizer/quantizer_test.py,sha256=CqAT83gLWGIUacN7cAKxrefQ77-9MME4HzBKa421zdg,28446
17
+ ai_edge_quantizer/quantizer_test.py,sha256=9BVwt7oyM8IsSC7jN73nI0O-4MikBkymm_FigJnSeCM,27117
18
18
  ai_edge_quantizer/recipe.py,sha256=MEkfQ2Sg3KAE9LAORHWcbjYNPg06EUbwc1d-VspQA2U,6461
19
19
  ai_edge_quantizer/recipe_manager.py,sha256=6l2uq8KL23KLu9OQDmPGkxrFiwHrdDB9xnn-ni8WdEM,15036
20
- ai_edge_quantizer/recipe_manager_test.py,sha256=gYK3haUJ8-AISQvTI6tD-E-drJXQPSXPqBZdgpc5QTo,36595
20
+ ai_edge_quantizer/recipe_manager_test.py,sha256=qjgGUF-wggXnSXqZ5khmqrDMIQI5CShk52IVWTahq6s,36817
21
21
  ai_edge_quantizer/recipe_test.py,sha256=QisyaTol8JRZFcGOGyee7QRCvqj5VbF4guKWdIoMUOE,6213
22
22
  ai_edge_quantizer/transformation_instruction_generator.py,sha256=O0U2aZcB8aXQgOV8r9g1rGNzDUiuI5Ta53XnxZbVffE,31576
23
23
  ai_edge_quantizer/transformation_instruction_generator_test.py,sha256=KW5-WoTTo9IqLEVnWxVC8ut8eWLi_91xfKgGqVQ9QDk,54635
@@ -28,22 +28,22 @@ ai_edge_quantizer/algorithms/nonlinear_quantize/__init__.py,sha256=lpq1g2ayg3lCP
28
28
  ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting.py,sha256=Bs9CK7wZAw6jNaZ8xEtbwO2vM34VYXNZSMVWvxJo9nw,9297
29
29
  ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting_test.py,sha256=EqIHGEZ1LgUrTN7zf880RuAzEv3Qy7kgh5ivObJGHSo,22646
30
30
  ai_edge_quantizer/algorithms/uniform_quantize/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
31
- ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=wrp4F2Wo9ammz_6VXFjXu04RMJV4_MxGfp4XyFMhZHc,39904
31
+ ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=HF7aNccdDmCbZGZ21UxeO5UpSpQOLr3TiOEyLwWOVPQ,39888
32
32
  ai_edge_quantizer/algorithms/uniform_quantize/common_quantize_test.py,sha256=GGf_n3wIeg3GB_eGsmyNJ0fTcxgpeMMbugTMRONK6TQ,3553
33
- ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha256=VjBDxGxjITHJc7xJABqBbZt6_qhobtZAl2gnVQrYJgc,8652
33
+ ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha256=BDdn_uBZakfHyzdMJPKadsOqxqyC-s6W2ZzFH99L4fE,8652
34
34
  ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py,sha256=sT5eX5TLZEHTtPfnSkCPDlS0sQxlTFWbCsbvOuj--yY,8889
35
35
  ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation.py,sha256=qxt9CPDcidVWIxp5nSWPN2hKKj1XZcsOOLBd2SYIvW0,14572
36
- ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py,sha256=1ejj5WS3GZwFk3qpsPiPS8jcmVS1-e7zRmvj2Nj8fKw,15440
37
- ai_edge_quantizer/algorithms/uniform_quantize/mse.py,sha256=EP5yPw6khAhTo6VNTPXEE2aGKLfNnqz8COeJnTKaGWs,4641
38
- ai_edge_quantizer/algorithms/uniform_quantize/mse_test.py,sha256=-E1LIlxadckspltdgBWTiUzsiwbawSubndavHhWLt1g,7145
39
- ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=3HldmkAZv1EN0GeUWr574L9brknb569KB8i1iIGgcx0,8334
40
- ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=Eqa4OUqoCGywbHz-HxJ9dWRj9BKlVzJPuIhVzvrpdLM,8925
41
- ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=-n-QZyp9y8WCy5FPSpXZXHfOA-p-RLvfSaCzAfhHiHI,7040
42
- ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=6m2U-9JdNei0XzOORg2gt87TJdD0XHZ-z5h9c4g_TB4,9120
43
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=ZU7QWZeN1KjdprJWWvfmSikz8ebhSH1aS1Cl7g1Qp0Q,20446
44
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=eTrrc8AGaSf1Ytp5gsRONAZ94PHFJUTd4dGi5ZnKZjU,16038
36
+ ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py,sha256=mgv6aGIqQouxfA8_GacuGdOftvL75XBF1_h5tlCCYJQ,15468
37
+ ai_edge_quantizer/algorithms/uniform_quantize/mse.py,sha256=qiIyzogATGVxjYwxzH0cZvgwPSPBJv_3y8NSumHZXTk,4561
38
+ ai_edge_quantizer/algorithms/uniform_quantize/mse_test.py,sha256=-_P4jQJ7gVo0FNSapP3sIGcnhwfjQHW1AKLfoiAlS_s,7142
39
+ ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=1sB2j1vlvvWDKyjcGvA_JLCpN2KbCmMslGCBUc4--V4,8461
40
+ ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=nscKDvNb14ErZdAfG0aXRWyRs6bTvhMqMjKx2vxvUK0,8725
41
+ ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=Umxh4kJyeHddZf-Wd4aXE5MTI1XWFa5KRuM17uYU714,6922
42
+ ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=sha1d99Xk87bI87tgz0g5LeDC-EeE4WMfM5rRC98-m4,9140
43
+ ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=frGVia4J2cWxQZzo2bT8MxWnEF5zGLIHfn4nnrWt_no,19746
44
+ ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=NjqvNYZBZYhaZFaWD2dmCNTQ92MmznqV-_kCCJ6UB-0,16032
45
45
  ai_edge_quantizer/algorithms/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
46
- ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=Q6BoDDR1flnmxLL2NZ1YrNCaL3uUbt76htW8aHO6ukE,37462
46
+ ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=4eAlGph6DDW18bUdoY0XcUoOXEr3P_3_W1ptidD8qK4,37611
47
47
  ai_edge_quantizer/algorithms/utils/common_utils_test.py,sha256=zqapGEfYhjQWe9cNGPLmdbwtEUUYQRhlO_kNe0cXX6E,18104
48
48
  ai_edge_quantizer/transformations/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
49
49
  ai_edge_quantizer/transformations/dequant_insert.py,sha256=sL1LHFVzBDSd9jgrzlHz38LWU0bwmVX7iBkaNcui0ts,3566
@@ -74,8 +74,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOi
74
74
  ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
75
75
  ai_edge_quantizer/utils/validation_utils.py,sha256=QTYyQ_HDVrFTGPIsrA240Lv8tUw1fwWp2fu9kTVISkE,6224
76
76
  ai_edge_quantizer/utils/validation_utils_test.py,sha256=lO51rGskhzpXePRdZMU87u_YO35_sDp9_eQ85CmupL4,4600
77
- ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
78
- ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info/METADATA,sha256=WZuAdLSBsGAybfUkGXKpnL9dO6CbiueTPfDRvLJ0A7w,1508
79
- ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
80
- ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
81
- ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info/RECORD,,
77
+ ai_edge_quantizer_nightly-0.4.0.dev20251030.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
78
+ ai_edge_quantizer_nightly-0.4.0.dev20251030.dist-info/METADATA,sha256=kEyzUto7tG3yMLuozS764ypS7pHLWGJbsqVFiZkNsWA,1508
79
+ ai_edge_quantizer_nightly-0.4.0.dev20251030.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
80
+ ai_edge_quantizer_nightly-0.4.0.dev20251030.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
81
+ ai_edge_quantizer_nightly-0.4.0.dev20251030.dist-info/RECORD,,