ai-edge-quantizer-nightly 0.4.0.dev20250909__py3-none-any.whl → 0.4.0.dev20250911__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -309,13 +309,28 @@ def _materialize_bias_for_conv_ops(
309
309
  bias_tensor,
310
310
  graph_info.buffers,
311
311
  )
312
- bias_quant_params = (
313
- uniform_quantize_tensor.symmetric_quantize_bias_tensor(
314
- bias_content,
315
- op_tensor_params[op_input_index].consumers[0].parameters,
316
- op_tensor_params[op_weight_index].consumers[0].parameters,
317
- )
312
+ input_consumer_params = (
313
+ op_tensor_params[op_input_index].consumers[0].parameters
314
+ )
315
+ weight_consumer_params = (
316
+ op_tensor_params[op_weight_index].consumers[0].parameters
318
317
  )
318
+ try:
319
+ # Bias quantization is using fixed quantization scale:
320
+ # input_scale * weight_scale. To avoid hidden numerics error, we check
321
+ # the quantization error in bias quantization.
322
+ bias_quant_params = (
323
+ uniform_quantize_tensor.symmetric_quantize_bias_tensor(
324
+ bias_content,
325
+ input_consumer_params,
326
+ weight_consumer_params,
327
+ )
328
+ )
329
+ except ValueError as e:
330
+ raise ValueError(
331
+ f"Failed to quantize bias tensor for op {op_info.op_name} with op"
332
+ f" id {op_info.subgraph_op_index}."
333
+ ) from e
319
334
  # We only quantize bias under SRQ. Setting is_constant=True for SRQ only
320
335
  # to avoid quantize bias for DRQ and weight-only cases.
321
336
  is_constant = (
@@ -305,6 +305,7 @@ def symmetric_quantize_bias_tensor(
305
305
  bias_content: np.ndarray,
306
306
  input_tensor_quant_params: qtyping.UniformQuantParams,
307
307
  weight_tensor_quant_params: qtyping.UniformQuantParams,
308
+ check_error: bool = True,
308
309
  ) -> qtyping.UniformQuantParams:
309
310
  """Quantize bias tensor (symmetrically, i.e., zero_point = 0).
310
311
 
@@ -316,6 +317,12 @@ def symmetric_quantize_bias_tensor(
316
317
  bias_content: The bias content.
317
318
  input_tensor_quant_params: The quantization parameters of input tensor.
318
319
  weight_tensor_quant_params: The quantization parameters of weight tensor.
320
+ check_error: Whether to check if the quantization error (the difference
321
+ between the original and dequantized bias) is larger than the quantization
322
+ scale. This check is important because bias quantization parameters are
323
+ fixed (bias_scale = input_scale * weight_scale), which can lead to large
324
+ quantization errors. Raising an error when the quantization error is
325
+ larger than the scale helps to identify unexpected numerical issues.
319
326
 
320
327
  Returns:
321
328
  The quantized bias tensor.
@@ -343,6 +350,14 @@ def symmetric_quantize_bias_tensor(
343
350
  )
344
351
 
345
352
  quantized_vars = uniform_quantize(bias_content, bias_quant_params)
353
+ if check_error:
354
+ dequantized_bias = uniform_dequantize(quantized_vars, bias_quant_params)
355
+ quantization_error = np.abs(dequantized_bias - bias_content)
356
+ if np.any(quantization_error > effective_output_scale):
357
+ raise ValueError(
358
+ "Quantization error is too large for bias tensor quantization."
359
+ )
360
+
346
361
  # Save the int32 quantized bias as int64 if the input tensor is quantized to
347
362
  # 16 bits. This is to assume the matmul is using int64 accumulator (safe from
348
363
  # overflow). For accelerators with int32 accumulator, it is safe to cast int64
@@ -352,7 +352,7 @@ class TensorUtilsTest(parameterized.TestCase):
352
352
  self.assertEqual(quantized_bias.dtype, np.int64)
353
353
  self.assertSequenceEqual(
354
354
  list(quantized_bias.flatten()),
355
- list(quantized_bias.astype(np.int32).flatten()), # pytype: disable=attribute-error
355
+ list(quantized_bias.astype(np.int32).flatten()),
356
356
  )
357
357
 
358
358
  bias_quant_config = dataclasses.replace(
@@ -368,6 +368,30 @@ class TensorUtilsTest(parameterized.TestCase):
368
368
  list(bias_quant_config.quantized_data.flatten()), # pytype: disable=attribute-error
369
369
  )
370
370
 
371
+ def test_quantize_bias_tensor_raises_error_for_large_quantization_error(self):
372
+ input_quant_config = qtyping.UniformQuantParams(
373
+ scale=np.array([0.1]),
374
+ zero_point=np.array([10]),
375
+ num_bits=8,
376
+ symmetric=False,
377
+ quantized_dimension=None,
378
+ )
379
+ weight_quant_config = qtyping.UniformQuantParams(
380
+ scale=np.array([0.1]),
381
+ zero_point=np.array([-1]),
382
+ num_bits=8,
383
+ symmetric=True,
384
+ quantized_dimension=None,
385
+ )
386
+ # This will result in quantized bias of 3e9, which is larger than int32 max.
387
+ bias_tensor_data = np.array([3e7])
388
+ with self.assertRaises(ValueError):
389
+ uniform_quantize_tensor.symmetric_quantize_bias_tensor(
390
+ bias_tensor_data,
391
+ input_quant_config,
392
+ weight_quant_config,
393
+ )
394
+
371
395
  @parameterized.parameters((8, True), (16, False))
372
396
  def test_tensor_zp_scale_from_min_max(self, num_bits, symmetric):
373
397
  min_val = np.min(self._test_data, keepdims=True)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ai-edge-quantizer-nightly
3
- Version: 0.4.0.dev20250909
3
+ Version: 0.4.0.dev20250911
4
4
  Summary: A quantizer for advanced developers to quantize converted AI Edge models.
5
5
  Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
6
6
  Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
@@ -28,7 +28,7 @@ ai_edge_quantizer/algorithms/nonlinear_quantize/__init__.py,sha256=lpq1g2ayg3lCP
28
28
  ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting.py,sha256=Bs9CK7wZAw6jNaZ8xEtbwO2vM34VYXNZSMVWvxJo9nw,9297
29
29
  ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting_test.py,sha256=EqIHGEZ1LgUrTN7zf880RuAzEv3Qy7kgh5ivObJGHSo,22646
30
30
  ai_edge_quantizer/algorithms/uniform_quantize/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
31
- ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=TQQxkxeAngrZO6ro6RjOeJAieWHIgK4hrACtbU0-Buk,35919
31
+ ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=TUxqc3cG66H77Rz0N3ynFnKKmFySDUAExK--3-VS7a4,36487
32
32
  ai_edge_quantizer/algorithms/uniform_quantize/common_quantize_test.py,sha256=GGf_n3wIeg3GB_eGsmyNJ0fTcxgpeMMbugTMRONK6TQ,3553
33
33
  ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha256=BDdn_uBZakfHyzdMJPKadsOqxqyC-s6W2ZzFH99L4fE,8652
34
34
  ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py,sha256=sT5eX5TLZEHTtPfnSkCPDlS0sQxlTFWbCsbvOuj--yY,8889
@@ -38,8 +38,8 @@ ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=1
38
38
  ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=nscKDvNb14ErZdAfG0aXRWyRs6bTvhMqMjKx2vxvUK0,8725
39
39
  ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=Umxh4kJyeHddZf-Wd4aXE5MTI1XWFa5KRuM17uYU714,6922
40
40
  ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=sha1d99Xk87bI87tgz0g5LeDC-EeE4WMfM5rRC98-m4,9140
41
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=_WfxguP9dVsWFjUc2l8cvvx6kK_PEiUS5HU1vfC7d5c,17554
42
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=OH1etR0btp2ep7PIP4uc7LCOydrouVBeeBJzkIkMF-k,13625
41
+ ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=vsvBGEGFEEUP4kXRUh9hMpVXjsMBpfs6UDk8m4BNGTs,18375
42
+ ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=Ympigz0BGcaO5x3OozxNxrRAGiF0to6V_HXAcxNNEpI,14399
43
43
  ai_edge_quantizer/algorithms/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
44
44
  ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=4eAlGph6DDW18bUdoY0XcUoOXEr3P_3_W1ptidD8qK4,37611
45
45
  ai_edge_quantizer/algorithms/utils/common_utils_test.py,sha256=zqapGEfYhjQWe9cNGPLmdbwtEUUYQRhlO_kNe0cXX6E,18104
@@ -70,8 +70,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOi
70
70
  ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
71
71
  ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
72
72
  ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
73
- ai_edge_quantizer_nightly-0.4.0.dev20250909.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
74
- ai_edge_quantizer_nightly-0.4.0.dev20250909.dist-info/METADATA,sha256=-_Cqm5vEJXyhzkezt-hEOtZOZJ8kOtv8-0oUkMad7aQ,1508
75
- ai_edge_quantizer_nightly-0.4.0.dev20250909.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
76
- ai_edge_quantizer_nightly-0.4.0.dev20250909.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
77
- ai_edge_quantizer_nightly-0.4.0.dev20250909.dist-info/RECORD,,
73
+ ai_edge_quantizer_nightly-0.4.0.dev20250911.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
74
+ ai_edge_quantizer_nightly-0.4.0.dev20250911.dist-info/METADATA,sha256=xgvGhyCt3HZFAZEVfl8gcPEw1nyxFqTXMQMd7nxy0lY,1508
75
+ ai_edge_quantizer_nightly-0.4.0.dev20250911.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
76
+ ai_edge_quantizer_nightly-0.4.0.dev20250911.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
77
+ ai_edge_quantizer_nightly-0.4.0.dev20250911.dist-info/RECORD,,