ai-edge-quantizer-nightly 0.4.0.dev20250908__py3-none-any.whl → 0.4.0.dev20250910__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -309,13 +309,28 @@ def _materialize_bias_for_conv_ops(
309
309
  bias_tensor,
310
310
  graph_info.buffers,
311
311
  )
312
- bias_quant_params = (
313
- uniform_quantize_tensor.symmetric_quantize_bias_tensor(
314
- bias_content,
315
- op_tensor_params[op_input_index].consumers[0].parameters,
316
- op_tensor_params[op_weight_index].consumers[0].parameters,
317
- )
312
+ input_consumer_params = (
313
+ op_tensor_params[op_input_index].consumers[0].parameters
314
+ )
315
+ weight_consumer_params = (
316
+ op_tensor_params[op_weight_index].consumers[0].parameters
318
317
  )
318
+ try:
319
+ # Bias quantization is using fixed quantization scale:
320
+ # input_scale * weight_scale. To avoid hidden numerics error, we check
321
+ # the quantization error in bias quantization.
322
+ bias_quant_params = (
323
+ uniform_quantize_tensor.symmetric_quantize_bias_tensor(
324
+ bias_content,
325
+ input_consumer_params,
326
+ weight_consumer_params,
327
+ )
328
+ )
329
+ except ValueError as e:
330
+ raise ValueError(
331
+ f"Failed to quantize bias tensor for op {op_info.op_name} with op"
332
+ f" id {op_info.subgraph_op_index}."
333
+ ) from e
319
334
  # We only quantize bias under SRQ. Setting is_constant=True for SRQ only
320
335
  # to avoid quantize bias for DRQ and weight-only cases.
321
336
  is_constant = (
@@ -305,6 +305,7 @@ def symmetric_quantize_bias_tensor(
305
305
  bias_content: np.ndarray,
306
306
  input_tensor_quant_params: qtyping.UniformQuantParams,
307
307
  weight_tensor_quant_params: qtyping.UniformQuantParams,
308
+ check_error: bool = True,
308
309
  ) -> qtyping.UniformQuantParams:
309
310
  """Quantize bias tensor (symmetrically, i.e., zero_point = 0).
310
311
 
@@ -316,6 +317,12 @@ def symmetric_quantize_bias_tensor(
316
317
  bias_content: The bias content.
317
318
  input_tensor_quant_params: The quantization parameters of input tensor.
318
319
  weight_tensor_quant_params: The quantization parameters of weight tensor.
320
+ check_error: Whether to check if the quantization error (the difference
321
+ between the original and dequantized bias) is larger than the quantization
322
+ scale. This check is important because bias quantization parameters are
323
+ fixed (bias_scale = input_scale * weight_scale), which can lead to large
324
+ quantization errors. Raising an error when the quantization error is
325
+ larger than the scale helps to identify unexpected numerical issues.
319
326
 
320
327
  Returns:
321
328
  The quantized bias tensor.
@@ -330,7 +337,8 @@ def symmetric_quantize_bias_tensor(
330
337
 
331
338
  # symmetric
332
339
  bias_zp = np.zeros_like(effective_output_scale, dtype=np.int32)
333
- bias_number_bits = 64 if input_tensor_quant_params.num_bits == 16 else 32
340
+ # Fixed to 32 bits since most of the accelerators use int32 accumulator.
341
+ bias_number_bits = 32
334
342
  symmetric = True
335
343
  quantized_dimension = None if len(effective_output_scale) == 1 else 0
336
344
  bias_quant_params = qtyping.UniformQuantParams(
@@ -342,6 +350,21 @@ def symmetric_quantize_bias_tensor(
342
350
  )
343
351
 
344
352
  quantized_vars = uniform_quantize(bias_content, bias_quant_params)
353
+ if check_error:
354
+ dequantized_bias = uniform_dequantize(quantized_vars, bias_quant_params)
355
+ quantization_error = np.abs(dequantized_bias - bias_content)
356
+ if np.any(quantization_error > effective_output_scale):
357
+ raise ValueError(
358
+ "Quantization error is too large for bias tensor quantization."
359
+ )
360
+
361
+ # Save the int32 quantized bias as int64 if the input tensor is quantized to
362
+ # 16 bits. This is to assume the matmul is using int64 accumulator (safe from
363
+ # overflow). For accelerators with int32 accumulator, it is safe to cast int64
364
+ # back to int32.
365
+ if input_tensor_quant_params.num_bits == 16:
366
+ quantized_vars = quantized_vars.astype(np.int64)
367
+ bias_number_bits = 64
345
368
 
346
369
  # UniformQuantParams is frozen dataclass, need to recreate.
347
370
  return qtyping.UniformQuantParams(
@@ -15,8 +15,11 @@
15
15
 
16
16
  """Tests for tensor_utils."""
17
17
 
18
+ import dataclasses
19
+
18
20
  from absl.testing import parameterized
19
21
  import numpy as np
22
+
20
23
  from tensorflow.python.platform import googletest
21
24
  from ai_edge_quantizer import qtyping
22
25
  from ai_edge_quantizer.algorithms.uniform_quantize import uniform_quantize_tensor
@@ -276,7 +279,10 @@ class TensorUtilsTest(parameterized.TestCase):
276
279
  )
277
280
 
278
281
  @parameterized.parameters(
279
- (8, 8, True, True), (8, 4, False, True), (16, 8, True, False)
282
+ (8, 8, True, True),
283
+ (8, 4, False, True),
284
+ (16, 8, True, False),
285
+ (16, 8, True, True),
280
286
  )
281
287
  def test_quantize_bias_tensor(
282
288
  self,
@@ -334,6 +340,26 @@ class TensorUtilsTest(parameterized.TestCase):
334
340
  self.assertSequenceAlmostEqual(
335
341
  list(dequantized_bias.flatten()), list(bias_tensor_data), places=5
336
342
  )
343
+
344
+ if activation_num_bits == 16:
345
+ # Check if it is safe to cast int64 bias to int32. We save the int32
346
+ # quantized bias as int64 if the input tensor is quantized to 16 bits.
347
+ # This is to assume the matmul is using int64 accumulator (safe from
348
+ # overflow). For accelerators with int32 accumulator, it is safe to cast
349
+ # int64 back to int32.
350
+ quantized_bias = bias_quant_config.quantized_data
351
+ self.assertIsNotNone(quantized_bias)
352
+ self.assertEqual(quantized_bias.dtype, np.int64)
353
+ self.assertSequenceEqual(
354
+ list(quantized_bias.flatten()),
355
+ list(quantized_bias.astype(np.int32).flatten()),
356
+ )
357
+
358
+ bias_quant_config = dataclasses.replace(
359
+ bias_quant_config,
360
+ num_bits=32,
361
+ )
362
+
337
363
  expected_quantized_data = uniform_quantize_tensor.uniform_quantize(
338
364
  bias_tensor_data, bias_quant_config
339
365
  )
@@ -342,6 +368,30 @@ class TensorUtilsTest(parameterized.TestCase):
342
368
  list(bias_quant_config.quantized_data.flatten()), # pytype: disable=attribute-error
343
369
  )
344
370
 
371
+ def test_quantize_bias_tensor_raises_error_for_large_quantization_error(self):
372
+ input_quant_config = qtyping.UniformQuantParams(
373
+ scale=np.array([0.1]),
374
+ zero_point=np.array([10]),
375
+ num_bits=8,
376
+ symmetric=False,
377
+ quantized_dimension=None,
378
+ )
379
+ weight_quant_config = qtyping.UniformQuantParams(
380
+ scale=np.array([0.1]),
381
+ zero_point=np.array([-1]),
382
+ num_bits=8,
383
+ symmetric=True,
384
+ quantized_dimension=None,
385
+ )
386
+ # This will result in quantized bias of 3e9, which is larger than int32 max.
387
+ bias_tensor_data = np.array([3e7])
388
+ with self.assertRaises(ValueError):
389
+ uniform_quantize_tensor.symmetric_quantize_bias_tensor(
390
+ bias_tensor_data,
391
+ input_quant_config,
392
+ weight_quant_config,
393
+ )
394
+
345
395
  @parameterized.parameters((8, True), (16, False))
346
396
  def test_tensor_zp_scale_from_min_max(self, num_bits, symmetric):
347
397
  min_val = np.min(self._test_data, keepdims=True)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ai-edge-quantizer-nightly
3
- Version: 0.4.0.dev20250908
3
+ Version: 0.4.0.dev20250910
4
4
  Summary: A quantizer for advanced developers to quantize converted AI Edge models.
5
5
  Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
6
6
  Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
@@ -28,7 +28,7 @@ ai_edge_quantizer/algorithms/nonlinear_quantize/__init__.py,sha256=lpq1g2ayg3lCP
28
28
  ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting.py,sha256=Bs9CK7wZAw6jNaZ8xEtbwO2vM34VYXNZSMVWvxJo9nw,9297
29
29
  ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting_test.py,sha256=EqIHGEZ1LgUrTN7zf880RuAzEv3Qy7kgh5ivObJGHSo,22646
30
30
  ai_edge_quantizer/algorithms/uniform_quantize/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
31
- ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=TQQxkxeAngrZO6ro6RjOeJAieWHIgK4hrACtbU0-Buk,35919
31
+ ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=TUxqc3cG66H77Rz0N3ynFnKKmFySDUAExK--3-VS7a4,36487
32
32
  ai_edge_quantizer/algorithms/uniform_quantize/common_quantize_test.py,sha256=GGf_n3wIeg3GB_eGsmyNJ0fTcxgpeMMbugTMRONK6TQ,3553
33
33
  ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha256=BDdn_uBZakfHyzdMJPKadsOqxqyC-s6W2ZzFH99L4fE,8652
34
34
  ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py,sha256=sT5eX5TLZEHTtPfnSkCPDlS0sQxlTFWbCsbvOuj--yY,8889
@@ -38,8 +38,8 @@ ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=1
38
38
  ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=nscKDvNb14ErZdAfG0aXRWyRs6bTvhMqMjKx2vxvUK0,8725
39
39
  ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=Umxh4kJyeHddZf-Wd4aXE5MTI1XWFa5KRuM17uYU714,6922
40
40
  ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=sha1d99Xk87bI87tgz0g5LeDC-EeE4WMfM5rRC98-m4,9140
41
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=uCREMXi0U2ckhXXfgGVzwSgjFZc0IbtnFU-OjlG9IO8,17146
42
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=7kHluzpteMv36hFD6LD_qnwwMoE1GKUP4bGmGMFbOdA,12755
41
+ ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=vsvBGEGFEEUP4kXRUh9hMpVXjsMBpfs6UDk8m4BNGTs,18375
42
+ ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=Ympigz0BGcaO5x3OozxNxrRAGiF0to6V_HXAcxNNEpI,14399
43
43
  ai_edge_quantizer/algorithms/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
44
44
  ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=4eAlGph6DDW18bUdoY0XcUoOXEr3P_3_W1ptidD8qK4,37611
45
45
  ai_edge_quantizer/algorithms/utils/common_utils_test.py,sha256=zqapGEfYhjQWe9cNGPLmdbwtEUUYQRhlO_kNe0cXX6E,18104
@@ -70,8 +70,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOi
70
70
  ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
71
71
  ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
72
72
  ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
73
- ai_edge_quantizer_nightly-0.4.0.dev20250908.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
74
- ai_edge_quantizer_nightly-0.4.0.dev20250908.dist-info/METADATA,sha256=vInM6iV-Us0yFEQmmlZz0uUwrJKgF-ZP747A2lLzoGc,1508
75
- ai_edge_quantizer_nightly-0.4.0.dev20250908.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
76
- ai_edge_quantizer_nightly-0.4.0.dev20250908.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
77
- ai_edge_quantizer_nightly-0.4.0.dev20250908.dist-info/RECORD,,
73
+ ai_edge_quantizer_nightly-0.4.0.dev20250910.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
74
+ ai_edge_quantizer_nightly-0.4.0.dev20250910.dist-info/METADATA,sha256=OHvvjpu55-8eASitbDgp6fKhpBkVhF-AXT652QFhswg,1508
75
+ ai_edge_quantizer_nightly-0.4.0.dev20250910.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
76
+ ai_edge_quantizer_nightly-0.4.0.dev20250910.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
77
+ ai_edge_quantizer_nightly-0.4.0.dev20250910.dist-info/RECORD,,