ai-edge-quantizer-nightly 0.4.0.dev20250908__py3-none-any.whl → 0.4.0.dev20250909__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -330,7 +330,8 @@ def symmetric_quantize_bias_tensor(
330
330
 
331
331
  # symmetric
332
332
  bias_zp = np.zeros_like(effective_output_scale, dtype=np.int32)
333
- bias_number_bits = 64 if input_tensor_quant_params.num_bits == 16 else 32
333
+ # Fixed to 32 bits since most of the accelerators use int32 accumulator.
334
+ bias_number_bits = 32
334
335
  symmetric = True
335
336
  quantized_dimension = None if len(effective_output_scale) == 1 else 0
336
337
  bias_quant_params = qtyping.UniformQuantParams(
@@ -342,6 +343,13 @@ def symmetric_quantize_bias_tensor(
342
343
  )
343
344
 
344
345
  quantized_vars = uniform_quantize(bias_content, bias_quant_params)
346
+ # Save the int32 quantized bias as int64 if the input tensor is quantized to
347
+ # 16 bits. This is to assume the matmul is using int64 accumulator (safe from
348
+ # overflow). For accelerators with int32 accumulator, it is safe to cast int64
349
+ # back to int32.
350
+ if input_tensor_quant_params.num_bits == 16:
351
+ quantized_vars = quantized_vars.astype(np.int64)
352
+ bias_number_bits = 64
345
353
 
346
354
  # UniformQuantParams is frozen dataclass, need to recreate.
347
355
  return qtyping.UniformQuantParams(
@@ -15,8 +15,11 @@
15
15
 
16
16
  """Tests for tensor_utils."""
17
17
 
18
+ import dataclasses
19
+
18
20
  from absl.testing import parameterized
19
21
  import numpy as np
22
+
20
23
  from tensorflow.python.platform import googletest
21
24
  from ai_edge_quantizer import qtyping
22
25
  from ai_edge_quantizer.algorithms.uniform_quantize import uniform_quantize_tensor
@@ -276,7 +279,10 @@ class TensorUtilsTest(parameterized.TestCase):
276
279
  )
277
280
 
278
281
  @parameterized.parameters(
279
- (8, 8, True, True), (8, 4, False, True), (16, 8, True, False)
282
+ (8, 8, True, True),
283
+ (8, 4, False, True),
284
+ (16, 8, True, False),
285
+ (16, 8, True, True),
280
286
  )
281
287
  def test_quantize_bias_tensor(
282
288
  self,
@@ -334,6 +340,26 @@ class TensorUtilsTest(parameterized.TestCase):
334
340
  self.assertSequenceAlmostEqual(
335
341
  list(dequantized_bias.flatten()), list(bias_tensor_data), places=5
336
342
  )
343
+
344
+ if activation_num_bits == 16:
345
+ # Check if it is safe to cast int64 bias to int32. We save the int32
346
+ # quantized bias as int64 if the input tensor is quantized to 16 bits.
347
+ # This is to assume the matmul is using int64 accumulator (safe from
348
+ # overflow). For accelerators with int32 accumulator, it is safe to cast
349
+ # int64 back to int32.
350
+ quantized_bias = bias_quant_config.quantized_data
351
+ self.assertIsNotNone(quantized_bias)
352
+ self.assertEqual(quantized_bias.dtype, np.int64)
353
+ self.assertSequenceEqual(
354
+ list(quantized_bias.flatten()),
355
+ list(quantized_bias.astype(np.int32).flatten()), # pytype: disable=attribute-error
356
+ )
357
+
358
+ bias_quant_config = dataclasses.replace(
359
+ bias_quant_config,
360
+ num_bits=32,
361
+ )
362
+
337
363
  expected_quantized_data = uniform_quantize_tensor.uniform_quantize(
338
364
  bias_tensor_data, bias_quant_config
339
365
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ai-edge-quantizer-nightly
3
- Version: 0.4.0.dev20250908
3
+ Version: 0.4.0.dev20250909
4
4
  Summary: A quantizer for advanced developers to quantize converted AI Edge models.
5
5
  Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
6
6
  Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
@@ -38,8 +38,8 @@ ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=1
38
38
  ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=nscKDvNb14ErZdAfG0aXRWyRs6bTvhMqMjKx2vxvUK0,8725
39
39
  ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=Umxh4kJyeHddZf-Wd4aXE5MTI1XWFa5KRuM17uYU714,6922
40
40
  ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=sha1d99Xk87bI87tgz0g5LeDC-EeE4WMfM5rRC98-m4,9140
41
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=uCREMXi0U2ckhXXfgGVzwSgjFZc0IbtnFU-OjlG9IO8,17146
42
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=7kHluzpteMv36hFD6LD_qnwwMoE1GKUP4bGmGMFbOdA,12755
41
+ ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=_WfxguP9dVsWFjUc2l8cvvx6kK_PEiUS5HU1vfC7d5c,17554
42
+ ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=OH1etR0btp2ep7PIP4uc7LCOydrouVBeeBJzkIkMF-k,13625
43
43
  ai_edge_quantizer/algorithms/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
44
44
  ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=4eAlGph6DDW18bUdoY0XcUoOXEr3P_3_W1ptidD8qK4,37611
45
45
  ai_edge_quantizer/algorithms/utils/common_utils_test.py,sha256=zqapGEfYhjQWe9cNGPLmdbwtEUUYQRhlO_kNe0cXX6E,18104
@@ -70,8 +70,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOi
70
70
  ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
71
71
  ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
72
72
  ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
73
- ai_edge_quantizer_nightly-0.4.0.dev20250908.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
74
- ai_edge_quantizer_nightly-0.4.0.dev20250908.dist-info/METADATA,sha256=vInM6iV-Us0yFEQmmlZz0uUwrJKgF-ZP747A2lLzoGc,1508
75
- ai_edge_quantizer_nightly-0.4.0.dev20250908.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
76
- ai_edge_quantizer_nightly-0.4.0.dev20250908.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
77
- ai_edge_quantizer_nightly-0.4.0.dev20250908.dist-info/RECORD,,
73
+ ai_edge_quantizer_nightly-0.4.0.dev20250909.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
74
+ ai_edge_quantizer_nightly-0.4.0.dev20250909.dist-info/METADATA,sha256=-_Cqm5vEJXyhzkezt-hEOtZOZJ8kOtv8-0oUkMad7aQ,1508
75
+ ai_edge_quantizer_nightly-0.4.0.dev20250909.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
76
+ ai_edge_quantizer_nightly-0.4.0.dev20250909.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
77
+ ai_edge_quantizer_nightly-0.4.0.dev20250909.dist-info/RECORD,,