PyPI - ai-edge-quantizer-nightly - Versions diffs - 0.4.0.dev20250908__py3-none-any.whl → 0.4.0.dev20250910__py3-none-any.whl - Mend

ai-edge-quantizer-nightly 0.4.0.dev20250908py3-none-any.whl → 0.4.0.dev20250910py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py CHANGED Viewed

@@ -309,13 +309,28 @@ def _materialize_bias_for_conv_ops(
           bias_tensor,
           graph_info.buffers,
       )
-      bias_quant_params = (
-          uniform_quantize_tensor.symmetric_quantize_bias_tensor(
-              bias_content,
-              op_tensor_params[op_input_index].consumers[0].parameters,
-              op_tensor_params[op_weight_index].consumers[0].parameters,
-          )
+      input_consumer_params = (
+          op_tensor_params[op_input_index].consumers[0].parameters
+      )
+      weight_consumer_params = (
+          op_tensor_params[op_weight_index].consumers[0].parameters
       )
+      try:
+        # Bias quantization is using fixed quantization scale:
+        # input_scale * weight_scale. To avoid hidden numerics error, we check
+        # the quantization error in bias quantization.
+        bias_quant_params = (
+            uniform_quantize_tensor.symmetric_quantize_bias_tensor(
+                bias_content,
+                input_consumer_params,
+                weight_consumer_params,
+            )
+        )
+      except ValueError as e:
+        raise ValueError(
+            f"Failed to quantize bias tensor for op {op_info.op_name} with op"
+            f" id {op_info.subgraph_op_index}."
+        ) from e
     # We only quantize bias under SRQ. Setting is_constant=True for SRQ only
     # to avoid quantize bias for DRQ and weight-only cases.
     is_constant = (

ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py CHANGED Viewed

@@ -305,6 +305,7 @@ def symmetric_quantize_bias_tensor(
     bias_content: np.ndarray,
     input_tensor_quant_params: qtyping.UniformQuantParams,
     weight_tensor_quant_params: qtyping.UniformQuantParams,
+    check_error: bool = True,
 ) -> qtyping.UniformQuantParams:
   """Quantize bias tensor (symmetrically, i.e., zero_point = 0).
@@ -316,6 +317,12 @@ def symmetric_quantize_bias_tensor(
     bias_content: The bias content.
     input_tensor_quant_params: The quantization parameters of input tensor.
     weight_tensor_quant_params: The quantization parameters of weight tensor.
+    check_error: Whether to check if the quantization error (the difference
+      between the original and dequantized bias) is larger than the quantization
+      scale. This check is important because bias quantization parameters are
+      fixed (bias_scale = input_scale * weight_scale), which can lead to large
+      quantization errors. Raising an error when the quantization error is
+      larger than the scale helps to identify unexpected numerical issues.
   Returns:
     The quantized bias tensor.
@@ -330,7 +337,8 @@ def symmetric_quantize_bias_tensor(
   # symmetric
   bias_zp = np.zeros_like(effective_output_scale, dtype=np.int32)
-  bias_number_bits = 64 if input_tensor_quant_params.num_bits == 16 else 32
+  # Fixed to 32 bits since most of the accelerators use int32 accumulator.
+  bias_number_bits = 32
   symmetric = True
   quantized_dimension = None if len(effective_output_scale) == 1 else 0
   bias_quant_params = qtyping.UniformQuantParams(
@@ -342,6 +350,21 @@ def symmetric_quantize_bias_tensor(
   )
   quantized_vars = uniform_quantize(bias_content, bias_quant_params)
+  if check_error:
+    dequantized_bias = uniform_dequantize(quantized_vars, bias_quant_params)
+    quantization_error = np.abs(dequantized_bias - bias_content)
+    if np.any(quantization_error > effective_output_scale):
+      raise ValueError(
+          "Quantization error is too large for bias tensor quantization."
+      )
+  # Save the int32 quantized bias as int64 if the input tensor is quantized to
+  # 16 bits. This is to assume the matmul is using int64 accumulator (safe from
+  # overflow). For accelerators with int32 accumulator, it is safe to cast int64
+  # back to int32.
+  if input_tensor_quant_params.num_bits == 16:
+    quantized_vars = quantized_vars.astype(np.int64)
+    bias_number_bits = 64
   # UniformQuantParams is frozen dataclass, need to recreate.
   return qtyping.UniformQuantParams(

ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py CHANGED Viewed

@@ -15,8 +15,11 @@
 """Tests for tensor_utils."""
+import dataclasses
 from absl.testing import parameterized
 import numpy as np
 from tensorflow.python.platform import googletest
 from ai_edge_quantizer import qtyping
 from ai_edge_quantizer.algorithms.uniform_quantize import uniform_quantize_tensor
@@ -276,7 +279,10 @@ class TensorUtilsTest(parameterized.TestCase):
       )
   @parameterized.parameters(
-      (8, 8, True, True), (8, 4, False, True), (16, 8, True, False)
+      (8, 8, True, True),
+      (8, 4, False, True),
+      (16, 8, True, False),
+      (16, 8, True, True),
   )
   def test_quantize_bias_tensor(
       self,
@@ -334,6 +340,26 @@ class TensorUtilsTest(parameterized.TestCase):
     self.assertSequenceAlmostEqual(
         list(dequantized_bias.flatten()), list(bias_tensor_data), places=5
     )
+    if activation_num_bits == 16:
+      # Check if it is safe to cast int64 bias to int32. We save the int32
+      # quantized bias as int64 if the input tensor is quantized to 16 bits.
+      # This is to assume the matmul is using int64 accumulator (safe from
+      # overflow). For accelerators with int32 accumulator, it is safe to cast
+      # int64 back to int32.
+      quantized_bias = bias_quant_config.quantized_data
+      self.assertIsNotNone(quantized_bias)
+      self.assertEqual(quantized_bias.dtype, np.int64)
+      self.assertSequenceEqual(
+          list(quantized_bias.flatten()),
+          list(quantized_bias.astype(np.int32).flatten()),
+      )
+      bias_quant_config = dataclasses.replace(
+          bias_quant_config,
+          num_bits=32,
+      )
     expected_quantized_data = uniform_quantize_tensor.uniform_quantize(
         bias_tensor_data, bias_quant_config
     )
@@ -342,6 +368,30 @@ class TensorUtilsTest(parameterized.TestCase):
         list(bias_quant_config.quantized_data.flatten()),  # pytype: disable=attribute-error
     )
+  def test_quantize_bias_tensor_raises_error_for_large_quantization_error(self):
+    input_quant_config = qtyping.UniformQuantParams(
+        scale=np.array([0.1]),
+        zero_point=np.array([10]),
+        num_bits=8,
+        symmetric=False,
+        quantized_dimension=None,
+    )
+    weight_quant_config = qtyping.UniformQuantParams(
+        scale=np.array([0.1]),
+        zero_point=np.array([-1]),
+        num_bits=8,
+        symmetric=True,
+        quantized_dimension=None,
+    )
+    # This will result in quantized bias of 3e9, which is larger than int32 max.
+    bias_tensor_data = np.array([3e7])
+    with self.assertRaises(ValueError):
+      uniform_quantize_tensor.symmetric_quantize_bias_tensor(
+          bias_tensor_data,
+          input_quant_config,
+          weight_quant_config,
+      )
   @parameterized.parameters((8, True), (16, False))
   def test_tensor_zp_scale_from_min_max(self, num_bits, symmetric):
     min_val = np.min(self._test_data, keepdims=True)

{ai_edge_quantizer_nightly-0.4.0.dev20250908.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250910.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-edge-quantizer-nightly
-Version: 0.4.0.dev20250908
+Version: 0.4.0.dev20250910
 Summary: A quantizer for advanced developers to quantize converted AI Edge models.
 Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
 Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI

{ai_edge_quantizer_nightly-0.4.0.dev20250908.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250910.dist-info}/RECORD RENAMED Viewed

@@ -28,7 +28,7 @@ ai_edge_quantizer/algorithms/nonlinear_quantize/__init__.py,sha256=lpq1g2ayg3lCP
 ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting.py,sha256=Bs9CK7wZAw6jNaZ8xEtbwO2vM34VYXNZSMVWvxJo9nw,9297
 ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting_test.py,sha256=EqIHGEZ1LgUrTN7zf880RuAzEv3Qy7kgh5ivObJGHSo,22646
 ai_edge_quantizer/algorithms/uniform_quantize/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
-ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=TQQxkxeAngrZO6ro6RjOeJAieWHIgK4hrACtbU0-Buk,35919
+ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=TUxqc3cG66H77Rz0N3ynFnKKmFySDUAExK--3-VS7a4,36487
 ai_edge_quantizer/algorithms/uniform_quantize/common_quantize_test.py,sha256=GGf_n3wIeg3GB_eGsmyNJ0fTcxgpeMMbugTMRONK6TQ,3553
 ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha256=BDdn_uBZakfHyzdMJPKadsOqxqyC-s6W2ZzFH99L4fE,8652
 ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py,sha256=sT5eX5TLZEHTtPfnSkCPDlS0sQxlTFWbCsbvOuj--yY,8889
@@ -38,8 +38,8 @@ ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=1
 ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=nscKDvNb14ErZdAfG0aXRWyRs6bTvhMqMjKx2vxvUK0,8725
 ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=Umxh4kJyeHddZf-Wd4aXE5MTI1XWFa5KRuM17uYU714,6922
 ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=sha1d99Xk87bI87tgz0g5LeDC-EeE4WMfM5rRC98-m4,9140
-ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=uCREMXi0U2ckhXXfgGVzwSgjFZc0IbtnFU-OjlG9IO8,17146
-ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=7kHluzpteMv36hFD6LD_qnwwMoE1GKUP4bGmGMFbOdA,12755
+ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=vsvBGEGFEEUP4kXRUh9hMpVXjsMBpfs6UDk8m4BNGTs,18375
+ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=Ympigz0BGcaO5x3OozxNxrRAGiF0to6V_HXAcxNNEpI,14399
 ai_edge_quantizer/algorithms/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
 ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=4eAlGph6DDW18bUdoY0XcUoOXEr3P_3_W1ptidD8qK4,37611
 ai_edge_quantizer/algorithms/utils/common_utils_test.py,sha256=zqapGEfYhjQWe9cNGPLmdbwtEUUYQRhlO_kNe0cXX6E,18104
@@ -70,8 +70,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOi
 ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
 ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
 ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
-ai_edge_quantizer_nightly-0.4.0.dev20250908.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-ai_edge_quantizer_nightly-0.4.0.dev20250908.dist-info/METADATA,sha256=vInM6iV-Us0yFEQmmlZz0uUwrJKgF-ZP747A2lLzoGc,1508
-ai_edge_quantizer_nightly-0.4.0.dev20250908.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-ai_edge_quantizer_nightly-0.4.0.dev20250908.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
-ai_edge_quantizer_nightly-0.4.0.dev20250908.dist-info/RECORD,,
+ai_edge_quantizer_nightly-0.4.0.dev20250910.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+ai_edge_quantizer_nightly-0.4.0.dev20250910.dist-info/METADATA,sha256=OHvvjpu55-8eASitbDgp6fKhpBkVhF-AXT652QFhswg,1508
+ai_edge_quantizer_nightly-0.4.0.dev20250910.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+ai_edge_quantizer_nightly-0.4.0.dev20250910.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
+ai_edge_quantizer_nightly-0.4.0.dev20250910.dist-info/RECORD,,

{ai_edge_quantizer_nightly-0.4.0.dev20250908.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250910.dist-info}/LICENSE RENAMED Viewed

File without changes

{ai_edge_quantizer_nightly-0.4.0.dev20250908.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250910.dist-info}/WHEEL RENAMED Viewed

File without changes

{ai_edge_quantizer_nightly-0.4.0.dev20250908.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250910.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai-edge-quantizer-nightly 0.4.0.dev20250908__py3-none-any.whl → 0.4.0.dev20250910__py3-none-any.whl

ai-edge-quantizer-nightly 0.4.0.dev20250908py3-none-any.whl → 0.4.0.dev20250910py3-none-any.whl