PyPI - ai-edge-quantizer-nightly - Versions diffs - 0.3.0.dev20250806__py3-none-any.whl → 0.3.0.dev20250807__py3-none-any.whl - Mend

ai-edge-quantizer-nightly 0.3.0.dev20250806py3-none-any.whl → 0.3.0.dev20250807py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py CHANGED Viewed

@@ -166,6 +166,7 @@ def min_max_calibrate(
     tensor_content_map: dict[str, np.ndarray],
     inputs_to_ignore: Optional[list[int]] = None,
     outputs_to_ignore: Optional[list[int]] = None,
+    valid_range: tuple[float, float] = (-3e38, 3e38),
 ) -> dict[str, qtyping.QSV]:
   """Collect quantization statistics variable (QSV, e.g., min/max) for the op.
@@ -175,11 +176,18 @@ def min_max_calibrate(
     tensor_content_map: A map of tensor name to tensor content.
     inputs_to_ignore: Input tensor indices to ignore.
     outputs_to_ignore: Output tensor indices to ignore.
+    valid_range: The valid range for tensor content, excluding the boundaries.
+      Tensor values outside this range are ignored during calibration. Defaults
+      to an approximate bfloat16 range. This range is chosen to address issues
+      with `padv2` where a bfloat16 -inf padding constant can cause problems.
+      Values exceeding this range can lead to quantization issues and are
+      therefore excluded from min/max calibration.
   Returns:
     A dictionary with key as tensor name and value as the collected QSV.
   """
   op_qsvs = {}
+  min_val, max_val = valid_range
   def _collect_activation_tensor_min_max(tensor_idx):
     tensor = graph_info.subgraph_tensors[tensor_idx]
@@ -191,9 +199,16 @@ def min_max_calibrate(
       return
     tensor_name = tfl_flatbuffer_utils.get_tensor_name(tensor)
     tensor_content = tensor_content_map[tensor_name]
+    qsv_shape = (1,) * tensor_content.ndim
+    filter_mask = (tensor_content > min_val) & (tensor_content < max_val)
+    if np.any(filter_mask):
+      tensor_content = tensor_content[filter_mask]
+    # Reshape is needed to ensure the scalar min/max have the same number of
+    # dimensions as the input tensor array, for compatibility with subsequent
+    # operations.
     op_qsvs[tensor_name] = {
-        "min": np.min(tensor_content, axis=None, keepdims=True),
-        "max": np.max(tensor_content, axis=None, keepdims=True),
+        "min": np.min(tensor_content, axis=None).reshape(qsv_shape),
+        "max": np.max(tensor_content, axis=None).reshape(qsv_shape),
     }
   inputs_to_ignore = inputs_to_ignore or []

ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py CHANGED Viewed

@@ -200,6 +200,41 @@ class NaiveMinMaxQuantizeTest(parameterized.TestCase):
     self.assertEqual(quant_params.block_size, 2)
     self.assertEqual(quant_params.quantized_dimension, 1)
+  def test_calibrate_ignores_inf_min_max(self):
+    """Tests that calibration ignores infinity values."""
+    # Sample input/output data for the fc op.
+    input_tensor_name = "sequential/flatten/Reshape"
+    output_tensor_name = (
+        "sequential/dense/MatMul;sequential/dense/Relu;sequential/dense/BiasAdd"
+    )
+    bloat16_inf = 3.39e38
+    tensor_content_map = {
+        input_tensor_name: np.array(
+            [[-np.inf, 1.0, 5.0, np.inf, bloat16_inf]], dtype=np.float32
+        ),
+        output_tensor_name: np.array(
+            [[6.0, 7.0, -bloat16_inf, 9.0, np.inf]], dtype=np.float32
+        ),
+    }
+    # Read from Model Explorer.
+    subgraph0 = self._test_model.subgraphs[0]
+    fc_op = subgraph0.operators[3]
+    op_qsvs = naive_min_max_quantize.min_max_calibrate(
+        fc_op,
+        self._graph_info,
+        tensor_content_map,
+        inputs_to_ignore=[1, 2],  # Ignore weight and bias.
+        outputs_to_ignore=[],
+    )
+    self.assertIn(input_tensor_name, op_qsvs)
+    self.assertEqual(op_qsvs[input_tensor_name]["min"], 1.0)
+    self.assertEqual(op_qsvs[input_tensor_name]["max"], 5.0)
+    self.assertIn(output_tensor_name, op_qsvs)
+    self.assertEqual(op_qsvs[output_tensor_name]["min"], 6.0)
+    self.assertEqual(op_qsvs[output_tensor_name]["max"], 9.0)
 if __name__ == "__main__":
   googletest.main()

ai_edge_quantizer/quantizer.py CHANGED Viewed

@@ -18,8 +18,10 @@
 from collections.abc import Iterable
 import dataclasses
 import json
+import logging
 import os
 from typing import Any, Optional, Union
 from ai_edge_quantizer import algorithm_manager
 from ai_edge_quantizer import calibrator
 from ai_edge_quantizer import default_policy
@@ -57,49 +59,61 @@ class QuantizationResult:
   recipe: _QuantRecipe
   quantized_model: Optional[bytearray]
-  def save(self, save_folder: str, model_name: str) -> None:
+  def save(
+      self, save_folder: str, model_name: str, overwrite: bool = False
+  ) -> None:
     """Saves the quantized model and the quantization recipe.
     Args:
       save_folder: Path to the folder to save the quantized model and the
         quantization recipe.
       model_name: Name of the model.
+      overwrite: Whether to overwrite the model if it already exists.
     Raises:
       RuntimeError: If no quantized model is available.
-      FileExistsError: If the model already exists in the folder.
     """
-    if self.quantized_model is None:
-      raise RuntimeError(
-          'No quantized model to save. Make sure .quantize() is called.'
-      )
+    if not gfile.Exists(save_folder):
+      gfile.MakeDirs(save_folder)
     model_save_path = os.path.join(save_folder, f'{model_name}.tflite')
-    if gfile.Exists(model_save_path):
-      raise FileExistsError(
-          f'The model {model_save_path} already exists in the folder.'
-      )
-    with gfile.GFile(model_save_path, 'wb') as output_file_handle:
-      output_file_handle.write(self.quantized_model)
+    self.export_model(model_save_path, overwrite)
-    recipe = json.dumps(self.recipe)
     recipe_save_path = os.path.join(save_folder, model_name + '_recipe.json')
+    recipe = json.dumps(self.recipe)
     with gfile.GFile(recipe_save_path, 'w') as output_file_handle:
       output_file_handle.write(recipe)
-  def export_model(self, filepath: str) -> None:
+  def export_model(self, filepath: str, overwrite: bool = False) -> None:
     """Exports the quantized model to a .tflite flatbuffer.
     Args:
       filepath: Path (including file name) that the exported model should be
         serialized to.
+      overwrite: Whether to overwrite the model if it already exists.
     Raises:
       RuntimeError: If no quantized model is available.
+      ValueError: If the model already exists in the folder and overwrite is
+        False.
     """
     if self.quantized_model is None:
       raise RuntimeError(
           'No quantized model to save. Make sure .quantize() is called.'
       )
+    if gfile.Exists(filepath):
+      if overwrite:
+        logging.warning(
+            'The model %s already exists in the folder. Overwriting the model'
+            ' since overwrite=True.',
+            filepath,
+        )
+      else:
+        raise ValueError(
+            f'The model {filepath} already exists in the folder. Please'
+            ' consider change the model name or specify overwrite=True to'
+            ' overwrite the model if needed.'
+        )
     with gfile.GFile(filepath, 'wb') as output_file_handle:
       output_file_handle.write(self.quantized_model)

{ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info → ai_edge_quantizer_nightly-0.3.0.dev20250807.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-edge-quantizer-nightly
-Version: 0.3.0.dev20250806
+Version: 0.3.0.dev20250807
 Summary: A quantizer for advanced developers to quantize converted AI Edge models.
 Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
 Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI

{ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info → ai_edge_quantizer_nightly-0.3.0.dev20250807.dist-info}/RECORD RENAMED Viewed

@@ -13,7 +13,7 @@ ai_edge_quantizer/model_validator_test.py,sha256=EeqOP_mrZsnZ3rug756s0ryDDqd2KgI
 ai_edge_quantizer/params_generator.py,sha256=hcgMHJlERZERUyIAEi6AHJcLJ8gsKIBAEojzFFz-tqk,20098
 ai_edge_quantizer/params_generator_test.py,sha256=RDYoRZDJfEZRtjlTAU2kZ_4t3JHOqEHxfJX9V4ETAhg,40597
 ai_edge_quantizer/qtyping.py,sha256=t7S5wTjg6VFmKYzeM_qymu36tT18rnkmY-4YOXMQxzc,16722
-ai_edge_quantizer/quantizer.py,sha256=g3DMqFMrMpt9jQttCE0WcdNbMtk0JZnmN5MmCHrNdyM,13202
+ai_edge_quantizer/quantizer.py,sha256=WeKwhh8cYZ07DUwvS0S1EdNzEZSfPODlynqIBvJ-Br4,13624
 ai_edge_quantizer/quantizer_test.py,sha256=K_HBA56JkFI3HL8VLWCqGEfC0ISh5ldMKoNyBdGRAJg,20368
 ai_edge_quantizer/recipe.py,sha256=FR0uJceumZrnle2VRSOQZ1uXup4S1cTYKRH-N53mWRo,2919
 ai_edge_quantizer/recipe_manager.py,sha256=qcGUD7e7BISKdsY9WH2rdaRR3acmzSA5qMezGNbzlpo,8931
@@ -34,8 +34,8 @@ ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha
 ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py,sha256=sT5eX5TLZEHTtPfnSkCPDlS0sQxlTFWbCsbvOuj--yY,8889
 ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation.py,sha256=U3h5scCHSOdqHA-pb1C3pNgwumT4ydGbtkCSM0ORhrs,12740
 ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py,sha256=5VUxlaKP1jz4HV-LcKxXMMtmb6eWamq0A6qWJd63cR4,10179
-ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=8_tNLTbOWTKId4DfHBjkOR9RvELUyIpxlGxKu7tv5Ko,7556
-ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=zoF_EHjYqsKkuev8wfuutIITEmp_maa70IpJI_Df3ck,7431
+ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=1sB2j1vlvvWDKyjcGvA_JLCpN2KbCmMslGCBUc4--V4,8461
+ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=nscKDvNb14ErZdAfG0aXRWyRs6bTvhMqMjKx2vxvUK0,8725
 ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=Umxh4kJyeHddZf-Wd4aXE5MTI1XWFa5KRuM17uYU714,6922
 ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=sha1d99Xk87bI87tgz0g5LeDC-EeE4WMfM5rRC98-m4,9140
 ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=0d5XdNbjR2pVsAc-gWX3ik_pAIL-bZ-zemEz_jS2d0c,16531
@@ -68,8 +68,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOi
 ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
 ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
 ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
-ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info/METADATA,sha256=xTO8cST-KD3qLcf9Ts-E51tmKAOQJscoUJ983f-owr0,1528
-ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
-ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info/RECORD,,
+ai_edge_quantizer_nightly-0.3.0.dev20250807.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+ai_edge_quantizer_nightly-0.3.0.dev20250807.dist-info/METADATA,sha256=zDKLqFrxvrjKJM46l8OaqwP7YriogP6dqsvY_8Y3O-I,1528
+ai_edge_quantizer_nightly-0.3.0.dev20250807.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+ai_edge_quantizer_nightly-0.3.0.dev20250807.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
+ai_edge_quantizer_nightly-0.3.0.dev20250807.dist-info/RECORD,,

{ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info → ai_edge_quantizer_nightly-0.3.0.dev20250807.dist-info}/LICENSE RENAMED Viewed

File without changes

{ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info → ai_edge_quantizer_nightly-0.3.0.dev20250807.dist-info}/WHEEL RENAMED Viewed

File without changes

{ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info → ai_edge_quantizer_nightly-0.3.0.dev20250807.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai-edge-quantizer-nightly 0.3.0.dev20250806__py3-none-any.whl → 0.3.0.dev20250807__py3-none-any.whl

ai-edge-quantizer-nightly 0.3.0.dev20250806py3-none-any.whl → 0.3.0.dev20250807py3-none-any.whl