PyPI - ai-edge-quantizer-nightly - Versions diffs - 0.1.0.dev20250415__py3-none-any.whl → 0.5.0.dev20260103__py3-none-any.whl - Mend

ai-edge-quantizer-nightly 0.1.0.dev20250415py3-none-any.whl → 0.5.0.dev20260103py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

ai_edge_quantizer/calibrator_test.py CHANGED Viewed

@@ -103,58 +103,6 @@ class CalibratorTest(googletest.TestCase):
     model_tensor_qsvs = self._calibrator.get_model_qsvs()
     self.assertEmpty(model_tensor_qsvs)
-  def test_calibrator_initialize_qsv(self):
-    _add_default_int8xint8_integer_recipe(self._recipe_manager)
-    # Overwrite the single op to fc
-    self._recipe_manager.add_quantization_config(
-        regex=".*Stateful.*",
-        operation_name=qtyping.TFLOperationName.FULLY_CONNECTED,
-        algorithm_key=_AlgorithmName.MIN_MAX_UNIFORM_QUANT,
-        op_config=qtyping.OpQuantizationConfig(
-            weight_tensor_config=_TENSOR_QUANT_CONFIG(
-                num_bits=4,
-                granularity=qtyping.QuantGranularity.CHANNELWISE,
-            ),
-            compute_precision=_ComputePrecision.INTEGER,
-        ),
-    )
-    self._calibrator._initialize_model_qsvs(self._recipe_manager)
-    model_tensor_qsvs = self._calibrator.get_model_qsvs()
-    self.assertLen(model_tensor_qsvs, 4)
-    self.assertIn("serving_default_input_1:0", model_tensor_qsvs)  # input
-    input_qsv = model_tensor_qsvs["serving_default_input_1:0"]
-    self.assertEmpty(input_qsv)
-    self.assertIn("sequential/dense/MatMul", model_tensor_qsvs)  # weight
-    weight_tensor_qsv = model_tensor_qsvs["sequential/dense/MatMul"]
-    mins_maxs_shape = (16, 1)
-    self.assertTupleEqual(weight_tensor_qsv["min"].shape, mins_maxs_shape)
-    self.assertAlmostEqual(weight_tensor_qsv["min"][0][0], -0.40436327)
-    self.assertTupleEqual(weight_tensor_qsv["max"].shape, mins_maxs_shape)
-    self.assertAlmostEqual(weight_tensor_qsv["max"][0][0], 0.46138108)
-    self.assertIn(
-        "sequential/dense/BiasAdd/ReadVariableOp", model_tensor_qsvs
-    )  # bias
-    bias_tensor_qsv = model_tensor_qsvs[
-        "sequential/dense/BiasAdd/ReadVariableOp"
-    ]
-    mins_maxs_shape = (16,)
-    self.assertTupleEqual(bias_tensor_qsv["min"].shape, mins_maxs_shape)
-    self.assertAlmostEqual(bias_tensor_qsv["min"][0], -0.26978338)
-    self.assertTupleEqual(bias_tensor_qsv["max"].shape, mins_maxs_shape)
-    # Here bias min/max will be the same as each element is a scalar
-    # Bias will be quantized with input_scale * weight_scale.
-    self.assertSequenceEqual(
-        list(bias_tensor_qsv["max"].flatten()),
-        list(bias_tensor_qsv["min"].flatten()),
-    )
-    self.assertIn("StatefulPartitionedCall:0", model_tensor_qsvs)  # output
-    output_qsv = model_tensor_qsvs["StatefulPartitionedCall:0"]
-    self.assertEmpty(output_qsv)
   def test_calibrate_single_fc_success(self):
     _add_default_int8xint8_integer_recipe(self._recipe_manager)
     self._calibrator.calibrate(
@@ -162,7 +110,7 @@ class CalibratorTest(googletest.TestCase):
     )
     model_tensor_qsvs = self._calibrator.get_model_qsvs()
-    self.assertLen(model_tensor_qsvs, 4)
+    self.assertLen(model_tensor_qsvs, 2)
     self.assertIn("serving_default_input_1:0", model_tensor_qsvs)  # input
     input_qsv = model_tensor_qsvs["serving_default_input_1:0"]
     self.assertSequenceAlmostEqual(
@@ -171,19 +119,6 @@ class CalibratorTest(googletest.TestCase):
     self.assertSequenceAlmostEqual(
         input_qsv["max"].flatten(), [TEST_MAX_VAL], delta=1e-5
     )
-    self.assertIn("sequential/dense/MatMul", model_tensor_qsvs)  # weight
-    weight_qsv = model_tensor_qsvs["sequential/dense/MatMul"]
-    self.assertSequenceAlmostEqual(weight_qsv["min"].flatten(), [-0.49114203])
-    self.assertSequenceAlmostEqual(weight_qsv["max"].flatten(), [0.4903704])
-    self.assertIn(
-        "sequential/dense/BiasAdd/ReadVariableOp", model_tensor_qsvs
-    )  # bias
-    bias_qsv = model_tensor_qsvs["sequential/dense/BiasAdd/ReadVariableOp"]
-    self.assertSequenceAlmostEqual(bias_qsv["min"].flatten(), [-0.38401994])
-    self.assertSequenceAlmostEqual(bias_qsv["max"].flatten(), [0.31727126])
     self.assertIn("StatefulPartitionedCall:0", model_tensor_qsvs)  # output
     output_qsv = model_tensor_qsvs["StatefulPartitionedCall:0"]
     # Relu, only check the min
@@ -249,15 +184,11 @@ class CalibratorAlreadyQuantizedModelTest(googletest.TestCase):
     )
     _ = calibrator.Calibrator(test_model_path)
-  def test_check_is_float_model_raises_error_when_model_is_quantized(self):
+  def test_check_is_quantized_model_succeeds_when_model_is_quantized(self):
     test_model_path = os.path.join(
         TEST_DATA_PREFIX_PATH, "tests/models/mnist_quantized.tflite"
     )
-    with self.assertRaisesRegex(
-        ValueError,
-        "The input model for calibration is not a float model.",
-    ):
-      _ = calibrator.Calibrator(test_model_path)
+    _ = calibrator.Calibrator(test_model_path)
 class CalibratorToyGemma2Test(googletest.TestCase):
@@ -302,7 +233,7 @@ class CalibratorToyGemma2Test(googletest.TestCase):
         self._toy_gemma2_calibration_dataset,
         model_recipe_manager=recipe_mngr,
     )
-    self.assertLen(calib.get_model_qsvs(), 282)
+    self.assertLen(calib.get_model_qsvs(), 202)
 if __name__ == "__main__":

ai_edge_quantizer/default_policy.py CHANGED Viewed

@@ -19,9 +19,9 @@ import collections
 import copy
 import json
 from typing import Any, Union
+from ai_edge_litert.tools import flatbuffer_utils
 from ai_edge_quantizer import qtyping
 from ai_edge_litert import schema_py_generated as schema  # pylint:disable=g-direct-tensorflow-import
-from tensorflow.lite.tools import flatbuffer_utils  # pylint: disable=g-direct-tensorflow-import
 _TFLOpName = qtyping.TFLOperationName
 _OpQuantizationConfig = qtyping.OpQuantizationConfig
@@ -61,9 +61,8 @@ DEFAULT_JSON_POLICY = """
       "weight_tensor_config": {
         "num_bits": 4,
         "symmetric": [true],
-        "granularity": ["BLOCKWISE"],
-        "dtype": "INT",
-        "block_size": [32, 64, 96, 128, 256]
+        "granularity": ["BLOCKWISE_32", "BLOCKWISE_64", "BLOCKWISE_128", "BLOCKWISE_256"],
+        "dtype": "INT"
       },
       "explicit_dequantize": false,
       "compute_precision": "INTEGER"
@@ -178,12 +177,30 @@ DEFAULT_JSON_POLICY = """
       "INPUT",
       "OUTPUT",
       "SLICE",
-      "EMBEDDING_LOOKUP",
       "SUM",
+      "SELECT",
       "SELECT_V2",
       "DYNAMIC_UPDATE_SLICE",
       "SELECT_V2",
-      "STABLEHLO_COMPOSITE"
+      "STABLEHLO_COMPOSITE",
+      "PAD",
+      "MAX_POOL_2D",
+      "RESIZE_BILINEAR",
+      "RESIZE_NEAREST_NEIGHBOR",
+      "GATHER_ND",
+      "PACK",
+      "UNPACK",
+      "DIV",
+      "BROADCAST_TO",
+      "SQRT",
+      "GATHER",
+      "MAXIMUM",
+      "PADV2",
+      "REDUCE_MIN",
+      "EQUAL",
+      "NOT_EQUAL",
+      "MIRROR_PAD",
+      "RELU"
     ],
     "static_wi8_ai8": [
       "ADD",
@@ -209,15 +226,36 @@ DEFAULT_JSON_POLICY = """
       "INPUT",
       "OUTPUT",
       "SLICE",
-      "EMBEDDING_LOOKUP",
       "SUM",
+      "SELECT",
       "SELECT_V2",
       "DYNAMIC_UPDATE_SLICE",
       "SELECT_V2",
-      "STABLEHLO_COMPOSITE"
+      "STABLEHLO_COMPOSITE",
+      "PAD",
+      "SQUARED_DIFFERENCE",
+      "MAX_POOL_2D",
+      "RESIZE_BILINEAR",
+      "RESIZE_NEAREST_NEIGHBOR",
+      "GATHER_ND",
+      "PACK",
+      "UNPACK",
+      "DIV",
+      "BROADCAST_TO",
+      "SQRT",
+      "GATHER",
+      "HARD_SWISH",
+      "MAXIMUM",
+      "PADV2",
+      "REDUCE_MIN",
+      "EQUAL",
+      "NOT_EQUAL",
+      "MIRROR_PAD",
+      "SPACE_TO_DEPTH",
+      "RELU"
     ],
-    "static_wi4_ai8": ["FULLY_CONNECTED", "CONV_2D", "INPUT", "OUTPUT", "EMBEDDING_LOOKUP"],
-    "static_wi4_ai16": ["FULLY_CONNECTED", "CONV_2D", "INPUT", "OUTPUT", "EMBEDDING_LOOKUP"],
+    "static_wi4_ai8": ["FULLY_CONNECTED", "CONV_2D", "INPUT", "OUTPUT"],
+    "static_wi4_ai16": ["FULLY_CONNECTED", "CONV_2D", "INPUT", "OUTPUT"],
     "dynamic_wi8_afp32": [
       "BATCH_MATMUL",
       "CONV_2D",
@@ -240,6 +278,11 @@ DEFAULT_JSON_POLICY = """
   }
 }
 """
+QUANTIZABLE_COMPOSITES = [
+    "od" + "ml.npu_call",
+    "od" + "ml.rms_norm",
+    "od" + "ml.l2_norm",
+]
 def _unroll_json_config(
@@ -280,16 +323,9 @@ def _unroll_json_config(
           "granularity": granularity,
           "dtype": json_config["weight_tensor_config"]["dtype"],
       }
-      if "block_size" in json_config["weight_tensor_config"]:
-        for block_size in json_config["weight_tensor_config"]["block_size"]:
-          tensor_config["block_size"] = block_size
-          weight_configs.append(
-              qtyping.TensorQuantizationConfig.from_dict(tensor_config)
-          )
-      else:
-        weight_configs.append(
-            qtyping.TensorQuantizationConfig.from_dict(tensor_config)
-        )
+      weight_configs.append(
+          qtyping.TensorQuantizationConfig.from_dict(tensor_config)
+      )
       if activation_configs:
         for activation_config in activation_configs:
@@ -317,10 +353,10 @@ def _unroll_json_config(
 # TODO: b/401024954 - Have a better way to specify recipes based on op options.
-def is_conditionally_unquantized(
+def is_non_quantizable_composite_op(
     op: Union[schema.Operator, schema.OperatorT],
 ) -> bool:
-  """Checks if the operator is conditionally unquantized.
+  """Checks if the operator is a non-quantizable composite op.
   We may want to quantize an op only when its has certain options.
   Policies/recipes
@@ -335,10 +371,9 @@ def is_conditionally_unquantized(
   if opts := flatbuffer_utils.get_options_as(
       op, schema.StableHLOCompositeOptionsT
   ):
-    name: bytes = opts.name
-    # Non npu_call composites may have a kernel and as such will not be
-    # quantized.
-    return ("od" + "ml.npu_call") not in name.decode("utf-8")
+    name = opts.name.decode("utf-8")
+    if name not in QUANTIZABLE_COMPOSITES:
+      return True
   return False

ai_edge_quantizer/model_modifier.py CHANGED Viewed

@@ -17,15 +17,21 @@
 from collections.abc import Sequence
 import copy
+import logging
 import numpy as np
+from ai_edge_litert.tools import flatbuffer_utils
 from ai_edge_quantizer import qtyping
 from ai_edge_quantizer import transformation_instruction_generator
 from ai_edge_quantizer import transformation_performer
 from ai_edge_quantizer.utils import tfl_flatbuffer_utils
+from ai_edge_quantizer.utils import tfl_interpreter_utils
+from ai_edge_litert import interpreter as tfl  # pylint: disable=g-direct-tensorflow-import
 from ai_edge_litert import schema_py_generated  # pylint: disable=g-direct-tensorflow-import
-from tensorflow.lite.tools import flatbuffer_utils  # pylint: disable=g-direct-tensorflow-import
+_DEQUANT_SUFFIX = "_dequant"
 class ModelModifier:
@@ -104,11 +110,95 @@ class ModelModifier:
         instructions, quantized_model, tensor_processing_order
     )
     constant_buffer_size = self._process_constant_map(quantized_model)
-    # we leave 64MB for the model architecture.
-    if constant_buffer_size > 2**31 - 2**26:
-      return self._serialize_large_model(quantized_model)
-    else:
-      return self._serialize_small_model(quantized_model)
+    # we leave 256MB for the model architecture.
+    serialize_fun = (
+        self._serialize_large_model
+        if constant_buffer_size > 2**31 - 2**28
+        else self._serialize_small_model
+    )
+    serialized_quantized_model = serialize_fun(quantized_model)
+    # Update signature defs if dequant is inserted before output.
+    if self._has_dequant_before_output(instructions):
+      quantized_model = self._update_signature_defs_for_dequant_output(
+          quantized_model, serialized_quantized_model
+      )
+      serialized_quantized_model = serialize_fun(quantized_model)
+    return serialized_quantized_model
+  def _update_signature_defs_for_dequant_output(
+      self, model: schema_py_generated.ModelT, serialized_model: bytearray
+  ):
+    """Updates the signature definitions in the model.
+    This function is called when a dequantize operation is inserted before
+    an output tensor. It updates the tensor index in the signature
+    definitions to point to the newly inserted dequantize output tensor.
+    Args:
+      model: The TFlite ModelT object.
+      serialized_model: The serialized bytearray of the TFlite model.
+    Returns:
+      The updated TFlite ModelT object.
+    """
+    interpreter = tfl.Interpreter(model_content=bytes(serialized_model))
+    for signature_def in model.signatureDefs:
+      signature_key = signature_def.signatureKey.decode("utf-8")
+      logging.info("Signature = %s", signature_key)
+      subgraph_idx = tfl_interpreter_utils.get_signature_main_subgraph_index(
+          interpreter, signature_key
+      )
+      output_details = interpreter.get_signature_runner(
+          signature_key
+      ).get_output_details()
+      subgraph = model.subgraphs[subgraph_idx]
+      graph_info = qtyping.GraphInfo(subgraph.tensors, model.buffers)
+      for output in subgraph.outputs:
+        tensor_name = tfl_flatbuffer_utils.get_tensor_name(
+            graph_info.subgraph_tensors[output]
+        )
+        logging.info("\tOutput tensor = `%s`", tensor_name)
+        for signature_name, tensor_details in output_details.items():
+          if tensor_details["name"] + _DEQUANT_SUFFIX == tensor_name:
+            logging.info(
+                "\t\tfound tensor mapping: `%s`->`%s` for signature name: `%s`",
+                tensor_details["name"],
+                tensor_name,
+                signature_name,
+            )
+            for signature_item in signature_def.outputs:
+              if signature_item.name.decode("utf-8") == signature_name:
+                signature_item.tensorIndex = output
+                logging.info(
+                    "\t\t\tswapped tensor index: %s->%s",
+                    tensor_details["index"],
+                    output,
+                )
+                break
+            break
+    return model
+  def _has_dequant_before_output(
+      self, instructions: dict[str, qtyping.TensorTransformationInsts]
+  ) -> bool:
+    """Check if the model has dequant insert to output."""
+    for tensor_name, tensor_trans_insts in instructions.items():
+      for instr in tensor_trans_insts.instructions:
+        if (
+            qtyping.QuantTransformation.ADD_DEQUANTIZE == instr.transformation
+            and instr.consumers == [-1]
+        ):
+          logging.info(
+              "Found dequant insert to output for tensor: %s", tensor_name
+          )
+          return True
+    return False
   def _process_constant_map(
       self, quantized_model: schema_py_generated.ModelT
@@ -142,7 +232,7 @@ class ModelModifier:
     remainder = len(bytearr) % 16
     if remainder != 0:
       padding_size = 16 - remainder
-      bytearr.extend(b'\0' * padding_size)
+      bytearr.extend(b"\0" * padding_size)
   # TODO: b/333797307 - support > 2GB output model
   def _serialize_large_model(

ai_edge_quantizer/model_modifier_test.py CHANGED Viewed

@@ -19,13 +19,13 @@ import os
 import tracemalloc
 from tensorflow.python.platform import googletest
 from absl.testing import parameterized
+from ai_edge_litert.tools import flatbuffer_utils
 from ai_edge_quantizer import model_modifier
 from ai_edge_quantizer import params_generator
 from ai_edge_quantizer import qtyping
 from ai_edge_quantizer import recipe_manager
 from ai_edge_quantizer.utils import test_utils
 from ai_edge_quantizer.utils import tfl_flatbuffer_utils
-from tensorflow.lite.tools import flatbuffer_utils  # pylint: disable=g-direct-tensorflow-import
 TEST_DATA_PREFIX_PATH = test_utils.get_path_to_datafile('.')
@@ -125,6 +125,86 @@ class ModelModifierTest(parameterized.TestCase):
     loosen_mem_use_factor = 4.5
     self.assertLess(mem_peak / len(self._model_content), loosen_mem_use_factor)
+  def test_has_dequant_before_output_true(self):
+    instructions = {
+        'tensor1': qtyping.TensorTransformationInsts(
+            'tensor1',
+            0,
+            instructions=[
+                qtyping.TransformationInst(
+                    transformation=qtyping.QuantTransformation.ADD_DEQUANTIZE,
+                    tensor_id=0,
+                    producer=0,
+                    consumers=[-1],
+                )
+            ],
+        )
+    }
+    self.assertTrue(
+        self._model_modifier._has_dequant_before_output(instructions)
+    )
+  def test_has_dequant_before_output_false(self):
+    instructions = {
+        'tensor1': qtyping.TensorTransformationInsts(
+            'tensor1',
+            0,
+            instructions=[
+                qtyping.TransformationInst(
+                    transformation=qtyping.QuantTransformation.ADD_DEQUANTIZE,
+                    tensor_id=0,
+                    producer=0,
+                    consumers=[1],
+                )
+            ],
+        )
+    }
+    self.assertFalse(
+        self._model_modifier._has_dequant_before_output(instructions)
+    )
+  def test_pad_bytearray(self):
+    arr = bytearray(b'\x01\x02\x03')
+    self._model_modifier._pad_bytearray(arr)
+    self.assertLen(arr, 16)
+    self.assertEqual(arr, b'\x01\x02\x03' + b'\0' * 13)
+    arr = bytearray(b'\x01' * 16)
+    self._model_modifier._pad_bytearray(arr)
+    self.assertLen(arr, 16)
+    arr = bytearray(b'\x01' * 17)
+    self._model_modifier._pad_bytearray(arr)
+    self.assertLen(arr, 32)
+class ModelModifierTestWithSignature(parameterized.TestCase):
+  def setUp(self):
+    super().setUp()
+    self._model_path = os.path.join(
+        TEST_DATA_PREFIX_PATH,
+        'tests/models/single_fc.tflite',
+    )
+    self._model_content: bytes = tfl_flatbuffer_utils.get_model_content(
+        self._model_path
+    )
+    self._model_modifier = model_modifier.ModelModifier(self._model_content)
+  def test_update_signature_defs_for_dequant_output_succeeds(self):
+    # This is a simplified test that only checks if the function runs without
+    # crashing and returns a model. A more thorough test with a model
+    # with a known signature was added in `quantizer_test`.
+    model_bytearray = flatbuffer_utils.read_model_from_bytearray(
+        self._model_content
+    )
+    updated_model = (
+        self._model_modifier._update_signature_defs_for_dequant_output(
+            model_bytearray, bytearray(self._model_content)
+        )
+    )
+    self.assertIsNotNone(updated_model)
 if __name__ == '__main__':
   googletest.main()

ai_edge_quantizer/model_validator.py CHANGED Viewed

@@ -25,7 +25,7 @@ from typing import Any, Optional, Union
 import numpy as np
 from ai_edge_quantizer.utils import tfl_interpreter_utils as utils
-from tensorflow.python.platform import gfile  # pylint: disable=g-direct-tensorflow-import
+import os # tensorflow.python.platform.gfile  # pylint: disable=g-direct-tensorflow-import
 _DEFAULT_SIGNATURE_KEY = utils.DEFAULT_SIGNATURE_KEY
@@ -118,7 +118,8 @@ class ComparisonResult:
     for name in utils.get_input_tensor_names(
         self._reference_model, signature_key
     ):
-      input_tensor_results[name] = result.pop(name)
+      if name in result:
+        input_tensor_results[name] = result.pop(name)
     output_tensor_results = {}
     for name in utils.get_output_tensor_names(
@@ -136,7 +137,8 @@ class ComparisonResult:
         self._reference_model,
         subgraph_index,
     ):
-      constant_tensor_results[name] = result.pop(name)
+      if name in result:
+        constant_tensor_results[name] = result.pop(name)
     self._comparison_results[signature_key] = SingleSignatureComparisonResult(
         error_metric=error_metric,
@@ -192,7 +194,7 @@ class ComparisonResult:
     result_save_path = os.path.join(
         save_folder, model_name + '_comparison_result.json'
     )
-    with gfile.GFile(result_save_path, 'w') as output_file_handle:
+    with open(result_save_path, 'w') as output_file_handle:
       output_file_handle.write(json.dumps(result))
     # TODO: b/365578554 - Remove after ME is updated to use the new json format.
@@ -204,7 +206,7 @@ class ComparisonResult:
     json_save_path = os.path.join(
         save_folder, model_name + '_comparison_result_me_input.json'
     )
-    with gfile.GFile(json_save_path, 'w') as output_file_handle:
+    with open(json_save_path, 'w') as output_file_handle:
       output_file_handle.write(json_object)
@@ -214,6 +216,7 @@ def _setup_validation_interpreter(
     signature_key: Optional[str],
     use_xnnpack: bool,
     num_threads: int,
+    preserve_all_tensors: bool = True,
 ) -> tuple[Any, int, dict[str, Any]]:
   """Setup the interpreter for validation given a signature key.
@@ -224,13 +227,17 @@ def _setup_validation_interpreter(
       model only has one signature, this can be set to None.
     use_xnnpack: Whether to use xnnpack for the interpreter.
     num_threads: The number of threads to use for the interpreter.
+    preserve_all_tensors: Whether to preserve all tensors.
   Returns:
     A tuple of interpreter, subgraph_index and tensor_name_to_details.
   """
   interpreter = utils.create_tfl_interpreter(
-      tflite_model=model, use_xnnpack=use_xnnpack, num_threads=num_threads
+      tflite_model=model,
+      use_xnnpack=use_xnnpack,
+      num_threads=num_threads,
+      preserve_all_tensors=preserve_all_tensors,
   )
   utils.invoke_interpreter_signature(
       interpreter, signature_input, signature_key
@@ -255,6 +262,7 @@ def compare_model(
     compare_fn: Callable[[Any, Any], float],
     use_xnnpack: bool = True,
     num_threads: int = 16,
+    validate_output_tensors_only: bool = False,
 ) -> ComparisonResult:
   """Compares model tensors over a model signature using the compare_fn.
@@ -275,10 +283,13 @@ def compare_model(
       single float value.
     use_xnnpack: Whether to use xnnpack for the interpreter.
     num_threads: The number of threads to use for the interpreter.
+    validate_output_tensors_only: If True, only compare output tensors.
+      Otherwise, compare all tensors.
   Returns:
     A ComparisonResult object.
   """
+  preserve_all_tensors = not validate_output_tensors_only
   model_comparion_result = ComparisonResult(reference_model, target_model)
   for signature_key, signature_inputs in test_data.items():
     comparison_results = {}
@@ -291,6 +302,7 @@ def compare_model(
               signature_key,
               use_xnnpack,
               num_threads,
+              preserve_all_tensors=preserve_all_tensors,
           )
       )
       targ_interpreter, targ_subgraph_index, targ_tensor_name_to_details = (
@@ -300,12 +312,23 @@ def compare_model(
               signature_key,
               use_xnnpack,
               num_threads,
+              preserve_all_tensors=preserve_all_tensors,
           )
       )
-      # Compare the cached tensor values.
-      for tensor_name, detail in ref_tensor_name_to_details.items():
+      # Compare the cached tensor value
+      tensor_names_to_compare = (
+          utils.get_output_tensor_names(reference_model, signature_key)
+          if validate_output_tensors_only
+          else list(ref_tensor_name_to_details.keys())
+      )
+      for tensor_name in tensor_names_to_compare:
+        detail = ref_tensor_name_to_details[tensor_name]
         if detail['dtype'] == np.object_:
           continue
+        # Ignore tensors where any dimension of the shape is 0.
+        if not np.all(detail['shape']):
+          continue
         if tensor_name in targ_tensor_name_to_details:
           if tensor_name not in comparison_results:
             comparison_results[tensor_name] = []

ai_edge_quantizer/params_generator.py CHANGED Viewed

@@ -35,12 +35,12 @@ class ParamsGenerator:
   def __init__(self, float_tflite: Union[str, bytes]):
     self.flatbuffer_model = tfl_flatbuffer_utils.read_model(float_tflite)
-    if not tfl_flatbuffer_utils.is_float_model(self.flatbuffer_model):
-      raise ValueError(
-          'The input model for quantization parameters generation is not a'
-          ' float model. Please check the model (e.g., if it is already'
-          ' quantized).'
-      )
+    # if not tfl_flatbuffer_utils.is_float_model(self.flatbuffer_model):
+    #   raise ValueError(
+    #       'The input model for quantization parameters generation is not a'
+    #       ' float model. Please check the model (e.g., if it is already'
+    #       ' quantized).'
+    #   )
     self._check_tensor_names_are_unique()
     self.buffer_to_tensors: dict[int, list[Any]] = (
         tfl_flatbuffer_utils.buffer_to_tensors(self.flatbuffer_model)
@@ -78,8 +78,6 @@ class ParamsGenerator:
     skip_subgraphs = set()
     op_codes = self.flatbuffer_model.operatorCodes
     for sg_ind, subgraph in enumerate(self.flatbuffer_model.subgraphs):
-      if sg_ind in skip_subgraphs:
-        continue
       graph_info = qtyping.GraphInfo(
           subgraph.tensors, self.flatbuffer_model.buffers
@@ -109,7 +107,10 @@ class ParamsGenerator:
         algorithm_name, op_quant_config = (
             model_recipe_manager.get_quantization_configs(op_key, op_scope)
         )
-        if policy.is_conditionally_unquantized(op):
+        if sg_ind in skip_subgraphs or policy.is_non_quantizable_composite_op(
+            op
+        ):
           algorithm_name = algorithm_manager.AlgorithmName.NO_QUANTIZE
         if algorithm_name == algorithm_manager.AlgorithmName.NO_QUANTIZE:
@@ -408,7 +409,11 @@ class ParamsGenerator:
     buffers_to_duplicate = []
     tensor_names_to_duplicate = []
     for buffer_idx, tensors in self.buffer_to_tensors.items():
-      if not tensors:
+      # TODO: b/458797890 - Investigate if skipping buffer_idx == 0 is a
+      # correct fix, or if it just covers up a deeper issue. This is only
+      # required when statically quantizing models that have already been
+      # quantized dynamically.
+      if not tensors or buffer_idx == 0:
         continue
       # Check if any of the tensors needs to be duplicated.
       for tensor in tensors:
@@ -508,6 +513,8 @@ def _compatible_tensor_params(
   float_source_transformations = [
       _QuantTrans.ADD_QUANTIZE,
       _QuantTrans.NO_QUANTIZE,
+      _QuantTrans.INSERT_HADAMARD_ROTATION,
+      _QuantTrans.INSERT_DECOMPOSED_HADAMARD_ROTATION,
   ]
   quantized_source_transformations = [
       _QuantTrans.QUANTIZE_TENSOR,

ai-edge-quantizer-nightly 0.1.0.dev20250415__py3-none-any.whl → 0.5.0.dev20260103__py3-none-any.whl

ai-edge-quantizer-nightly 0.1.0.dev20250415py3-none-any.whl → 0.5.0.dev20260103py3-none-any.whl