PyPI - ai-edge-quantizer-nightly - Versions diffs - 0.4.0.dev20251002__py3-none-any.whl → 0.4.0.dev20251004__py3-none-any.whl - Mend

ai-edge-quantizer-nightly 0.4.0.dev20251002py3-none-any.whl → 0.4.0.dev20251004py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

ai_edge_quantizer/model_modifier.py CHANGED Viewed

@@ -17,6 +17,7 @@
 from collections.abc import Sequence
 import copy
+import logging
 import numpy as np
@@ -24,10 +25,15 @@ from ai_edge_quantizer import qtyping
 from ai_edge_quantizer import transformation_instruction_generator
 from ai_edge_quantizer import transformation_performer
 from ai_edge_quantizer.utils import tfl_flatbuffer_utils
+from ai_edge_quantizer.utils import tfl_interpreter_utils
+from ai_edge_litert import interpreter as tfl  # pylint: disable=g-direct-tensorflow-import
 from ai_edge_litert import schema_py_generated  # pylint: disable=g-direct-tensorflow-import
 from tensorflow.lite.tools import flatbuffer_utils  # pylint: disable=g-direct-tensorflow-import
+_DEQUANT_SUFFIX = "_dequant"
 class ModelModifier:
   """Model Modifier class that produce the final quantized TFlite model."""
@@ -105,10 +111,94 @@ class ModelModifier:
     )
     constant_buffer_size = self._process_constant_map(quantized_model)
     # we leave 256MB for the model architecture.
-    if constant_buffer_size > 2**31 - 2**28:
-      return self._serialize_large_model(quantized_model)
-    else:
-      return self._serialize_small_model(quantized_model)
+    serialize_fun = (
+        self._serialize_large_model
+        if constant_buffer_size > 2**31 - 2**28
+        else self._serialize_small_model
+    )
+    serialized_quantized_model = serialize_fun(quantized_model)
+    # Update signature defs if dequant is inserted before output.
+    if self._has_dequant_before_output(instructions):
+      quantized_model = self._update_signature_defs_for_dequant_output(
+          quantized_model, serialized_quantized_model
+      )
+      serialized_quantized_model = serialize_fun(quantized_model)
+    return serialized_quantized_model
+  def _update_signature_defs_for_dequant_output(
+      self, model: schema_py_generated.ModelT, serialized_model: bytearray
+  ):
+    """Updates the signature definitions in the model.
+    This function is called when a dequantize operation is inserted before
+    an output tensor. It updates the tensor index in the signature
+    definitions to point to the newly inserted dequantize output tensor.
+    Args:
+      model: The TFlite ModelT object.
+      serialized_model: The serialized bytearray of the TFlite model.
+    Returns:
+      The updated TFlite ModelT object.
+    """
+    interpreter = tfl.Interpreter(model_content=bytes(serialized_model))
+    for signature_def in model.signatureDefs:
+      signature_key = signature_def.signatureKey.decode("utf-8")
+      logging.info("Signature = %s", signature_key)
+      subgraph_idx = tfl_interpreter_utils.get_signature_main_subgraph_index(
+          interpreter, signature_key
+      )
+      output_details = interpreter.get_signature_runner(
+          signature_key
+      ).get_output_details()
+      subgraph = model.subgraphs[subgraph_idx]
+      graph_info = qtyping.GraphInfo(subgraph.tensors, model.buffers)
+      for output in subgraph.outputs:
+        tensor_name = tfl_flatbuffer_utils.get_tensor_name(
+            graph_info.subgraph_tensors[output]
+        )
+        logging.info("\tOutput tensor = `%s`", tensor_name)
+        for signature_name, tensor_details in output_details.items():
+          if tensor_details["name"] + _DEQUANT_SUFFIX == tensor_name:
+            logging.info(
+                "\t\tfound tensor mapping: `%s`->`%s` for signature name: `%s`",
+                tensor_details["name"],
+                tensor_name,
+                signature_name,
+            )
+            for signature_item in signature_def.outputs:
+              if signature_item.name.decode("utf-8") == signature_name:
+                signature_item.tensorIndex = output
+                logging.info(
+                    "\t\t\tswapped tensor index: %s->%s",
+                    tensor_details["index"],
+                    output,
+                )
+                break
+            break
+    return model
+  def _has_dequant_before_output(
+      self, instructions: dict[str, qtyping.TensorTransformationInsts]
+  ) -> bool:
+    """Check if the model has dequant insert to output."""
+    for tensor_name, tensor_trans_insts in instructions.items():
+      for instr in tensor_trans_insts.instructions:
+        if (
+            qtyping.QuantTransformation.ADD_DEQUANTIZE == instr.transformation
+            and instr.consumers == [-1]
+        ):
+          logging.info(
+              "Found dequant insert to output for tensor: %s", tensor_name
+          )
+          return True
+    return False
   def _process_constant_map(
       self, quantized_model: schema_py_generated.ModelT
@@ -142,7 +232,7 @@ class ModelModifier:
     remainder = len(bytearr) % 16
     if remainder != 0:
       padding_size = 16 - remainder
-      bytearr.extend(b'\0' * padding_size)
+      bytearr.extend(b"\0" * padding_size)
   # TODO: b/333797307 - support > 2GB output model
   def _serialize_large_model(

ai_edge_quantizer/model_modifier_test.py CHANGED Viewed

@@ -125,6 +125,86 @@ class ModelModifierTest(parameterized.TestCase):
     loosen_mem_use_factor = 4.5
     self.assertLess(mem_peak / len(self._model_content), loosen_mem_use_factor)
+  def test_has_dequant_before_output_true(self):
+    instructions = {
+        'tensor1': qtyping.TensorTransformationInsts(
+            'tensor1',
+            0,
+            instructions=[
+                qtyping.TransformationInst(
+                    transformation=qtyping.QuantTransformation.ADD_DEQUANTIZE,
+                    tensor_id=0,
+                    producer=0,
+                    consumers=[-1],
+                )
+            ],
+        )
+    }
+    self.assertTrue(
+        self._model_modifier._has_dequant_before_output(instructions)
+    )
+  def test_has_dequant_before_output_false(self):
+    instructions = {
+        'tensor1': qtyping.TensorTransformationInsts(
+            'tensor1',
+            0,
+            instructions=[
+                qtyping.TransformationInst(
+                    transformation=qtyping.QuantTransformation.ADD_DEQUANTIZE,
+                    tensor_id=0,
+                    producer=0,
+                    consumers=[1],
+                )
+            ],
+        )
+    }
+    self.assertFalse(
+        self._model_modifier._has_dequant_before_output(instructions)
+    )
+  def test_pad_bytearray(self):
+    arr = bytearray(b'\x01\x02\x03')
+    self._model_modifier._pad_bytearray(arr)
+    self.assertLen(arr, 16)
+    self.assertEqual(arr, b'\x01\x02\x03' + b'\0' * 13)
+    arr = bytearray(b'\x01' * 16)
+    self._model_modifier._pad_bytearray(arr)
+    self.assertLen(arr, 16)
+    arr = bytearray(b'\x01' * 17)
+    self._model_modifier._pad_bytearray(arr)
+    self.assertLen(arr, 32)
+class ModelModifierTestWithSignature(parameterized.TestCase):
+  def setUp(self):
+    super().setUp()
+    self._model_path = os.path.join(
+        TEST_DATA_PREFIX_PATH,
+        'tests/models/single_fc.tflite',
+    )
+    self._model_content: bytes = tfl_flatbuffer_utils.get_model_content(
+        self._model_path
+    )
+    self._model_modifier = model_modifier.ModelModifier(self._model_content)
+  def test_update_signature_defs_for_dequant_output_succeeds(self):
+    # This is a simplified test that only checks if the function runs without
+    # crashing and returns a model. A more thorough test with a model
+    # with a known signature was added in `quantizer_test`.
+    model_bytearray = flatbuffer_utils.read_model_from_bytearray(
+        self._model_content
+    )
+    updated_model = (
+        self._model_modifier._update_signature_defs_for_dequant_output(
+            model_bytearray, bytearray(self._model_content)
+        )
+    )
+    self.assertIsNotNone(updated_model)
 if __name__ == '__main__':
   googletest.main()

ai_edge_quantizer/quantizer_test.py CHANGED Viewed

@@ -51,6 +51,30 @@ def _get_calibration_data(num_samples: int = 16):
   return calibration_data
+def _is_all_signature_defs_inputs_float(model_content: bytes):
+  tfl_interpreter = tfl_interpreter_utils.create_tfl_interpreter(model_content)
+  for signature_key in tfl_interpreter.get_signature_list():
+    input_details = tfl_interpreter.get_signature_runner(
+        signature_key
+    ).get_input_details()
+    for tensor_details in input_details.values():
+      if tensor_details['dtype'] != np.float32:
+        return False
+  return True
+def _is_all_signature_defs_outputs_float(model_content: bytes):
+  tfl_interpreter = tfl_interpreter_utils.create_tfl_interpreter(model_content)
+  for signature_key in tfl_interpreter.get_signature_list():
+    output_details = tfl_interpreter.get_signature_runner(
+        signature_key
+    ).get_output_details()
+    for tensor_details in output_details.values():
+      if tensor_details['dtype'] != np.float32:
+        return False
+  return True
 class QuantizerTest(parameterized.TestCase):
   def setUp(self):
@@ -547,21 +571,21 @@ class QuantizerToyGemma2Test(parameterized.TestCase):
         'signature_1': [{
             'cache_0': _RNG.random(size=(1, 100, 4, 4), dtype=np.float32),
             'cache_1': _RNG.random(size=(1, 100, 4, 4), dtype=np.float32),
-            'positions': _RNG.integers(low=0, high=10, size=(1, 100)).astype(
-                np.int32
+            'positions': (
+                _RNG.integers(low=0, high=10, size=(1, 100)).astype(np.int32)
             ),
-            'tokens': _RNG.integers(low=0, high=10, size=(1, 100)).astype(
-                np.int32
+            'tokens': (
+                _RNG.integers(low=0, high=10, size=(1, 100)).astype(np.int32)
             ),
         }],
         'signature_2': [{
             'cache_0': _RNG.random(size=(1, 100, 4, 4), dtype=np.float32),
             'cache_1': _RNG.random(size=(1, 100, 4, 4), dtype=np.float32),
-            'positions': _RNG.integers(low=0, high=10, size=(1, 100)).astype(
-                np.int32
+            'positions': (
+                _RNG.integers(low=0, high=10, size=(1, 100)).astype(np.int32)
             ),
-            'tokens': _RNG.integers(low=0, high=10, size=(1, 100)).astype(
-                np.int32
+            'tokens': (
+                _RNG.integers(low=0, high=10, size=(1, 100)).astype(np.int32)
             ),
         }],
     }
@@ -578,8 +602,8 @@ class QuantizerToyGemma2Test(parameterized.TestCase):
     )
     self._quantizer.update_quantization_recipe(
-        regex='StatefulPartitionedCall',
-        operation_name=qtyping.TFLOperationName.FULLY_CONNECTED,
+        regex='.*',
+        operation_name=qtyping.TFLOperationName.OUTPUT,
         algorithm_key=_AlgorithmName.NO_QUANTIZE,
     )
@@ -591,6 +615,90 @@ class QuantizerToyGemma2Test(parameterized.TestCase):
     self._quantizer.quantize(calib_result)
     self.assertIsNotNone(self._quantizer._result.quantized_model)
+  def test_toy_gemma2_update_signature_defs_succeeds(self):
+    self.assertTrue(
+        _is_all_signature_defs_outputs_float(
+            open(self._test_model_path, 'rb').read()
+        )
+    )
+    calib_result = self._quantizer.calibrate(
+        self._toy_gemma2_calibration_dataset
+    )
+    self.assertIsNotNone(calib_result)
+    self._quantizer.quantize(calib_result)
+    self.assertIsNotNone(self._quantizer._result.quantized_model)
+    self.assertTrue(
+        _is_all_signature_defs_outputs_float(
+            self._quantizer._result.quantized_model
+        )
+    )
+class QuantizerFullyConnectedTest(parameterized.TestCase):
+  def setUp(self):
+    super().setUp()
+    self._tmp_save_path = self.create_tempdir().full_path
+    self._test_model_path = os.path.join(
+        TEST_DATA_PREFIX_PATH,
+        'tests/models/single_fc.tflite',
+    )
+    self._test_recipe_path = os.path.join(
+        TEST_DATA_PREFIX_PATH,
+        'recipes/default_a8w8_recipe.json',
+    )
+    with open(self._test_recipe_path) as json_file:
+      self._test_recipe = json.load(json_file)
+    self._quantizer = quantizer.Quantizer(
+        self._test_model_path, self._test_recipe_path
+    )
+    self._quantizer.update_quantization_recipe(
+        regex='.*',
+        operation_name=qtyping.TFLOperationName.INPUT,
+        algorithm_key=_AlgorithmName.NO_QUANTIZE,
+    )
+    self._quantizer.update_quantization_recipe(
+        regex='.*',
+        operation_name=qtyping.TFLOperationName.OUTPUT,
+        algorithm_key=_AlgorithmName.NO_QUANTIZE,
+    )
+  def test_fully_connected_quantization_succeeds(self):
+    calib_result = self._quantizer.calibrate(
+        tfl_interpreter_utils.create_random_normal_input_data(
+            self._test_model_path, num_samples=4
+        )
+    )
+    self.assertIsNotNone(calib_result)
+    self._quantizer.quantize(calib_result)
+    self.assertIsNotNone(self._quantizer._result.quantized_model)
+  def test_fully_connected_quantization_update_signature_defs_succeeds(self):
+    model_content = open(self._test_model_path, 'rb').read()
+    self.assertTrue(_is_all_signature_defs_inputs_float(model_content))
+    self.assertTrue(_is_all_signature_defs_outputs_float(model_content))
+    calib_result = self._quantizer.calibrate(
+        tfl_interpreter_utils.create_random_normal_input_data(
+            self._test_model_path, num_samples=4
+        )
+    )
+    self.assertIsNotNone(calib_result)
+    quant_result = self._quantizer.quantize(calib_result)
+    self.assertIsNotNone(quant_result.quantized_model)
+    self.assertTrue(
+        _is_all_signature_defs_inputs_float(quant_result.quantized_model)
+    )
+    self.assertTrue(
+        _is_all_signature_defs_outputs_float(quant_result.quantized_model)
+    )
 if __name__ == '__main__':
   googletest.main()

{ai_edge_quantizer_nightly-0.4.0.dev20251002.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20251004.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-edge-quantizer-nightly
-Version: 0.4.0.dev20251002
+Version: 0.4.0.dev20251004
 Summary: A quantizer for advanced developers to quantize converted AI Edge models.
 Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
 Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI

{ai_edge_quantizer_nightly-0.4.0.dev20251002.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20251004.dist-info}/RECORD RENAMED Viewed

@@ -6,15 +6,15 @@ ai_edge_quantizer/calibrator.py,sha256=Sms7_AIHPH9G5xFaz5Ef3a5gPhxuIWQI8d2LUM8C9
 ai_edge_quantizer/calibrator_test.py,sha256=ZLzIMWB2FSFU4TOatDioYuwp_kLh8iSCefZ5_Q9FU7s,11900
 ai_edge_quantizer/conftest.py,sha256=SxCz-5LlRD_lQm4hQc4c6IGG7DS8d7IyEWY9gnscPN0,794
 ai_edge_quantizer/default_policy.py,sha256=6eJA0eX5Npv8lw_0EDS5iPldInoURQKEDhDZ272VG1Q,11770
-ai_edge_quantizer/model_modifier.py,sha256=teGa8I6kGvn6TQY6Xv53YFIc_pQEhNvM9Zb4bvhezyw,7110
-ai_edge_quantizer/model_modifier_test.py,sha256=cJd04SLOG-fQZZNZPcisoBLx3cLtWEwGqUBbLb-pif4,4751
+ai_edge_quantizer/model_modifier.py,sha256=U70JByv6CItP8tg4bdyMfX-R3UlwylAGSviZkF_FSAM,10468
+ai_edge_quantizer/model_modifier_test.py,sha256=CV4pgMEQkBJr_qbYR720TO8HBCutbEYLHptDHgdQMUE,7274
 ai_edge_quantizer/model_validator.py,sha256=Hj0_5o-Oa3dSlJ3ryVjRhvsyelHNyek1GrtG9buMczg,13153
 ai_edge_quantizer/model_validator_test.py,sha256=EeqOP_mrZsnZ3rug756s0ryDDqd2KgIDld5Lm_gDuWY,13020
 ai_edge_quantizer/params_generator.py,sha256=0w-sDGk84sVNkXoduon1wDqq30sGOHVgBVbdg44QVF4,20153
 ai_edge_quantizer/params_generator_test.py,sha256=RDYoRZDJfEZRtjlTAU2kZ_4t3JHOqEHxfJX9V4ETAhg,40597
 ai_edge_quantizer/qtyping.py,sha256=7aEMPA4qr4CGD3NXtZgG2fDoQX5NzK9jwSv1yWNqQV4,17149
 ai_edge_quantizer/quantizer.py,sha256=ckAEOnnBxuCKZuvlzdChevCKPuE-IeDPHCNtFTWr250,17857
-ai_edge_quantizer/quantizer_test.py,sha256=m6f4ayyaF3yQb9i4V0aFAbmGw0OKZ2Zam1RoTPh-u24,22917
+ai_edge_quantizer/quantizer_test.py,sha256=bh4IowxRF249p_XKIKQ0f17PmeDddfcOUzvQ2ht1L0E,26530
 ai_edge_quantizer/recipe.py,sha256=MEkfQ2Sg3KAE9LAORHWcbjYNPg06EUbwc1d-VspQA2U,6461
 ai_edge_quantizer/recipe_manager.py,sha256=6l2uq8KL23KLu9OQDmPGkxrFiwHrdDB9xnn-ni8WdEM,15036
 ai_edge_quantizer/recipe_manager_test.py,sha256=qjgGUF-wggXnSXqZ5khmqrDMIQI5CShk52IVWTahq6s,36817
@@ -74,8 +74,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOi
 ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
 ai_edge_quantizer/utils/validation_utils.py,sha256=yJH9Cvepr_XWn-3Hsh91j7HuC5iLQHAyskyQ48bGNoc,4797
 ai_edge_quantizer/utils/validation_utils_test.py,sha256=1sblJWHLTYTbn1Qi9rwnrREOSXRy5KwHAWSwgI1e_aU,3697
-ai_edge_quantizer_nightly-0.4.0.dev20251002.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-ai_edge_quantizer_nightly-0.4.0.dev20251002.dist-info/METADATA,sha256=gx_gBIYVh7XDUrBl-uDmPRRRrawHIroH_14pjZmhL4w,1508
-ai_edge_quantizer_nightly-0.4.0.dev20251002.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-ai_edge_quantizer_nightly-0.4.0.dev20251002.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
-ai_edge_quantizer_nightly-0.4.0.dev20251002.dist-info/RECORD,,
+ai_edge_quantizer_nightly-0.4.0.dev20251004.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+ai_edge_quantizer_nightly-0.4.0.dev20251004.dist-info/METADATA,sha256=xDd1vh-k3X8Sl3r__T0Wjeo6tfeqzBGf5t4xjG16plk,1508
+ai_edge_quantizer_nightly-0.4.0.dev20251004.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+ai_edge_quantizer_nightly-0.4.0.dev20251004.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
+ai_edge_quantizer_nightly-0.4.0.dev20251004.dist-info/RECORD,,

{ai_edge_quantizer_nightly-0.4.0.dev20251002.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20251004.dist-info}/LICENSE RENAMED Viewed

File without changes

{ai_edge_quantizer_nightly-0.4.0.dev20251002.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20251004.dist-info}/WHEEL RENAMED Viewed

File without changes

{ai_edge_quantizer_nightly-0.4.0.dev20251002.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20251004.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai-edge-quantizer-nightly 0.4.0.dev20251002__py3-none-any.whl → 0.4.0.dev20251004__py3-none-any.whl

ai-edge-quantizer-nightly 0.4.0.dev20251002py3-none-any.whl → 0.4.0.dev20251004py3-none-any.whl