PyPI - ai-edge-quantizer-nightly - Versions diffs - 0.4.0.dev20250829__py3-none-any.whl → 0.4.0.dev20250831__py3-none-any.whl - Mend

ai-edge-quantizer-nightly 0.4.0.dev20250829py3-none-any.whl → 0.4.0.dev20250831py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

ai_edge_quantizer/transformation_instruction_generator.py CHANGED Viewed

@@ -23,10 +23,13 @@ from collections.abc import Iterator
 import dataclasses
 from typing import Optional
 from ai_edge_quantizer import qtyping
+from ai_edge_quantizer.algorithms.utils import common_utils
+from ai_edge_quantizer.utils import constrained_ops_utils
 from ai_edge_quantizer.utils import tfl_flatbuffer_utils
 from ai_edge_litert import schema_py_generated  # pylint: disable=g-direct-tensorflow-import
+_OpQuantConstraint = common_utils.OpQuantConstraint
 _QuantTransformation = qtyping.QuantTransformation
@@ -165,6 +168,16 @@ class TransformationInstructionsGenerator:
     else:
       self.flatbuffer_model = tfl_flatbuffer_utils.read_model(float_tflite)
       self._create_tensor_name_to_graph_info_map()
+    self._same_as_input_scale_ops = (
+        constrained_ops_utils.get_constrained_op_list(
+            _OpQuantConstraint.SAME_AS_INPUT_SCALE
+        )
+    )
+    self._same_as_output_scale_ops = (
+        constrained_ops_utils.get_constrained_op_list(
+            _OpQuantConstraint.SAME_AS_OUTPUT_SCALE
+        )
+    )
   @dataclasses.dataclass(frozen=True)
   class TensorGraphInfo:
@@ -506,6 +519,89 @@ class TransformationInstructionsGenerator:
       ):
         instructions.pop(i)
+  def _is_valid_quantize_requantize_pair(
+      self,
+      instr_0: qtyping.TransformationInst,
+      instr_1: qtyping.TransformationInst,
+  ) -> bool:
+    """Checks if the two instructions form a valid quantize and requantize pair."""
+    return (
+        instr_0.transformation == _QuantTransformation.QUANTIZE_TENSOR
+        and instr_1.transformation == _QuantTransformation.ADD_QUANTIZE
+        and instr_0.consumers == instr_1.consumers
+    )
+  def _is_op_constrained(
+      self, subgraph_id: int, op_index: int
+  ) -> bool:
+    """Checks if the op has same as input or output scale constraints."""
+    op_name = tfl_flatbuffer_utils.get_op_name_by_index(
+        self.flatbuffer_model, subgraph_id, op_index
+    )
+    return (
+        op_name in self._same_as_input_scale_ops
+        or op_name in self._same_as_output_scale_ops
+    )
+  def _are_quant_params_compatible(
+      self,
+      params_0: qtyping.UniformQuantParams,
+      params_1: qtyping.UniformQuantParams,
+  ) -> bool:
+    """Checks if quant params are the same except for the scale and zero point."""
+    ignore_set = {"scale", "zero_point"}
+    for field_info in dataclasses.fields(qtyping.UniformQuantParams):
+      field_name = field_info.name
+      if field_name in ignore_set:
+        continue
+      if getattr(params_0, field_name) != getattr(params_1, field_name):
+        return False
+    return True
+  def _eliminate_requantization_for_nonconstrained_provider(
+      self, tensor_trans_insts: qtyping.TensorTransformationInsts
+  ) -> None:
+    """Removes requantization for tensors with a non-constrained provider.
+    Fuses [QUANTIZE_TENSOR, ADD_QUANTIZE] instructions when a tensor has a
+    provider op without same as input/ouput scale constrains. Quant params from
+    the second instruction are copied to the first one and ADD_QUANTIZE is
+    removed.
+    Args:
+      tensor_trans_insts: Transformation instructions for a tensor.
+    """
+    instructions = tensor_trans_insts.instructions
+    if instructions is None or len(instructions) != 2:
+      return
+    instr_0, instr_1 = instructions
+    params_0 = instr_0.parameters
+    params_1 = instr_1.parameters
+    producer_op_index = instr_0.producer
+    if (
+        not isinstance(params_0, qtyping.UniformQuantParams)
+        or not isinstance(params_1, qtyping.UniformQuantParams)
+        or not self._is_valid_quantize_requantize_pair(instr_0, instr_1)
+        or not self._are_quant_params_compatible(params_0, params_1)
+        # To avoid fusion when subgraph inputs connected to the main subgraph
+        # (e.g. while_body), we skip all tensors with no producer.
+        or producer_op_index == -1
+        # Can't apply fusion to tensors with a constrained producer since that
+        # will break the constraint.
+        or self._is_op_constrained(
+            tensor_trans_insts.subgraph_id, producer_op_index
+        )
+    ):
+      return
+    # Fuse the quantize and requantize.
+    instr_0.parameters = dataclasses.replace(
+        params_0, scale=params_1.scale, zero_point=params_1.zero_point
+    )
+    # Remove the requantize instruction.
+    instructions.pop(1)
   def _quant_params_to_transformation_insts(
       self,
       param: qtyping.TensorTransformationParams,
@@ -578,6 +674,12 @@ class TransformationInstructionsGenerator:
     # will raise an error if the instructions are not valid.
     self._check_tensor_transformation_instructions_valid(tensor_trans_insts)
+    # Remove unnecessary [QUANTIZE_TENSOR, ADD_QUANTIZE] pairs for tensors with
+    # providers without same as input/output scale constraints.
+    self._eliminate_requantization_for_nonconstrained_provider(
+        tensor_trans_insts
+    )
     return tensor_trans_insts
   def _split_instructions_by_tensor_duplication(

ai_edge_quantizer/transformation_instruction_generator_test.py CHANGED Viewed

@@ -15,7 +15,9 @@
 """Tests for instruction_generator."""
+from collections.abc import Sequence
 import os
+from typing import Optional
 import numpy as np
@@ -1337,5 +1339,166 @@ class InstructionGeneratorTest(parameterized.TestCase):
       )
+class EliminateUnnecessaryRequantizationTest(parameterized.TestCase):
+  def setUp(self):
+    super().setUp()
+    self.ins_gen = instruction_generator.TransformationInstructionsGenerator(
+        os.path.join(TEST_DATA_PREFIX_PATH, "tests/models/conv_fc_mnist.tflite")
+    )
+  def _get_test_instruction(
+      self,
+      transformation: qtyping.QuantTransformation,
+      producer: int = -1,
+      consumers: Optional[Sequence[int]] = None,
+      qparams: Optional[qtyping.UniformQuantParams] = None,
+  ) -> qtyping.TransformationInst:
+    if consumers is None:
+      consumers = []
+    if qparams is None:
+      qparams = qtyping.UniformQuantParams(
+          num_bits=8,
+          quantized_dimension=None,
+          scale=np.array([1]),
+          zero_point=np.array([0]),
+      )
+    return qtyping.TransformationInst(
+        transformation=transformation,
+        producer=producer,
+        consumers=consumers,
+        parameters=qparams,
+        # Dummy values below.
+        tensor_id=0,
+    )
+  def _create_test_insts(
+      self, instructions: list[qtyping.TransformationInst]
+  ) -> qtyping.TensorTransformationInsts:
+    return qtyping.TensorTransformationInsts(
+        tensor_name="test_tensor", subgraph_id=0, instructions=instructions
+    )
+  def test_no_fusion_when_too_few_instructions(self):
+    tensor_insts = self._create_test_insts([
+        self._get_test_instruction(_QTransf.QUANTIZE_TENSOR),
+    ])
+    self.ins_gen._eliminate_requantization_for_nonconstrained_provider(
+        tensor_insts
+    )
+    self.assertLen(tensor_insts.instructions, 1)
+  def test_no_fusion_when_too_many_instructions(self):
+    tensor_insts = self._create_test_insts([
+        self._get_test_instruction(_QTransf.QUANTIZE_TENSOR),
+        self._get_test_instruction(_QTransf.ADD_QUANTIZE),
+        self._get_test_instruction(_QTransf.ADD_DEQUANTIZE),
+    ])
+    self.ins_gen._eliminate_requantization_for_nonconstrained_provider(
+        tensor_insts
+    )
+    self.assertLen(tensor_insts.instructions, 3)
+  def test_no_fusion_when_invalid_transformation_pair(self):
+    tensor_insts = self._create_test_insts([
+        self._get_test_instruction(_QTransf.ADD_DEQUANTIZE),
+        self._get_test_instruction(_QTransf.ADD_QUANTIZE),
+    ])
+    self.ins_gen._eliminate_requantization_for_nonconstrained_provider(
+        tensor_insts
+    )
+    self.assertLen(tensor_insts.instructions, 2)
+  def test_no_fusion_when_consumers_mismatch(self):
+    tensor_insts = self._create_test_insts([
+        self._get_test_instruction(_QTransf.QUANTIZE_TENSOR, consumers=[0]),
+        self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[1]),
+    ])
+    self.ins_gen._eliminate_requantization_for_nonconstrained_provider(
+        tensor_insts
+    )
+    self.assertLen(tensor_insts.instructions, 2)
+  def test_no_fusion_when_no_producer(self):
+    producer = -1
+    tensor_insts = self._create_test_insts([
+        self._get_test_instruction(_QTransf.QUANTIZE_TENSOR, producer),
+        self._get_test_instruction(_QTransf.ADD_QUANTIZE, producer),
+    ])
+    self.ins_gen._eliminate_requantization_for_nonconstrained_provider(
+        tensor_insts
+    )
+    self.assertLen(tensor_insts.instructions, 2)
+  def test_no_fusion_when_quant_params_are_incompatible(self):
+    params_8_bits = qtyping.UniformQuantParams(
+        8, None, np.array([1]), np.array([0])
+    )
+    params_16_bits = qtyping.UniformQuantParams(
+        16, None, np.array([1]), np.array([0])
+    )
+    tensor_insts = self._create_test_insts([
+        self._get_test_instruction(
+            _QTransf.QUANTIZE_TENSOR, qparams=params_8_bits
+        ),
+        self._get_test_instruction(
+            _QTransf.ADD_QUANTIZE, qparams=params_16_bits
+        ),
+    ])
+    self.ins_gen._eliminate_requantization_for_nonconstrained_provider(
+        tensor_insts
+    )
+    self.assertLen(tensor_insts.instructions, 2)
+  def test_no_fusion_when_producer_constrained(self):
+    # Reshape op (op index 2) has same as input scale constraint.
+    tensor_insts = self._create_test_insts([
+        self._get_test_instruction(_QTransf.QUANTIZE_TENSOR, producer=2),
+        self._get_test_instruction(_QTransf.ADD_QUANTIZE, producer=2),
+    ])
+    self.ins_gen._eliminate_requantization_for_nonconstrained_provider(
+        tensor_insts
+    )
+    self.assertLen(tensor_insts.instructions, 2)
+  def test_fusion_succeeds(self):
+    producer = 0
+    consumers = [1]
+    params_0 = qtyping.UniformQuantParams(
+        num_bits=8,
+        quantized_dimension=None,
+        scale=np.array([1]),
+        zero_point=np.array([0]),
+    )
+    params_1 = qtyping.UniformQuantParams(
+        num_bits=8,
+        quantized_dimension=None,
+        scale=np.array([2]),
+        zero_point=np.array([1]),
+    )
+    inst_0 = self._get_test_instruction(
+        _QTransf.QUANTIZE_TENSOR, producer, consumers, params_0
+    )
+    inst_1 = self._get_test_instruction(
+        _QTransf.ADD_QUANTIZE, producer, consumers, params_1
+    )
+    tensor_insts = self._create_test_insts([inst_0, inst_1])
+    self.ins_gen._eliminate_requantization_for_nonconstrained_provider(
+        tensor_insts
+    )
+    self.assertLen(tensor_insts.instructions, 1)
+    result_inst = tensor_insts.instructions[0]
+    self.assertEqual(result_inst.transformation, _QTransf.QUANTIZE_TENSOR)
+    result_params = result_inst.parameters
+    # Explicitly narrow the type for pytype.
+    if not isinstance(result_params, qtyping.UniformQuantParams):
+      self.fail("Fused instruction parameters are not UniformQuantParams")
+    self.assertEqual(result_params.scale, params_1.scale)
+    self.assertEqual(result_params.zero_point, params_1.zero_point)
 if __name__ == "__main__":
   googletest.main()

ai_edge_quantizer/utils/tfl_flatbuffer_utils.py CHANGED Viewed

@@ -342,3 +342,12 @@ def get_op_side_effect_subgraphs(
     return [opts.decompositionSubgraphIndex]
   # Can add other nested ops here (control flow ops, etc).
   return []
+def get_op_name_by_index(
+    flatbuffer_model: Any, subgraph_id: int, op_index: int
+) -> str:
+  """Get the op name from the flatbuffer model."""
+  op = flatbuffer_model.subgraphs[subgraph_id].operators[op_index]
+  builtin_code = flatbuffer_model.operatorCodes[op.opcodeIndex].builtinCode
+  return TFL_OP_CODE_TO_NAME[builtin_code]

{ai_edge_quantizer_nightly-0.4.0.dev20250829.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250831.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-edge-quantizer-nightly
-Version: 0.4.0.dev20250829
+Version: 0.4.0.dev20250831
 Summary: A quantizer for advanced developers to quantize converted AI Edge models.
 Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
 Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI

{ai_edge_quantizer_nightly-0.4.0.dev20250829.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250831.dist-info}/RECORD RENAMED Viewed

@@ -19,8 +19,8 @@ ai_edge_quantizer/recipe.py,sha256=MEkfQ2Sg3KAE9LAORHWcbjYNPg06EUbwc1d-VspQA2U,6
 ai_edge_quantizer/recipe_manager.py,sha256=6dgbE-IZfEetzXH3p3Qm_9eQutNDOpZnMpiaLTbP-ZQ,14744
 ai_edge_quantizer/recipe_manager_test.py,sha256=H-B75vwPN5ND-nUa3pOXizeHTv4mufPiC5cL_OlDIYU,34040
 ai_edge_quantizer/recipe_test.py,sha256=GKuo6N65wKLS2xwSpjd-BWWeVRpF1zc7Yt7phSMYSxA,5905
-ai_edge_quantizer/transformation_instruction_generator.py,sha256=iMGXy7_ufqgQRzu4drAfO31VGdze35peEFh1BMZlVHk,27714
-ai_edge_quantizer/transformation_instruction_generator_test.py,sha256=Zw3EOSnvzjuB4NWeo129eJZxK_EHno9oF9OtEQ-0dnM,48905
+ai_edge_quantizer/transformation_instruction_generator.py,sha256=O0U2aZcB8aXQgOV8r9g1rGNzDUiuI5Ta53XnxZbVffE,31576
+ai_edge_quantizer/transformation_instruction_generator_test.py,sha256=KW5-WoTTo9IqLEVnWxVC8ut8eWLi_91xfKgGqVQ9QDk,54635
 ai_edge_quantizer/transformation_performer.py,sha256=o4J6OUbI0dLoobVYjkOFw5Po3yH0gZJXrfuTIYais4o,13029
 ai_edge_quantizer/transformation_performer_test.py,sha256=xk6A3LStCyPclN51--9uO7XjSxNfZmpdfvrzOL0maNM,20349
 ai_edge_quantizer/algorithms/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
@@ -64,14 +64,14 @@ ai_edge_quantizer/utils/calibration_utils_test.py,sha256=4BlksXl7b4yptL8xPR67hmJ
 ai_edge_quantizer/utils/constrained_ops_utils.py,sha256=EAITCf7Ku_PFZcw3K-wd-8hGbyuRd5W5UtNdGvalwAE,4478
 ai_edge_quantizer/utils/constrained_ops_utils_test.py,sha256=6k_AqfB-NmiLkW5WwEV5NSuswFWky2sL0xBGmV6Fdwk,1756
 ai_edge_quantizer/utils/test_utils.py,sha256=a4Nk-wbeB09dFjTDZiA0K67d26j5DD0UDH_GIVmVG_4,8685
-ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=RL6oq6FzZj-xV0Zgh0UBn7-fOQaRXSxZ-PPG_LmtyUY,11384
+ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=aNtL4dpWH5uGGGlaygnMDkh5llTstbgs5ZxO0JkH5VQ,11718
 ai_edge_quantizer/utils/tfl_flatbuffer_utils_test.py,sha256=K1SbK8q92qYVtiVj0I0GtugsPTkpIpEKv9zakvFV_Sc,8555
 ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOihexmizeJqt4SQcET9aA,14925
 ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
 ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
 ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
-ai_edge_quantizer_nightly-0.4.0.dev20250829.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-ai_edge_quantizer_nightly-0.4.0.dev20250829.dist-info/METADATA,sha256=fg5k0J7zQJc0ufSBvuidEZKz57iydiIhRI4teV-7AZI,1535
-ai_edge_quantizer_nightly-0.4.0.dev20250829.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-ai_edge_quantizer_nightly-0.4.0.dev20250829.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
-ai_edge_quantizer_nightly-0.4.0.dev20250829.dist-info/RECORD,,
+ai_edge_quantizer_nightly-0.4.0.dev20250831.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+ai_edge_quantizer_nightly-0.4.0.dev20250831.dist-info/METADATA,sha256=TwazFRbRa2j0kWXJB38Tz5tH0ZCeujk2wCBKsnSdk9I,1535
+ai_edge_quantizer_nightly-0.4.0.dev20250831.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+ai_edge_quantizer_nightly-0.4.0.dev20250831.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
+ai_edge_quantizer_nightly-0.4.0.dev20250831.dist-info/RECORD,,

{ai_edge_quantizer_nightly-0.4.0.dev20250829.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250831.dist-info}/LICENSE RENAMED Viewed

File without changes

{ai_edge_quantizer_nightly-0.4.0.dev20250829.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250831.dist-info}/WHEEL RENAMED Viewed

File without changes

{ai_edge_quantizer_nightly-0.4.0.dev20250829.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250831.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai-edge-quantizer-nightly 0.4.0.dev20250829__py3-none-any.whl → 0.4.0.dev20250831__py3-none-any.whl

ai-edge-quantizer-nightly 0.4.0.dev20250829py3-none-any.whl → 0.4.0.dev20250831py3-none-any.whl