PyPI - ai-edge-quantizer-nightly - Versions diffs - 0.0.1.dev20250302__py3-none-any.whl → 0.5.0.dev20260103__py3-none-any.whl - Mend

ai-edge-quantizer-nightly 0.0.1.dev20250302py3-none-any.whl → 0.5.0.dev20260103py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

ai_edge_quantizer/transformation_instruction_generator_test.py CHANGED Viewed

@@ -15,7 +15,9 @@
 """Tests for instruction_generator."""
+from collections.abc import Sequence
 import os
+from typing import Optional
 import numpy as np
@@ -27,6 +29,8 @@ from ai_edge_quantizer.utils import test_utils
 TEST_DATA_PREFIX_PATH = test_utils.get_path_to_datafile(".")
+_QTransf = qtyping.QuantTransformation
 class InstructionGeneratorTest(parameterized.TestCase):
@@ -951,33 +955,6 @@ class InstructionGeneratorTest(parameterized.TestCase):
         instructions["StatefulPartitionedCall:0"], output_transformation
     )
-  def test_raise_error_on_op_replacement_transformation_is_not_unique(self):
-    test_model_path = os.path.join(
-        TEST_DATA_PREFIX_PATH, "tests/models/insert_dequant_test.tflite"
-    )
-    quant_parameters = {}
-    quant_parameters["tfl.quantize"] = qtyping.TensorTransformationParams(
-        "tfl.quantize",
-        qtyping.OpToTensorParams(
-            subgraph_op_id=0,
-            transformations=[
-                qtyping.QuantTransformation.ADD_DEQUANTIZE,
-                qtyping.QuantTransformation.EMULATED_SUBCHANNEL,
-            ],
-            parameters=qtyping.UniformQuantParams(
-                8, None, np.array([1]), np.array([0])
-            ),
-        ),
-        [],
-    )
-    ins_gen = instruction_generator.TransformationInstructionsGenerator(
-        test_model_path
-    )
-    with self.assertRaisesRegex(
-        ValueError, "op replacement transformation can not be combined"
-    ):
-      ins_gen.quant_params_to_transformation_insts(quant_parameters)
   def test_raise_error_on_no_quant_conflict(self):
     test_model_path = os.path.join(
         TEST_DATA_PREFIX_PATH, "tests/models/insert_dequant_test.tflite"
@@ -1077,6 +1054,451 @@ class InstructionGeneratorTest(parameterized.TestCase):
     self.assertLen(instructions, 1)
     self.assertEqual(instructions["tfl.quantize"], expected_instructions)
+  def test_instruction_generator_keeps_buffer_duplication_as_first_transformation(
+      self,
+  ):
+    test_tensor_name = "test_tensor"
+    dummy_quant_params = qtyping.UniformQuantParams(
+        8, None, np.array([1]), np.array([0])
+    )
+    consumer_params_1 = qtyping.OpToTensorParams(
+        subgraph_op_id=0,
+        transformations=[
+            qtyping.QuantTransformation.DUPLICATE_BUFFER,
+            qtyping.QuantTransformation.ADD_QUANTIZE,
+        ],
+        parameters=dummy_quant_params,
+    )
+    consumer_params_2 = qtyping.OpToTensorParams(
+        subgraph_op_id=2,
+        transformations=[
+            qtyping.QuantTransformation.DUPLICATE_BUFFER,
+            qtyping.QuantTransformation.ADD_QUANTIZE,
+            qtyping.QuantTransformation.ADD_DEQUANTIZE,
+        ],
+        parameters=dummy_quant_params,
+    )
+    quant_parameters = {
+        test_tensor_name: qtyping.TensorTransformationParams(
+            tensor_name=test_tensor_name,
+            producer=None,
+            consumers=[consumer_params_1, consumer_params_2],
+        ),
+    }
+    instruction_gen = (
+        instruction_generator.TransformationInstructionsGenerator()
+    )
+    # _tensor_name_to_graph_info has to have an entry for the test tensor for
+    # `quant_params_to_transformation_insts` to work. But the values do not
+    # matter for this test.
+    instruction_gen._tensor_name_to_graph_info[test_tensor_name] = (
+        instruction_generator.TransformationInstructionsGenerator.TensorGraphInfo(
+            tensor_id=1,
+            subgraph_id=0,
+            producer=0,
+            consumers=[2],
+        )
+    )
+    instructions = instruction_gen.quant_params_to_transformation_insts(
+        quant_parameters
+    )
+    self.assertLen(instructions, 1)
+    instructions = instructions[test_tensor_name].instructions
+    self.assertGreater(len(instructions), 1)
+    self.assertEqual(instructions[0].transformation, _QTransf.DUPLICATE_BUFFER)
+    self.assertNotIn(_QTransf.DUPLICATE_BUFFER, instructions[1:])
+  def _get_test_instruction(self, transformation, consumers=None):
+    if consumers is None:
+      consumers = []
+    return qtyping.TransformationInst(
+        transformation=transformation,
+        consumers=consumers,
+        # Dummy values below.
+        tensor_id=0,
+        producer=None,
+        parameters=None,
+    )
+  def test__remove_last_tensor_duplication_succeeds(self):
+    tensor_instructions = qtyping.TensorTransformationInsts(
+        tensor_name="test_tensor",
+        subgraph_id=0,
+        instructions=[
+            self._get_test_instruction(_QTransf.DUPLICATE_TENSOR),
+            self._get_test_instruction(_QTransf.ADD_QUANTIZE),
+            self._get_test_instruction(_QTransf.DUPLICATE_TENSOR),
+            self._get_test_instruction(_QTransf.ADD_DEQUANTIZE),
+        ],
+    )
+    instruction_gen = (
+        instruction_generator.TransformationInstructionsGenerator()
+    )
+    instruction_gen._remove_last_tensor_duplication(tensor_instructions)
+    self.assertLen(tensor_instructions.instructions, 3)
+    expected_transformations = [
+        _QTransf.DUPLICATE_TENSOR,
+        _QTransf.ADD_QUANTIZE,
+        _QTransf.ADD_DEQUANTIZE,
+    ]
+    got_transformations = [
+        instruction.transformation
+        for instruction in tensor_instructions.instructions
+    ]
+    self.assertEqual(got_transformations, expected_transformations)
+  def test__remove_unnecessary_buffer_duplication_succeeds(
+      self,
+  ):
+    instructions = [
+        self._get_test_instruction(_QTransf.DUPLICATE_TENSOR, consumers=[1]),
+        self._get_test_instruction(_QTransf.DUPLICATE_BUFFER, consumers=[1]),
+        self._get_test_instruction(_QTransf.ADD_QUANTIZE),
+        self._get_test_instruction(_QTransf.DUPLICATE_BUFFER, consumers=[1]),
+        self._get_test_instruction(_QTransf.ADD_DEQUANTIZE),
+        self._get_test_instruction(_QTransf.DUPLICATE_BUFFER, consumers=[2]),
+        self._get_test_instruction(_QTransf.DUPLICATE_TENSOR, consumers=[3, 4]),
+        self._get_test_instruction(_QTransf.ADD_QUANTIZE),
+        self._get_test_instruction(_QTransf.DUPLICATE_BUFFER, consumers=[3, 4]),
+    ]
+    tensor_instructions = qtyping.TensorTransformationInsts(
+        tensor_name="test_tensor",
+        subgraph_id=0,
+        instructions=instructions,
+    )
+    instruction_gen = (
+        instruction_generator.TransformationInstructionsGenerator()
+    )
+    instruction_gen._remove_unnecessary_buffer_duplication(tensor_instructions)
+    self.assertLen(tensor_instructions.instructions, 6)
+    expected_transformations = [
+        _QTransf.DUPLICATE_TENSOR,
+        _QTransf.ADD_QUANTIZE,
+        _QTransf.ADD_DEQUANTIZE,
+        _QTransf.DUPLICATE_BUFFER,
+        _QTransf.DUPLICATE_TENSOR,
+        _QTransf.ADD_QUANTIZE,
+    ]
+    got_transformations = [
+        instruction.transformation
+        for instruction in tensor_instructions.instructions
+    ]
+    self.assertEqual(got_transformations, expected_transformations)
+  def test__instruction_generator_removes_unnecessary_tensor_and_buffer_duplication(
+      self,
+  ):
+    test_model_path = os.path.join(
+        TEST_DATA_PREFIX_PATH,
+        "tests/models/constant_tensor_and_buffer_only_sharing_weight_fcs.tflite",
+    )
+    params_4_bits = qtyping.UniformQuantParams(
+        4, None, np.array([1]), np.array([0])
+    )
+    params_8_bits = qtyping.UniformQuantParams(
+        8, None, np.array([1]), np.array([0])
+    )
+    quant_parameters = {}
+    # Two FCs share a weight tensor `arith.constant`.
+    quant_parameters["arith.constant"] = qtyping.TensorTransformationParams(
+        tensor_name="arith.constant",
+        producer=None,
+        consumers=[
+            qtyping.OpToTensorParams(
+                subgraph_op_id=0,
+                transformations=[
+                    _QTransf.DUPLICATE_TENSOR,
+                    _QTransf.DUPLICATE_BUFFER,  # Expected to be removed.
+                    _QTransf.QUANTIZE_TENSOR,
+                ],
+                parameters=params_8_bits,
+            ),
+            qtyping.OpToTensorParams(
+                subgraph_op_id=1,
+                transformations=[
+                    _QTransf.DUPLICATE_TENSOR,  # Expected to be removed.
+                    _QTransf.DUPLICATE_BUFFER,
+                    _QTransf.QUANTIZE_TENSOR,
+                ],
+                parameters=params_4_bits,
+            ),
+        ],
+    )
+    instruction_gen = instruction_generator.TransformationInstructionsGenerator(
+        test_model_path
+    )
+    instructions = instruction_gen.quant_params_to_transformation_insts(
+        quant_parameters
+    )
+    def get_expected_instruction(transformation, consumers, params):
+      return qtyping.TransformationInst(
+          transformation=transformation,
+          consumers=consumers,
+          tensor_id=1,
+          producer=-1,
+          parameters=params,
+      )
+    expected_instructions = qtyping.TensorTransformationInsts(
+        tensor_name="arith.constant",
+        subgraph_id=0,
+        instructions=[
+            get_expected_instruction(
+                _QTransf.DUPLICATE_TENSOR, consumers=[0], params=params_8_bits
+            ),
+            get_expected_instruction(
+                _QTransf.DUPLICATE_BUFFER, consumers=[1], params=params_4_bits
+            ),
+            get_expected_instruction(
+                _QTransf.QUANTIZE_TENSOR, consumers=[0], params=params_8_bits
+            ),
+            get_expected_instruction(
+                _QTransf.QUANTIZE_TENSOR, consumers=[1], params=params_4_bits
+            ),
+        ],
+    )
+    self.assertLen(instructions, 1)
+    self.assertEqual(instructions["arith.constant"], expected_instructions)
+  def test__split_instructions_by_tensor_duplication_returns_expected_subsets(
+      self,
+  ):
+    instructions = [
+        self._get_test_instruction(_QTransf.DUPLICATE_TENSOR, consumers=[1, 2, 3]),  # pylint: disable=line-too-long
+        self._get_test_instruction(_QTransf.DUPLICATE_TENSOR, consumers=[4]),
+        self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[1, 2]),
+        self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[3]),
+        self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[4]),
+        self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[5]),
+    ]
+    tensor_instructions = qtyping.TensorTransformationInsts(
+        tensor_name="test_tensor", subgraph_id=0, instructions=instructions
+    )
+    instruction_gen = (
+        instruction_generator.TransformationInstructionsGenerator()
+    )
+    got = instruction_gen._split_instructions_by_tensor_duplication(
+        tensor_instructions
+    )
+    expected = [
+        [self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[5])],
+        [
+            self._get_test_instruction(_QTransf.DUPLICATE_TENSOR, consumers=[1, 2, 3]),  # pylint: disable=line-too-long
+            self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[1, 2]),
+            self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[3]),
+        ],
+        [
+            self._get_test_instruction(_QTransf.DUPLICATE_TENSOR, consumers=[4]),  # pylint: disable=line-too-long
+            self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[4]),
+        ],
+    ]
+    self.assertEqual(got, expected)
+  def test__check_tensor_transformation_instructions_valid_succeeds_on_q_dq_with_duplication(
+      self,
+  ):
+    instructions = [
+        self._get_test_instruction(_QTransf.DUPLICATE_TENSOR, consumers=[1]),
+        self._get_test_instruction(_QTransf.NO_QUANTIZE, consumers=[1]),
+        self._get_test_instruction(_QTransf.QUANTIZE_TENSOR, consumers=[2]),
+    ]
+    tensor_instructions = qtyping.TensorTransformationInsts(
+        tensor_name="test_tensor", subgraph_id=0, instructions=instructions
+    )
+    instruction_gen = (
+        instruction_generator.TransformationInstructionsGenerator()
+    )
+    instruction_gen._check_tensor_transformation_instructions_valid(
+        tensor_instructions
+    )
+  def test__check_tensor_transformation_instructions_valid_fails_when_q_noq_wo_duplication(
+      self,
+  ):
+    tensor_instructions = qtyping.TensorTransformationInsts(
+        tensor_name="test_tensor",
+        subgraph_id=0,
+        instructions=[
+            self._get_test_instruction(_QTransf.NO_QUANTIZE, consumers=[1]),
+            self._get_test_instruction(_QTransf.QUANTIZE_TENSOR, consumers=[2]),
+        ],
+    )
+    instruction_gen = (
+        instruction_generator.TransformationInstructionsGenerator()
+    )
+    with self.assertRaisesRegex(
+        ValueError, "can not be both quantized and unquantized"
+    ):
+      instruction_gen._check_tensor_transformation_instructions_valid(
+          tensor_instructions
+      )
+class EliminateUnnecessaryRequantizationTest(parameterized.TestCase):
+  def setUp(self):
+    super().setUp()
+    self.ins_gen = instruction_generator.TransformationInstructionsGenerator(
+        os.path.join(TEST_DATA_PREFIX_PATH, "tests/models/conv_fc_mnist.tflite")
+    )
+  def _get_test_instruction(
+      self,
+      transformation: qtyping.QuantTransformation,
+      producer: int = -1,
+      consumers: Optional[Sequence[int]] = None,
+      qparams: Optional[qtyping.UniformQuantParams] = None,
+  ) -> qtyping.TransformationInst:
+    if consumers is None:
+      consumers = []
+    if qparams is None:
+      qparams = qtyping.UniformQuantParams(
+          num_bits=8,
+          quantized_dimension=None,
+          scale=np.array([1]),
+          zero_point=np.array([0]),
+      )
+    return qtyping.TransformationInst(
+        transformation=transformation,
+        producer=producer,
+        consumers=consumers,
+        parameters=qparams,
+        # Dummy values below.
+        tensor_id=0,
+    )
+  def _create_test_insts(
+      self, instructions: list[qtyping.TransformationInst]
+  ) -> qtyping.TensorTransformationInsts:
+    return qtyping.TensorTransformationInsts(
+        tensor_name="test_tensor", subgraph_id=0, instructions=instructions
+    )
+  def test_no_fusion_when_too_few_instructions(self):
+    tensor_insts = self._create_test_insts([
+        self._get_test_instruction(_QTransf.QUANTIZE_TENSOR),
+    ])
+    self.ins_gen._eliminate_requantization_for_nonconstrained_provider(
+        tensor_insts
+    )
+    self.assertLen(tensor_insts.instructions, 1)
+  def test_no_fusion_when_too_many_instructions(self):
+    tensor_insts = self._create_test_insts([
+        self._get_test_instruction(_QTransf.QUANTIZE_TENSOR),
+        self._get_test_instruction(_QTransf.ADD_QUANTIZE),
+        self._get_test_instruction(_QTransf.ADD_DEQUANTIZE),
+    ])
+    self.ins_gen._eliminate_requantization_for_nonconstrained_provider(
+        tensor_insts
+    )
+    self.assertLen(tensor_insts.instructions, 3)
+  def test_no_fusion_when_invalid_transformation_pair(self):
+    tensor_insts = self._create_test_insts([
+        self._get_test_instruction(_QTransf.ADD_DEQUANTIZE),
+        self._get_test_instruction(_QTransf.ADD_QUANTIZE),
+    ])
+    self.ins_gen._eliminate_requantization_for_nonconstrained_provider(
+        tensor_insts
+    )
+    self.assertLen(tensor_insts.instructions, 2)
+  def test_no_fusion_when_consumers_mismatch(self):
+    tensor_insts = self._create_test_insts([
+        self._get_test_instruction(_QTransf.QUANTIZE_TENSOR, consumers=[0]),
+        self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[1]),
+    ])
+    self.ins_gen._eliminate_requantization_for_nonconstrained_provider(
+        tensor_insts
+    )
+    self.assertLen(tensor_insts.instructions, 2)
+  def test_no_fusion_when_no_producer(self):
+    producer = -1
+    tensor_insts = self._create_test_insts([
+        self._get_test_instruction(_QTransf.QUANTIZE_TENSOR, producer),
+        self._get_test_instruction(_QTransf.ADD_QUANTIZE, producer),
+    ])
+    self.ins_gen._eliminate_requantization_for_nonconstrained_provider(
+        tensor_insts
+    )
+    self.assertLen(tensor_insts.instructions, 2)
+  def test_no_fusion_when_quant_params_are_incompatible(self):
+    params_8_bits = qtyping.UniformQuantParams(
+        8, None, np.array([1]), np.array([0])
+    )
+    params_16_bits = qtyping.UniformQuantParams(
+        16, None, np.array([1]), np.array([0])
+    )
+    tensor_insts = self._create_test_insts([
+        self._get_test_instruction(
+            _QTransf.QUANTIZE_TENSOR, qparams=params_8_bits
+        ),
+        self._get_test_instruction(
+            _QTransf.ADD_QUANTIZE, qparams=params_16_bits
+        ),
+    ])
+    self.ins_gen._eliminate_requantization_for_nonconstrained_provider(
+        tensor_insts
+    )
+    self.assertLen(tensor_insts.instructions, 2)
+  def test_no_fusion_when_producer_constrained(self):
+    # Reshape op (op index 2) has same as input scale constraint.
+    tensor_insts = self._create_test_insts([
+        self._get_test_instruction(_QTransf.QUANTIZE_TENSOR, producer=2),
+        self._get_test_instruction(_QTransf.ADD_QUANTIZE, producer=2),
+    ])
+    self.ins_gen._eliminate_requantization_for_nonconstrained_provider(
+        tensor_insts
+    )
+    self.assertLen(tensor_insts.instructions, 2)
+  def test_fusion_succeeds(self):
+    producer = 0
+    consumers = [1]
+    params_0 = qtyping.UniformQuantParams(
+        num_bits=8,
+        quantized_dimension=None,
+        scale=np.array([1]),
+        zero_point=np.array([0]),
+    )
+    params_1 = qtyping.UniformQuantParams(
+        num_bits=8,
+        quantized_dimension=None,
+        scale=np.array([2]),
+        zero_point=np.array([1]),
+    )
+    inst_0 = self._get_test_instruction(
+        _QTransf.QUANTIZE_TENSOR, producer, consumers, params_0
+    )
+    inst_1 = self._get_test_instruction(
+        _QTransf.ADD_QUANTIZE, producer, consumers, params_1
+    )
+    tensor_insts = self._create_test_insts([inst_0, inst_1])
+    self.ins_gen._eliminate_requantization_for_nonconstrained_provider(
+        tensor_insts
+    )
+    self.assertLen(tensor_insts.instructions, 1)
+    result_inst = tensor_insts.instructions[0]
+    self.assertEqual(result_inst.transformation, _QTransf.QUANTIZE_TENSOR)
+    result_params = result_inst.parameters
+    # Explicitly narrow the type for pytype.
+    if not isinstance(result_params, qtyping.UniformQuantParams):
+      self.fail("Fused instruction parameters are not UniformQuantParams")
+    self.assertEqual(result_params.scale, params_1.scale)
+    self.assertEqual(result_params.zero_point, params_1.zero_point)
 if __name__ == "__main__":
   googletest.main()

ai-edge-quantizer-nightly 0.0.1.dev20250302__py3-none-any.whl → 0.5.0.dev20260103__py3-none-any.whl

ai-edge-quantizer-nightly 0.0.1.dev20250302py3-none-any.whl → 0.5.0.dev20260103py3-none-any.whl