PyPI - ai-edge-quantizer-nightly - Versions diffs - 0.1.0.dev20250404__py3-none-any.whl → 0.1.0.dev20250406__py3-none-any.whl - Mend

ai-edge-quantizer-nightly 0.1.0.dev20250404py3-none-any.whl → 0.1.0.dev20250406py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

ai_edge_quantizer/qtyping.py CHANGED Viewed

@@ -111,6 +111,8 @@ class QuantTransformation(enum.Enum):
   EMULATED_SUBCHANNEL = 4
   # Duplicate the buffer.
   DUPLICATE_BUFFER = 5
+  # Duplicate the tensor.
+  DUPLICATE_TENSOR = 6
 @dataclasses.dataclass(frozen=True)

ai_edge_quantizer/transformation_instruction_generator.py CHANGED Viewed

@@ -27,6 +27,9 @@ from ai_edge_quantizer.utils import tfl_flatbuffer_utils
 from ai_edge_litert import schema_py_generated  # pylint: disable=g-direct-tensorflow-import
+_QuantTransformation = qtyping.QuantTransformation
 # When a tensor has no producer, we'll assign -1 to the producer field
 # When a tensor is a graph output, we'll also include a -1 in the consumer list
 def check_horizontal_optimization(
@@ -454,6 +457,53 @@ class TransformationInstructionsGenerator:
       transformations.insert(0, producer_trans_rule)
     return transformations
+  def _remove_last_tensor_duplication(
+      self, tensor_trans_insts: qtyping.TensorTransformationInsts
+  ) -> None:
+    """Remove the last tensor duplication so the original tensor can be reused."""
+    instructions = tensor_trans_insts.instructions
+    if not instructions:
+      return
+    for i in range(len(instructions) - 1, -1, -1):
+      if (
+          instructions[i].transformation
+          == _QuantTransformation.DUPLICATE_TENSOR
+      ):
+        instructions.pop(i)
+        return
+  def _remove_unnecessary_buffer_duplication(
+      self, tensor_trans_insts: qtyping.TensorTransformationInsts
+  ) -> None:
+    """Remove buffer duplications that comes after a tensor duplication.
+    When a tensor is duplicated, a new buffer is created for it. Therefore,
+    buffer duplication transformation that comes after it is unnecessary.
+    Args:
+      tensor_trans_insts: Transformation instructions for a tensor.
+    """
+    instructions = tensor_trans_insts.instructions
+    if not instructions:
+      return
+    # Find all consumers that have a tensor duplication.
+    consumers_with_tensor_duplication = set()
+    for instr in instructions:
+      if instr.transformation == _QuantTransformation.DUPLICATE_TENSOR:
+        consumers_with_tensor_duplication.update(instr.consumers)
+    if not consumers_with_tensor_duplication:
+      return
+    # Remove a buffer duplication that comes with a tensor duplication.
+    for i in range(len(instructions) - 1, -1, -1):
+      instr = instructions[i]
+      if (
+          instr.transformation == _QuantTransformation.DUPLICATE_BUFFER
+          and consumers_with_tensor_duplication.issuperset(instr.consumers)
+      ):
+        instructions.pop(i)
   def _quant_params_to_transformation_insts(
       self,
       param: qtyping.TensorTransformationParams,
@@ -510,27 +560,119 @@ class TransformationInstructionsGenerator:
     # Adding other consumers rules.
     transformations += other_consumer_transformations
     tensor_trans_insts.instructions = transformations
+    # Now, when all optimizations are done, we can remove the last tensor
+    # duplication instruction, so the original tensor can be reused.
+    self._remove_last_tensor_duplication(tensor_trans_insts)
+    # With the tensor duplication instructions finalized, we can remove
+    # unnecessary buffer duplications applied to the same duplicated tensors.
+    # This is not a part of a vertical optimization because vertical
+    # optimization only works between producers & consumers, and this is between
+    # the consumer only. Also this can't be done during the params generation
+    # because removing last tensor duplication has to happen first.
+    self._remove_unnecessary_buffer_duplication(tensor_trans_insts)
     # Check the generated transformation instructions are valid, the function
     # will raise an error if the instructions are not valid.
     self._check_tensor_transformation_instructions_valid(tensor_trans_insts)
     return tensor_trans_insts
-  def _check_tensor_transformation_instructions_valid(
-      self, instructions: qtyping.TensorTransformationInsts
-  ):
-    """Check if the tensor transformation instructions are valid.
+  def _split_instructions_by_tensor_duplication(
+      self,
+      instructions: qtyping.TensorTransformationInsts,
+  ) -> list[list[qtyping.TransformationInst]]:
+    """Split the instructions into subsets by tensor duplication.
+    Splits the instructions into subsets based on which tensor (original or one
+    of duplicated ones) they will be applied to.
+    The first subset is for the original tensor. The following subsets are for
+    the duplicated tensors. The order of instructions in each subset is
+    preserved.
+    Enforced constraints for each duplicated tensor's instructions subset:
+    1. The first instruction must be a `DUPLICATE_TENSOR` one.
+    2. No other `DUPLICATE_TENSOR` instructions can be present.
+    For the following instructions:
+      [
+          (transformation=DUPLICATE_TENSOR, consumers=[1, 2, 3]),
+          (transformation=DUPLICATE_TENSOR, consumers=[4]),
+          (transformation=T1, consumers=[1, 2]),
+          (transformation=T2, consumers=[3]),
+          (transformation=T3, consumers=[4]),
+          (transformation=T4, consumers=[5])
+      ]
+    `instruction_subsets` will be:
+      [
+          [(transformation=T4, consumers=[5])],
+          [
+              (transformation=DUPLICATE_TENSOR, consumers=[1, 2, 3]),
+              (transformation=T1, consumers=[1, 2]),
+              (transformation=T2, consumers=[3])
+          ],
+          [
+              (transformation=DUPLICATE_TENSOR, consumers=[4]),
+              (transformation=T3, consumers=[4])
+          ]
+      ],
     Args:
       instructions: Transformation instructions for a tensor.
+    Returns:
+      A list of subsets of transformation instructions, where the first subset
+      is for the original tensor, and the following subsets are for the
+      duplicated tensors.
     Raises:
-      ValueError: If the instructions are not valid.
+      ValueError: If DUPLICATE_TENSOR is found and it's not the first
+      transformation for its consumers.
+    """
+    original_tensor_subset_idx = 0
+    instruction_subsets = [[]]
+    consumer_to_subset_idx = {}
+    for instruction in instructions.instructions:
+      if instruction.transformation == _QuantTransformation.DUPLICATE_TENSOR:
+        instruction_subsets.append([instruction])
+        subset_idx = len(instruction_subsets) - 1
+        for consumer in instruction.consumers:
+          if consumer in consumer_to_subset_idx:
+            raise ValueError(
+                f"Tensor {instructions.tensor_name} : duplicate tensor should"
+                " be the first instruction for its consumers."
+            )
+          else:
+            consumer_to_subset_idx[consumer] = subset_idx
+      else:
+        first_consumer = instruction.consumers[0]
+        if first_consumer not in consumer_to_subset_idx:
+          consumer_to_subset_idx[first_consumer] = original_tensor_subset_idx
+        subset_idx = consumer_to_subset_idx[first_consumer]
+        instruction_subsets[subset_idx].append(instruction)
+    return instruction_subsets
+  def _check_subset_of_tensor_transformation_instructions_valid(
+      self,
+      instructions: Optional[list[qtyping.TransformationInst]],
+      tensor_name: str,
+  ):
+    """Check if a subset of tensor transformation instructions is valid.
+    Args:
+      instructions: A subset of transformation instructions for a tensor.
+      tensor_name: The name of the tensor.
+    Raises:
+      ValueError: If the subset of instructions are not valid.
     """
     is_tensor_unquantized = False
     is_tensor_quantized = False
     is_operator_emulated = False
-    for instruction in instructions.instructions:
+    for instruction in instructions:
       transform_type = instruction.transformation
       if transform_type == qtyping.QuantTransformation.NO_QUANTIZE:
         is_tensor_unquantized = True
@@ -543,14 +685,36 @@ class TransformationInstructionsGenerator:
         is_operator_emulated = True
     if is_tensor_unquantized and is_tensor_quantized:
       raise ValueError(
-          "Tensor %s can not be both quantized and unquantized"
-          % instructions.tensor_name
+          "Tensor %s can not be both quantized and unquantized" % tensor_name
       )
-    if is_operator_emulated and len(instructions.instructions) > 1:
+    if is_operator_emulated and len(instructions) > 1:
       raise ValueError(
           "Tensor %s : op replacement transformation can not be combined with"
-          " other transformations."
-          % instructions.tensor_name
+          " other transformations." % tensor_name
+      )
+  def _check_tensor_transformation_instructions_valid(
+      self,
+      instructions: qtyping.TensorTransformationInsts,
+  ):
+    """Check if the tensor transformation instructions are valid.
+    Args:
+      instructions: Transformation instructions for a tensor.
+    Raises:
+      ValueError: If the instructions are not valid.
+    """
+    # Split the instructions into subsets based on which tensor (original or one
+    # of duplicated ones) they will be applied to.
+    instruction_subsets = self._split_instructions_by_tensor_duplication(
+        instructions
+    )
+    # Check that each subset of instructions is valid.
+    for instruction_subset in instruction_subsets:
+      self._check_subset_of_tensor_transformation_instructions_valid(
+          instruction_subset,
+          instructions.tensor_name,
       )
   def quant_params_to_transformation_insts(

ai_edge_quantizer/transformation_instruction_generator_test.py CHANGED Viewed

@@ -27,6 +27,8 @@ from ai_edge_quantizer.utils import test_utils
 TEST_DATA_PREFIX_PATH = test_utils.get_path_to_datafile(".")
+_QTransf = qtyping.QuantTransformation
 class InstructionGeneratorTest(parameterized.TestCase):
@@ -1130,14 +1132,236 @@ class InstructionGeneratorTest(parameterized.TestCase):
     self.assertLen(instructions, 1)
     instructions = instructions[test_tensor_name].instructions
     self.assertGreater(len(instructions), 1)
-    self.assertEqual(
-        instructions[0].transformation,
-        qtyping.QuantTransformation.DUPLICATE_BUFFER,
+    self.assertEqual(instructions[0].transformation, _QTransf.DUPLICATE_BUFFER)
+    self.assertNotIn(_QTransf.DUPLICATE_BUFFER, instructions[1:])
+  def _get_test_instruction(self, transformation, consumers=None):
+    if consumers is None:
+      consumers = []
+    return qtyping.TransformationInst(
+        transformation=transformation,
+        consumers=consumers,
+        # Dummy values below.
+        tensor_id=0,
+        producer=None,
+        parameters=None,
+    )
+  def test__remove_last_tensor_duplication_succeeds(self):
+    tensor_instructions = qtyping.TensorTransformationInsts(
+        tensor_name="test_tensor",
+        subgraph_id=0,
+        instructions=[
+            self._get_test_instruction(_QTransf.DUPLICATE_TENSOR),
+            self._get_test_instruction(_QTransf.ADD_QUANTIZE),
+            self._get_test_instruction(_QTransf.DUPLICATE_TENSOR),
+            self._get_test_instruction(_QTransf.ADD_DEQUANTIZE),
+        ],
+    )
+    instruction_gen = (
+        instruction_generator.TransformationInstructionsGenerator()
+    )
+    instruction_gen._remove_last_tensor_duplication(tensor_instructions)
+    self.assertLen(tensor_instructions.instructions, 3)
+    expected_transformations = [
+        _QTransf.DUPLICATE_TENSOR,
+        _QTransf.ADD_QUANTIZE,
+        _QTransf.ADD_DEQUANTIZE,
+    ]
+    got_transformations = [
+        instruction.transformation
+        for instruction in tensor_instructions.instructions
+    ]
+    self.assertEqual(got_transformations, expected_transformations)
+  def test__remove_unnecessary_buffer_duplication_succeeds(
+      self,
+  ):
+    instructions = [
+        self._get_test_instruction(_QTransf.DUPLICATE_TENSOR, consumers=[1]),
+        self._get_test_instruction(_QTransf.DUPLICATE_BUFFER, consumers=[1]),
+        self._get_test_instruction(_QTransf.ADD_QUANTIZE),
+        self._get_test_instruction(_QTransf.DUPLICATE_BUFFER, consumers=[1]),
+        self._get_test_instruction(_QTransf.ADD_DEQUANTIZE),
+        self._get_test_instruction(_QTransf.DUPLICATE_BUFFER, consumers=[2]),
+        self._get_test_instruction(_QTransf.DUPLICATE_TENSOR, consumers=[3, 4]),
+        self._get_test_instruction(_QTransf.ADD_QUANTIZE),
+        self._get_test_instruction(_QTransf.DUPLICATE_BUFFER, consumers=[3, 4]),
+    ]
+    tensor_instructions = qtyping.TensorTransformationInsts(
+        tensor_name="test_tensor",
+        subgraph_id=0,
+        instructions=instructions,
+    )
+    instruction_gen = (
+        instruction_generator.TransformationInstructionsGenerator()
+    )
+    instruction_gen._remove_unnecessary_buffer_duplication(tensor_instructions)
+    self.assertLen(tensor_instructions.instructions, 6)
+    expected_transformations = [
+        _QTransf.DUPLICATE_TENSOR,
+        _QTransf.ADD_QUANTIZE,
+        _QTransf.ADD_DEQUANTIZE,
+        _QTransf.DUPLICATE_BUFFER,
+        _QTransf.DUPLICATE_TENSOR,
+        _QTransf.ADD_QUANTIZE,
+    ]
+    got_transformations = [
+        instruction.transformation
+        for instruction in tensor_instructions.instructions
+    ]
+    self.assertEqual(got_transformations, expected_transformations)
+  def test__instruction_generator_removes_unnecessary_tensor_and_buffer_duplication(
+      self,
+  ):
+    test_model_path = os.path.join(
+        TEST_DATA_PREFIX_PATH,
+        "tests/models/constant_tensor_and_buffer_only_sharing_weight_fcs.tflite",
+    )
+    params_4_bits = qtyping.UniformQuantParams(
+        4, None, np.array([1]), np.array([0])
+    )
+    params_8_bits = qtyping.UniformQuantParams(
+        8, None, np.array([1]), np.array([0])
+    )
+    quant_parameters = {}
+    # Two FCs share a weight tensor `arith.constant`.
+    quant_parameters["arith.constant"] = qtyping.TensorTransformationParams(
+        tensor_name="arith.constant",
+        producer=None,
+        consumers=[
+            qtyping.OpToTensorParams(
+                subgraph_op_id=0,
+                transformations=[
+                    _QTransf.DUPLICATE_TENSOR,
+                    _QTransf.DUPLICATE_BUFFER,  # Expected to be removed.
+                    _QTransf.QUANTIZE_TENSOR,
+                ],
+                parameters=params_8_bits,
+            ),
+            qtyping.OpToTensorParams(
+                subgraph_op_id=1,
+                transformations=[
+                    _QTransf.DUPLICATE_TENSOR,  # Expected to be removed.
+                    _QTransf.DUPLICATE_BUFFER,
+                    _QTransf.QUANTIZE_TENSOR,
+                ],
+                parameters=params_4_bits,
+            ),
+        ],
+    )
+    instruction_gen = instruction_generator.TransformationInstructionsGenerator(
+        test_model_path
+    )
+    instructions = instruction_gen.quant_params_to_transformation_insts(
+        quant_parameters
     )
-    self.assertNotIn(
-        qtyping.QuantTransformation.DUPLICATE_BUFFER,
-        instructions[1:],
+    def get_expected_instruction(transformation, consumers, params):
+      return qtyping.TransformationInst(
+          transformation=transformation,
+          consumers=consumers,
+          tensor_id=1,
+          producer=-1,
+          parameters=params,
+      )
+    expected_instructions = qtyping.TensorTransformationInsts(
+        tensor_name="arith.constant",
+        subgraph_id=0,
+        instructions=[
+            get_expected_instruction(
+                _QTransf.DUPLICATE_TENSOR, consumers=[0], params=params_8_bits
+            ),
+            get_expected_instruction(
+                _QTransf.DUPLICATE_BUFFER, consumers=[1], params=params_4_bits
+            ),
+            get_expected_instruction(
+                _QTransf.QUANTIZE_TENSOR, consumers=[0], params=params_8_bits
+            ),
+            get_expected_instruction(
+                _QTransf.QUANTIZE_TENSOR, consumers=[1], params=params_4_bits
+            ),
+        ],
     )
+    self.assertLen(instructions, 1)
+    self.assertEqual(instructions["arith.constant"], expected_instructions)
+  def test__split_instructions_by_tensor_duplication_returns_expected_subsets(
+      self,
+  ):
+    instructions = [
+        self._get_test_instruction(_QTransf.DUPLICATE_TENSOR, consumers=[1, 2, 3]),  # pylint: disable=line-too-long
+        self._get_test_instruction(_QTransf.DUPLICATE_TENSOR, consumers=[4]),
+        self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[1, 2]),
+        self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[3]),
+        self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[4]),
+        self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[5]),
+    ]
+    tensor_instructions = qtyping.TensorTransformationInsts(
+        tensor_name="test_tensor", subgraph_id=0, instructions=instructions
+    )
+    instruction_gen = (
+        instruction_generator.TransformationInstructionsGenerator()
+    )
+    got = instruction_gen._split_instructions_by_tensor_duplication(
+        tensor_instructions
+    )
+    expected = [
+        [self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[5])],
+        [
+            self._get_test_instruction(_QTransf.DUPLICATE_TENSOR, consumers=[1, 2, 3]),  # pylint: disable=line-too-long
+            self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[1, 2]),
+            self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[3]),
+        ],
+        [
+            self._get_test_instruction(_QTransf.DUPLICATE_TENSOR, consumers=[4]),  # pylint: disable=line-too-long
+            self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[4]),
+        ],
+    ]
+    self.assertEqual(got, expected)
+  def test__check_tensor_transformation_instructions_valid_succeeds_on_q_dq_with_duplication(
+      self,
+  ):
+    instructions = [
+        self._get_test_instruction(_QTransf.DUPLICATE_TENSOR, consumers=[1]),
+        self._get_test_instruction(_QTransf.NO_QUANTIZE, consumers=[1]),
+        self._get_test_instruction(_QTransf.QUANTIZE_TENSOR, consumers=[2]),
+    ]
+    tensor_instructions = qtyping.TensorTransformationInsts(
+        tensor_name="test_tensor", subgraph_id=0, instructions=instructions
+    )
+    instruction_gen = (
+        instruction_generator.TransformationInstructionsGenerator()
+    )
+    instruction_gen._check_tensor_transformation_instructions_valid(
+        tensor_instructions
+    )
+  def test__check_tensor_transformation_instructions_valid_fails_when_q_noq_wo_duplication(
+      self,
+  ):
+    tensor_instructions = qtyping.TensorTransformationInsts(
+        tensor_name="test_tensor",
+        subgraph_id=0,
+        instructions=[
+            self._get_test_instruction(_QTransf.NO_QUANTIZE, consumers=[1]),
+            self._get_test_instruction(_QTransf.QUANTIZE_TENSOR, consumers=[2]),
+        ],
+    )
+    instruction_gen = (
+        instruction_generator.TransformationInstructionsGenerator()
+    )
+    with self.assertRaisesRegex(
+        ValueError, "can not be both quantized and unquantized"
+    ):
+      instruction_gen._check_tensor_transformation_instructions_valid(
+          tensor_instructions
+      )
 if __name__ == "__main__":

ai_edge_quantizer/transformation_performer.py CHANGED Viewed

@@ -23,6 +23,7 @@ import numpy as np
 from ai_edge_quantizer import qtyping
 from ai_edge_quantizer.transformations import dequant_insert
 from ai_edge_quantizer.transformations import duplicate_buffer
+from ai_edge_quantizer.transformations import duplicate_tensor
 from ai_edge_quantizer.transformations import emulated_subchannel
 from ai_edge_quantizer.transformations import quant_insert
 from ai_edge_quantizer.transformations import quantize_tensor
@@ -76,6 +77,9 @@ class TransformationPerformer:
         qtyping.QuantTransformation.DUPLICATE_BUFFER: (
             duplicate_buffer.duplicate_buffer
         ),
+        qtyping.QuantTransformation.DUPLICATE_TENSOR: (
+            duplicate_tensor.duplicate_tensor
+        ),
     }
     # transformations are seprated in two categories:
     # op_insertion_transformations are transformations that only insert ops
@@ -86,6 +90,7 @@ class TransformationPerformer:
         qtyping.QuantTransformation.QUANTIZE_TENSOR,
         qtyping.QuantTransformation.ADD_QUANTIZE,
         qtyping.QuantTransformation.DUPLICATE_BUFFER,
+        qtyping.QuantTransformation.DUPLICATE_TENSOR,
     ])
     self._op_replacement_transformations = set(
         [qtyping.QuantTransformation.EMULATED_SUBCHANNEL]
@@ -132,42 +137,47 @@ class TransformationPerformer:
       transformations: list[qtyping.TransformationInst],
       subgraph_id: int,
       trans_info: qtyping.TransformationInfo,
-  ):
-    """Update the instructions after the graph is modified.
+  ) -> None:
+    """Update the instructions in-place after the graph is modified.
-    After an op is inserted, the topology is changed and this may impact the
-    following transformation to be applied. So we need to update instructions
-    that have yet to be applied.
+    After an op is inserted or a tensor is duplicated, the topology is changed
+    and this may impact the following transformation to be applied. So we need
+    to update instructions that have yet to be applied.
     Args:
-      prev_transformation_index: the index of the last applied transformation
-      transformations: the list of transformations we're applying
-      subgraph_id: the subgraph where the provided instrucitons belongs to
-      trans_info: transformation info returned by a transformation
-    Returns:
-      None, modifies the transformation in place
+      prev_transformation_index: The index of the last applied transformation.
+      transformations: The list of transformations we're applying.
+      subgraph_id: The subgraph where the provided instructions belong to.
+      trans_info: Transformation info returned by a transformation.
     """
-    # if no ops were added, then no need for update
-    if trans_info.num_ops_added == 0:
-      return
     prev_transformation = transformations[prev_transformation_index]
-    self._added_op_id_map[subgraph_id].append(
-        trans_info.op_id + trans_info.num_ops_added - 1
+    is_prev_not_duplicate_tensor = (
+        prev_transformation.transformation
+        != qtyping.QuantTransformation.DUPLICATE_TENSOR
     )
+    was_op_added = trans_info.num_ops_added > 0
+    if not was_op_added and is_prev_not_duplicate_tensor:
+      return
+    if was_op_added:
+      self._added_op_id_map[subgraph_id].append(
+          trans_info.op_id + trans_info.num_ops_added - 1
+      )
     for transformations_index in range(
         prev_transformation_index + 1, len(transformations)
     ):
       transformation = transformations[transformations_index]
       for consumer_index in transformation.consumers:
-        # if the consumer need to use newly added ops, then the new added op
+        # If the consumer needs to use newly added ops, then the new added op
         # index needs to be outside of the range of the orignal op ids.
         if consumer_index in prev_transformation.consumers:
-          transformation.producer = (
-              len(self._original_op_id_map[subgraph_id])
-              + len(self._added_op_id_map[subgraph_id])
-              - 1
-          )
+          if was_op_added:
+            transformation.producer = (
+                len(self._original_op_id_map[subgraph_id])
+                + len(self._added_op_id_map[subgraph_id])
+                - 1
+            )
           transformation.tensor_id = trans_info.output_tensor_id
   def _apply_single_transformation(

ai_edge_quantizer/transformation_performer_test.py CHANGED Viewed

@@ -15,6 +15,7 @@
 """Tests for transformation_performer."""
+import copy
 import os
 import numpy as np
@@ -26,6 +27,9 @@ from ai_edge_quantizer import transformation_performer
 from ai_edge_quantizer.utils import test_utils
 from ai_edge_quantizer.utils import tfl_flatbuffer_utils
+_QTransf = qtyping.QuantTransformation
 TEST_DATA_PREFIX_PATH = test_utils.get_path_to_datafile(".")
@@ -267,6 +271,52 @@ class TransformationPerformerTest(parameterized.TestCase):
         expected_added_op_id_map,
     )
+  def test__update_instructions_updates_tensor_id_after_duplicate_tensor(self):
+    def get_test_instruction(transformation, consumers):
+      return qtyping.TransformationInst(
+          transformation=transformation,
+          consumers=consumers,
+          # Dummy values below.
+          tensor_id=0,
+          producer=0,
+          parameters=qtyping.UniformQuantParams(
+              8, None, np.array([1]), np.array([0])
+          ),
+      )
+    instructions = [
+        get_test_instruction(_QTransf.DUPLICATE_TENSOR, consumers=[1]),
+        get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[1]),
+        get_test_instruction(_QTransf.ADD_DEQUANTIZE, consumers=[1]),
+        get_test_instruction(_QTransf.QUANTIZE_TENSOR, consumers=[2]),
+    ]
+    # Simulate a situation as if the first instruction (duplicate tensor) was
+    # applied.
+    subgraph_id = 0
+    duplicated_tensor_id = 13
+    prev_trans_idx = 0
+    trans_info = qtyping.TransformationInfo(
+        # Copy of what duplicate_tensor.py returns.
+        op_id=0,
+        num_ops_added=0,
+        output_tensor_id=duplicated_tensor_id,
+    )
+    self._transformation_performer._create_op_id_map(self._test_model)
+    self._transformation_performer._update_instructions(
+        prev_trans_idx, instructions, subgraph_id, trans_info
+    )
+    # Expecting the ops with the same consumers as in the DUPLICATE_TENSOR
+    # instruction to use the new tensor id.
+    expected_instructions = copy.deepcopy(instructions)
+    expected_instructions[1].tensor_id = duplicated_tensor_id
+    expected_instructions[2].tensor_id = duplicated_tensor_id
+    self.assertSequenceEqual(instructions, expected_instructions)
+    # Expecting no change to the op id map.
+    self.assertListEqual(
+        self._transformation_performer._added_op_id_map,
+        [[]],
+    )
   def test_transform_graph(self):
     """test for transform_graph."""
     instructions = {

ai_edge_quantizer/transformations/duplicate_tensor.py ADDED Viewed

@@ -0,0 +1,61 @@
+# Copyright 2024 The AI Edge Quantizer Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Duplicate tensor transformation."""
+from ai_edge_quantizer import qtyping
+from ai_edge_quantizer.transformations import transformation_utils
+from ai_edge_quantizer.utils import tfl_flatbuffer_utils
+def duplicate_tensor(
+    transformation_input: transformation_utils.TransformationInput,
+) -> qtyping.TransformationInfo:
+  """Duplicates the tensor."""
+  tensor_id = transformation_input.tensor_id
+  subgraph = transformation_input.subgraph
+  tensor = subgraph.tensors[tensor_id]
+  tensor_name = tfl_flatbuffer_utils.get_tensor_name(tensor)
+  buffer_data = transformation_input.buffers[tensor.buffer].data
+  if buffer_data is None:
+    raise ValueError(
+        'Duplicate Tensor transformation supports only constant tensors.'
+        f' Tensor {tensor_name} is not constant.'
+    )
+  new_tensor_id = transformation_utils.add_new_constant_tensor(
+      tensor_name=f'{tensor_name}_duplicated',
+      data=buffer_data,
+      tensor_type=tensor.type,
+      tensor_shape=tensor.shape,
+      subgraph=subgraph,
+      buffers=transformation_input.buffers,
+  )
+  # Update the tensor name to avoid name collision in case when tensor is
+  # duplicated mulitple times.
+  subgraph.tensors[new_tensor_id].name += f'_{new_tensor_id}'
+  # Update the consumers' input tensor id to the duplicated tensor id.
+  # Assuming transformation_input to contain all and only consumers that are
+  # supposed to use this new duplicated tensor.
+  for consumer in transformation_input.consumers:
+    consumer_inputs = subgraph.operators[consumer].inputs
+    for i in range(len(consumer_inputs)):
+      if consumer_inputs[i] == tensor_id:
+        consumer_inputs[i] = new_tensor_id
+        break
+  return qtyping.TransformationInfo(
+      op_id=0, num_ops_added=0, output_tensor_id=new_tensor_id
+  )

ai_edge_quantizer/transformations/duplicate_tensor_test.py ADDED Viewed

@@ -0,0 +1,131 @@
+# Copyright 2024 The AI Edge Quantizer Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import os
+import numpy as np
+from tensorflow.python.platform import googletest
+from ai_edge_quantizer import qtyping
+from ai_edge_quantizer.transformations import duplicate_tensor
+from ai_edge_quantizer.transformations import transformation_utils
+from ai_edge_quantizer.utils import test_utils
+from ai_edge_quantizer.utils import tfl_flatbuffer_utils
+TEST_DATA_PREFIX_PATH = test_utils.get_path_to_datafile('..')
+class DuplicateTensorTest(googletest.TestCase):
+  def setUp(self):
+    super().setUp()
+    model_path = os.path.join(
+        TEST_DATA_PREFIX_PATH, 'tests/models/weight_sharing_fcs.tflite'
+    )
+    self.model = tfl_flatbuffer_utils.read_model(model_path)
+  def _get_transformation_input(
+      self,
+      subgraph_idx: int,
+      tensor_idx: int,
+      consumers: list[int],
+  ) -> transformation_utils.TransformationInput:
+    return transformation_utils.TransformationInput(
+        tensor_id=tensor_idx,
+        buffers=self.model.buffers,
+        consumers=consumers,
+        # Dummy params below.
+        op_codes=self.model.operatorCodes,
+        subgraph=self.model.subgraphs[subgraph_idx],
+        producer=-1,
+        quant_params=qtyping.UniformQuantParams(
+            num_bits=8,
+            quantized_dimension=None,
+            scale=np.ones(1),
+            zero_point=np.zeros(1),
+        ),
+    )
+  def test_constant_tensor_is_correctly_duplicated(self):
+    # Duplicate the FC weight tensor in the second subgraph for the first FC.
+    subgraph_idx = 1
+    fc1_op_idx = 0
+    prev_weight_tensor_idx = 1
+    subgraph = self.model.subgraphs[subgraph_idx]
+    weight_idx_in_op_inputs = list(subgraph.operators[fc1_op_idx].inputs).index(
+        prev_weight_tensor_idx
+    )
+    prev_num_tensors = len(subgraph.tensors)
+    prev_buffer_id = subgraph.tensors[prev_weight_tensor_idx].buffer
+    prev_num_buffers = len(self.model.buffers)
+    transformation_input = self._get_transformation_input(
+        subgraph_idx, prev_weight_tensor_idx, consumers=[fc1_op_idx]
+    )
+    transformation_info = duplicate_tensor.duplicate_tensor(
+        transformation_input
+    )
+    self.assertEqual(transformation_info.op_id, 0)
+    self.assertEqual(transformation_info.num_ops_added, 0)
+    # Check that a new tensor and buffer were added.
+    self.assertLen(subgraph.tensors, prev_num_tensors + 1)
+    self.assertLen(self.model.buffers, prev_num_buffers + 1)
+    # Check that the duplicated tensor is the last tensor in the subgraph.
+    weight_tensor_idx = transformation_info.output_tensor_id
+    self.assertEqual(weight_tensor_idx, len(subgraph.tensors) - 1)
+    # Compare tensors.
+    original_tensor = subgraph.tensors[prev_weight_tensor_idx]
+    original_tensor_name = tfl_flatbuffer_utils.get_tensor_name(original_tensor)
+    duplicated_tensor = subgraph.tensors[weight_tensor_idx]
+    self.assertEqual(
+        duplicated_tensor.name,
+        f'{original_tensor_name}_duplicated_{weight_tensor_idx}',
+    )
+    self.assertEqual(duplicated_tensor.type, original_tensor.type)
+    self.assertTrue(np.all(duplicated_tensor.shape == original_tensor.shape))
+    # Check that the new buffer is used by the duplicated tensor.
+    new_buffer_id = len(self.model.buffers) - 1
+    self.assertEqual(duplicated_tensor.buffer, new_buffer_id)
+    # Check that the new buffer has the same data as the original one.
+    self.assertTrue(
+        np.all(
+            np.frombuffer(
+                self.model.buffers[new_buffer_id].data,
+                dtype=np.float32,
+            )
+            == np.frombuffer(
+                self.model.buffers[prev_buffer_id].data,
+                dtype=np.float32,
+            )
+        )
+    )
+    # Check that first FC input tensor id was updated.
+    self.assertEqual(
+        subgraph.operators[fc1_op_idx].inputs[weight_idx_in_op_inputs],
+        weight_tensor_idx,
+    )
+  def test_duplicate_tensor_raises_error_when_tensor_is_not_constant(self):
+    # Duplicate the FC input tensor in the second subgraph.
+    subgraph_idx = 1
+    input_tensor_idx = 0
+    transformation_input = self._get_transformation_input(
+        subgraph_idx, input_tensor_idx, consumers=[0]
+    )
+    with self.assertRaisesRegex(
+        ValueError,
+        'Duplicate Tensor transformation supports only constant tensors.',
+    ):
+      duplicate_tensor.duplicate_tensor(transformation_input)
+if __name__ == '__main__':
+  googletest.main()

ai_edge_quantizer/transformations/transformation_utils.py CHANGED Viewed

@@ -16,7 +16,7 @@
 """Utility functions for graph transformations."""
 import dataclasses
-from typing import Union
+from typing import Optional, Union
 import numpy as np
@@ -98,6 +98,7 @@ def add_new_constant_tensor(
     tensor_type: schema_py_generated.TensorType,
     subgraph: schema_py_generated.SubGraphT,
     buffers: list[schema_py_generated.BufferT],
+    tensor_shape: Optional[list[int]] = None,
 ) -> int:
   """Add a new constant tensor to the model.
@@ -107,6 +108,8 @@ def add_new_constant_tensor(
     tensor_type: The type of the new tensor.
     subgraph: The subgraph where the new tensor is added.
     buffers: The buffers of the model.
+    tensor_shape: The shape of the new tensor. If not provided, the shape of the
+      data will be used.
   Returns:
     The index of the new tensor in the subgraph.
@@ -114,7 +117,9 @@ def add_new_constant_tensor(
   new_buffer_id = add_new_constant_buffer(data, buffers)
   new_tensor = schema_py_generated.TensorT()
-  new_tensor.shape = data.shape
+  if tensor_shape is None:
+    tensor_shape = data.shape
+  new_tensor.shape = tensor_shape
   new_tensor.buffer = new_buffer_id
   new_tensor.type = tensor_type
   new_tensor.name = tensor_name

{ai_edge_quantizer_nightly-0.1.0.dev20250404.dist-info → ai_edge_quantizer_nightly-0.1.0.dev20250406.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-edge-quantizer-nightly
-Version: 0.1.0.dev20250404
+Version: 0.1.0.dev20250406
 Summary: A quantizer for advanced developers to quantize converted AI Edge models.
 Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
 Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI

{ai_edge_quantizer_nightly-0.1.0.dev20250404.dist-info → ai_edge_quantizer_nightly-0.1.0.dev20250406.dist-info}/RECORD RENAMED Viewed

@@ -12,17 +12,17 @@ ai_edge_quantizer/model_validator.py,sha256=fRNz0jO54cthPTibsCuViUXUuFRHl_fbvEiC
 ai_edge_quantizer/model_validator_test.py,sha256=EeqOP_mrZsnZ3rug756s0ryDDqd2KgIDld5Lm_gDuWY,13020
 ai_edge_quantizer/params_generator.py,sha256=46XDjnP4R3m4xsoXNp7brv0sNQPdQMg217_CbEl-Wgg,15780
 ai_edge_quantizer/params_generator_test.py,sha256=9WTUl87XqbM4NruX5ypLuVRtuhcw-CmxndsMOUzZ92Q,43171
-ai_edge_quantizer/qtyping.py,sha256=UBZ3HgO8IDLY6VJmO05rGtFv_idMD3Os3WWsnriA0NA,15235
+ai_edge_quantizer/qtyping.py,sha256=FqelZu7j0fGBRSCv_VVsuf3VmbfVlYJGgsjvdMXGgaw,15284
 ai_edge_quantizer/quantizer.py,sha256=g3DMqFMrMpt9jQttCE0WcdNbMtk0JZnmN5MmCHrNdyM,13202
 ai_edge_quantizer/quantizer_test.py,sha256=K_HBA56JkFI3HL8VLWCqGEfC0ISh5ldMKoNyBdGRAJg,20368
 ai_edge_quantizer/recipe.py,sha256=FR0uJceumZrnle2VRSOQZ1uXup4S1cTYKRH-N53mWRo,2919
 ai_edge_quantizer/recipe_manager.py,sha256=qcGUD7e7BISKdsY9WH2rdaRR3acmzSA5qMezGNbzlpo,8931
 ai_edge_quantizer/recipe_manager_test.py,sha256=LulVxsYp6TBGFI2PLCUCd4VsFq8ELpC7kMNkUjsLgbo,32230
 ai_edge_quantizer/recipe_test.py,sha256=Fg_sfxovI2fRjk5qdu18ghOvXdUvhDR1TxbE0GHDczc,3381
-ai_edge_quantizer/transformation_instruction_generator.py,sha256=WkECCO85lLs4cEnjZF5eVGbtuul4P8N77gUxUCK9ESY,21605
-ai_edge_quantizer/transformation_instruction_generator_test.py,sha256=-6ycvqpLoAMRvHuLiAwNBZNhsWqwWTsi9tqFxV9Gfq0,41218
-ai_edge_quantizer/transformation_performer.py,sha256=y7kBTwXO_ORTBiijBv3y-L85Y-NwaDEIx3_OdI0uhUI,11551
-ai_edge_quantizer/transformation_performer_test.py,sha256=m3V6nd6jsjd6jVId5wTBNuyDB2h2p4tHlMWhlnomlJo,13341
+ai_edge_quantizer/transformation_instruction_generator.py,sha256=R7A90Qj6iQQROrznXmXLJd-5yXq0PRHbLOdNY51dEu4,27913
+ai_edge_quantizer/transformation_instruction_generator_test.py,sha256=E0QSDCav6N6izlJ-a1ZJOsb2VEUxuxBmTbt0-EgDdxY,49890
+ai_edge_quantizer/transformation_performer.py,sha256=PIrylVhuWZCpnXEl7qSw2BlxRrY7lqj6aQvagJVCVts,11989
+ai_edge_quantizer/transformation_performer_test.py,sha256=n9xI6QMqvrj9KUul2LuObIsF7YdLSqgMg4X6d4BkFP8,15219
 ai_edge_quantizer/algorithms/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
 ai_edge_quantizer/algorithms/nonlinear_quantize/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
 ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting.py,sha256=Bs9CK7wZAw6jNaZ8xEtbwO2vM34VYXNZSMVWvxJo9nw,9297
@@ -46,13 +46,15 @@ ai_edge_quantizer/transformations/dequant_insert.py,sha256=sL1LHFVzBDSd9jgrzlHz3
 ai_edge_quantizer/transformations/dequant_insert_test.py,sha256=NJ18PnG71_AvUPz3Cr_TmG6URMeBfa7IiDDyddfTkKQ,10830
 ai_edge_quantizer/transformations/duplicate_buffer.py,sha256=sEod0EtmcHX0VDqBCI4BYCX9CSRyDtx2vmjtOentFiY,1743
 ai_edge_quantizer/transformations/duplicate_buffer_test.py,sha256=YYWl3Q5WF60s8T8pLzzA8TCSxz-i7dqc03dJt1LtMw4,3880
+ai_edge_quantizer/transformations/duplicate_tensor.py,sha256=HF1uuKFm5kFF6X0XUpdYlLPoikSRd7pIPK1oxN7TuHY,2455
+ai_edge_quantizer/transformations/duplicate_tensor_test.py,sha256=s-RqSxNBMfVJyCunXz2eb7-KA6UiBmbOmL7phLslENQ,5056
 ai_edge_quantizer/transformations/emulated_subchannel.py,sha256=HVaRxoC8PCAvy3xeMv3OIymukUy_yW1zK0xN8Ann6I4,13602
 ai_edge_quantizer/transformations/emulated_subchannel_test.py,sha256=gZP6u9NdPXl7s19qB_Un8evou9ZZV6I9Gy0E1rdobHM,7722
 ai_edge_quantizer/transformations/quant_insert.py,sha256=jn6HsJaV-sqBiFPY-Aqbd64t8zgcYVkEkZI375x_FWY,3958
 ai_edge_quantizer/transformations/quant_insert_test.py,sha256=X9ptPDvJCFkR5tejKnD1SlHFGPazQTW-wNNMV9MEAuw,10107
 ai_edge_quantizer/transformations/quantize_tensor.py,sha256=y6As38mTzhva50YvNQ7p0SFpuWet3LPqFwE3qIO0gEQ,8231
 ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=mHLO3_MRt36A8-ZN8ADn5tBBJlqjTWa7ZUN8Mmu5Rcw,9116
-ai_edge_quantizer/transformations/transformation_utils.py,sha256=R42OIbzwQ7JYJ-Qt46jsqwb6u4MfDGiIPCRZCUGLVCw,4664
+ai_edge_quantizer/transformations/transformation_utils.py,sha256=5w0fG6TP362elTHs-JZokl24fuK4Gv6DGyIpybQYb3g,4885
 ai_edge_quantizer/transformations/transformation_utils_test.py,sha256=xH64SF3UHDh84vYbt-WvmXNjM-Jg-mefES1ACO1tkqw,6269
 ai_edge_quantizer/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
 ai_edge_quantizer/utils/calibration_utils.py,sha256=1Fj9MIO6aLZIRgyd4axvZN4S_O64nB_-Miu1WP664js,2536
@@ -64,8 +66,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=x2xA2CFPpe_2trcV8v5xGaBE
 ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=Op3JxtOqlrjzmYF18jnnstL1k9xiY9kKJ8S2vklKGkc,11327
 ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
 ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
-ai_edge_quantizer_nightly-0.1.0.dev20250404.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-ai_edge_quantizer_nightly-0.1.0.dev20250404.dist-info/METADATA,sha256=oVrRVQkgq4t-L9CuYTqEu-S3S6TufssD059_ZtP6WMQ,1527
-ai_edge_quantizer_nightly-0.1.0.dev20250404.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-ai_edge_quantizer_nightly-0.1.0.dev20250404.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
-ai_edge_quantizer_nightly-0.1.0.dev20250404.dist-info/RECORD,,
+ai_edge_quantizer_nightly-0.1.0.dev20250406.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+ai_edge_quantizer_nightly-0.1.0.dev20250406.dist-info/METADATA,sha256=W7h2q3SS2TX0imvGdEIJiCocHydrTH813QV1behoKQU,1527
+ai_edge_quantizer_nightly-0.1.0.dev20250406.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+ai_edge_quantizer_nightly-0.1.0.dev20250406.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
+ai_edge_quantizer_nightly-0.1.0.dev20250406.dist-info/RECORD,,

{ai_edge_quantizer_nightly-0.1.0.dev20250404.dist-info → ai_edge_quantizer_nightly-0.1.0.dev20250406.dist-info}/LICENSE RENAMED Viewed

File without changes

{ai_edge_quantizer_nightly-0.1.0.dev20250404.dist-info → ai_edge_quantizer_nightly-0.1.0.dev20250406.dist-info}/WHEEL RENAMED Viewed

File without changes

{ai_edge_quantizer_nightly-0.1.0.dev20250404.dist-info → ai_edge_quantizer_nightly-0.1.0.dev20250406.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai-edge-quantizer-nightly 0.1.0.dev20250404__py3-none-any.whl → 0.1.0.dev20250406__py3-none-any.whl

ai-edge-quantizer-nightly 0.1.0.dev20250404py3-none-any.whl → 0.1.0.dev20250406py3-none-any.whl