PyPI - ai-edge-quantizer-nightly - Versions diffs - 0.0.1.dev20250302__py3-none-any.whl → 0.5.0.dev20260103__py3-none-any.whl - Mend

ai-edge-quantizer-nightly 0.0.1.dev20250302py3-none-any.whl → 0.5.0.dev20260103py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

ai_edge_quantizer/transformation_performer.py CHANGED Viewed

@@ -15,10 +15,17 @@
 """Python manager for transformations to be applied to TFlite models."""
+from collections.abc import Sequence
+from typing import Optional
 import numpy as np
 from ai_edge_quantizer import qtyping
 from ai_edge_quantizer.transformations import dequant_insert
-from ai_edge_quantizer.transformations import emulated_subchannel
+from ai_edge_quantizer.transformations import duplicate_buffer
+from ai_edge_quantizer.transformations import duplicate_tensor
+from ai_edge_quantizer.transformations import insert_decomposed_hadamard_rotation
+from ai_edge_quantizer.transformations import insert_hadamard_rotation
 from ai_edge_quantizer.transformations import quant_insert
 from ai_edge_quantizer.transformations import quantize_tensor
 from ai_edge_quantizer.transformations import transformation_utils
@@ -65,9 +72,21 @@ class TransformationPerformer:
             quantize_tensor.quantize_tensor
         ),
         qtyping.QuantTransformation.EMULATED_SUBCHANNEL: (
-            emulated_subchannel.emulated_subchannel
+            transformation_utils.raise_deprecated_error
         ),
         qtyping.QuantTransformation.ADD_QUANTIZE: quant_insert.insert_quant,
+        qtyping.QuantTransformation.DUPLICATE_BUFFER: (
+            duplicate_buffer.duplicate_buffer
+        ),
+        qtyping.QuantTransformation.DUPLICATE_TENSOR: (
+            duplicate_tensor.duplicate_tensor
+        ),
+        qtyping.QuantTransformation.INSERT_HADAMARD_ROTATION: (
+            insert_hadamard_rotation.insert_hadamard_rotation
+        ),
+        qtyping.QuantTransformation.INSERT_DECOMPOSED_HADAMARD_ROTATION: (
+            insert_decomposed_hadamard_rotation.insert_decomposed_hadamard_rotation
+        ),
     }
     # transformations are seprated in two categories:
     # op_insertion_transformations are transformations that only insert ops
@@ -77,6 +96,10 @@ class TransformationPerformer:
         qtyping.QuantTransformation.ADD_DEQUANTIZE,
         qtyping.QuantTransformation.QUANTIZE_TENSOR,
         qtyping.QuantTransformation.ADD_QUANTIZE,
+        qtyping.QuantTransformation.DUPLICATE_BUFFER,
+        qtyping.QuantTransformation.DUPLICATE_TENSOR,
+        qtyping.QuantTransformation.INSERT_HADAMARD_ROTATION,
+        qtyping.QuantTransformation.INSERT_DECOMPOSED_HADAMARD_ROTATION,
     ])
     self._op_replacement_transformations = set(
         [qtyping.QuantTransformation.EMULATED_SUBCHANNEL]
@@ -123,44 +146,81 @@ class TransformationPerformer:
       transformations: list[qtyping.TransformationInst],
       subgraph_id: int,
       trans_info: qtyping.TransformationInfo,
-  ):
-    """Update the instructions after the graph is modified.
+  ) -> None:
+    """Update the instructions in-place after the graph is modified.
-    After an op is inserted, the topology is changed and this may impact the
-    following transformation to be applied. So we need to update instructions
-    that have yet to be applied.
+    After an op is inserted or a tensor is duplicated, the topology is changed
+    and this may impact the following transformation to be applied. So we need
+    to update instructions that have yet to be applied.
     Args:
-      prev_transformation_index: the index of the last applied transformation
-      transformations: the list of transformations we're applying
-      subgraph_id: the subgraph where the provided instrucitons belongs to
-      trans_info: transformation info returned by a transformation
-    Returns:
-      None, modifies the transformation in place
+      prev_transformation_index: The index of the last applied transformation.
+      transformations: The list of transformations we're applying.
+      subgraph_id: The subgraph where the provided instructions belong to.
+      trans_info: Transformation info returned by a transformation.
     """
-    # if no ops were added, then no need for update
-    if trans_info.num_ops_added == 0:
-      return
     prev_transformation = transformations[prev_transformation_index]
-    self._added_op_id_map[subgraph_id].append(
-        trans_info.op_id + trans_info.num_ops_added - 1
+    is_prev_not_duplicate_tensor = (
+        prev_transformation.transformation
+        != qtyping.QuantTransformation.DUPLICATE_TENSOR
     )
+    was_op_added = trans_info.num_ops_added > 0
+    if not was_op_added and is_prev_not_duplicate_tensor:
+      return
+    if was_op_added:
+      self._added_op_id_map[subgraph_id].append(
+          trans_info.op_id + trans_info.num_ops_added - 1
+      )
     for transformations_index in range(
         prev_transformation_index + 1, len(transformations)
     ):
       transformation = transformations[transformations_index]
       for consumer_index in transformation.consumers:
-        # if the consumer need to use newly added ops, then the new added op
+        # If the consumer needs to use newly added ops, then the new added op
         # index needs to be outside of the range of the orignal op ids.
         if consumer_index in prev_transformation.consumers:
-          transformation.producer = (
-              len(self._original_op_id_map[subgraph_id])
-              + len(self._added_op_id_map[subgraph_id])
-              - 1
-          )
+          if was_op_added:
+            transformation.producer = (
+                len(self._original_op_id_map[subgraph_id])
+                + len(self._added_op_id_map[subgraph_id])
+                - 1
+            )
           transformation.tensor_id = trans_info.output_tensor_id
+  def _get_updated_producer_id(
+      self, original_producer_id: int, subgraph_id: int
+  ) -> int:
+    """Update the producer of a transformation instruction."""
+    if original_producer_id is None or original_producer_id < 0:
+      producer = -1
+    elif original_producer_id < len(self._original_op_id_map[subgraph_id]):
+      producer = self._original_op_id_map[subgraph_id][original_producer_id]
+    else:
+      # If the producer id is not in the original op map, it's an added op,
+      # go the added op map to find the producer.
+      producer = self._added_op_id_map[subgraph_id][
+          original_producer_id - len(self._original_op_id_map[subgraph_id])
+      ]
+    return producer
+  def _get_updated_consumer_ids(
+      self,
+      original_consumer_ids: list[int],
+      subgraph_id: int,
+  ) -> list[int]:
+    """Update the consumers of a transformation instruction."""
+    consumers = []
+    for original_op_id in original_consumer_ids:
+      new_consumer_id = (
+          -1
+          if original_op_id == -1
+          else self._original_op_id_map[subgraph_id][original_op_id]
+      )
+      consumers.append(new_consumer_id)
+    return consumers
   def _apply_single_transformation(
       self,
       transformation_inst: qtyping.TensorTransformationInsts,
@@ -179,28 +239,12 @@ class TransformationPerformer:
       None, update the transformation_inst & tflite_model in place
     """
     instruction = transformation_inst.instructions[transformation_index]
-    if not instruction.producer or instruction.producer < 0:
-      producer = -1
-    elif instruction.producer < len(
-        self._original_op_id_map[transformation_inst.subgraph_id]
-    ):
-      producer = self._original_op_id_map[transformation_inst.subgraph_id][
-          instruction.producer
-      ]
-    else:
-      # if the producer id is not in the original op map, it's an added op,
-      # go the corresponding new maps
-      producer = self._added_op_id_map[transformation_inst.subgraph_id][
-          instruction.producer
-          - len(self._original_op_id_map[transformation_inst.subgraph_id])
-      ]
-    consumers = []
-    for original_op_id in instruction.consumers:
-      consumers.append(
-          self._original_op_id_map[transformation_inst.subgraph_id][
-              original_op_id
-          ]
-      )
+    producer = self._get_updated_producer_id(
+        instruction.producer, transformation_inst.subgraph_id
+    )
+    consumers = self._get_updated_consumer_ids(
+        instruction.consumers, transformation_inst.subgraph_id
+    )
     trans_info = self._transformation_registration[instruction.transformation](
         transformation_utils.TransformationInput(
             instruction.tensor_id,
@@ -220,7 +264,12 @@ class TransformationPerformer:
     )
     self._update_op_id_map(
         transformation_inst.subgraph_id,
-        min(instruction.consumers),
+        # The added op must be right before the most immediate consumer, unless
+        # the consumer is the graph output (id=-1), then use the producer's
+        # index instead.
+        min(instruction.consumers)
+        if min(instruction.consumers) >= 0
+        else instruction.producer + 1,
         trans_info.num_ops_added,
     )
@@ -260,19 +309,24 @@ class TransformationPerformer:
       self,
       transformation_instructions: dict[str, qtyping.TensorTransformationInsts],
       tflite_model: schema_py_generated.ModelT,
-  ):
-    """Apply all transformations to the given tflite_model.
+      tensor_processing_order: Optional[Sequence[str]] = None,
+  ) -> None:
+    """Apply all transformations to the given tflite_model in place.
     Args:
-      transformation_instructions: a dict of transformation instructions grouped
-        by tensors, produced by transformation_instruction_generator
-      tflite_model: the tflite model to apply quantization on
-    Returns:
-      None, modifies the input tflite_model in place
+      transformation_instructions: Mapping from tensor name to its
+        transformation instructions, produced by
+        transformation_instruction_generator.
+      tflite_model: The tflite model to apply quantization to.
+      tensor_processing_order: The order of tensors to process. If not provided,
+        the order will be inferred from `transformation_instructions`.
     """
     self._original_op_id_map = []
     self._added_op_id_map = []
     self._create_op_id_map(tflite_model)
-    for transformation_inst in transformation_instructions.values():
-      self._apply_transformations(transformation_inst, tflite_model)
+    if tensor_processing_order is None:
+      tensor_processing_order = transformation_instructions.keys()
+    for tensor_name in tensor_processing_order:
+      self._apply_transformations(
+          transformation_instructions[tensor_name], tflite_model
+      )

ai_edge_quantizer/transformation_performer_test.py CHANGED Viewed

@@ -15,6 +15,7 @@
 """Tests for transformation_performer."""
+import copy
 import os
 import numpy as np
@@ -26,6 +27,9 @@ from ai_edge_quantizer import transformation_performer
 from ai_edge_quantizer.utils import test_utils
 from ai_edge_quantizer.utils import tfl_flatbuffer_utils
+_QTransf = qtyping.QuantTransformation
 TEST_DATA_PREFIX_PATH = test_utils.get_path_to_datafile(".")
@@ -108,6 +112,32 @@ class TransformationPerformerTest(parameterized.TestCase):
     for index, op_id in enumerate(op_id_map[0]):
       self.assertEqual(op_id, index)
+  def test_update_op_id_map_not_changing_value_single_op_model(self):
+    """test for _update_op_id_map."""
+    model = tfl_flatbuffer_utils.read_model(
+        os.path.join(
+            TEST_DATA_PREFIX_PATH, "tests/models/single_fc_bias.tflite"
+        )
+    )
+    self._transformation_performer._create_op_id_map(model)
+    instruction = qtyping.TransformationInst(
+        transformation=qtyping.QuantTransformation.QUANTIZE_TENSOR,
+        tensor_id=0,
+        producer=0,
+        consumers=[-1],
+        parameters=qtyping.UniformQuantParams(
+            8, None, np.array([1]), np.array([0])
+        ),
+    )
+    producer = self._transformation_performer._get_updated_producer_id(
+        instruction.producer, 0
+    )
+    consumers = self._transformation_performer._get_updated_consumer_ids(
+        instruction.consumers, 0
+    )
+    self.assertEqual(producer, 0)
+    self.assertEqual(consumers, [-1])
   @parameterized.named_parameters(
       dict(
           testcase_name="test_no_update",
@@ -267,6 +297,52 @@ class TransformationPerformerTest(parameterized.TestCase):
         expected_added_op_id_map,
     )
+  def test_update_instructions_updates_tensor_id_after_duplicate_tensor(self):
+    def get_test_instruction(transformation, consumers):
+      return qtyping.TransformationInst(
+          transformation=transformation,
+          consumers=consumers,
+          # Dummy values below.
+          tensor_id=0,
+          producer=0,
+          parameters=qtyping.UniformQuantParams(
+              8, None, np.array([1]), np.array([0])
+          ),
+      )
+    instructions = [
+        get_test_instruction(_QTransf.DUPLICATE_TENSOR, consumers=[1]),
+        get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[1]),
+        get_test_instruction(_QTransf.ADD_DEQUANTIZE, consumers=[1]),
+        get_test_instruction(_QTransf.QUANTIZE_TENSOR, consumers=[2]),
+    ]
+    # Simulate a situation as if the first instruction (duplicate tensor) was
+    # applied.
+    subgraph_id = 0
+    duplicated_tensor_id = 13
+    prev_trans_idx = 0
+    trans_info = qtyping.TransformationInfo(
+        # Copy of what duplicate_tensor.py returns.
+        op_id=0,
+        num_ops_added=0,
+        output_tensor_id=duplicated_tensor_id,
+    )
+    self._transformation_performer._create_op_id_map(self._test_model)
+    self._transformation_performer._update_instructions(
+        prev_trans_idx, instructions, subgraph_id, trans_info
+    )
+    # Expecting the ops with the same consumers as in the DUPLICATE_TENSOR
+    # instruction to use the new tensor id.
+    expected_instructions = copy.deepcopy(instructions)
+    expected_instructions[1].tensor_id = duplicated_tensor_id
+    expected_instructions[2].tensor_id = duplicated_tensor_id
+    self.assertSequenceEqual(instructions, expected_instructions)
+    # Expecting no change to the op id map.
+    self.assertListEqual(
+        self._transformation_performer._added_op_id_map,
+        [[]],
+    )
   def test_transform_graph(self):
     """test for transform_graph."""
     instructions = {
@@ -275,6 +351,8 @@ class TransformationPerformerTest(parameterized.TestCase):
             tensor_name="sequential/conv2d/Relu;sequential/conv2d/BiasAdd;"
             + "sequential/conv2d/Conv2D;sequential/conv2d/BiasAdd/ReadVariableOp1",
             subgraph_id=0,
+            # Conv2d: op_id=0, output_tensor_id=7.
+            # This should add two sequential dequants after the conv2d.
             instructions=[
                 qtyping.TransformationInst(
                     transformation=qtyping.QuantTransformation.ADD_DEQUANTIZE,
@@ -299,6 +377,8 @@ class TransformationPerformerTest(parameterized.TestCase):
         "sequential/average_pooling2d/AvgPool": qtyping.TensorTransformationInsts(
             tensor_name="sequential/average_pooling2d/AvgPool",
             subgraph_id=0,
+            # Avg_pool: op_id=1, output_tensor_id=8.
+            # This should add two sequential dequants after the avg_pool.
             instructions=[
                 qtyping.TransformationInst(
                     transformation=qtyping.QuantTransformation.ADD_DEQUANTIZE,
@@ -326,19 +406,111 @@ class TransformationPerformerTest(parameterized.TestCase):
     )
     self.assertLen(self._test_model.subgraphs, 1)
     self.assertLen(self._test_model.subgraphs[0].operators, 10)
+    # The original model has 13 tensors, each dequant adds 1 tensor.
     self.assertLen(self._test_model.subgraphs[0].tensors, 17)
+    # Check that the dequant opcode is added to the model.
     self.assertEqual(
         self._test_model.subgraphs[0].operators[1].opcodeIndex,
         len(self._test_model.operatorCodes) - 1,
     )
+    # Conv2d, dequant, dequant, avgpool, dequant, dequant, etc.
+    expected_builtin_op_order = [3, 6, 6, 1, 6, 6, 22, 9, 9, 25]
+    for i, op in enumerate(self._test_model.subgraphs[0].operators):
+      op_code = self._test_model.operatorCodes[op.opcodeIndex].builtinCode
+      self.assertEqual(op_code, expected_builtin_op_order[i])
+    # Check that the first dequant input is connected to the conv2d output.
+    self.assertEqual(self._test_model.subgraphs[0].operators[1].inputs[0], 7)
+    # Output is a new tensor just added.
+    self.assertEqual(self._test_model.subgraphs[0].operators[1].outputs[0], 13)
+    # Second dequant has new tensors.
     self.assertEqual(self._test_model.subgraphs[0].operators[2].inputs[0], 13)
     self.assertEqual(self._test_model.subgraphs[0].operators[2].outputs[0], 14)
-    self.assertEqual(
-        self._test_model.subgraphs[0].operators[2].outputs[0],
-        self._test_model.subgraphs[0].operators[3].inputs[0],
-    )
+    # Avgpool's input is second dequant's output.
+    self.assertEqual(self._test_model.subgraphs[0].operators[3].inputs[0], 14)
+    # Avgpool's output remains the same.
     self.assertEqual(self._test_model.subgraphs[0].operators[3].outputs[0], 8)
+    # Third dequant's output is a new tensor.
     self.assertEqual(self._test_model.subgraphs[0].operators[4].outputs[0], 15)
+    # Fourth dequant.
+    self.assertEqual(self._test_model.subgraphs[0].operators[5].inputs[0], 15)
+    self.assertEqual(self._test_model.subgraphs[0].operators[5].outputs[0], 16)
+    # Avgpool (op_id=1) and reshape (op_id=2) are bumped by 2 due to the two
+    # dequants added after it.
+    expected_op_id_map = [0, 3, 6, 7, 8, 9]
+    self.assertEqual(
+        self._transformation_performer._original_op_id_map[0],
+        expected_op_id_map,
+    )
+    # New dequants are added at these indices.
+    expected_added_op_id_map = [1, 2, 4, 5]
+    self.assertEqual(
+        self._transformation_performer._added_op_id_map[0],
+        expected_added_op_id_map,
+    )
+  def test_op_insertion_at_input_and_output(self):
+    """test for _update_op_id_map."""
+    model = tfl_flatbuffer_utils.read_model(
+        os.path.join(
+            TEST_DATA_PREFIX_PATH, "tests/models/single_fc_bias.tflite"
+        )
+    )
+    self._transformation_performer._create_op_id_map(model)
+    instructions = {
+        # Fully_connected: op_id=0, input_tensor_id=0, output_tensor_id=3.
+        # Add a new quantize op to the input of the fully_connected.
+        "serving_default_input_2:0": qtyping.TensorTransformationInsts(
+            tensor_name="serving_default_input_2:0",
+            subgraph_id=0,
+            instructions=[
+                qtyping.TransformationInst(
+                    transformation=qtyping.QuantTransformation.ADD_QUANTIZE,
+                    tensor_id=0,
+                    producer=-1,
+                    consumers=[0],
+                    parameters=qtyping.UniformQuantParams(
+                        8, None, np.array([1]), np.array([0])
+                    ),
+                ),
+            ],
+        ),
+        # Add a new dequantize op to the output of the fully_connected.
+        "StatefulPartitionedCall:0": qtyping.TensorTransformationInsts(
+            tensor_name="StatefulPartitionedCall:0",
+            subgraph_id=0,
+            instructions=[
+                qtyping.TransformationInst(
+                    transformation=qtyping.QuantTransformation.ADD_DEQUANTIZE,
+                    tensor_id=3,
+                    producer=0,
+                    consumers=[-1],
+                    parameters=qtyping.UniformQuantParams(
+                        8, None, np.array([1]), np.array([0])
+                    ),
+                ),
+            ],
+        ),
+    }
+    self._transformation_performer.transform_graph(instructions, model)
+    # Original fc (op_id=0) should be bumped to op_id=1.
+    self.assertEqual(
+        self._transformation_performer._original_op_id_map[0],
+        [1],
+    )
+    # New quantize added at op_id=0, dequantize added at op_id=1.
+    expected_added_op_id_map = [0, 2]
+    self.assertEqual(
+        self._transformation_performer._added_op_id_map[0],
+        expected_added_op_id_map,
+    )
+    # Quantize, fully_connected, dequantize.
+    expected_builtin_op_order = [114, 9, 6]
+    for i, op in enumerate(model.subgraphs[0].operators):
+      op_code = model.operatorCodes[op.opcodeIndex].builtinCode
+      self.assertEqual(op_code, expected_builtin_op_order[i])
 if __name__ == "__main__":
   googletest.main()

ai_edge_quantizer/transformations/duplicate_buffer.py ADDED Viewed

@@ -0,0 +1,46 @@
+# Copyright 2024 The AI Edge Quantizer Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Duplicate buffer transformation."""
+from ai_edge_quantizer import qtyping
+from ai_edge_quantizer.transformations import transformation_utils
+from ai_edge_quantizer.utils import tfl_flatbuffer_utils
+def duplicate_buffer(
+    transformation_input: transformation_utils.TransformationInput,
+) -> qtyping.TransformationInfo:
+  """Duplicates the buffer of the tensor."""
+  tensor_id = transformation_input.tensor_id
+  tensor = transformation_input.subgraph.tensors[tensor_id]
+  buffer_data = transformation_input.buffers[tensor.buffer].data
+  if buffer_data is None:
+    tensor_name = tfl_flatbuffer_utils.get_tensor_name(tensor)
+    raise ValueError(
+        'Duplicate Buffer transformation supports only constant tensors.'
+        f' Tensor {tensor_name} is not constant.'
+    )
+  duplicated_buffer_id = transformation_utils.get_constant_buffer(
+      data=buffer_data,
+      buffers=transformation_input.buffers,
+      force_duplicate_buffer=True,
+  )
+  tensor.buffer = duplicated_buffer_id
+  return qtyping.TransformationInfo(
+      op_id=0, num_ops_added=0, output_tensor_id=tensor_id
+  )

ai_edge_quantizer/transformations/duplicate_buffer_test.py ADDED Viewed

@@ -0,0 +1,106 @@
+# Copyright 2024 The AI Edge Quantizer Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import os
+import numpy as np
+from tensorflow.python.platform import googletest
+from ai_edge_quantizer import qtyping
+from ai_edge_quantizer.transformations import duplicate_buffer
+from ai_edge_quantizer.transformations import transformation_utils
+from ai_edge_quantizer.utils import test_utils
+from ai_edge_quantizer.utils import tfl_flatbuffer_utils
+TEST_DATA_PREFIX_PATH = test_utils.get_path_to_datafile('..')
+class DuplicateBufferTest(googletest.TestCase):
+  def setUp(self):
+    super().setUp()
+    model_path = os.path.join(
+        TEST_DATA_PREFIX_PATH, 'tests/models/weight_sharing_fcs.tflite'
+    )
+    self.model = tfl_flatbuffer_utils.read_model(model_path)
+  def _get_transformation_input(
+      self, subgraph_idx: int, tensor_idx: int
+  ) -> transformation_utils.TransformationInput:
+    return transformation_utils.TransformationInput(
+        tensor_id=tensor_idx,
+        buffers=self.model.buffers,
+        # Dummy params below.
+        op_codes=self.model.operatorCodes,
+        subgraph=self.model.subgraphs[subgraph_idx],
+        producer=-1,
+        consumers=[],
+        quant_params=qtyping.UniformQuantParams(
+            num_bits=8,
+            quantized_dimension=None,
+            scale=np.ones(1),
+            zero_point=np.zeros(1),
+        ),
+    )
+  def test_constant_buffer_is_correctly_duplicated(self):
+    # Duplicate the FC weight tensor in the second subgraph.
+    subgraph_idx = 1
+    subgraph = self.model.subgraphs[subgraph_idx]
+    weight_tensor_idx = 1
+    prev_buffer_id = subgraph.tensors[weight_tensor_idx].buffer
+    prev_num_buffers = len(self.model.buffers)
+    transformation_input = self._get_transformation_input(
+        subgraph_idx, weight_tensor_idx
+    )
+    transformation_info = duplicate_buffer.duplicate_buffer(
+        transformation_input
+    )
+    self.assertEqual(transformation_info.op_id, 0)
+    self.assertEqual(transformation_info.num_ops_added, 0)
+    self.assertEqual(transformation_info.output_tensor_id, 1)
+    # Check that a new buffer was added.
+    self.assertLen(self.model.buffers, prev_num_buffers + 1)
+    # Check that the new buffer is used by the weight tensor.
+    new_buffer_id = len(self.model.buffers) - 1
+    self.assertEqual(subgraph.tensors[weight_tensor_idx].buffer, new_buffer_id)
+    # Check that the new buffer has the same data as the original one.
+    self.assertTrue(
+        np.all(
+            np.frombuffer(
+                self.model.buffers[new_buffer_id].data,
+                dtype=np.float32,
+            )
+            == np.frombuffer(
+                self.model.buffers[prev_buffer_id].data,
+                dtype=np.float32,
+            )
+        )
+    )
+  def test_duplicate_buffer_raises_error_when_tensor_is_not_constant(self):
+    # Duplicate the FC input tensor in the second subgraph.
+    subgraph_idx = 1
+    weight_tensor_idx = 0
+    transformation_input = self._get_transformation_input(
+        subgraph_idx, weight_tensor_idx
+    )
+    with self.assertRaisesRegex(
+        ValueError,
+        'Duplicate Buffer transformation supports only constant tensors.',
+    ):
+      duplicate_buffer.duplicate_buffer(transformation_input)
+if __name__ == '__main__':
+  googletest.main()

ai-edge-quantizer-nightly 0.0.1.dev20250302__py3-none-any.whl → 0.5.0.dev20260103__py3-none-any.whl

ai-edge-quantizer-nightly 0.0.1.dev20250302py3-none-any.whl → 0.5.0.dev20260103py3-none-any.whl