PyPI - ai-edge-quantizer-nightly - Versions diffs - 0.0.1.dev20250302__py3-none-any.whl → 0.5.0.dev20260103__py3-none-any.whl - Mend

ai-edge-quantizer-nightly 0.0.1.dev20250302py3-none-any.whl → 0.5.0.dev20260103py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

ai_edge_quantizer/transformation_instruction_generator.py CHANGED Viewed

@@ -23,10 +23,16 @@ from collections.abc import Iterator
 import dataclasses
 from typing import Optional
 from ai_edge_quantizer import qtyping
+from ai_edge_quantizer.algorithms.utils import common_utils
+from ai_edge_quantizer.utils import constrained_ops_utils
 from ai_edge_quantizer.utils import tfl_flatbuffer_utils
 from ai_edge_litert import schema_py_generated  # pylint: disable=g-direct-tensorflow-import
+_OpQuantConstraint = common_utils.OpQuantConstraint
+_QuantTransformation = qtyping.QuantTransformation
 # When a tensor has no producer, we'll assign -1 to the producer field
 # When a tensor is a graph output, we'll also include a -1 in the consumer list
 def check_horizontal_optimization(
@@ -48,6 +54,15 @@ def check_horizontal_optimization(
   Returns:
     True if the two transformations can be merged, False otherwise
   """
+  if (
+      isinstance(param1.parameters, qtyping.UniformQuantParams)
+      and param1.parameters.hadamard is not None
+  ):
+    if (
+        isinstance(param2.parameters, qtyping.UniformQuantParams)
+        and param2.parameters.hadamard is not None
+    ):
+      return True
   return (
       param1.parameters == param2.parameters
       and len(param1.transformations) > index
@@ -162,6 +177,16 @@ class TransformationInstructionsGenerator:
     else:
       self.flatbuffer_model = tfl_flatbuffer_utils.read_model(float_tflite)
       self._create_tensor_name_to_graph_info_map()
+    self._same_as_input_scale_ops = (
+        constrained_ops_utils.get_constrained_op_list(
+            _OpQuantConstraint.SAME_AS_INPUT_SCALE
+        )
+    )
+    self._same_as_output_scale_ops = (
+        constrained_ops_utils.get_constrained_op_list(
+            _OpQuantConstraint.SAME_AS_OUTPUT_SCALE
+        )
+    )
   @dataclasses.dataclass(frozen=True)
   class TensorGraphInfo:
@@ -183,11 +208,13 @@ class TransformationInstructionsGenerator:
       A tuple of tensor_name and TensorGraphInfo.
     """
     for tensor_id, tensor in enumerate(subgraph.tensors):
-      consumers = [
-          op_id
-          for (op_id, op) in enumerate(subgraph.operators)
-          if tensor_id in op.inputs
-      ]
+      consumers = []
+      for op_id, op in enumerate(subgraph.operators):
+        # Some ops may use the same input tensor multiple times,
+        # and we should handle each time independently.
+        for op_input in op.inputs:
+          if op_input == tensor_id:
+            consumers.append(op_id)
       producer = -1
       for op_id, op in enumerate(subgraph.operators):
         if tensor_id in op.outputs:
@@ -454,53 +481,181 @@ class TransformationInstructionsGenerator:
       transformations.insert(0, producer_trans_rule)
     return transformations
+  def _remove_last_tensor_duplication(
+      self, tensor_trans_insts: qtyping.TensorTransformationInsts
+  ) -> None:
+    """Remove the last tensor duplication so the original tensor can be reused."""
+    instructions = tensor_trans_insts.instructions
+    if not instructions:
+      return
+    for i in range(len(instructions) - 1, -1, -1):
+      if (
+          instructions[i].transformation
+          == _QuantTransformation.DUPLICATE_TENSOR
+      ):
+        instructions.pop(i)
+        return
+  def _remove_unnecessary_buffer_duplication(
+      self, tensor_trans_insts: qtyping.TensorTransformationInsts
+  ) -> None:
+    """Remove buffer duplications that comes after a tensor duplication.
+    When a tensor is duplicated, a new buffer is created for it. Therefore,
+    buffer duplication transformation that comes after it is unnecessary.
+    Args:
+      tensor_trans_insts: Transformation instructions for a tensor.
+    """
+    instructions = tensor_trans_insts.instructions
+    if not instructions:
+      return
+    # Find all consumers that have a tensor duplication.
+    consumers_with_tensor_duplication = set()
+    for instr in instructions:
+      if instr.transformation == _QuantTransformation.DUPLICATE_TENSOR:
+        consumers_with_tensor_duplication.update(instr.consumers)
+    if not consumers_with_tensor_duplication:
+      return
+    # Remove a buffer duplication that comes with a tensor duplication.
+    for i in range(len(instructions) - 1, -1, -1):
+      instr = instructions[i]
+      if (
+          instr.transformation == _QuantTransformation.DUPLICATE_BUFFER
+          and consumers_with_tensor_duplication.issuperset(instr.consumers)
+      ):
+        instructions.pop(i)
+  def _is_valid_quantize_requantize_pair(
+      self,
+      instr_0: qtyping.TransformationInst,
+      instr_1: qtyping.TransformationInst,
+  ) -> bool:
+    """Checks if the two instructions form a valid quantize and requantize pair."""
+    return (
+        instr_0.transformation == _QuantTransformation.QUANTIZE_TENSOR
+        and instr_1.transformation == _QuantTransformation.ADD_QUANTIZE
+        and instr_0.consumers == instr_1.consumers
+    )
+  def _is_op_constrained(
+      self, subgraph_id: int, op_index: int
+  ) -> bool:
+    """Checks if the op has same as input or output scale constraints."""
+    op_name = tfl_flatbuffer_utils.get_op_name_by_index(
+        self.flatbuffer_model, subgraph_id, op_index
+    )
+    return (
+        op_name in self._same_as_input_scale_ops
+        or op_name in self._same_as_output_scale_ops
+    )
+  def _are_quant_params_compatible(
+      self,
+      params_0: qtyping.UniformQuantParams,
+      params_1: qtyping.UniformQuantParams,
+  ) -> bool:
+    """Checks if quant params are the same except for the scale and zero point."""
+    ignore_set = {"scale", "zero_point"}
+    for field_info in dataclasses.fields(qtyping.UniformQuantParams):
+      field_name = field_info.name
+      if field_name in ignore_set:
+        continue
+      if getattr(params_0, field_name) != getattr(params_1, field_name):
+        return False
+    return True
+  def _eliminate_requantization_for_nonconstrained_provider(
+      self, tensor_trans_insts: qtyping.TensorTransformationInsts
+  ) -> None:
+    """Removes requantization for tensors with a non-constrained provider.
+    Fuses [QUANTIZE_TENSOR, ADD_QUANTIZE] instructions when a tensor has a
+    provider op without same as input/ouput scale constrains. Quant params from
+    the second instruction are copied to the first one and ADD_QUANTIZE is
+    removed.
+    Args:
+      tensor_trans_insts: Transformation instructions for a tensor.
+    """
+    instructions = tensor_trans_insts.instructions
+    if instructions is None or len(instructions) != 2:
+      return
+    instr_0, instr_1 = instructions
+    params_0 = instr_0.parameters
+    params_1 = instr_1.parameters
+    producer_op_index = instr_0.producer
+    if (
+        not isinstance(params_0, qtyping.UniformQuantParams)
+        or not isinstance(params_1, qtyping.UniformQuantParams)
+        or not self._is_valid_quantize_requantize_pair(instr_0, instr_1)
+        or not self._are_quant_params_compatible(params_0, params_1)
+        # To avoid fusion when subgraph inputs connected to the main subgraph
+        # (e.g. while_body), we skip all tensors with no producer.
+        or producer_op_index == -1
+        # Can't apply fusion to tensors with a constrained producer since that
+        # will break the constraint.
+        or self._is_op_constrained(
+            tensor_trans_insts.subgraph_id, producer_op_index
+        )
+    ):
+      return
+    # Fuse the quantize and requantize.
+    instr_0.parameters = dataclasses.replace(
+        params_0, scale=params_1.scale, zero_point=params_1.zero_point
+    )
+    # Remove the requantize instruction.
+    instructions.pop(1)
   def _quant_params_to_transformation_insts(
       self,
       param: qtyping.TensorTransformationParams,
   ) -> qtyping.TensorTransformationInsts:
-    """Converts a single quantization params to transformation instructions.
+    """Convert single tensor quant params to transformation instructions.
     Args:
-      param: quantization parameter of a tensor in the graph
+      param: Quantization parameters of a tensor in the graph.
     Returns:
-      a list of transformations to be applied to the same tensor
+      Transformations to be applied to the given tensor.
     """
-    # setup the structure
+    # Setup the structure.
     tensor_info = self._tensor_name_to_graph_info[param.tensor_name]
     tensor_trans_insts = qtyping.TensorTransformationInsts(
         param.tensor_name, tensor_info.subgraph_id, []
     )
-    # horizontal optimization
-    consumer_group = self._group_consumer_transformations(param)
-    # at this point, starting from index 1 of consumer_group, we're having sets
-    # that represents transformations that can be grouped together
-    transformations_available_for_vertical_optimization = (
-        self._produce_transformation_for_vertical_opt(consumer_group, param)
-    )
-    other_consumer_transformations = (
-        self._produce_consumer_transformations_unavailable_for_vertical_opt(
-            consumer_group, param
-        )
-    )
+    # Add all producer rules.
     transformations = []
-    # adding all producer rules
-    producer_params = param.producer
-    if producer_params:
-      for transformation in producer_params.transformations:
+    if param.producer:
+      for transformation in param.producer.transformations:
         transformations.append(
             qtyping.TransformationInst(
                 transformation,
                 tensor_info.tensor_id,
                 tensor_info.producer,
                 tensor_info.consumers,
-                producer_params.parameters,
+                param.producer.parameters,
             )
         )
-    # apply vertical optimization
+    # Horizontal optimization.
+    consumer_group = self._group_consumer_transformations(param)
+    # At this point, starting from index 1 of consumer_group, we're having sets
+    # that represent transformations that can be grouped together.
+    transformations_available_for_vertical_optimization = (
+        self._produce_transformation_for_vertical_opt(consumer_group, param)
+    )
+    other_consumer_transformations = (
+        self._produce_consumer_transformations_unavailable_for_vertical_opt(
+            consumer_group, param
+        )
+    )
+    # Apply vertical optimization.
     last_producer_rule_idx = len(transformations) - 1
     if last_producer_rule_idx >= 0:
       transformations += self._apply_vertical_optimization(
@@ -509,30 +664,127 @@ class TransformationInstructionsGenerator:
       )
     else:
       transformations += transformations_available_for_vertical_optimization
-    # Adding other consumers rules
+    # Adding other consumers rules.
     transformations += other_consumer_transformations
     tensor_trans_insts.instructions = transformations
+    # Now, when all optimizations are done, we can remove the last tensor
+    # duplication instruction, so the original tensor can be reused.
+    self._remove_last_tensor_duplication(tensor_trans_insts)
+    # With the tensor duplication instructions finalized, we can remove
+    # unnecessary buffer duplications applied to the same duplicated tensors.
+    # This is not a part of a vertical optimization because vertical
+    # optimization only works between producers & consumers, and this is between
+    # the consumer only. Also this can't be done during the params generation
+    # because removing last tensor duplication has to happen first.
+    self._remove_unnecessary_buffer_duplication(tensor_trans_insts)
     # Check the generated transformation instructions are valid, the function
-    # will raise an error if the instructions are not valid
+    # will raise an error if the instructions are not valid.
     self._check_tensor_transformation_instructions_valid(tensor_trans_insts)
+    # Remove unnecessary [QUANTIZE_TENSOR, ADD_QUANTIZE] pairs for tensors with
+    # providers without same as input/output scale constraints.
+    self._eliminate_requantization_for_nonconstrained_provider(
+        tensor_trans_insts
+    )
     return tensor_trans_insts
-  def _check_tensor_transformation_instructions_valid(
-      self, instructions: qtyping.TensorTransformationInsts
-  ):
-    """Check if the tensor transformation instructions are valid.
+  def _split_instructions_by_tensor_duplication(
+      self,
+      instructions: qtyping.TensorTransformationInsts,
+  ) -> list[list[qtyping.TransformationInst]]:
+    """Split the instructions into subsets by tensor duplication.
+    Splits the instructions into subsets based on which tensor (original or one
+    of duplicated ones) they will be applied to.
+    The first subset is for the original tensor. The following subsets are for
+    the duplicated tensors. The order of instructions in each subset is
+    preserved.
+    Enforced constraints for each duplicated tensor's instructions subset:
+    1. The first instruction must be a `DUPLICATE_TENSOR` one.
+    2. No other `DUPLICATE_TENSOR` instructions can be present.
+    For the following instructions:
+      [
+          (transformation=DUPLICATE_TENSOR, consumers=[1, 2, 3]),
+          (transformation=DUPLICATE_TENSOR, consumers=[4]),
+          (transformation=T1, consumers=[1, 2]),
+          (transformation=T2, consumers=[3]),
+          (transformation=T3, consumers=[4]),
+          (transformation=T4, consumers=[5])
+      ]
+    `instruction_subsets` will be:
+      [
+          [(transformation=T4, consumers=[5])],
+          [
+              (transformation=DUPLICATE_TENSOR, consumers=[1, 2, 3]),
+              (transformation=T1, consumers=[1, 2]),
+              (transformation=T2, consumers=[3])
+          ],
+          [
+              (transformation=DUPLICATE_TENSOR, consumers=[4]),
+              (transformation=T3, consumers=[4])
+          ]
+      ],
     Args:
       instructions: Transformation instructions for a tensor.
+    Returns:
+      A list of subsets of transformation instructions, where the first subset
+      is for the original tensor, and the following subsets are for the
+      duplicated tensors.
     Raises:
-      ValueError: If the instructions are not valid.
+      ValueError: If DUPLICATE_TENSOR is found and it's not the first
+      transformation for its consumers.
+    """
+    original_tensor_subset_idx = 0
+    instruction_subsets = [[]]
+    consumer_to_subset_idx = {}
+    for instruction in instructions.instructions:
+      if instruction.transformation == _QuantTransformation.DUPLICATE_TENSOR:
+        instruction_subsets.append([instruction])
+        subset_idx = len(instruction_subsets) - 1
+        for consumer in instruction.consumers:
+          if consumer in consumer_to_subset_idx:
+            raise ValueError(
+                f"Tensor {instructions.tensor_name} : duplicate tensor should"
+                " be the first instruction for its consumers."
+            )
+          else:
+            consumer_to_subset_idx[consumer] = subset_idx
+      else:
+        first_consumer = instruction.consumers[0]
+        if first_consumer not in consumer_to_subset_idx:
+          consumer_to_subset_idx[first_consumer] = original_tensor_subset_idx
+        subset_idx = consumer_to_subset_idx[first_consumer]
+        instruction_subsets[subset_idx].append(instruction)
+    return instruction_subsets
+  def _check_subset_of_tensor_transformation_instructions_valid(
+      self,
+      instructions: Optional[list[qtyping.TransformationInst]],
+      tensor_name: str,
+  ):
+    """Check if a subset of tensor transformation instructions is valid.
+    Args:
+      instructions: A subset of transformation instructions for a tensor.
+      tensor_name: The name of the tensor.
+    Raises:
+      ValueError: If the subset of instructions are not valid.
     """
     is_tensor_unquantized = False
     is_tensor_quantized = False
-    is_operator_emulated = False
-    for instruction in instructions.instructions:
+    for instruction in instructions:
       transform_type = instruction.transformation
       if transform_type == qtyping.QuantTransformation.NO_QUANTIZE:
         is_tensor_unquantized = True
@@ -541,18 +793,33 @@ class TransformationInstructionsGenerator:
           or transform_type == qtyping.QuantTransformation.ADD_DEQUANTIZE
       ):
         is_tensor_quantized = True
-      elif transform_type == qtyping.QuantTransformation.EMULATED_SUBCHANNEL:
-        is_operator_emulated = True
     if is_tensor_unquantized and is_tensor_quantized:
       raise ValueError(
-          "Tensor %s can not be both quantized and unquantized"
-          % instructions.tensor_name
+          "Tensor %s can not be both quantized and unquantized" % tensor_name
       )
-    if is_operator_emulated and len(instructions.instructions) > 1:
-      raise ValueError(
-          "Tensor %s : op replacement transformation can not be combined with"
-          " other transformations."
-          % instructions.tensor_name
+  def _check_tensor_transformation_instructions_valid(
+      self,
+      instructions: qtyping.TensorTransformationInsts,
+  ):
+    """Check if the tensor transformation instructions are valid.
+    Args:
+      instructions: Transformation instructions for a tensor.
+    Raises:
+      ValueError: If the instructions are not valid.
+    """
+    # Split the instructions into subsets based on which tensor (original or one
+    # of duplicated ones) they will be applied to.
+    instruction_subsets = self._split_instructions_by_tensor_duplication(
+        instructions
+    )
+    # Check that each subset of instructions is valid.
+    for instruction_subset in instruction_subsets:
+      self._check_subset_of_tensor_transformation_instructions_valid(
+          instruction_subset,
+          instructions.tensor_name,
       )
   def quant_params_to_transformation_insts(

ai-edge-quantizer-nightly 0.0.1.dev20250302__py3-none-any.whl → 0.5.0.dev20260103__py3-none-any.whl

ai-edge-quantizer-nightly 0.0.1.dev20250302py3-none-any.whl → 0.5.0.dev20260103py3-none-any.whl