PyPI - ai-edge-quantizer-nightly - Versions diffs - 0.0.1.dev20250302__py3-none-any.whl → 0.5.0.dev20260103__py3-none-any.whl - Mend

ai-edge-quantizer-nightly 0.0.1.dev20250302py3-none-any.whl → 0.5.0.dev20260103py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

ai_edge_quantizer/params_generator.py CHANGED Viewed

@@ -15,10 +15,12 @@
 """Generate model tensor level quantization config."""
+from collections.abc import Sequence
 import copy
 from typing import Any, Optional, Union
 from ai_edge_quantizer import algorithm_manager
+from ai_edge_quantizer import default_policy as policy
 from ai_edge_quantizer import qtyping
 from ai_edge_quantizer import recipe_manager
 from ai_edge_quantizer.utils import tfl_flatbuffer_utils
@@ -33,12 +35,12 @@ class ParamsGenerator:
   def __init__(self, float_tflite: Union[str, bytes]):
     self.flatbuffer_model = tfl_flatbuffer_utils.read_model(float_tflite)
-    if not tfl_flatbuffer_utils.is_float_model(self.flatbuffer_model):
-      raise ValueError(
-          'The input model for quantization parameters generation is not a'
-          ' float model. Please check the model (e.g., if it is already'
-          ' quantized).'
-      )
+    # if not tfl_flatbuffer_utils.is_float_model(self.flatbuffer_model):
+    #   raise ValueError(
+    #       'The input model for quantization parameters generation is not a'
+    #       ' float model. Please check the model (e.g., if it is already'
+    #       ' quantized).'
+    #   )
     self._check_tensor_names_are_unique()
     self.buffer_to_tensors: dict[int, list[Any]] = (
         tfl_flatbuffer_utils.buffer_to_tensors(self.flatbuffer_model)
@@ -73,8 +75,10 @@ class ParamsGenerator:
     if model_qsvs is None:
       model_qsvs = {}
+    skip_subgraphs = set()
     op_codes = self.flatbuffer_model.operatorCodes
-    for subgraph in self.flatbuffer_model.subgraphs:
+    for sg_ind, subgraph in enumerate(self.flatbuffer_model.subgraphs):
       graph_info = qtyping.GraphInfo(
           subgraph.tensors, self.flatbuffer_model.buffers
       )
@@ -103,10 +107,22 @@ class ParamsGenerator:
         algorithm_name, op_quant_config = (
             model_recipe_manager.get_quantization_configs(op_key, op_scope)
         )
+        if sg_ind in skip_subgraphs or policy.is_non_quantizable_composite_op(
+            op
+        ):
+          algorithm_name = algorithm_manager.AlgorithmName.NO_QUANTIZE
         if algorithm_name == algorithm_manager.AlgorithmName.NO_QUANTIZE:
+          side_effect_subgraphs = (
+              tfl_flatbuffer_utils.get_op_side_effect_subgraphs(op)
+          )
+          skip_subgraphs.update(side_effect_subgraphs)
           op_quant_results = self._get_params_for_no_quant_op(
               subgraph_op_id, op, subgraph.tensors
           )
         else:
           op_info = qtyping.OpInfo(op, op_key, subgraph_op_id, op_quant_config)
           # Step2: query algorithm_manager to get/call the related function.
@@ -146,7 +162,7 @@ class ParamsGenerator:
       RuntimeError: If the tensors sharing the same buffer have different
       quantization settings.
     """
-    self._check_buffer_sharing()
+    self._check_and_fix_buffer_sharing()
   def _update_model_quant_results(
       self,
@@ -252,57 +268,224 @@ class ParamsGenerator:
         tensor_params.append(output_tensor_params)
     return tensor_params
-  def _check_buffer_sharing(self) -> None:
-    """Check if tensors sharing the same buffer have the same quantization.
+  def _mark_tensors_requiring_buffer_duplication(
+      self, buffers_to_duplicate: Sequence[int]
+  ) -> None:
+    """Mark tensors that require buffer duplication.
+    Marking a tensor means adding a DUPLICATE_BUFFER transformation as the first
+    transformation to be applied for each consumer of the tensor. Need to do
+    that for each consumer to preserve a zero layer and not affect the
+    horizontal optimization later in the transformation instructions generator.
+    Marks all tensors within each of the provided buffers as requiring buffer
+    duplication, except for the last tensor. The order of tensors is assumed to
+    be the same during both the marking and transformation performer steps, as
+    determined by `self.buffer_to_tensors`. This allows the final tensor to
+    reuse the original buffer, as it is not marked for duplication.
+    Args:
+      buffers_to_duplicate: Indices of the buffers to duplicate.
+    """
+    for buffer_idx in buffers_to_duplicate:
+      for tensor in self.buffer_to_tensors[buffer_idx][:-1]:
+        tensor_name = tfl_flatbuffer_utils.get_tensor_name(tensor)
+        for consumer_params in self.model_quant_results[tensor_name].consumers:
+          consumer_params.transformations.insert(
+              0, _QuantTrans.DUPLICATE_BUFFER
+          )
+  def _mark_tensors_requiring_tensor_duplication(
+      self, tensor_names_to_duplicate
+  ) -> None:
+    """Mark tensors that require tensor duplication.
+    Marking a tensor means adding a DUPLICATE_TENSOR transformation as the first
+    transformation to be applied for each consumer of the tensor. Need to do
+    that for each consumer to preserve a zero layer and not affect the
+    horizontal optimization later in the transformation instructions generator.
+    Args:
+      tensor_names_to_duplicate: Names of tensors to duplicate.
+    """
+    for tensor_name in tensor_names_to_duplicate:
+      for consumer_params in self.model_quant_results[tensor_name].consumers:
+        consumer_params.transformations.insert(0, _QuantTrans.DUPLICATE_TENSOR)
+  def _check_buffer_sharing_for_tensor(self, tensor: Any) -> bool:
+    """Check buffer sharing for the tensor against itself.
+    Args:
+      tensor: The tensor to check.
+    Returns:
+      Whether the tensor has compatible quantization parameters.
+    Raises:
+      RuntimeError: If the tensor has incompatible quantization parameters
+      and the buffer is not constant.
+    """
+    tensor_params = self.model_quant_results.get(
+        tfl_flatbuffer_utils.get_tensor_name(tensor), None
+    )
+    if tensor_params is None:
+      return True
+    if _are_tensor_consumer_params_compatible(tensor_params):
+      return True
+    elif _is_constant_tensor(tensor, self.flatbuffer_model.buffers):
+      return False
+    else:
+      error_msg = (
+          f'The tensor {tensor.name} consumers do not have the same'
+          ' quantization parameters. Please modify your quantization recipe to'
+          ' make sure the two tensors have the same quantization settings.'
+      )
+      raise RuntimeError(error_msg)
+  def _check_buffer_sharing_for_self_compatible_tensors(
+      self, tensor1: Any, tensor2: Any
+  ) -> bool:
+    """Check a pair of self compatible tensors have the same quantization params.
+    Self compatible means that all tensor's consumers have the same quantization
+    parameters.
+    Args:
+      tensor1: The first tensor to check.
+      tensor2: The second tensor to check.
+    Returns:
+      Whether the tensors have compatible quantization parameters.
+    Raises:
+      RuntimeError: If the tensors have incompatible quantization parameters
+      and the buffer is not constant.
+    """
+    tensor1_params = self.model_quant_results.get(
+        tfl_flatbuffer_utils.get_tensor_name(tensor1), None
+    )
+    tensor2_params = self.model_quant_results.get(
+        tfl_flatbuffer_utils.get_tensor_name(tensor2), None
+    )
+    if tensor1_params is None or tensor2_params is None:
+      return True
+    if _are_self_compatible_tensors_compatible_to_each_other(
+        tensor1_params, tensor2_params
+    ):
+      return True
+    elif _is_constant_tensor(tensor1, self.flatbuffer_model.buffers):
+      return False
+    else:
+      error_msg = (
+          f'The tensors {tensor1.name} and {tensor2.name} do not have'
+          ' the same quantization parameters even though they share the'
+          ' same buffer. Please modify your quantization recipe to make'
+          ' sure the two tensors have the same quantization settings.'
+      )
+      raise RuntimeError(error_msg)
+  def _check_and_fix_buffer_sharing(self) -> None:
+    """Check and fix tensor/buffer sharing issues when possible.
+    This function checks if tensors sharing the same buffer have the same
+    quantization settings. If not, when it's possible, it will fix it by marking
+    such tensors or buffers to be duplicated. Otherwise, it will raise an error.
+    Possible cases that can be fixed by duplication:
+      1. A constant tensor recieves different quantization parameters from its
+      consumers. In this case, the tensor is marked for duplication.
+      2. Two or more tensors share the same constant buffer and have different
+      quantization parameters. In this case, the buffer is marked for
+      duplication.
     Raises:
       RuntimeError: If the tensors sharing the same buffer have different
-        quantization settings.
+        quantization settings and it can't be resolved by duplicating the
+        buffer/tensor.
     """
-    for tensors in self.buffer_to_tensors.values():
-      if len(tensors) <= 1:
+    buffers_to_duplicate = []
+    tensor_names_to_duplicate = []
+    for buffer_idx, tensors in self.buffer_to_tensors.items():
+      # TODO: b/458797890 - Investigate if skipping buffer_idx == 0 is a
+      # correct fix, or if it just covers up a deeper issue. This is only
+      # required when statically quantizing models that have already been
+      # quantized dynamically.
+      if not tensors or buffer_idx == 0:
         continue
-      first_tensor = tensors[0]
-      first_tensor_params = self.model_quant_results[
-          tfl_flatbuffer_utils.get_tensor_name(first_tensor)
-      ]
-      for tensor in tensors[1:]:
-        tensor_params = self.model_quant_results[
-            tfl_flatbuffer_utils.get_tensor_name(tensor)
-        ]
-        if not _compatible_tensor_transformation_params(
-            first_tensor_params, tensor_params
-        ):
-          error_msg = (
-              f'The tensors {first_tensor.name} and {tensor.name} do not have'
-              ' the same quantization parameters even though they share the'
-              ' same buffer. Please modify your quantization recipe to make'
-              ' sure the two tensors have the same quantization settings.'
+      # Check if any of the tensors needs to be duplicated.
+      for tensor in tensors:
+        if not self._check_buffer_sharing_for_tensor(tensor):
+          tensor_names_to_duplicate.append(
+              tfl_flatbuffer_utils.get_tensor_name(tensor)
           )
-          raise RuntimeError(error_msg)
+      # Check if the buffer needs to be duplicated.
+      tensor_1 = tensors[0]
+      tensor_name_1 = tfl_flatbuffer_utils.get_tensor_name(tensor_1)
+      if tensor_name_1 in tensor_names_to_duplicate:
+        buffers_to_duplicate.append(buffer_idx)
+        continue
+      for tensor_2 in tensors[1:]:
+        tensor_name_2 = tfl_flatbuffer_utils.get_tensor_name(tensor_2)
+        if (
+            tensor_name_2 in tensor_names_to_duplicate
+            or not self._check_buffer_sharing_for_self_compatible_tensors(
+                tensor_1, tensor_2
+            )
+        ):
+          buffers_to_duplicate.append(buffer_idx)
+          break
+    # Fix the buffer sharing issues.
+    self._mark_tensors_requiring_buffer_duplication(buffers_to_duplicate)
+    self._mark_tensors_requiring_tensor_duplication(tensor_names_to_duplicate)
+def _are_tensor_consumer_params_compatible(
+    params: qtyping.TensorTransformationParams,
+) -> bool:
+  """Check if all tensor's consumers have the same quantization parameters."""
+  if params.consumers is None or len(params.consumers) < 2:
+    return True
+  consumer_1 = params.consumers[0]
+  for consumer in params.consumers[1:]:
+    if not _compatible_tensor_params(consumer, consumer_1):
+      return False
+  return True
-def _compatible_tensor_transformation_params(
+def _are_self_compatible_tensors_compatible_to_each_other(
     params1: qtyping.TensorTransformationParams,
     params2: qtyping.TensorTransformationParams,
 ) -> bool:
-  """Check if two tensor transformation params are compatible."""
+  """Check if two self compatible tensors are compatible to each other.
+  Self compatible means that all tensor's consumers have the same quantization
+  parameters.
+  Args:
+    params1: The first tensor transformation params.
+    params2: The second tensor transformation params.
+  Returns:
+    Whether the two tensors are compatible to each other.
+  """
+  # Check the producer.
   if params1.producer is None or params2.producer is None:
     if params1.producer != params2.producer:
       return False
   elif not _compatible_tensor_params(params1.producer, params2.producer):
     return False
+  # Check the consumers.
   if params1.consumers is None or params2.consumers is None:
     if params1.consumers != params2.consumers:
       return False
   else:
-    # Check all consumers within each params are compatible.
-    for params1_consumer in params1.consumers:
-      if not _compatible_tensor_params(params1_consumer, params1.consumers[0]):
-        return False
-    for params2_consumer in params2.consumers:
-      if not _compatible_tensor_params(params2_consumer, params2.consumers[0]):
-        return False
+    # Since all consumer params within each tensor are the same, it's enough to
+    # check only the first consumers.
     if not _compatible_tensor_params(
         params1.consumers[0], params2.consumers[0]
     ):
@@ -330,6 +513,8 @@ def _compatible_tensor_params(
   float_source_transformations = [
       _QuantTrans.ADD_QUANTIZE,
       _QuantTrans.NO_QUANTIZE,
+      _QuantTrans.INSERT_HADAMARD_ROTATION,
+      _QuantTrans.INSERT_DECOMPOSED_HADAMARD_ROTATION,
   ]
   quantized_source_transformations = [
       _QuantTrans.QUANTIZE_TENSOR,
@@ -337,14 +522,6 @@ def _compatible_tensor_params(
   ]
   if _same_tensor_params_except_id(params1, params2):
     return True
-  if (
-      params1.transformations[0] != _QuantTrans.NO_QUANTIZE
-      and params2.transformations[0] != _QuantTrans.NO_QUANTIZE
-  ):
-    # NO_QUANTIZE has no parameters. So only if both params aren't NO_QUANTIZE
-    # do we expect the parameters to be the same.
-    if params1.parameters != params2.parameters:
-      return False
   # We only need to check the first transformation because transformations are
   # applied in order, and as long as the one that's immediately after the tensor
   # is the same, it's compatible.
@@ -356,6 +533,12 @@ def _compatible_tensor_params(
   if (
       params1.transformations[0] in quantized_source_transformations
       and params2.transformations[0] in quantized_source_transformations
+      and params1.parameters == params2.parameters
   ):
     return True
   return False
+def _is_constant_tensor(tensor: Any, buffers: Sequence[Any]) -> bool:
+  """Check if the tensor is a constant tensor."""
+  return buffers[tensor.buffer].data is not None

ai-edge-quantizer-nightly 0.0.1.dev20250302__py3-none-any.whl → 0.5.0.dev20260103__py3-none-any.whl

ai-edge-quantizer-nightly 0.0.1.dev20250302py3-none-any.whl → 0.5.0.dev20260103py3-none-any.whl