PyPI - ai-edge-quantizer-nightly - Versions diffs - 0.0.1.dev20250302__py3-none-any.whl → 0.5.0.dev20260103__py3-none-any.whl - Mend

ai-edge-quantizer-nightly 0.0.1.dev20250302py3-none-any.whl → 0.5.0.dev20260103py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py CHANGED Viewed

@@ -23,7 +23,7 @@ to implement the get_tensor_quant_params_fn with the
 qtyping.GetTensorQuantParamsFuncSignature signature.
 """
-from typing import Any
+from typing import Any, Optional, Sequence
 import numpy as np
 from ai_edge_quantizer import qtyping
 from ai_edge_quantizer.algorithms.uniform_quantize import uniform_quantize_tensor
@@ -36,6 +36,13 @@ _OpQuantConstraint = common_utils.OpQuantConstraint
 _ComputePrecision = qtyping.ComputePrecision
+def check_if_quantized(tensor: Any) -> bool:
+  """Checks if the tensor is quantized."""
+  return (
+      tensor.quantization is not None and tensor.quantization.scale is not None
+  )
 def check_op_quantization_config(
     op_name: _TFLOpName,
     op_quant_config: qtyping.OpQuantizationConfig,
@@ -110,6 +117,21 @@ def materialize_output(
   )
+def materialize_composite(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in the virtual output op."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+  )
 def materialize_add(
     get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
     op_info: qtyping.OpInfo,
@@ -256,7 +278,7 @@ def materialize_average_pool_2d(
   )
-def _materialize_bias_for_conv_ops(
+def _materialize_bias_for_fc_conv_ops(
     op_info: qtyping.OpInfo,
     graph_info: qtyping.GraphInfo,
     op_tensor_params: list[qtyping.TensorTransformationParams],
@@ -275,14 +297,16 @@ def _materialize_bias_for_conv_ops(
     op_weight_index: Index for the weight tensor in the op.
     op_bias_index: Index for the bias tensor in the op.
   """
-  _, _, bias_tensor, _ = tfl_flatbuffer_utils.parse_fc_bmm_conv_tensors(
-      op_info.op,
-      graph_info.subgraph_tensors,
-      op_input_index,
-      op_weight_index,
-      op_bias_index,
-  )
-  if bias_tensor is not None:
+  _, weight_tensor, bias_tensor, _ = (
+      tfl_flatbuffer_utils.parse_fc_bmm_conv_tensors(
+          op_info.op,
+          graph_info.subgraph_tensors,
+          op_input_index,
+          op_weight_index,
+          op_bias_index,
+      )
+  )
+  if bias_tensor is not None and not check_if_quantized(bias_tensor):
     bias_quant_params = None
     # Fused bias needs to be quantized for SRQ.
     # Check if SRQ.
@@ -294,13 +318,41 @@ def _materialize_bias_for_conv_ops(
           bias_tensor,
           graph_info.buffers,
       )
-      bias_quant_params = (
-          uniform_quantize_tensor.symmetric_quantize_bias_tensor(
-              bias_content,
-              op_tensor_params[op_input_index].consumers[0].parameters,
-              op_tensor_params[op_weight_index].consumers[0].parameters,
-          )
+      input_consumer_params = (
+          op_tensor_params[op_input_index].consumers[0].parameters
+      )
+      weight_consumer_params = (
+          op_tensor_params[op_weight_index].consumers[0].parameters
       )
+      if weight_consumer_params is None and check_if_quantized(weight_tensor):
+        quant_params = weight_tensor.quantization
+        if op_info.op_quant_config.weight_tensor_config is None:
+          raise ValueError(
+              "weight_tensor_config cannot be None when weight tensor is"
+              " quantized."
+          )
+        weight_consumer_params = qtyping.UniformQuantParams(
+            num_bits=op_info.op_quant_config.weight_tensor_config.num_bits,
+            scale=quant_params.scale,
+            zero_point=quant_params.zeroPoint,
+            quantized_dimension=quant_params.quantizedDimension,
+        )
+      try:
+        # Bias quantization is using fixed quantization scale:
+        # input_scale * weight_scale. To avoid hidden numerics error, we check
+        # the quantization error in bias quantization.
+        bias_quant_params = (
+            uniform_quantize_tensor.symmetric_quantize_bias_tensor(
+                bias_content,
+                input_consumer_params,
+                weight_consumer_params,
+            )
+        )
+      except ValueError as e:
+        raise ValueError(
+            f"Failed to quantize bias tensor for op {op_info.op_name} with op"
+            f" id {op_info.subgraph_op_index}."
+        ) from e
     # We only quantize bias under SRQ. Setting is_constant=True for SRQ only
     # to avoid quantize bias for DRQ and weight-only cases.
     is_constant = (
@@ -356,6 +408,25 @@ def materialize_slice(
   )
+def materialize_select(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.select."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_OUTPUT_SCALE,
+      inputs_to_ignore=[
+          0,
+      ],  # Condition tensor does not need to be quantized.
+  )
 def materialize_select_v2(
     get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
     op_info: qtyping.OpInfo,
@@ -375,6 +446,25 @@ def materialize_select_v2(
   )
+def materialize_dynamic_update_slice(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.dynamic_update_slice."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_OUTPUT_SCALE,
+      inputs_to_ignore=[
+          2,
+      ],  # start_indices do not need to be quantized.
+  )
 def materialize_sum(
     get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
     op_info: qtyping.OpInfo,
@@ -382,12 +472,21 @@ def materialize_sum(
     tensor_name_to_qsv: dict[str, Any],
 ) -> list[qtyping.TensorTransformationParams]:
   """Materialize tensors in tfl.sum."""
+  # For 8 bits the reference kernel calls a function without input/output
+  # constraints. For all others it calls a function that enforces input/output
+  # scale/zero point checks. See:
+  # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/kernels/reduce.cc#L909
+  activation_config = op_info.op_quant_config.activation_tensor_config
+  if activation_config is not None and activation_config.num_bits == 8:
+    constraint = _OpQuantConstraint.NO_CONSTRAIN
+  else:
+    constraint = _OpQuantConstraint.SAME_AS_INPUT_SCALE
   return common_utils.materialize_standard_op(
       op_info,
       graph_info,
       tensor_name_to_qsv,
       get_tensor_quant_params_fn,
-      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+      constraint=constraint,
       inputs_to_ignore=[1],  # Axis index does not need to be quantized.
   )
@@ -418,7 +517,13 @@ def materialize_fc_conv(
     weights, bias).
   """
   ignored_inputs = [bias_index]  # Bias tensor is quantized separately.
-  if _are_weights_too_small(op_info, graph_info, weight_index):
+  should_ignore_weight = False
+  if graph_info:
+    w_tensor = graph_info.subgraph_tensors[op_info.op.inputs[weight_index]]
+    should_ignore_weight = check_if_quantized(w_tensor)
+  if should_ignore_weight or _are_weights_too_small(
+      op_info, graph_info, weight_index
+  ):
     ignored_inputs.append(weight_index)
   op_tensor_params = common_utils.materialize_standard_op(
@@ -429,7 +534,7 @@ def materialize_fc_conv(
       inputs_to_ignore=ignored_inputs,
   )
-  _materialize_bias_for_conv_ops(
+  _materialize_bias_for_fc_conv_ops(
       op_info,
       graph_info,
       op_tensor_params,
@@ -484,7 +589,7 @@ def materialize_conv2d_transpose(
         "Materialize standard op should return at least two tensors for"
         " conv2d_transpose."
     )
-  _materialize_bias_for_conv_ops(
+  _materialize_bias_for_fc_conv_ops(
       op_info,
       graph_info,
       op_tensor_params,
@@ -635,3 +740,521 @@ def materialize_split(
       constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
       inputs_to_ignore=[0],  # Split dimension does not need to be quantized.
   )
+def materialize_pad(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.pad."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+      inputs_to_ignore=[1],  # Paddings tensor does not need to be quantized.
+  )
+def materialize_padv2(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.padv2."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_OUTPUT_SCALE,
+      inputs_to_ignore=[1],  # Paddings tensor does not need to be quantized.
+  )
+def materialize_mirror_pad(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.mirror_pad.
+  Args:
+    get_tensor_quant_params_fn: Function to get quantization parameters for the
+      tensor.
+    op_info: Aggregated information about the op (e.g., quantization config).
+    graph_info: Graph information needed to perform quantization for the op.
+    tensor_name_to_qsv: A map of tensor name to quantization parameters.
+  Returns:
+    A list of `qtyping.TensorTransformationParams` for the tensors in the op.
+  """
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+      inputs_to_ignore=[1],  # Paddings tensor does not need to be quantized.
+  )
+def materialize_space_to_depth(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.space_to_depth.
+  Args:
+    get_tensor_quant_params_fn: Function to get quantization parameters for the
+      tensor.
+    op_info: Aggregated information about the op (e.g., quantization config).
+    graph_info: Graph information needed to perform quantization for the op.
+    tensor_name_to_qsv: A map of tensor name to quantization parameters.
+  Returns:
+    A list of `qtyping.TensorTransformationParams` for the tensors in the op.
+  """
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+  )
+def materialize_squared_difference(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.squared_difference."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+  )
+def materialize_max_pool_2d(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.max_pool_2d."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+  )
+def materialize_resize_bilinear(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.resize_bilinear."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+      inputs_to_ignore=[1],  # Resize size does not need to be quantized.
+  )
+def materialize_resize_nearest_neighbor(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.resize_nearest_neighbor."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+      inputs_to_ignore=[1],  # Resize size does not need to be quantized.
+  )
+def materialize_gather_nd(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.gather_nd."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+      inputs_to_ignore=[1],  # Gather indices do not need to be quantized.
+  )
+def materialize_maximum(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.maximum."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_OUTPUT_SCALE,
+  )
+def materialize_pack(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.pack."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_OUTPUT_SCALE,
+  )
+def materialize_unpack(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.unpack."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+  )
+def materialize_div(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.div."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+  )
+def materialize_broadcast_to(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.broadcast_to."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+      inputs_to_ignore=[1],  # Shape tensor does not need to be quantized.
+  )
+def materialize_sqrt(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.sqrt."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+  )
+def materialize_hard_swish(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.hard_swish."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+  )
+def materialize_gather(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.gather."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+      inputs_to_ignore=[1],  # Indices do not need to be quantized.
+  )
+def materialize_reduce_min(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.reduce_min."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+      inputs_to_ignore=[1],  # Axis index does not need to be quantized.
+  )
+def materialize_equal(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.equal."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+  )
+def materialize_not_equal(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.not_equal."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+  )
+def materialize_relu(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.relu."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+  )
+def _get_tensor_shape_for_blockwise(
+    tensor_shape: Sequence[int], quantized_dim: int, block_size: int
+) -> list[int]:
+  """Get the tensor shape for blockwise quantization.
+  This function splits the quantize dimension of the tensor into blocks and the
+  dim/blocks. Hence, min/max of the tensor can be calculated for each block
+  using existing functions.
+  Args:
+    tensor_shape: The original shape of the tensor.
+    quantized_dim: The dimension to be quantized blockwise.
+    block_size: The size of the block.
+  Returns:
+    The new tensor shape for calculating scale and zp for blockwise
+    quantization.
+  """
+  new_shape = []
+  for index, val in enumerate(tensor_shape):
+    if index == quantized_dim:
+      new_shape.append(int(val / block_size))
+      new_shape.append(block_size)
+    else:
+      new_shape.append(val)
+  return new_shape
+def _reshape_data_for_blockwise(
+    tensor_data: np.ndarray,
+    quantized_dim: int,
+    block_size: int,
+) -> tuple[np.ndarray, int]:
+  """Reshapes data for blockwise quantization.
+  Args:
+    tensor_data: The original tensor data.
+    quantized_dim: The dimension to be quantized blockwise.
+    block_size: The size of the block. `block_size must be a multiple of 32. `
+      `The tensor quantized dimension shape must be divisible by block_size.
+  Returns:
+    A tuple containing the reshaped tensor data and the new reduce dimension.
+  """
+  # TODO: b/417508018 - create AEQ specific error class instead of
+  # using generic ValueError.
+  if tensor_data.shape[quantized_dim] % block_size != 0:
+    raise ValueError(
+        "Tensor quantization dimension must be divisible by block size for"
+        " blockwise quantization."
+    )
+  new_shape = _get_tensor_shape_for_blockwise(
+      tensor_data.shape, quantized_dim, block_size
+  )
+  reshaped_data = tensor_data.reshape(new_shape)
+  return reshaped_data, quantized_dim + 1
+def broadcast_scale_zp_for_blockwise(
+    tensor_content: np.ndarray,
+    quant_params: qtyping.UniformQuantParams,
+) -> qtyping.UniformQuantParams:
+  """Broadcasts scale and zp for blockwise quantization.
+  Args:
+    tensor_content: The original tensor data.
+    quant_params: The quantization parameters.
+      `quant_params.quantized_dimension` must be specified.
+      `quant_params.block_size` must be specified and positive.
+  Returns:
+    The updated quantization parameters with broadcasted scale and zp for
+    correct constant quantization.
+  """
+  if quant_params.quantized_dimension is None:
+    raise ValueError("Quantized dimension must be specified.")
+  if quant_params.block_size is None or quant_params.block_size <= 0:
+    raise ValueError("Block size must be specified and positive.")
+  quantized_dim = quant_params.quantized_dimension
+  expanded_tensor_shape = _get_tensor_shape_for_blockwise(
+      tensor_content.shape, quantized_dim, quant_params.block_size
+  )
+  expanded_scale = np.reshape(
+      np.broadcast_to(
+          np.expand_dims(quant_params.scale, quantized_dim + 1),
+          expanded_tensor_shape,
+      ),
+      tensor_content.shape,
+  )
+  expanded_zp = np.reshape(
+      np.broadcast_to(
+          np.expand_dims(quant_params.zero_point, quantized_dim + 1),
+          expanded_tensor_shape,
+      ),
+      tensor_content.shape,
+  )
+  return qtyping.UniformQuantParams(
+      scale=expanded_scale,
+      zero_point=expanded_zp,
+      num_bits=quant_params.num_bits,
+      symmetric=quant_params.symmetric,
+      quantized_dimension=quantized_dim,
+      block_size=quant_params.block_size,
+  )
+def init_tensor_min_max(
+    tensor_data: Optional[np.ndarray],
+    op_info: qtyping.OpInfo,
+) -> qtyping.QSV:
+  """Initialize the min/max for a tensor.
+  This function initializes the min/max values for a tensor.
+  Args:
+    tensor_data: The tensor data.
+    op_info: Aggregated information about the op (e.g., quantization config).
+  Returns:
+    A dictionary containing the min/max values for the tensor, or an empty
+    dictionary if the tensor data is None.
+  """
+  weight_tensor_config = op_info.op_quant_config.weight_tensor_config
+  if tensor_data is None or weight_tensor_config is None:
+    return {}
+  else:
+    # Get reduce dimension for min/max calculation based on quantization
+    # granularity.
+    granularity = weight_tensor_config.granularity
+    if granularity == qtyping.QuantGranularity.TENSORWISE:
+      reduce_dims = None
+      keep_dims = True
+    elif granularity == qtyping.QuantGranularity.CHANNELWISE:
+      quantized_dim = common_utils.get_weight_quantized_dim(
+          op_info, tensor_data, weight_tensor_config.granularity
+      )
+      reduce_dims = common_utils.get_reduce_dims(
+          quantized_dim, tensor_data.shape
+      )
+      keep_dims = True
+    elif uniform_quantize_tensor.is_blockwise(granularity):
+      tensor_data, reduce_dims = (
+          uniform_quantize_tensor.reshape_data_for_blockwise(
+              tensor_data,
+              op_info.op_name,
+              granularity,
+          )
+      )
+      keep_dims = False
+    else:
+      raise ValueError(f"Unsupported granularity: {granularity}")
+    return {
+        "min": np.min(tensor_data, axis=reduce_dims, keepdims=keep_dims),
+        "max": np.max(tensor_data, axis=reduce_dims, keepdims=keep_dims),
+    }

ai-edge-quantizer-nightly 0.0.1.dev20250302__py3-none-any.whl → 0.5.0.dev20260103__py3-none-any.whl

ai-edge-quantizer-nightly 0.0.1.dev20250302py3-none-any.whl → 0.5.0.dev20260103py3-none-any.whl