PyPI - ai-edge-quantizer-nightly - Versions diffs - 0.1.0.dev20250415__py3-none-any.whl → 0.5.0.dev20260103__py3-none-any.whl - Mend

ai-edge-quantizer-nightly 0.1.0.dev20250415py3-none-any.whl → 0.5.0.dev20260103py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py CHANGED Viewed

@@ -36,6 +36,13 @@ _OpQuantConstraint = common_utils.OpQuantConstraint
 _ComputePrecision = qtyping.ComputePrecision
+def check_if_quantized(tensor: Any) -> bool:
+  """Checks if the tensor is quantized."""
+  return (
+      tensor.quantization is not None and tensor.quantization.scale is not None
+  )
 def check_op_quantization_config(
     op_name: _TFLOpName,
     op_quant_config: qtyping.OpQuantizationConfig,
@@ -271,7 +278,7 @@ def materialize_average_pool_2d(
   )
-def _materialize_bias_for_conv_ops(
+def _materialize_bias_for_fc_conv_ops(
     op_info: qtyping.OpInfo,
     graph_info: qtyping.GraphInfo,
     op_tensor_params: list[qtyping.TensorTransformationParams],
@@ -290,14 +297,16 @@ def _materialize_bias_for_conv_ops(
     op_weight_index: Index for the weight tensor in the op.
     op_bias_index: Index for the bias tensor in the op.
   """
-  _, _, bias_tensor, _ = tfl_flatbuffer_utils.parse_fc_bmm_conv_tensors(
-      op_info.op,
-      graph_info.subgraph_tensors,
-      op_input_index,
-      op_weight_index,
-      op_bias_index,
-  )
-  if bias_tensor is not None:
+  _, weight_tensor, bias_tensor, _ = (
+      tfl_flatbuffer_utils.parse_fc_bmm_conv_tensors(
+          op_info.op,
+          graph_info.subgraph_tensors,
+          op_input_index,
+          op_weight_index,
+          op_bias_index,
+      )
+  )
+  if bias_tensor is not None and not check_if_quantized(bias_tensor):
     bias_quant_params = None
     # Fused bias needs to be quantized for SRQ.
     # Check if SRQ.
@@ -309,13 +318,41 @@ def _materialize_bias_for_conv_ops(
           bias_tensor,
           graph_info.buffers,
       )
-      bias_quant_params = (
-          uniform_quantize_tensor.symmetric_quantize_bias_tensor(
-              bias_content,
-              op_tensor_params[op_input_index].consumers[0].parameters,
-              op_tensor_params[op_weight_index].consumers[0].parameters,
-          )
+      input_consumer_params = (
+          op_tensor_params[op_input_index].consumers[0].parameters
+      )
+      weight_consumer_params = (
+          op_tensor_params[op_weight_index].consumers[0].parameters
       )
+      if weight_consumer_params is None and check_if_quantized(weight_tensor):
+        quant_params = weight_tensor.quantization
+        if op_info.op_quant_config.weight_tensor_config is None:
+          raise ValueError(
+              "weight_tensor_config cannot be None when weight tensor is"
+              " quantized."
+          )
+        weight_consumer_params = qtyping.UniformQuantParams(
+            num_bits=op_info.op_quant_config.weight_tensor_config.num_bits,
+            scale=quant_params.scale,
+            zero_point=quant_params.zeroPoint,
+            quantized_dimension=quant_params.quantizedDimension,
+        )
+      try:
+        # Bias quantization is using fixed quantization scale:
+        # input_scale * weight_scale. To avoid hidden numerics error, we check
+        # the quantization error in bias quantization.
+        bias_quant_params = (
+            uniform_quantize_tensor.symmetric_quantize_bias_tensor(
+                bias_content,
+                input_consumer_params,
+                weight_consumer_params,
+            )
+        )
+      except ValueError as e:
+        raise ValueError(
+            f"Failed to quantize bias tensor for op {op_info.op_name} with op"
+            f" id {op_info.subgraph_op_index}."
+        ) from e
     # We only quantize bias under SRQ. Setting is_constant=True for SRQ only
     # to avoid quantize bias for DRQ and weight-only cases.
     is_constant = (
@@ -371,6 +408,25 @@ def materialize_slice(
   )
+def materialize_select(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.select."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_OUTPUT_SCALE,
+      inputs_to_ignore=[
+          0,
+      ],  # Condition tensor does not need to be quantized.
+  )
 def materialize_select_v2(
     get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
     op_info: qtyping.OpInfo,
@@ -416,12 +472,21 @@ def materialize_sum(
     tensor_name_to_qsv: dict[str, Any],
 ) -> list[qtyping.TensorTransformationParams]:
   """Materialize tensors in tfl.sum."""
+  # For 8 bits the reference kernel calls a function without input/output
+  # constraints. For all others it calls a function that enforces input/output
+  # scale/zero point checks. See:
+  # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/kernels/reduce.cc#L909
+  activation_config = op_info.op_quant_config.activation_tensor_config
+  if activation_config is not None and activation_config.num_bits == 8:
+    constraint = _OpQuantConstraint.NO_CONSTRAIN
+  else:
+    constraint = _OpQuantConstraint.SAME_AS_INPUT_SCALE
   return common_utils.materialize_standard_op(
       op_info,
       graph_info,
       tensor_name_to_qsv,
       get_tensor_quant_params_fn,
-      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+      constraint=constraint,
       inputs_to_ignore=[1],  # Axis index does not need to be quantized.
   )
@@ -452,7 +517,13 @@ def materialize_fc_conv(
     weights, bias).
   """
   ignored_inputs = [bias_index]  # Bias tensor is quantized separately.
-  if _are_weights_too_small(op_info, graph_info, weight_index):
+  should_ignore_weight = False
+  if graph_info:
+    w_tensor = graph_info.subgraph_tensors[op_info.op.inputs[weight_index]]
+    should_ignore_weight = check_if_quantized(w_tensor)
+  if should_ignore_weight or _are_weights_too_small(
+      op_info, graph_info, weight_index
+  ):
     ignored_inputs.append(weight_index)
   op_tensor_params = common_utils.materialize_standard_op(
@@ -463,7 +534,7 @@ def materialize_fc_conv(
       inputs_to_ignore=ignored_inputs,
   )
-  _materialize_bias_for_conv_ops(
+  _materialize_bias_for_fc_conv_ops(
       op_info,
       graph_info,
       op_tensor_params,
@@ -518,7 +589,7 @@ def materialize_conv2d_transpose(
         "Materialize standard op should return at least two tensors for"
         " conv2d_transpose."
     )
-  _materialize_bias_for_conv_ops(
+  _materialize_bias_for_fc_conv_ops(
       op_info,
       graph_info,
       op_tensor_params,
@@ -671,6 +742,366 @@ def materialize_split(
   )
+def materialize_pad(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.pad."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+      inputs_to_ignore=[1],  # Paddings tensor does not need to be quantized.
+  )
+def materialize_padv2(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.padv2."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_OUTPUT_SCALE,
+      inputs_to_ignore=[1],  # Paddings tensor does not need to be quantized.
+  )
+def materialize_mirror_pad(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.mirror_pad.
+  Args:
+    get_tensor_quant_params_fn: Function to get quantization parameters for the
+      tensor.
+    op_info: Aggregated information about the op (e.g., quantization config).
+    graph_info: Graph information needed to perform quantization for the op.
+    tensor_name_to_qsv: A map of tensor name to quantization parameters.
+  Returns:
+    A list of `qtyping.TensorTransformationParams` for the tensors in the op.
+  """
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+      inputs_to_ignore=[1],  # Paddings tensor does not need to be quantized.
+  )
+def materialize_space_to_depth(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.space_to_depth.
+  Args:
+    get_tensor_quant_params_fn: Function to get quantization parameters for the
+      tensor.
+    op_info: Aggregated information about the op (e.g., quantization config).
+    graph_info: Graph information needed to perform quantization for the op.
+    tensor_name_to_qsv: A map of tensor name to quantization parameters.
+  Returns:
+    A list of `qtyping.TensorTransformationParams` for the tensors in the op.
+  """
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+  )
+def materialize_squared_difference(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.squared_difference."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+  )
+def materialize_max_pool_2d(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.max_pool_2d."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+  )
+def materialize_resize_bilinear(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.resize_bilinear."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+      inputs_to_ignore=[1],  # Resize size does not need to be quantized.
+  )
+def materialize_resize_nearest_neighbor(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.resize_nearest_neighbor."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+      inputs_to_ignore=[1],  # Resize size does not need to be quantized.
+  )
+def materialize_gather_nd(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.gather_nd."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+      inputs_to_ignore=[1],  # Gather indices do not need to be quantized.
+  )
+def materialize_maximum(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.maximum."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_OUTPUT_SCALE,
+  )
+def materialize_pack(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.pack."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_OUTPUT_SCALE,
+  )
+def materialize_unpack(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.unpack."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+  )
+def materialize_div(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.div."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+  )
+def materialize_broadcast_to(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.broadcast_to."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+      inputs_to_ignore=[1],  # Shape tensor does not need to be quantized.
+  )
+def materialize_sqrt(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.sqrt."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+  )
+def materialize_hard_swish(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.hard_swish."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+  )
+def materialize_gather(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.gather."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+      inputs_to_ignore=[1],  # Indices do not need to be quantized.
+  )
+def materialize_reduce_min(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.reduce_min."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+      inputs_to_ignore=[1],  # Axis index does not need to be quantized.
+  )
+def materialize_equal(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.equal."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+  )
+def materialize_not_equal(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.not_equal."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+  )
+def materialize_relu(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.relu."""
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+  )
 def _get_tensor_shape_for_blockwise(
     tensor_shape: Sequence[int], quantized_dim: int, block_size: int
 ) -> list[int]:
@@ -700,18 +1131,29 @@ def _get_tensor_shape_for_blockwise(
 def _reshape_data_for_blockwise(
-    tensor_data: np.ndarray, quantized_dim: int, block_size: int
+    tensor_data: np.ndarray,
+    quantized_dim: int,
+    block_size: int,
 ) -> tuple[np.ndarray, int]:
   """Reshapes data for blockwise quantization.
   Args:
     tensor_data: The original tensor data.
     quantized_dim: The dimension to be quantized blockwise.
-    block_size: The size of the block.
+    block_size: The size of the block. `block_size must be a multiple of 32. `
+      `The tensor quantized dimension shape must be divisible by block_size.
   Returns:
     A tuple containing the reshaped tensor data and the new reduce dimension.
   """
+  # TODO: b/417508018 - create AEQ specific error class instead of
+  # using generic ValueError.
+  if tensor_data.shape[quantized_dim] % block_size != 0:
+    raise ValueError(
+        "Tensor quantization dimension must be divisible by block size for"
+        " blockwise quantization."
+    )
   new_shape = _get_tensor_shape_for_blockwise(
       tensor_data.shape, quantized_dim, block_size
   )
@@ -783,42 +1225,36 @@ def init_tensor_min_max(
     A dictionary containing the min/max values for the tensor, or an empty
     dictionary if the tensor data is None.
   """
-  if tensor_data is None:
+  weight_tensor_config = op_info.op_quant_config.weight_tensor_config
+  if tensor_data is None or weight_tensor_config is None:
     return {}
   else:
-    weight_tensor_config = op_info.op_quant_config.weight_tensor_config
-    quantized_dim = None
-    if weight_tensor_config is not None and (
-        weight_tensor_config.granularity == qtyping.QuantGranularity.CHANNELWISE
-    ):
+    # Get reduce dimension for min/max calculation based on quantization
+    # granularity.
+    granularity = weight_tensor_config.granularity
+    if granularity == qtyping.QuantGranularity.TENSORWISE:
+      reduce_dims = None
+      keep_dims = True
+    elif granularity == qtyping.QuantGranularity.CHANNELWISE:
       quantized_dim = common_utils.get_weight_quantized_dim(
-          op_info, tensor_data
-      )
-    if (
-        weight_tensor_config is not None
-        and weight_tensor_config.granularity
-        == qtyping.QuantGranularity.BLOCKWISE
-    ):
-      quantized_dim = (
-          tfl_flatbuffer_utils.TFL_OP_TO_BLOCKWISE_WEIGHT_QUANTIZED_DIM[
-              op_info.op_name
-          ]
-      )
-      reshaped_data, reduce_dims = _reshape_data_for_blockwise(
-          tensor_data,
-          quantized_dim,
-          weight_tensor_config.block_size,
+          op_info, tensor_data, weight_tensor_config.granularity
       )
-      return {
-          "min": np.min(reshaped_data, axis=reduce_dims, keepdims=False),
-          "max": np.max(reshaped_data, axis=reduce_dims, keepdims=False),
-      }
-    else:
       reduce_dims = common_utils.get_reduce_dims(
           quantized_dim, tensor_data.shape
       )
-      return {
-          "min": np.min(tensor_data, axis=reduce_dims, keepdims=True),
-          "max": np.max(tensor_data, axis=reduce_dims, keepdims=True),
-      }
+      keep_dims = True
+    elif uniform_quantize_tensor.is_blockwise(granularity):
+      tensor_data, reduce_dims = (
+          uniform_quantize_tensor.reshape_data_for_blockwise(
+              tensor_data,
+              op_info.op_name,
+              granularity,
+          )
+      )
+      keep_dims = False
+    else:
+      raise ValueError(f"Unsupported granularity: {granularity}")
+    return {
+        "min": np.min(tensor_data, axis=reduce_dims, keepdims=keep_dims),
+        "max": np.max(tensor_data, axis=reduce_dims, keepdims=keep_dims),
+    }

ai_edge_quantizer/algorithms/uniform_quantize/common_quantize_test.py CHANGED Viewed

@@ -31,8 +31,7 @@ _TensorQuantConfig = qtyping.TensorQuantizationConfig
 class CommonQuantizeTest(parameterized.TestCase):
-  """Tests for general quantize functions.
-  """
+  """Tests for general quantize functions."""
   def setUp(self):
     super().setUp()
@@ -69,6 +68,34 @@ class CommonQuantizeTest(parameterized.TestCase):
           default_policy.DEFAULT_CONFIG_CHECK_POLICY,
       )
+  def test_reshape_data_for_blockwise_raises_error_when_quantized_dim_not_divisible_by_block_size(
+      self,
+  ):
+    tensor_data = np.ones((24, 128), dtype=np.float32)
+    block_size = 256
+    quantized_dim = 1
+    with self.assertRaisesWithPredicateMatch(
+        ValueError,
+        lambda err: (
+            "Tensor quantization dimension must be divisible by block"
+            " size for blockwise quantization."
+        )
+        in str(err),
+    ):
+      common_quantize._reshape_data_for_blockwise(
+          tensor_data, quantized_dim, block_size
+      )
+  def test_reshape_data_for_blockwise_returns_correct_values(self):
+    tensor_data = np.ones((24, 128), dtype=np.float32)
+    block_size = 32
+    quantized_dim = 1
+    new_tensor_data, reduce_dim = common_quantize._reshape_data_for_blockwise(
+        tensor_data, quantized_dim, block_size
+    )
+    self.assertEqual(new_tensor_data.shape, (24, 4, 32))
+    self.assertEqual(reduce_dim, 2)
 if __name__ == "__main__":
   googletest.main()

ai-edge-quantizer-nightly 0.1.0.dev20250415__py3-none-any.whl → 0.5.0.dev20260103__py3-none-any.whl

ai-edge-quantizer-nightly 0.1.0.dev20250415py3-none-any.whl → 0.5.0.dev20260103py3-none-any.whl