PyPI - ai-edge-quantizer-nightly - Versions diffs - 0.4.0.dev20251008__py3-none-any.whl → 0.5.0.dev20251121__py3-none-any.whl - Mend

ai-edge-quantizer-nightly 0.4.0.dev20251008py3-none-any.whl → 0.5.0.dev20251121py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

ai_edge_quantizer/algorithm_manager.py CHANGED Viewed

@@ -132,6 +132,7 @@ MIN_MAX_OP_NAME_MATERIALIZE_FUNC_DICT = {
     _TFLOpName.EQUAL: common_quantize.materialize_equal,
     _TFLOpName.NOT_EQUAL: common_quantize.materialize_not_equal,
     _TFLOpName.MIRROR_PAD: common_quantize.materialize_mirror_pad,
+    _TFLOpName.SPACE_TO_DEPTH: common_quantize.materialize_space_to_depth,
 }
 for op_name, materialize_func in MIN_MAX_OP_NAME_MATERIALIZE_FUNC_DICT.items():
   register_quantized_op(
@@ -286,6 +287,7 @@ _OCTAV_OP_NAME_MATERIALIZE_FUNC_DICT = immutabledict({
     _TFLOpName.EQUAL: common_quantize.materialize_equal,
     _TFLOpName.NOT_EQUAL: common_quantize.materialize_not_equal,
     _TFLOpName.MIRROR_PAD: common_quantize.materialize_mirror_pad,
+    _TFLOpName.SPACE_TO_DEPTH: common_quantize.materialize_space_to_depth,
 })
 for op_name, materialize_func in _OCTAV_OP_NAME_MATERIALIZE_FUNC_DICT.items():
@@ -380,6 +382,9 @@ register_config_check_policy_func(
 _MSE_OP_NAME_MATERIALIZE_FUNC_DICT = immutabledict({
     _TFLOpName.FULLY_CONNECTED: common_quantize.materialize_fc_conv,
     _TFLOpName.EMBEDDING_LOOKUP: common_quantize.materialize_embedding_lookup,
+    _TFLOpName.CONV_2D: common_quantize.materialize_fc_conv,
+    _TFLOpName.DEPTHWISE_CONV_2D: common_quantize.materialize_fc_conv,
+    _TFLOpName.CONV_2D_TRANSPOSE: common_quantize.materialize_conv2d_transpose,
 })
 for (
     op_name,

ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py CHANGED Viewed

@@ -776,6 +776,33 @@ def materialize_mirror_pad(
   )
+def materialize_space_to_depth(
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.space_to_depth.
+  Args:
+    get_tensor_quant_params_fn: Function to get quantization parameters for the
+      tensor.
+    op_info: Aggregated information about the op (e.g., quantization config).
+    graph_info: Graph information needed to perform quantization for the op.
+    tensor_name_to_qsv: A map of tensor name to quantization parameters.
+  Returns:
+    A list of `qtyping.TensorTransformationParams` for the tensors in the op.
+  """
+  return common_utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      get_tensor_quant_params_fn,
+      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+  )
 def materialize_squared_difference(
     get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
     op_info: qtyping.OpInfo,
@@ -1138,39 +1165,36 @@ def init_tensor_min_max(
     A dictionary containing the min/max values for the tensor, or an empty
     dictionary if the tensor data is None.
   """
-  if tensor_data is None:
+  weight_tensor_config = op_info.op_quant_config.weight_tensor_config
+  if tensor_data is None or weight_tensor_config is None:
     return {}
   else:
-    weight_tensor_config = op_info.op_quant_config.weight_tensor_config
-    quantized_dim = None
-    if weight_tensor_config is not None and (
-        weight_tensor_config.granularity == qtyping.QuantGranularity.CHANNELWISE
-    ):
+    # Get reduce dimension for min/max calculation based on quantization
+    # granularity.
+    granularity = weight_tensor_config.granularity
+    if granularity == qtyping.QuantGranularity.TENSORWISE:
+      reduce_dims = None
+      keep_dims = True
+    elif granularity == qtyping.QuantGranularity.CHANNELWISE:
       quantized_dim = common_utils.get_weight_quantized_dim(
           op_info, tensor_data, weight_tensor_config.granularity
       )
-    if (
-        weight_tensor_config is not None
-        and weight_tensor_config.granularity
-        == qtyping.QuantGranularity.BLOCKWISE
-    ):
-      reshaped_data, reduce_dims = (
+      reduce_dims = common_utils.get_reduce_dims(
+          quantized_dim, tensor_data.shape
+      )
+      keep_dims = True
+    elif uniform_quantize_tensor.is_blockwise(granularity):
+      tensor_data, reduce_dims = (
           uniform_quantize_tensor.reshape_data_for_blockwise(
               tensor_data,
               op_info.op_name,
-              weight_tensor_config.block_size,
+              granularity,
           )
       )
-      return {
-          "min": np.min(reshaped_data, axis=reduce_dims, keepdims=False),
-          "max": np.max(reshaped_data, axis=reduce_dims, keepdims=False),
-      }
+      keep_dims = False
     else:
-      reduce_dims = common_utils.get_reduce_dims(
-          quantized_dim, tensor_data.shape
-      )
-      return {
-          "min": np.min(tensor_data, axis=reduce_dims, keepdims=True),
-          "max": np.max(tensor_data, axis=reduce_dims, keepdims=True),
-      }
+      raise ValueError(f"Unsupported granularity: {granularity}")
+    return {
+        "min": np.min(tensor_data, axis=reduce_dims, keepdims=keep_dims),
+        "max": np.max(tensor_data, axis=reduce_dims, keepdims=keep_dims),
+    }

ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py CHANGED Viewed

@@ -158,7 +158,7 @@ def get_tensor_quant_params(
         op_info, tensor_quant_config, tensor_content, tensor_qsv
     )
-  if tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE:
+  if uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity):
     raise ValueError(
         "Blockwise quantization is not supported for dequantized weight"
         " recovery."

ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py CHANGED Viewed

@@ -147,8 +147,7 @@ class HadamardRotationFullyConnectedTest(parameterized.TestCase):
             weight_tensor_config=_TensorQuantConfig(
                 num_bits=8,
                 symmetric=True,
-                granularity=qtyping.QuantGranularity.BLOCKWISE,
-                block_size=32,
+                granularity=qtyping.QuantGranularity.BLOCKWISE_32,
             ),
         ),
     )

ai_edge_quantizer/algorithms/uniform_quantize/mse.py CHANGED Viewed

@@ -55,7 +55,7 @@ def get_tensor_quant_params(
     ValueError: `tensor_qsv` must contain min/max values, or `tensor_content`
       must be provided so that they can be inferred.
   """
-  if tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE:
+  if uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity):
     raise ValueError(
         "Blockwise quantization is not supported for MSE quantization."
     )
@@ -113,13 +113,15 @@ def get_tensor_quant_params(
       num_bits=tensor_quant_config.num_bits,
       symmetric=tensor_quant_config.symmetric,
       quantized_dimension=quantized_dim,
-      block_size=tensor_quant_config.block_size,
+      block_size=uniform_quantize_tensor.extract_block_size_from_granularity(
+          tensor_quant_config.granularity
+      ),
   )
   quantized_vars = uniform_quantize_tensor.uniform_quantize(
       tensor_content,
       quant_params,
-      tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE,
+      uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity),
   )
   return dataclasses.replace(quant_params, quantized_data=quantized_vars)

ai_edge_quantizer/algorithms/uniform_quantize/mse_test.py CHANGED Viewed

@@ -84,7 +84,7 @@ class MseQuantizeTest(parameterized.TestCase):
           tensor_quant_config=qtyping.TensorQuantizationConfig(
               num_bits=4,
               symmetric=True,
-              granularity=qtyping.QuantGranularity.BLOCKWISE,
+              granularity=qtyping.QuantGranularity.BLOCKWISE_32,
           ),
           tensor_content=test_data,
       )

ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py CHANGED Viewed

@@ -15,6 +15,7 @@
 """Performs naive min/max uniform quantization."""
+import dataclasses
 from typing import Any, Optional
 import numpy as np
 from ai_edge_quantizer import qtyping
@@ -91,7 +92,9 @@ def get_tensor_quant_params(
       num_bits=tensor_quant_config.num_bits,
       symmetric=tensor_quant_config.symmetric,
       quantized_dimension=quantized_dim,
-      block_size=tensor_quant_config.block_size,
+      block_size=uniform_quantize_tensor.extract_block_size_from_granularity(
+          tensor_quant_config.granularity
+      ),
   )
   if tensor_content is None:
     return quant_params
@@ -99,18 +102,10 @@ def get_tensor_quant_params(
   quantized_vars = uniform_quantize_tensor.uniform_quantize(
       tensor_content,
       quant_params,
-      tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE,
+      uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity),
   )
   # Update with quantized values.
-  return qtyping.UniformQuantParams(
-      scale=scale,
-      zero_point=zp,
-      num_bits=tensor_quant_config.num_bits,
-      symmetric=tensor_quant_config.symmetric,
-      quantized_dimension=quantized_dim,
-      quantized_data=quantized_vars,
-      block_size=tensor_quant_config.block_size,
-  )
+  return dataclasses.replace(quant_params, quantized_data=quantized_vars)
 # TODO: b/333731147 - Use named tuple to store min/max.

ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py CHANGED Viewed

@@ -17,6 +17,7 @@ import os
 from typing import cast
 from absl.testing import parameterized
+import ml_dtypes
 import numpy as np
 from tensorflow.python.platform import googletest
@@ -165,8 +166,7 @@ class NaiveMinMaxQuantizeTest(parameterized.TestCase):
     weight_tensor_config = _TensorQuantConfig(
         num_bits=4,
         symmetric=True,
-        granularity=qtyping.QuantGranularity.BLOCKWISE,
-        block_size=2,
+        granularity=qtyping.QuantGranularity.BLOCKWISE_32,
     )
     op_info = qtyping.OpInfo(
         op=fc_op,
@@ -176,28 +176,32 @@ class NaiveMinMaxQuantizeTest(parameterized.TestCase):
             weight_tensor_config=weight_tensor_config,
         ),
     )
-    test_data = np.array([[-7, 7], [4, -4], [4, -4], [7, 7]])
+    test_data = np.random.uniform(low=-10, high=10, size=(4, 32)).astype(
+        np.float32
+    )
     quant_params = naive_min_max_quantize.get_tensor_quant_params(
         op_info=op_info,
         tensor_quant_config=weight_tensor_config,
         tensor_content=test_data,
     )
-    scale = quant_params.scale
     zp = quant_params.zero_point
-    expected_scale = np.array([
-        [1],
-        [0.5703125],
-        [0.5703125],
-        [1],
-    ])
-    expected_zp = np.zeros([4, 1])
-    self.assertTrue(np.array_equal(zp, expected_zp))
-    self.assertTrue(np.array_equal(scale, expected_scale))
+    self.assertEqual(zp.shape, (4, 1))
+    self.assertTrue(np.array_equal(zp, np.zeros([4, 1])))
+    self.assertEqual(quant_params.scale.shape, (4, 1))
+    expected_scales = np.max(np.abs(test_data), axis=1, keepdims=True) / 7.0
+    expected_scales = (
+        expected_scales.astype(ml_dtypes.bfloat16)
+        .astype(np.float16)
+        .astype(np.float32)
+    )
+    self.assertTrue(np.allclose(quant_params.scale, expected_scales, atol=1e-5))
     self.assertIsNotNone(quant_params.quantized_data)
     self.assertTupleEqual(
         cast(np.ndarray, quant_params.quantized_data).shape, test_data.shape
     )
-    self.assertEqual(quant_params.block_size, 2)
+    self.assertEqual(quant_params.block_size, 32)
     self.assertEqual(quant_params.quantized_dimension, 1)
   def test_calibrate_ignores_inf_min_max(self):

ai_edge_quantizer/algorithms/uniform_quantize/octav.py CHANGED Viewed

@@ -131,12 +131,12 @@ def get_tensor_quant_params(
   quantized_dim = common_utils.get_weight_quantized_dim(
       op_info, tensor_content, tensor_quant_config.granularity
   )
-  if tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE:
+  if uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity):
     reshaped_data, reduce_dims = (
         uniform_quantize_tensor.reshape_data_for_blockwise(
             tensor_content,
             op_info.op_name,
-            tensor_quant_config.block_size,
+            tensor_quant_config.granularity,
         )
     )
   else:
@@ -154,7 +154,7 @@ def get_tensor_quant_params(
   # We created a new dimension in order to reduce properly for blockwise
   # quantization, so we need to reshape the clipping constants back to the
   # min/max shape for the next step.
-  if tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE:
+  if uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity):
     clipping_constants = clipping_constants.reshape(tensor_min_max["min"].shape)
   zp, scale = uniform_quantize_tensor.tensor_zp_scale_from_min_max(
@@ -172,13 +172,17 @@ def get_tensor_quant_params(
       num_bits=tensor_quant_config.num_bits,
       symmetric=tensor_quant_config.symmetric,
       quantized_dimension=quantized_dim,
-      block_size=tensor_quant_config.block_size,
+      block_size=uniform_quantize_tensor.extract_block_size_from_granularity(
+          tensor_quant_config.granularity
+      ),
   )
   quantized_vars = uniform_quantize_tensor.uniform_quantize(
       tensor_content,
       quant_params,
-      tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE,
+      is_blockwise_quant=uniform_quantize_tensor.is_blockwise(
+          tensor_quant_config.granularity
+      ),
   )
   return dataclasses.replace(quant_params, quantized_data=quantized_vars)

ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py CHANGED Viewed

@@ -196,8 +196,7 @@ class OctavQuantizeTest(parameterized.TestCase):
     tensor_config = qtyping.TensorQuantizationConfig(
         num_bits=4,
         symmetric=True,
-        granularity=qtyping.QuantGranularity.BLOCKWISE,
-        block_size=32,
+        granularity=qtyping.QuantGranularity.BLOCKWISE_32,
     )
     fc_op_info = qtyping.OpInfo(
         op=self._fc_op,

ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py CHANGED Viewed

@@ -29,6 +29,11 @@ class IntType:
   signed: bool
+def is_blockwise(granularity: qtyping.QuantGranularity) -> bool:
+  """Checks if the quantization granularity is blockwise."""
+  return "BLOCKWISE" in str(granularity)
 def get_quantized_range(qtype: IntType) -> tuple[float, float]:
   """Calculates range of the quantized type."""
   if qtype.signed:
@@ -40,6 +45,22 @@ def get_quantized_range(qtype: IntType) -> tuple[float, float]:
   return float(qmin), float(qmax)
+def extract_block_size_from_granularity(
+    granularity: qtyping.QuantGranularity,
+) -> int:
+  """Get the block size for blockwise quantization."""
+  if granularity == qtyping.QuantGranularity.BLOCKWISE_32:
+    return 32
+  elif granularity == qtyping.QuantGranularity.BLOCKWISE_64:
+    return 64
+  elif granularity == qtyping.QuantGranularity.BLOCKWISE_128:
+    return 128
+  elif granularity == qtyping.QuantGranularity.BLOCKWISE_256:
+    return 256
+  else:
+    return 0
 def _round_and_clip(
     tensor: np.ndarray, qtype: IntType, narrow: bool
 ) -> np.ndarray:
@@ -157,14 +178,16 @@ def _get_tensor_shape_for_blockwise(
 def reshape_data_for_blockwise(
-    tensor_data: np.ndarray, op_name: qtyping.TFLOperationName, block_size: int
+    tensor_data: np.ndarray,
+    op_name: qtyping.TFLOperationName,
+    granularity: qtyping.QuantGranularity,
 ) -> tuple[np.ndarray, int]:
   """Reshapes data for blockwise quantization.
   Args:
     tensor_data: The original tensor data.
     op_name: The name of the TFL op.
-    block_size: The size of the block.
+    granularity: The quantization granularity for the tensor.
   Returns:
     A tuple containing the reshaped tensor data and the new reduce dimension.
@@ -172,11 +195,11 @@ def reshape_data_for_blockwise(
   quantized_dim = tfl_flatbuffer_utils.TFL_OP_TO_BLOCKWISE_WEIGHT_QUANTIZED_DIM[
       op_name
   ]
+  block_size = extract_block_size_from_granularity(granularity)
   new_shape = _get_tensor_shape_for_blockwise(
       tensor_data.shape, quantized_dim, block_size
   )
-  reshaped_data = tensor_data.reshape(new_shape)
-  return reshaped_data, quantized_dim + 1
+  return tensor_data.reshape(new_shape), quantized_dim + 1
 def _broadcast_scale_zp_for_blockwise(
@@ -233,21 +256,21 @@ def _broadcast_scale_zp_for_blockwise(
 def uniform_quantize(
     tensor_data: np.ndarray,
     quantization_params: qtyping.UniformQuantParams,
-    is_blockwise: bool = False,
+    is_blockwise_quant: bool = False,
 ):
   """Uniform quantize a tensor.
   Args:
     tensor_data: The tensor to be quantized.
     quantization_params: The quantization parameters.
-    is_blockwise: Whether the tensor is blockwise quantized.
+    is_blockwise_quant: Whether the tensor is blockwise quantized.
   Returns:
     The quantized tensor.
   """
   # The reshaping for blockwise quantization is unique hence we do this here
   # to avoid unexpected broadcast behavior downstream.
-  if is_blockwise:
+  if is_blockwise_quant:
     quantization_params = _broadcast_scale_zp_for_blockwise(
         tensor_data, quantization_params
     )
@@ -381,10 +404,13 @@ def symmetric_quantize_bias_tensor(
   quantized_vars = uniform_quantize(bias_content, bias_quant_params)
   if check_error:
     dequantized_bias = uniform_dequantize(quantized_vars, bias_quant_params)
-    quantization_error = np.abs(dequantized_bias - bias_content)
-    if np.any(quantization_error > effective_output_scale):
+    max_quant_error = np.max(np.abs(dequantized_bias - bias_content))
+    error_tolerance = np.maximum(1e-6, np.max(effective_output_scale))
+    if max_quant_error > error_tolerance:
       raise ValueError(
-          "Quantization error is too large for bias tensor quantization."
+          "Quantization error is too large for bias tensor quantization. Max"
+          f" quantization error is {max_quant_error}, which exceed"
+          f" the threshold {error_tolerance}"
       )
   # Save the int32 quantized bias as int64 if the input tensor is quantized to
@@ -432,17 +458,18 @@ def tensor_zp_scale_from_min_max(
   Returns:
     The zero point and scale of the tensor.
   """
   # TODO: b/332574603 - support unsigned data type.
   qtype = IntType(
       num_bits,
       signed=True,
   )
   qmin, qmax = get_quantized_range(qtype)
-  min_bound = 1e-4  # 1e-6 precision for int8 and 1e-8 for int16.
+  min_bound = 1e-9  # Avoid zero scale.
   pos_clipping_values = None if clipping_values is None else clipping_values
   neg_clipping_values = None if clipping_values is None else -clipping_values
-  if granularity == qtyping.QuantGranularity.BLOCKWISE:
+  if is_blockwise(granularity):
     # Blockwise quantization uses float16 scale,
     # with 7 bit mantissa, so the maximum scale value is 65280 and maximum
     # representable range is [-65280 * (2 ** num_bits),
@@ -490,7 +517,7 @@ def tensor_zp_scale_from_min_max(
     zp = qmin - bound_min / scale
     zp = np.rint(zp)
-  if granularity == qtyping.QuantGranularity.BLOCKWISE:
+  if is_blockwise(granularity):
     # Round the scale values to 7 bit mantissa.
     scale = (
         scale.astype(ml_dtypes.bfloat16).astype(np.float16).astype(np.float32)

ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py CHANGED Viewed

@@ -222,7 +222,7 @@ class TensorUtilsTest(parameterized.TestCase):
               zero_point=np.array([-6]),
               symmetric=True,
           ),
-          is_blockwise=True,
+          is_blockwise_quant=True,
       )
   @parameterized.parameters(
@@ -431,7 +431,10 @@ class TensorUtilsTest(parameterized.TestCase):
     )
     # This will result in quantized bias of 3e9, which is larger than int32 max.
     bias_tensor_data = np.array([3e7])
-    with self.assertRaises(ValueError):
+    with self.assertRaisesRegex(
+        ValueError,
+        "Quantization error is too large for bias tensor quantization.",
+    ):
       uniform_quantize_tensor.symmetric_quantize_bias_tensor(
           bias_tensor_data,
           input_quant_config,

ai_edge_quantizer/algorithms/utils/common_utils.py CHANGED Viewed

@@ -51,8 +51,9 @@ def check_subchannel_config(
   """Checks the op quantization config for subchannel quantization."""
   if (
       op_quant_config.weight_tensor_config is not None
-      and op_quant_config.weight_tensor_config.granularity
-      == qtyping.QuantGranularity.BLOCKWISE
+      and uniform_quantize_tensor.is_blockwise(
+          op_quant_config.weight_tensor_config.granularity
+      )
   ):
     if op_name not in _SUPPORTED_SUBCHANNEL_OPS:
       raise ValueError(f"Unsupported op for blockwise quantization: {op_name}.")
@@ -66,10 +67,6 @@ def check_subchannel_config(
           "Blockwise quantization does not support for asymmetric weight"
           " quantization."
       )
-    if op_quant_config.weight_tensor_config.block_size <= 0:
-      raise ValueError(
-          "Blockwise quantization must have a non-zero block size."
-      )
 def check_if_valid_op_config(
@@ -369,11 +366,28 @@ def _materialize_standard_op_with_same_as_input_scale(
   # Change output qsv to be the same as input qsv. This is safe since TFL
   # subgraph is acyclic.
-  input_tensor_qsv = tensor_name_to_qsv[input_tensor_params.tensor_name]
-  for output_tensor in output_tensors:
-    tensor_name_to_qsv[tfl_flatbuffer_utils.get_tensor_name(output_tensor)] = (
-        input_tensor_qsv
+  input_tensor_qsv = tensor_name_to_qsv.get(
+      input_tensor_params.tensor_name, None
+  )
+  if input_tensor_qsv is None:
+    input_tensor_data = tfl_flatbuffer_utils.get_tensor_data(
+        input_tensors[0], graph_info.buffers
     )
+    # If the input tensor is a constant tensor without qsv, compute qsv from
+    # its quant params.
+    if input_tensor_data is None:
+      # If the only input to an op that needs to match input to
+      # output has no qsv and is not a constant tensor, then this is an error.
+      raise ValueError(
+          "Input tensor qsv is None for tensor"
+          f" {input_tensor_params.tensor_name}."
+      )
+    min_val, max_val = _get_min_max_from_quant_params(input_quant_params)
+    input_tensor_qsv = {"min": min_val, "max": max_val}
+  for output_tensor in output_tensors:
+    tensor_name_to_qsv[
+        tfl_flatbuffer_utils.get_tensor_name(output_tensor)
+    ] = input_tensor_qsv
   return op_tensor_params
@@ -697,6 +711,26 @@ def _add_non_match_tensors_to_ignored_lists(
   return inputs_to_ignore, outputs_to_ignore
+def _get_min_max_from_quant_params(
+    quant_params: qtyping.UniformQuantParams,
+) -> tuple[np.ndarray, np.ndarray]:
+  """Recalculate min/max from tensor quantization params."""
+  q_min, q_max = uniform_quantize_tensor.get_quantized_range(
+      _IntType(quant_params.num_bits, True)
+  )
+  float_min = uniform_quantize_tensor.uniform_dequantize(
+      np.array(q_min), quant_params
+  )
+  float_max = uniform_quantize_tensor.uniform_dequantize(
+      np.array(q_max), quant_params
+  )
+  # We use qmax values to compute scale for symmetric quantization (see
+  # uniform_quantize_tensor.tensor_zp_scale_from_min_max).
+  if quant_params.symmetric:
+    float_min = -float_max
+  return float_min, float_max
 def materialize_standard_op(
     op_info: qtyping.OpInfo,
     graph_info: qtyping.GraphInfo,
@@ -863,8 +897,6 @@ def materialize_op_with_output_activation_constraint(
     output_tensor_params.producer = op_tensor_params
     # Update the tensor_name_to_qsv map using the output activation constraints.
     min_val, max_val = _get_min_max_from_quant_params(
-        activation_num_bits,
-        activation_tensor_config.symmetric,
         fixed_quant_params,
     )
     tensor_name_to_qsv[output_tensor_params.tensor_name]["min"] = min_val
@@ -993,7 +1025,7 @@ def get_weight_quantized_dim(
       quantized_dim = tfl_flatbuffer_utils.TFL_OP_TO_WEIGHT_QUANTIZED_DIM.get(
           op_info.op_name, None
       )
-  elif granularity == qtyping.QuantGranularity.BLOCKWISE:
+  elif uniform_quantize_tensor.is_blockwise(granularity):
     quantized_dim = (
         tfl_flatbuffer_utils.TFL_OP_TO_BLOCKWISE_WEIGHT_QUANTIZED_DIM[
             op_info.op_name
@@ -1027,23 +1059,4 @@ def get_bmm_weight_quantized_dim(
   return rank - 1
-def _get_min_max_from_quant_params(
-    num_bits: int,
-    symmetric: bool,
-    tensor_params: qtyping.UniformQuantParams,
-) -> tuple[float, float]:
-  """Recalculate min/max from tensor quantization params."""
-  q_min, q_max = uniform_quantize_tensor.get_quantized_range(
-      _IntType(num_bits, True)
-  )
-  float_min = uniform_quantize_tensor.uniform_dequantize(
-      np.array(q_min), tensor_params
-  )
-  float_max = uniform_quantize_tensor.uniform_dequantize(
-      np.array(q_max), tensor_params
-  )
-  # We use qmax values to compute scale for symmetric quantization (see
-  # uniform_quantize_tensor.tensor_zp_scale_from_min_max).
-  if symmetric:
-    float_min = -float_max
-  return (float_min, float_max)

ai-edge-quantizer-nightly 0.4.0.dev20251008__py3-none-any.whl → 0.5.0.dev20251121__py3-none-any.whl

ai-edge-quantizer-nightly 0.4.0.dev20251008py3-none-any.whl → 0.5.0.dev20251121py3-none-any.whl