PyPI - ai-edge-quantizer-nightly - Versions diffs - 0.0.1.dev20250302__py3-none-any.whl → 0.5.0.dev20260103__py3-none-any.whl - Mend

ai-edge-quantizer-nightly 0.0.1.dev20250302py3-none-any.whl → 0.5.0.dev20260103py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

ai_edge_quantizer/algorithms/utils/common_utils.py CHANGED Viewed

@@ -41,6 +41,7 @@ _DRQ_OR_WEIGHT_ONLY_OPS = frozenset([
 _SUPPORTED_SUBCHANNEL_OPS = frozenset([
     _TFLOpName.FULLY_CONNECTED,
+    _TFLOpName.EMBEDDING_LOOKUP,
 ])
@@ -50,8 +51,9 @@ def check_subchannel_config(
   """Checks the op quantization config for subchannel quantization."""
   if (
       op_quant_config.weight_tensor_config is not None
-      and op_quant_config.weight_tensor_config.granularity
-      == qtyping.QuantGranularity.BLOCKWISE
+      and uniform_quantize_tensor.is_blockwise(
+          op_quant_config.weight_tensor_config.granularity
+      )
   ):
     if op_name not in _SUPPORTED_SUBCHANNEL_OPS:
       raise ValueError(f"Unsupported op for blockwise quantization: {op_name}.")
@@ -65,10 +67,6 @@ def check_subchannel_config(
           "Blockwise quantization does not support for asymmetric weight"
           " quantization."
       )
-    if op_quant_config.weight_tensor_config.block_size <= 0:
-      raise ValueError(
-          "Blockwise quantization must have a non-zero block size."
-      )
 def check_if_valid_op_config(
@@ -86,7 +84,6 @@ def check_if_valid_op_config(
   Raises:
     ValueError: If the op quantization config is not valid.
   """
   check_passed = False
   error_msg = ""
   # Check if find op_config in policy config_check_policy.
@@ -260,6 +257,60 @@ def _get_single_tensor_params(
   )
+def _materialize_tensors_with_quantized_data_update(
+    op_tensor_params: list[qtyping.TensorTransformationParams],
+    tensors: Sequence[Any],
+    quant_params: Optional[qtyping.UniformQuantParams],
+    is_inbounding_tensor: bool,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+) -> None:
+  """Materialize a list of tensors with `quantized_data` updated when needed.
+  Args:
+    op_tensor_params: Tensor transformation parameters for the op. Will be
+      modified to include new tensor parameters.
+    tensors: Tensors to be materialized.
+    quant_params: The quantization parameters to be used for materialization.
+    is_inbounding_tensor: Whether the tensor is an inbounding tensor for the op.
+    op_info: Aggregated information about the op (e.g., quantization config).
+    graph_info: Graph information needed to perform quantization for the op.
+    tensor_name_to_qsv: A map of tensor name to quantization parameters.
+    get_tensor_quant_params_fn: Function to get quantization parameters for the
+      tensor.
+  """
+  if quant_params is not None and quant_params.quantized_data is not None:
+    quant_params = dataclasses.replace(quant_params, quantized_data=None)
+  for tensor in tensors:
+    tensor_data = tfl_flatbuffer_utils.get_tensor_data(
+        tensor, graph_info.buffers
+    )
+    if quant_params is None or tensor_data is None:
+      tensor_quant_params = quant_params
+    else:
+      # Constant tensors require updating `quantized_data`.
+      quantized_data = uniform_quantize_tensor.uniform_quantize(
+          tensor_data, quant_params
+      )
+      tensor_quant_params = dataclasses.replace(
+          quant_params,
+          quantized_data=quantized_data,
+      )
+    _materialize_op_tensors(
+        op_tensor_params,
+        [tensor],
+        is_inbounding_tensor=is_inbounding_tensor,
+        op_info=op_info,
+        graph_info=graph_info,
+        tensor_name_to_qsv=tensor_name_to_qsv,
+        get_tensor_quant_params_fn=get_tensor_quant_params_fn,
+        quant_params=tensor_quant_params,
+    )
 def _materialize_standard_op_with_same_as_input_scale(
     input_tensors: Sequence[Any],
     output_tensors: Sequence[Any],
@@ -295,23 +346,48 @@ def _materialize_standard_op_with_same_as_input_scale(
   )
   op_tensor_params.append(input_tensor_params)
   # Use input quantization params for all output tensors.
-  _materialize_op_tensors(
+  input_quant_params = input_tensor_params.consumers[0].parameters
+  if not isinstance(input_quant_params, qtyping.UniformQuantParams):
+    raise ValueError(
+        "_materialize_standard_op_with_same_as_input_scale only supports"
+        f" UniformQuantParams. For tensor {input_tensor_params.tensor_name},"
+        f" got {type(input_quant_params)}"
+    )
+  _materialize_tensors_with_quantized_data_update(
       op_tensor_params,
       output_tensors,
+      input_quant_params,
       is_inbounding_tensor=False,
       op_info=op_info,
       graph_info=graph_info,
       tensor_name_to_qsv=tensor_name_to_qsv,
       get_tensor_quant_params_fn=get_tensor_quant_params_fn,
-      quant_params=input_tensor_params.consumers[0].parameters,
   )
   # Change output qsv to be the same as input qsv. This is safe since TFL
   # subgraph is acyclic.
-  input_tensor_qsv = tensor_name_to_qsv[input_tensor_params.tensor_name]
-  for output_tensor in output_tensors:
-    tensor_name_to_qsv[tfl_flatbuffer_utils.get_tensor_name(output_tensor)] = (
-        input_tensor_qsv
+  input_tensor_qsv = tensor_name_to_qsv.get(
+      input_tensor_params.tensor_name, None
+  )
+  if input_tensor_qsv is None:
+    input_tensor_data = tfl_flatbuffer_utils.get_tensor_data(
+        input_tensors[0], graph_info.buffers
     )
+    # If the input tensor is a constant tensor without qsv, compute qsv from
+    # its quant params.
+    if input_tensor_data is None:
+      # If the only input to an op that needs to match input to
+      # output has no qsv and is not a constant tensor, then this is an error.
+      raise ValueError(
+          "Input tensor qsv is None for tensor"
+          f" {input_tensor_params.tensor_name}."
+      )
+    min_val, max_val = _get_min_max_from_quant_params(input_quant_params)
+    input_tensor_qsv = {"min": min_val, "max": max_val}
+  for output_tensor in output_tensors:
+    tensor_name_to_qsv[
+        tfl_flatbuffer_utils.get_tensor_name(output_tensor)
+    ] = input_tensor_qsv
   return op_tensor_params
@@ -351,19 +427,26 @@ def _materialize_standard_op_with_same_as_output_scale(
   )
   # Use output quantization params for all input tensors.
   if output_tensor_params.producer is None:
-    quant_params = None
+    output_quant_params = None
   else:
-    quant_params = output_tensor_params.producer.parameters
-  _materialize_op_tensors(
+    output_quant_params = output_tensor_params.producer.parameters
+    if not isinstance(output_quant_params, qtyping.UniformQuantParams):
+      raise ValueError(
+          "_materialize_standard_op_with_same_as_output_scale only supports"
+          f" UniformQuantParams. For tensor {output_tensor_params.tensor_name},"
+          f" got {type(output_quant_params)}"
+      )
+  _materialize_tensors_with_quantized_data_update(
       op_tensor_params,
       input_tensors,
+      output_quant_params,
       is_inbounding_tensor=True,
       op_info=op_info,
       graph_info=graph_info,
       tensor_name_to_qsv=tensor_name_to_qsv,
       get_tensor_quant_params_fn=get_tensor_quant_params_fn,
-      quant_params=quant_params,
   )
   op_tensor_params.append(output_tensor_params)
   return op_tensor_params
@@ -628,6 +711,26 @@ def _add_non_match_tensors_to_ignored_lists(
   return inputs_to_ignore, outputs_to_ignore
+def _get_min_max_from_quant_params(
+    quant_params: qtyping.UniformQuantParams,
+) -> tuple[np.ndarray, np.ndarray]:
+  """Recalculate min/max from tensor quantization params."""
+  q_min, q_max = uniform_quantize_tensor.get_quantized_range(
+      _IntType(quant_params.num_bits, True)
+  )
+  float_min = uniform_quantize_tensor.uniform_dequantize(
+      np.array(q_min), quant_params
+  )
+  float_max = uniform_quantize_tensor.uniform_dequantize(
+      np.array(q_max), quant_params
+  )
+  # We use qmax values to compute scale for symmetric quantization (see
+  # uniform_quantize_tensor.tensor_zp_scale_from_min_max).
+  if quant_params.symmetric:
+    float_min = -float_max
+  return float_min, float_max
 def materialize_standard_op(
     op_info: qtyping.OpInfo,
     graph_info: qtyping.GraphInfo,
@@ -794,8 +897,6 @@ def materialize_op_with_output_activation_constraint(
     output_tensor_params.producer = op_tensor_params
     # Update the tensor_name_to_qsv map using the output activation constraints.
     min_val, max_val = _get_min_max_from_quant_params(
-        activation_num_bits,
-        activation_tensor_config.symmetric,
         fixed_quant_params,
     )
     tensor_name_to_qsv[output_tensor_params.tensor_name]["min"] = min_val
@@ -842,13 +943,6 @@ def get_tensor_transformations(
       transformations = [_QuantTransformation.QUANTIZE_TENSOR]
     else:
       transformations = [_QuantTransformation.NO_QUANTIZE]
-  elif (
-      op_quant_config.weight_tensor_config is not None
-      and op_quant_config.weight_tensor_config.granularity
-      == qtyping.QuantGranularity.BLOCKWISE
-      and is_constant
-  ):
-    transformations = [_QuantTransformation.EMULATED_SUBCHANNEL]
   # Check if WEIGHT_ONLY.
   elif (
       op_quant_config.compute_precision == qtyping.ComputePrecision.FLOAT
@@ -906,23 +1000,36 @@ def get_tensor_transformation_params(
   )
-def get_weight_quantized_dim(op_info: qtyping.OpInfo, tensor_data: np.ndarray):
+def get_weight_quantized_dim(
+    op_info: qtyping.OpInfo,
+    tensor_data: np.ndarray,
+    granularity: qtyping.QuantGranularity,
+):
   """Get the quantized dimension for the weight tensor.
   Args:
     op_info: Aggregated information about the op (e.g., quantization config).
     tensor_data: The weight tensor data.
+    granularity: The granularity of the weight tensor.
   Returns:
     The quantized dimension for the weight tensor.
   """
-  if op_info.op_name == _TFLOpName.BATCH_MATMUL:
-    quantized_dim = get_bmm_weight_quantized_dim(
-        tensor_data, adj_y=op_info.op.builtinOptions.adjY
-    )
-  else:
-    quantized_dim = tfl_flatbuffer_utils.TFL_OP_TO_WEIGHT_QUANTIZED_DIM.get(
-        op_info.op_name, None
+  quantized_dim = None
+  if granularity == qtyping.QuantGranularity.CHANNELWISE:
+    if op_info.op_name == _TFLOpName.BATCH_MATMUL:
+      quantized_dim = get_bmm_weight_quantized_dim(
+          tensor_data, adj_y=op_info.op.builtinOptions.adjY
+      )
+    else:
+      quantized_dim = tfl_flatbuffer_utils.TFL_OP_TO_WEIGHT_QUANTIZED_DIM.get(
+          op_info.op_name, None
+      )
+  elif uniform_quantize_tensor.is_blockwise(granularity):
+    quantized_dim = (
+        tfl_flatbuffer_utils.TFL_OP_TO_BLOCKWISE_WEIGHT_QUANTIZED_DIM[
+            op_info.op_name
+        ]
     )
   return quantized_dim
@@ -952,23 +1059,4 @@ def get_bmm_weight_quantized_dim(
   return rank - 1
-def _get_min_max_from_quant_params(
-    num_bits: int,
-    symmetric: bool,
-    tensor_params: qtyping.UniformQuantParams,
-) -> tuple[float, float]:
-  """Recalculate min/max from tensor quantization params."""
-  q_min, q_max = uniform_quantize_tensor.get_quantized_range(
-      _IntType(num_bits, True)
-  )
-  float_min = uniform_quantize_tensor.uniform_dequantize(
-      np.array(q_min), tensor_params
-  )
-  float_max = uniform_quantize_tensor.uniform_dequantize(
-      np.array(q_max), tensor_params
-  )
-  # We use qmax values to compute scale for symmetric quantization (see
-  # uniform_quantize_tensor.tensor_zp_scale_from_min_max).
-  if symmetric:
-    float_min = -float_max
-  return (float_min, float_max)

ai_edge_quantizer/calibrator.py CHANGED Viewed

@@ -23,6 +23,7 @@ from absl import logging
 import numpy as np
 from ai_edge_quantizer import algorithm_manager
+from ai_edge_quantizer import default_policy as policy
 from ai_edge_quantizer import qtyping
 from ai_edge_quantizer import recipe_manager
 from ai_edge_quantizer.utils import calibration_utils
@@ -45,11 +46,6 @@ class Calibrator:
   ):
     self._flatbuffer_model = tfl_flatbuffer_utils.read_model(float_tflite)
-    if not tfl_flatbuffer_utils.is_float_model(self._flatbuffer_model):
-      raise ValueError(
-          "The input model for calibration is not a float model. Please check"
-          " the model (e.g., if it is already quantized)."
-      )
     self._tfl_interpreter = tfl_interpreter_utils.create_tfl_interpreter(
         float_tflite, use_xnnpack=True, num_threads=num_threads
     )
@@ -97,9 +93,7 @@ class Calibrator:
       qsv_update_func: The function to update the QSVs.
     """
     op_codes = self._flatbuffer_model.operatorCodes
-    if not self._model_qsvs:
-      self._initialize_model_qsvs(model_recipe_manager)
-    else:
+    if self._model_qsvs:
       logging.warning(
           "Calibrator contains non-empty model qsvs, and the current"
           " calibration process will start on top of this state (i.e., update"
@@ -124,50 +118,67 @@ class Calibrator:
         )
         if cache_output:
           self._cached_output.append(signature_output)
-        self._tensor_content_map.update(
-            tfl_interpreter_utils.get_tensor_name_to_content_map(
-                self._tfl_interpreter, subgraph_idx
-            )
-        )
         # Step2: go through each op in subgraph to update quantization
         # statistic values.
-        subgraph = self._flatbuffer_model.subgraphs[subgraph_idx]
-        graph_info = qtyping.GraphInfo(
-            subgraph.tensors, self._flatbuffer_model.buffers
-        )
-        # Add input/output operators to the subgraph.
-        subgraph.operators += (
-            tfl_flatbuffer_utils.get_subgraph_input_output_operators(subgraph)
-        )
-        for op in subgraph.operators:
-          if isinstance(op, qtyping.IOOperator):
-            op_key = op.op_key
-          else:
-            op_code = op_codes[op.opcodeIndex].builtinCode
-            if op_code not in tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME:
-              continue
-            op_key = tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME[op_code]
-          # Step2.1: query the quantization_recipe to get op quantization
-          # settings.
-          op_scope = self._get_op_scope(op, subgraph.tensors)
-          algorithm_name, _ = model_recipe_manager.get_quantization_configs(
-              op_key, op_scope
-          )
-          if algorithm_name == algorithm_manager.AlgorithmName.NO_QUANTIZE:
-            continue
-          # Step2.2: query algorithm_manager to get/call the related calibration
-          # function.
-          calibrate_func = algorithm_manager.get_quantization_func(
-              algorithm_name, op_key, qtyping.QuantizeMode.CALIBRATE
+        subgraphs_inds = [subgraph_idx]
+        while subgraphs_inds:
+          subgraph_ind = subgraphs_inds.pop()
+          self._tensor_content_map.update(
+              tfl_interpreter_utils.get_tensor_name_to_content_map(
+                  self._tfl_interpreter, subgraph_ind
+              )
           )
-          op_qsvs = calibrate_func(op, graph_info, self._tensor_content_map)
-          # Step3: Update tensor qsvs with the new values. Ignore the tensor
-          # names that are already updated in this round of calibration.
-          op_updated_tensor_name = self._update_qsvs(
-              op_qsvs, updated_tensor_names, qsv_update_func
+          subgraph = self._flatbuffer_model.subgraphs[subgraph_ind]
+          graph_info = qtyping.GraphInfo(
+              subgraph.tensors, self._flatbuffer_model.buffers
           )
-          updated_tensor_names.update(op_updated_tensor_name)
+          # Add input/output operators if they are not in the subgraph.
+          if not any(
+              isinstance(op, qtyping.IOOperator) for op in subgraph.operators
+          ):
+            subgraph.operators += (
+                tfl_flatbuffer_utils.get_subgraph_input_output_operators(
+                    subgraph
+                )
+            )
+          for op in subgraph.operators:
+            if isinstance(op, qtyping.IOOperator):
+              op_key = op.op_key
+            else:
+              op_code = op_codes[op.opcodeIndex].builtinCode
+              if op_code not in tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME:
+                continue
+              op_key = tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME[op_code]
+            # Step2.1: query the quantization_recipe to get op quantization
+            # settings.
+            op_scope = self._get_op_scope(op, subgraph.tensors)
+            algorithm_name, _ = model_recipe_manager.get_quantization_configs(
+                op_key, op_scope
+            )
+            if algorithm_name == algorithm_manager.AlgorithmName.NO_QUANTIZE:
+              continue
+            if policy.is_non_quantizable_composite_op(op):
+              continue
+            # Step2.2: query algorithm_manager to get/call the related
+            # calibration function.
+            calibrate_func = algorithm_manager.get_quantization_func(
+                algorithm_name, op_key, qtyping.QuantizeMode.CALIBRATE
+            )
+            op_qsvs = calibrate_func(op, graph_info, self._tensor_content_map)
+            # Step3: Update tensor qsvs with the new values. Ignore the tensor
+            # names that are already updated in this round of calibration.
+            op_updated_tensor_name = self._update_qsvs(
+                op_qsvs, updated_tensor_names, qsv_update_func
+            )
+            updated_tensor_names.update(op_updated_tensor_name)
+            # Step4: Invoke any subgraphs invoked as a side effect of the op.
+            subgraphs_inds.extend(
+                tfl_flatbuffer_utils.get_op_side_effect_subgraphs(op)
+            )
       # Reset interpreter after one round of calibration.
       self._tfl_interpreter.reset_all_variables()
@@ -245,50 +256,3 @@ class Calibrator:
         output_tensor = subgraph_tensors[output_tensor_idx]
         scope += tfl_flatbuffer_utils.get_tensor_name(output_tensor)
     return scope
-  # TODO: b/354224138 - Remove code duplication between calibrate and
-  # _initialize_model_qsvs.
-  def _initialize_model_qsvs(
-      self, model_recipe_manager: recipe_manager.RecipeManager
-  ) -> None:
-    """Initialize the model qsvs.
-    Args:
-      model_recipe_manager: A RecipeManager object that contains the
-        quantization recipe.
-    """
-    op_codes = self._flatbuffer_model.operatorCodes
-    for subgraph in self._flatbuffer_model.subgraphs:
-      graph_info = qtyping.GraphInfo(
-          subgraph.tensors, self._flatbuffer_model.buffers
-      )
-      for subgraph_op_id, op in enumerate(subgraph.operators):
-        op_code = op_codes[op.opcodeIndex].builtinCode
-        if op_code not in tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME:
-          continue
-        op_key = tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME[op_code]
-        # Step1: query the quantization_recipe to get op quantization
-        # settings.
-        op_scope = self._get_op_scope(op, subgraph.tensors)
-        algorithm_name, op_quant_config = (
-            model_recipe_manager.get_quantization_configs(op_key, op_scope)
-        )
-        if algorithm_name == algorithm_manager.AlgorithmName.NO_QUANTIZE:
-          continue
-        # Step2: query algorithm_manager to get/call the related qsv init
-        # function.
-        qsv_init_func = algorithm_manager.get_init_qsv_func(
-            algorithm_name, op_key
-        )
-        op_info = qtyping.OpInfo(op, op_key, subgraph_op_id, op_quant_config)
-        # Ignore the input tensors where any dimension of the shape is 0.
-        inputs_to_ignore = [
-            opr_idx
-            for opr_idx, tensor_idx in enumerate(op.inputs)
-            if not np.all(graph_info.subgraph_tensors[tensor_idx].shape)
-        ]
-        op_qsvs = qsv_init_func(op_info, graph_info, inputs_to_ignore)
-        # Step3: initialize tensor qsvs.
-        for tensor_name, qsv in op_qsvs.items():
-          if tensor_name not in self._model_qsvs:
-            self._model_qsvs[tensor_name] = qsv

ai_edge_quantizer/calibrator_test.py CHANGED Viewed

@@ -103,58 +103,6 @@ class CalibratorTest(googletest.TestCase):
     model_tensor_qsvs = self._calibrator.get_model_qsvs()
     self.assertEmpty(model_tensor_qsvs)
-  def test_calibrator_initialize_qsv(self):
-    _add_default_int8xint8_integer_recipe(self._recipe_manager)
-    # Overwrite the single op to fc
-    self._recipe_manager.add_quantization_config(
-        regex=".*Stateful.*",
-        operation_name=qtyping.TFLOperationName.FULLY_CONNECTED,
-        algorithm_key=_AlgorithmName.MIN_MAX_UNIFORM_QUANT,
-        op_config=qtyping.OpQuantizationConfig(
-            weight_tensor_config=_TENSOR_QUANT_CONFIG(
-                num_bits=4,
-                granularity=qtyping.QuantGranularity.CHANNELWISE,
-            ),
-            compute_precision=_ComputePrecision.INTEGER,
-        ),
-    )
-    self._calibrator._initialize_model_qsvs(self._recipe_manager)
-    model_tensor_qsvs = self._calibrator.get_model_qsvs()
-    self.assertLen(model_tensor_qsvs, 4)
-    self.assertIn("serving_default_input_1:0", model_tensor_qsvs)  # input
-    input_qsv = model_tensor_qsvs["serving_default_input_1:0"]
-    self.assertEmpty(input_qsv)
-    self.assertIn("sequential/dense/MatMul", model_tensor_qsvs)  # weight
-    weight_tensor_qsv = model_tensor_qsvs["sequential/dense/MatMul"]
-    mins_maxs_shape = (16, 1)
-    self.assertTupleEqual(weight_tensor_qsv["min"].shape, mins_maxs_shape)
-    self.assertAlmostEqual(weight_tensor_qsv["min"][0][0], -0.40436327)
-    self.assertTupleEqual(weight_tensor_qsv["max"].shape, mins_maxs_shape)
-    self.assertAlmostEqual(weight_tensor_qsv["max"][0][0], 0.46138108)
-    self.assertIn(
-        "sequential/dense/BiasAdd/ReadVariableOp", model_tensor_qsvs
-    )  # bias
-    bias_tensor_qsv = model_tensor_qsvs[
-        "sequential/dense/BiasAdd/ReadVariableOp"
-    ]
-    mins_maxs_shape = (16,)
-    self.assertTupleEqual(bias_tensor_qsv["min"].shape, mins_maxs_shape)
-    self.assertAlmostEqual(bias_tensor_qsv["min"][0], -0.26978338)
-    self.assertTupleEqual(bias_tensor_qsv["max"].shape, mins_maxs_shape)
-    # Here bias min/max will be the same as each element is a scalar
-    # Bias will be quantized with input_scale * weight_scale.
-    self.assertSequenceEqual(
-        list(bias_tensor_qsv["max"].flatten()),
-        list(bias_tensor_qsv["min"].flatten()),
-    )
-    self.assertIn("StatefulPartitionedCall:0", model_tensor_qsvs)  # output
-    output_qsv = model_tensor_qsvs["StatefulPartitionedCall:0"]
-    self.assertEmpty(output_qsv)
   def test_calibrate_single_fc_success(self):
     _add_default_int8xint8_integer_recipe(self._recipe_manager)
     self._calibrator.calibrate(
@@ -162,7 +110,7 @@ class CalibratorTest(googletest.TestCase):
     )
     model_tensor_qsvs = self._calibrator.get_model_qsvs()
-    self.assertLen(model_tensor_qsvs, 4)
+    self.assertLen(model_tensor_qsvs, 2)
     self.assertIn("serving_default_input_1:0", model_tensor_qsvs)  # input
     input_qsv = model_tensor_qsvs["serving_default_input_1:0"]
     self.assertSequenceAlmostEqual(
@@ -171,19 +119,6 @@ class CalibratorTest(googletest.TestCase):
     self.assertSequenceAlmostEqual(
         input_qsv["max"].flatten(), [TEST_MAX_VAL], delta=1e-5
     )
-    self.assertIn("sequential/dense/MatMul", model_tensor_qsvs)  # weight
-    weight_qsv = model_tensor_qsvs["sequential/dense/MatMul"]
-    self.assertSequenceAlmostEqual(weight_qsv["min"].flatten(), [-0.49114203])
-    self.assertSequenceAlmostEqual(weight_qsv["max"].flatten(), [0.4903704])
-    self.assertIn(
-        "sequential/dense/BiasAdd/ReadVariableOp", model_tensor_qsvs
-    )  # bias
-    bias_qsv = model_tensor_qsvs["sequential/dense/BiasAdd/ReadVariableOp"]
-    self.assertSequenceAlmostEqual(bias_qsv["min"].flatten(), [-0.38401994])
-    self.assertSequenceAlmostEqual(bias_qsv["max"].flatten(), [0.31727126])
     self.assertIn("StatefulPartitionedCall:0", model_tensor_qsvs)  # output
     output_qsv = model_tensor_qsvs["StatefulPartitionedCall:0"]
     # Relu, only check the min
@@ -234,7 +169,7 @@ class CalibratorTest(googletest.TestCase):
     )
     test_calibrator = calibrator.Calibrator(test_model_path)
     _add_default_int8xint8_integer_recipe(self._recipe_manager)
-    calib_data = test_utils.create_random_normal_input_data(
+    calib_data = tfl_interpreter_utils.create_random_normal_input_data(
         test_model_path, num_samples=4
     )
     test_calibrator.calibrate(calib_data, self._recipe_manager)
@@ -249,15 +184,11 @@ class CalibratorAlreadyQuantizedModelTest(googletest.TestCase):
     )
     _ = calibrator.Calibrator(test_model_path)
-  def test_check_is_float_model_raises_error_when_model_is_quantized(self):
+  def test_check_is_quantized_model_succeeds_when_model_is_quantized(self):
     test_model_path = os.path.join(
         TEST_DATA_PREFIX_PATH, "tests/models/mnist_quantized.tflite"
     )
-    with self.assertRaisesRegex(
-        ValueError,
-        "The input model for calibration is not a float model.",
-    ):
-      _ = calibrator.Calibrator(test_model_path)
+    _ = calibrator.Calibrator(test_model_path)
 class CalibratorToyGemma2Test(googletest.TestCase):
@@ -302,7 +233,7 @@ class CalibratorToyGemma2Test(googletest.TestCase):
         self._toy_gemma2_calibration_dataset,
         model_recipe_manager=recipe_mngr,
     )
-    self.assertLen(calib.get_model_qsvs(), 282)
+    self.assertLen(calib.get_model_qsvs(), 202)
 if __name__ == "__main__":

ai-edge-quantizer-nightly 0.0.1.dev20250302__py3-none-any.whl → 0.5.0.dev20260103__py3-none-any.whl

ai-edge-quantizer-nightly 0.0.1.dev20250302py3-none-any.whl → 0.5.0.dev20260103py3-none-any.whl