PyPI - ai-edge-quantizer-nightly - Versions diffs - 0.1.0.dev20250415__py3-none-any.whl → 0.5.0.dev20260103__py3-none-any.whl - Mend

ai-edge-quantizer-nightly 0.1.0.dev20250415py3-none-any.whl → 0.5.0.dev20260103py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py CHANGED Viewed

@@ -15,8 +15,11 @@
 """Tests for tensor_utils."""
+import dataclasses
 from absl.testing import parameterized
 import numpy as np
 from tensorflow.python.platform import googletest
 from ai_edge_quantizer import qtyping
 from ai_edge_quantizer.algorithms.uniform_quantize import uniform_quantize_tensor
@@ -123,6 +126,14 @@ class TensorUtilsTest(parameterized.TestCase):
           False,
           [-24, 10, 19, 127],
       ),
+      (
+          [-16.0, 1.3, 2.4, 16.0],
+          [0.12598425],
+          [0],
+          8,
+          True,
+          [-127, 10, 19, 127],  # int8 symmetric is narrow range, -127 to 127
+      ),
       (
           [-3.0, 1.3, 2.4, 16.0],
           [1.2666667],
@@ -137,7 +148,7 @@ class TensorUtilsTest(parameterized.TestCase):
           [-6],
           4,
           True,
-          [-7, -5, -4, 7],
+          [-8, -5, -4, 7],  # int4 symmetric is not narrow range, -8 to 7
       ),
   )
   def test_uniform_quantize(
@@ -160,7 +171,9 @@ class TensorUtilsTest(parameterized.TestCase):
   def test_uniform_quantize_wrong_shape(self):
     tensor = [-3.0, 1.3, 2.4, 16.0]
-    error_message = "scale and zero_point must have the same shape."
+    error_message = (
+        "Ranks of scales (3) and zps (2) must be the same as the tensor rank"
+    )
     with self.assertRaisesWithPredicateMatch(
         ValueError, lambda err: error_message in str(err)
     ):
@@ -190,6 +203,28 @@ class TensorUtilsTest(parameterized.TestCase):
           ),
       )
+  def test_uniform_quantize_quant_dim_not_divisible_by_block_size_raise(self):
+    tensor = np.random.rand(34, 2)
+    error_message = (
+        "Quantized dimension 34 in tensor shape (34, 2) is not divisible by"
+        " block size 32."
+    )
+    with self.assertRaisesWithPredicateMatch(
+        ValueError, lambda err: error_message in str(err)
+    ):
+      uniform_quantize_tensor.uniform_quantize(
+          np.array(tensor),
+          qtyping.UniformQuantParams(
+              quantized_dimension=0,
+              block_size=32,
+              num_bits=4,
+              scale=np.array([1.2666667]),
+              zero_point=np.array([-6]),
+              symmetric=True,
+          ),
+          is_blockwise_quant=True,
+      )
   @parameterized.parameters(
       (
           8,
@@ -233,7 +268,9 @@ class TensorUtilsTest(parameterized.TestCase):
   def test_uniform_dequantize_wrong_shape(self):
     tensor = [-3.0, 1.3, 2.4, 16.0]
-    error_message = "scale and zero_point must have the same shape."
+    error_message = (
+        "Ranks of scales (3) and zps (2) must be the same as the tensor rank"
+    )
     with self.assertRaisesWithPredicateMatch(
         ValueError, lambda err: error_message in str(err)
     ):
@@ -263,8 +300,35 @@ class TensorUtilsTest(parameterized.TestCase):
           ),
       )
+  def test_uniform_dequantize_blockwise(self):
+    quantized_tensor = np.array([[-8, -5, -4, 7], [-4, 7, -8, -5]])
+    expected_output_tensor = np.array([
+        [-10.1333336, -6.3333335, -5.0666668, 8.8666669],
+        [-5.0666668, 8.8666669, -10.1333336, -6.3333335],
+    ])
+    quant_params = qtyping.UniformQuantParams(
+        # b/443830202:
+        quantized_dimension=0,
+        num_bits=4,
+        scale=np.array([[[1.2666667, 1.2666667], [1.2666667, 1.2666667]]]),
+        zero_point=np.array([[0]]),
+        symmetric=True,
+        block_size=2,
+    )
+    dequantized_tensor = uniform_quantize_tensor.uniform_dequantize(
+        np.array(quantized_tensor), quant_params
+    )
+    self.assertSequenceAlmostEqual(
+        expected_output_tensor.flatten(), dequantized_tensor.flatten(), places=4
+    )
   @parameterized.parameters(
-      (8, 8, True, True), (8, 4, False, True), (16, 8, True, False)
+      (8, 8, True, True),
+      (8, 4, False, True),
+      (16, 8, True, False),
+      (16, 8, True, True),
   )
   def test_quantize_bias_tensor(
       self,
@@ -322,6 +386,26 @@ class TensorUtilsTest(parameterized.TestCase):
     self.assertSequenceAlmostEqual(
         list(dequantized_bias.flatten()), list(bias_tensor_data), places=5
     )
+    if activation_num_bits == 16:
+      # Check if it is safe to cast int64 bias to int32. We save the int32
+      # quantized bias as int64 if the input tensor is quantized to 16 bits.
+      # This is to assume the matmul is using int64 accumulator (safe from
+      # overflow). For accelerators with int32 accumulator, it is safe to cast
+      # int64 back to int32.
+      quantized_bias = bias_quant_config.quantized_data
+      self.assertIsNotNone(quantized_bias)
+      self.assertEqual(quantized_bias.dtype, np.int64)
+      self.assertSequenceEqual(
+          list(quantized_bias.flatten()),
+          list(quantized_bias.astype(np.int32).flatten()),
+      )
+      bias_quant_config = dataclasses.replace(
+          bias_quant_config,
+          num_bits=32,
+      )
     expected_quantized_data = uniform_quantize_tensor.uniform_quantize(
         bias_tensor_data, bias_quant_config
     )
@@ -330,13 +414,44 @@ class TensorUtilsTest(parameterized.TestCase):
         list(bias_quant_config.quantized_data.flatten()),  # pytype: disable=attribute-error
     )
+  def test_quantize_bias_tensor_raises_error_for_large_quantization_error(self):
+    input_quant_config = qtyping.UniformQuantParams(
+        scale=np.array([0.1]),
+        zero_point=np.array([10]),
+        num_bits=8,
+        symmetric=False,
+        quantized_dimension=None,
+    )
+    weight_quant_config = qtyping.UniformQuantParams(
+        scale=np.array([0.1]),
+        zero_point=np.array([-1]),
+        num_bits=8,
+        symmetric=True,
+        quantized_dimension=None,
+    )
+    # This will result in quantized bias of 3e9, which is larger than int32 max.
+    bias_tensor_data = np.array([3e7])
+    with self.assertRaisesRegex(
+        ValueError,
+        "Quantization error is too large for bias tensor quantization.",
+    ):
+      uniform_quantize_tensor.symmetric_quantize_bias_tensor(
+          bias_tensor_data,
+          input_quant_config,
+          weight_quant_config,
+      )
   @parameterized.parameters((8, True), (16, False))
   def test_tensor_zp_scale_from_min_max(self, num_bits, symmetric):
     min_val = np.min(self._test_data, keepdims=True)
     max_val = np.max(self._test_data, keepdims=True)
     zp, scale = uniform_quantize_tensor.tensor_zp_scale_from_min_max(
-        min_val, max_val, num_bits, symmetric
+        min_val,
+        max_val,
+        num_bits,
+        symmetric,
+        qtyping.QuantGranularity.TENSORWISE,
     )
     self.assertEqual(zp.shape, scale.shape)
     max_q = 2**num_bits / 2 - 1
@@ -364,7 +479,12 @@ class TensorUtilsTest(parameterized.TestCase):
     max_val = np.array([[5.0]])
     clipping_values = np.array([4.0])
     zp, scale = uniform_quantize_tensor.tensor_zp_scale_from_min_max(
-        min_val, max_val, num_bits, symmetric, clipping_values
+        min_val,
+        max_val,
+        num_bits,
+        symmetric,
+        qtyping.QuantGranularity.TENSORWISE,
+        clipping_values,
     )
     expected_scale = clipping_values / quantized_bound

ai_edge_quantizer/algorithms/utils/common_utils.py CHANGED Viewed

@@ -41,6 +41,7 @@ _DRQ_OR_WEIGHT_ONLY_OPS = frozenset([
 _SUPPORTED_SUBCHANNEL_OPS = frozenset([
     _TFLOpName.FULLY_CONNECTED,
+    _TFLOpName.EMBEDDING_LOOKUP,
 ])
@@ -50,8 +51,9 @@ def check_subchannel_config(
   """Checks the op quantization config for subchannel quantization."""
   if (
       op_quant_config.weight_tensor_config is not None
-      and op_quant_config.weight_tensor_config.granularity
-      == qtyping.QuantGranularity.BLOCKWISE
+      and uniform_quantize_tensor.is_blockwise(
+          op_quant_config.weight_tensor_config.granularity
+      )
   ):
     if op_name not in _SUPPORTED_SUBCHANNEL_OPS:
       raise ValueError(f"Unsupported op for blockwise quantization: {op_name}.")
@@ -65,10 +67,6 @@ def check_subchannel_config(
           "Blockwise quantization does not support for asymmetric weight"
           " quantization."
       )
-    if op_quant_config.weight_tensor_config.block_size <= 0:
-      raise ValueError(
-          "Blockwise quantization must have a non-zero block size."
-      )
 def check_if_valid_op_config(
@@ -259,6 +257,60 @@ def _get_single_tensor_params(
   )
+def _materialize_tensors_with_quantized_data_update(
+    op_tensor_params: list[qtyping.TensorTransformationParams],
+    tensors: Sequence[Any],
+    quant_params: Optional[qtyping.UniformQuantParams],
+    is_inbounding_tensor: bool,
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+    get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
+) -> None:
+  """Materialize a list of tensors with `quantized_data` updated when needed.
+  Args:
+    op_tensor_params: Tensor transformation parameters for the op. Will be
+      modified to include new tensor parameters.
+    tensors: Tensors to be materialized.
+    quant_params: The quantization parameters to be used for materialization.
+    is_inbounding_tensor: Whether the tensor is an inbounding tensor for the op.
+    op_info: Aggregated information about the op (e.g., quantization config).
+    graph_info: Graph information needed to perform quantization for the op.
+    tensor_name_to_qsv: A map of tensor name to quantization parameters.
+    get_tensor_quant_params_fn: Function to get quantization parameters for the
+      tensor.
+  """
+  if quant_params is not None and quant_params.quantized_data is not None:
+    quant_params = dataclasses.replace(quant_params, quantized_data=None)
+  for tensor in tensors:
+    tensor_data = tfl_flatbuffer_utils.get_tensor_data(
+        tensor, graph_info.buffers
+    )
+    if quant_params is None or tensor_data is None:
+      tensor_quant_params = quant_params
+    else:
+      # Constant tensors require updating `quantized_data`.
+      quantized_data = uniform_quantize_tensor.uniform_quantize(
+          tensor_data, quant_params
+      )
+      tensor_quant_params = dataclasses.replace(
+          quant_params,
+          quantized_data=quantized_data,
+      )
+    _materialize_op_tensors(
+        op_tensor_params,
+        [tensor],
+        is_inbounding_tensor=is_inbounding_tensor,
+        op_info=op_info,
+        graph_info=graph_info,
+        tensor_name_to_qsv=tensor_name_to_qsv,
+        get_tensor_quant_params_fn=get_tensor_quant_params_fn,
+        quant_params=tensor_quant_params,
+    )
 def _materialize_standard_op_with_same_as_input_scale(
     input_tensors: Sequence[Any],
     output_tensors: Sequence[Any],
@@ -294,23 +346,48 @@ def _materialize_standard_op_with_same_as_input_scale(
   )
   op_tensor_params.append(input_tensor_params)
   # Use input quantization params for all output tensors.
-  _materialize_op_tensors(
+  input_quant_params = input_tensor_params.consumers[0].parameters
+  if not isinstance(input_quant_params, qtyping.UniformQuantParams):
+    raise ValueError(
+        "_materialize_standard_op_with_same_as_input_scale only supports"
+        f" UniformQuantParams. For tensor {input_tensor_params.tensor_name},"
+        f" got {type(input_quant_params)}"
+    )
+  _materialize_tensors_with_quantized_data_update(
       op_tensor_params,
       output_tensors,
+      input_quant_params,
       is_inbounding_tensor=False,
       op_info=op_info,
       graph_info=graph_info,
       tensor_name_to_qsv=tensor_name_to_qsv,
       get_tensor_quant_params_fn=get_tensor_quant_params_fn,
-      quant_params=input_tensor_params.consumers[0].parameters,
   )
   # Change output qsv to be the same as input qsv. This is safe since TFL
   # subgraph is acyclic.
-  input_tensor_qsv = tensor_name_to_qsv[input_tensor_params.tensor_name]
-  for output_tensor in output_tensors:
-    tensor_name_to_qsv[tfl_flatbuffer_utils.get_tensor_name(output_tensor)] = (
-        input_tensor_qsv
+  input_tensor_qsv = tensor_name_to_qsv.get(
+      input_tensor_params.tensor_name, None
+  )
+  if input_tensor_qsv is None:
+    input_tensor_data = tfl_flatbuffer_utils.get_tensor_data(
+        input_tensors[0], graph_info.buffers
     )
+    # If the input tensor is a constant tensor without qsv, compute qsv from
+    # its quant params.
+    if input_tensor_data is None:
+      # If the only input to an op that needs to match input to
+      # output has no qsv and is not a constant tensor, then this is an error.
+      raise ValueError(
+          "Input tensor qsv is None for tensor"
+          f" {input_tensor_params.tensor_name}."
+      )
+    min_val, max_val = _get_min_max_from_quant_params(input_quant_params)
+    input_tensor_qsv = {"min": min_val, "max": max_val}
+  for output_tensor in output_tensors:
+    tensor_name_to_qsv[
+        tfl_flatbuffer_utils.get_tensor_name(output_tensor)
+    ] = input_tensor_qsv
   return op_tensor_params
@@ -350,19 +427,26 @@ def _materialize_standard_op_with_same_as_output_scale(
   )
   # Use output quantization params for all input tensors.
   if output_tensor_params.producer is None:
-    quant_params = None
+    output_quant_params = None
   else:
-    quant_params = output_tensor_params.producer.parameters
-  _materialize_op_tensors(
+    output_quant_params = output_tensor_params.producer.parameters
+    if not isinstance(output_quant_params, qtyping.UniformQuantParams):
+      raise ValueError(
+          "_materialize_standard_op_with_same_as_output_scale only supports"
+          f" UniformQuantParams. For tensor {output_tensor_params.tensor_name},"
+          f" got {type(output_quant_params)}"
+      )
+  _materialize_tensors_with_quantized_data_update(
       op_tensor_params,
       input_tensors,
+      output_quant_params,
       is_inbounding_tensor=True,
       op_info=op_info,
       graph_info=graph_info,
       tensor_name_to_qsv=tensor_name_to_qsv,
       get_tensor_quant_params_fn=get_tensor_quant_params_fn,
-      quant_params=quant_params,
   )
   op_tensor_params.append(output_tensor_params)
   return op_tensor_params
@@ -627,6 +711,26 @@ def _add_non_match_tensors_to_ignored_lists(
   return inputs_to_ignore, outputs_to_ignore
+def _get_min_max_from_quant_params(
+    quant_params: qtyping.UniformQuantParams,
+) -> tuple[np.ndarray, np.ndarray]:
+  """Recalculate min/max from tensor quantization params."""
+  q_min, q_max = uniform_quantize_tensor.get_quantized_range(
+      _IntType(quant_params.num_bits, True)
+  )
+  float_min = uniform_quantize_tensor.uniform_dequantize(
+      np.array(q_min), quant_params
+  )
+  float_max = uniform_quantize_tensor.uniform_dequantize(
+      np.array(q_max), quant_params
+  )
+  # We use qmax values to compute scale for symmetric quantization (see
+  # uniform_quantize_tensor.tensor_zp_scale_from_min_max).
+  if quant_params.symmetric:
+    float_min = -float_max
+  return float_min, float_max
 def materialize_standard_op(
     op_info: qtyping.OpInfo,
     graph_info: qtyping.GraphInfo,
@@ -793,8 +897,6 @@ def materialize_op_with_output_activation_constraint(
     output_tensor_params.producer = op_tensor_params
     # Update the tensor_name_to_qsv map using the output activation constraints.
     min_val, max_val = _get_min_max_from_quant_params(
-        activation_num_bits,
-        activation_tensor_config.symmetric,
         fixed_quant_params,
     )
     tensor_name_to_qsv[output_tensor_params.tensor_name]["min"] = min_val
@@ -841,13 +943,6 @@ def get_tensor_transformations(
       transformations = [_QuantTransformation.QUANTIZE_TENSOR]
     else:
       transformations = [_QuantTransformation.NO_QUANTIZE]
-  elif (
-      op_quant_config.weight_tensor_config is not None
-      and op_quant_config.weight_tensor_config.granularity
-      == qtyping.QuantGranularity.BLOCKWISE
-      and is_constant
-  ):
-    transformations = [_QuantTransformation.EMULATED_SUBCHANNEL]
   # Check if WEIGHT_ONLY.
   elif (
       op_quant_config.compute_precision == qtyping.ComputePrecision.FLOAT
@@ -905,23 +1000,36 @@ def get_tensor_transformation_params(
   )
-def get_weight_quantized_dim(op_info: qtyping.OpInfo, tensor_data: np.ndarray):
+def get_weight_quantized_dim(
+    op_info: qtyping.OpInfo,
+    tensor_data: np.ndarray,
+    granularity: qtyping.QuantGranularity,
+):
   """Get the quantized dimension for the weight tensor.
   Args:
     op_info: Aggregated information about the op (e.g., quantization config).
     tensor_data: The weight tensor data.
+    granularity: The granularity of the weight tensor.
   Returns:
     The quantized dimension for the weight tensor.
   """
-  if op_info.op_name == _TFLOpName.BATCH_MATMUL:
-    quantized_dim = get_bmm_weight_quantized_dim(
-        tensor_data, adj_y=op_info.op.builtinOptions.adjY
-    )
-  else:
-    quantized_dim = tfl_flatbuffer_utils.TFL_OP_TO_WEIGHT_QUANTIZED_DIM.get(
-        op_info.op_name, None
+  quantized_dim = None
+  if granularity == qtyping.QuantGranularity.CHANNELWISE:
+    if op_info.op_name == _TFLOpName.BATCH_MATMUL:
+      quantized_dim = get_bmm_weight_quantized_dim(
+          tensor_data, adj_y=op_info.op.builtinOptions.adjY
+      )
+    else:
+      quantized_dim = tfl_flatbuffer_utils.TFL_OP_TO_WEIGHT_QUANTIZED_DIM.get(
+          op_info.op_name, None
+      )
+  elif uniform_quantize_tensor.is_blockwise(granularity):
+    quantized_dim = (
+        tfl_flatbuffer_utils.TFL_OP_TO_BLOCKWISE_WEIGHT_QUANTIZED_DIM[
+            op_info.op_name
+        ]
     )
   return quantized_dim
@@ -951,23 +1059,4 @@ def get_bmm_weight_quantized_dim(
   return rank - 1
-def _get_min_max_from_quant_params(
-    num_bits: int,
-    symmetric: bool,
-    tensor_params: qtyping.UniformQuantParams,
-) -> tuple[float, float]:
-  """Recalculate min/max from tensor quantization params."""
-  q_min, q_max = uniform_quantize_tensor.get_quantized_range(
-      _IntType(num_bits, True)
-  )
-  float_min = uniform_quantize_tensor.uniform_dequantize(
-      np.array(q_min), tensor_params
-  )
-  float_max = uniform_quantize_tensor.uniform_dequantize(
-      np.array(q_max), tensor_params
-  )
-  # We use qmax values to compute scale for symmetric quantization (see
-  # uniform_quantize_tensor.tensor_zp_scale_from_min_max).
-  if symmetric:
-    float_min = -float_max
-  return (float_min, float_max)

ai_edge_quantizer/calibrator.py CHANGED Viewed

@@ -46,11 +46,6 @@ class Calibrator:
   ):
     self._flatbuffer_model = tfl_flatbuffer_utils.read_model(float_tflite)
-    if not tfl_flatbuffer_utils.is_float_model(self._flatbuffer_model):
-      raise ValueError(
-          "The input model for calibration is not a float model. Please check"
-          " the model (e.g., if it is already quantized)."
-      )
     self._tfl_interpreter = tfl_interpreter_utils.create_tfl_interpreter(
         float_tflite, use_xnnpack=True, num_threads=num_threads
     )
@@ -98,9 +93,7 @@ class Calibrator:
       qsv_update_func: The function to update the QSVs.
     """
     op_codes = self._flatbuffer_model.operatorCodes
-    if not self._model_qsvs:
-      self._initialize_model_qsvs(model_recipe_manager)
-    else:
+    if self._model_qsvs:
       logging.warning(
           "Calibrator contains non-empty model qsvs, and the current"
           " calibration process will start on top of this state (i.e., update"
@@ -140,10 +133,15 @@ class Calibrator:
           graph_info = qtyping.GraphInfo(
               subgraph.tensors, self._flatbuffer_model.buffers
           )
-          # Add input/output operators to the subgraph.
-          subgraph.operators += (
-              tfl_flatbuffer_utils.get_subgraph_input_output_operators(subgraph)
-          )
+          # Add input/output operators if they are not in the subgraph.
+          if not any(
+              isinstance(op, qtyping.IOOperator) for op in subgraph.operators
+          ):
+            subgraph.operators += (
+                tfl_flatbuffer_utils.get_subgraph_input_output_operators(
+                    subgraph
+                )
+            )
           for op in subgraph.operators:
             if isinstance(op, qtyping.IOOperator):
               op_key = op.op_key
@@ -160,7 +158,7 @@ class Calibrator:
             )
             if algorithm_name == algorithm_manager.AlgorithmName.NO_QUANTIZE:
               continue
-            if policy.is_conditionally_unquantized(op):
+            if policy.is_non_quantizable_composite_op(op):
               continue
             # Step2.2: query algorithm_manager to get/call the related
@@ -258,50 +256,3 @@ class Calibrator:
         output_tensor = subgraph_tensors[output_tensor_idx]
         scope += tfl_flatbuffer_utils.get_tensor_name(output_tensor)
     return scope
-  # TODO: b/354224138 - Remove code duplication between calibrate and
-  # _initialize_model_qsvs.
-  def _initialize_model_qsvs(
-      self, model_recipe_manager: recipe_manager.RecipeManager
-  ) -> None:
-    """Initialize the model qsvs.
-    Args:
-      model_recipe_manager: A RecipeManager object that contains the
-        quantization recipe.
-    """
-    op_codes = self._flatbuffer_model.operatorCodes
-    for subgraph in self._flatbuffer_model.subgraphs:
-      graph_info = qtyping.GraphInfo(
-          subgraph.tensors, self._flatbuffer_model.buffers
-      )
-      for subgraph_op_id, op in enumerate(subgraph.operators):
-        op_code = op_codes[op.opcodeIndex].builtinCode
-        if op_code not in tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME:
-          continue
-        op_key = tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME[op_code]
-        # Step1: query the quantization_recipe to get op quantization
-        # settings.
-        op_scope = self._get_op_scope(op, subgraph.tensors)
-        algorithm_name, op_quant_config = (
-            model_recipe_manager.get_quantization_configs(op_key, op_scope)
-        )
-        if algorithm_name == algorithm_manager.AlgorithmName.NO_QUANTIZE:
-          continue
-        # Step2: query algorithm_manager to get/call the related qsv init
-        # function.
-        qsv_init_func = algorithm_manager.get_init_qsv_func(
-            algorithm_name, op_key
-        )
-        op_info = qtyping.OpInfo(op, op_key, subgraph_op_id, op_quant_config)
-        # Ignore the input tensors where any dimension of the shape is 0.
-        inputs_to_ignore = [
-            opr_idx
-            for opr_idx, tensor_idx in enumerate(op.inputs)
-            if not np.all(graph_info.subgraph_tensors[tensor_idx].shape)
-        ]
-        op_qsvs = qsv_init_func(op_info, graph_info, inputs_to_ignore)
-        # Step3: initialize tensor qsvs.
-        for tensor_name, qsv in op_qsvs.items():
-          if tensor_name not in self._model_qsvs:
-            self._model_qsvs[tensor_name] = qsv

ai-edge-quantizer-nightly 0.1.0.dev20250415__py3-none-any.whl → 0.5.0.dev20260103__py3-none-any.whl

ai-edge-quantizer-nightly 0.1.0.dev20250415py3-none-any.whl → 0.5.0.dev20260103py3-none-any.whl