PyPI - ai-edge-quantizer-nightly - Versions diffs - 0.0.1.dev20250219__py3-none-any.whl → 0.0.1.dev20250221__py3-none-any.whl - Mend

ai-edge-quantizer-nightly 0.0.1.dev20250219py3-none-any.whl → 0.0.1.dev20250221py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py ADDED Viewed

@@ -0,0 +1,131 @@
+# Copyright 2024 The AI Edge Quantizer Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Recovers quantized weights from dequantized weights (often from QAT)."""
+from typing import Optional
+import numpy as np
+from ai_edge_quantizer import qtyping
+from ai_edge_quantizer.algorithms.uniform_quantize import uniform_quantize_tensor
+ALGORITHM_KEY = "dequantized_weight_recovery"
+_TFLOpName = qtyping.TFLOperationName
+_QuantTransformation = qtyping.QuantTransformation
+_IntType = uniform_quantize_tensor.IntType
+def _validate_recovered_scale(
+    dequant_vals: np.ndarray, scale: np.ndarray, tol: float = 1e-4
+):
+  """Validates if the recovered quantized values match the dequantized values.
+  Args:
+      dequant_vals: The dequantized weight values.
+      scale: The scale values.
+      tol: The tolerance for the difference between the recovered and original
+        values.
+  Raises:
+      RuntimeError: If the maximum difference between the recovered and
+        original values exceeds the tolerance.
+  """
+  quant_vals = np.round(dequant_vals / scale)  # no need to clamp.
+  recovered_vals = quant_vals * scale
+  diff = np.abs(recovered_vals - dequant_vals).flatten()
+  max_diff = diff.max()
+  if max_diff > tol:
+    raise RuntimeError(
+        "Failed to recover the original quantized values from dequantized"
+        f" values. Max diff between recovered and original values: {max_diff}"
+    )
+def _get_scale(arr: np.ndarray, min_scale: float) -> float:
+  """Helper function to calculate scale from a 1D array."""
+  # Make sure the array includes zero (symmetric quantization).
+  arr = np.append(arr, 0)
+  unique_vals = np.unique(arr)
+  if unique_vals.size > 1:
+    diffs = np.diff(unique_vals)
+    return float(
+        np.maximum(np.min(diffs), min_scale)
+    )  # Cast to float to ensure return type consistency
+  return min_scale
+def get_zp_scale_from_2d_dequantized_symmetric_weights(
+    dequant_vals: np.ndarray,
+    quantized_dimension: Optional[int] = None,
+    min_scale: float = 1e-9,
+) -> tuple[np.ndarray, np.ndarray]:
+  """Calculates scale and zero point from 2D dequantized, symmetric weights.
+  Handles both per-tensor and per-channel (axis) quantization.
+  Args:
+      dequant_vals: The 2D dequantized weight values (numpy array).
+      quantized_dimension:  The dimension along which quantization was performed
+        (0 or 1), or None for per-tensor quantization.
+      min_scale: The minimum allowed scale value.
+  Returns:
+      A tuple containing:
+          - zero_points: Zero points (all zeros for symmetric quantization).
+          - scales: Scales (scalar for per-tensor, array for per-channel).
+  Raises:
+      ValueError: If `dequant_vals` is not 2D, or if
+          `quantized_dimension` is not 0, 1, or None.
+  """
+  if dequant_vals.ndim != 2:
+    raise ValueError(
+        f"Only 2D weights are supported. Got {dequant_vals.ndim} dimensions."
+    )
+  if quantized_dimension not in (0, 1, None):
+    raise ValueError(
+        f"quantized_dimension must be 0, 1, or None. Got {quantized_dimension}"
+    )
+  # Use absolute values for symmetric quantization.
+  dequant_vals = np.abs(dequant_vals)
+  if quantized_dimension is None:
+    # Per-tensor quantization: One scale for the entire tensor.
+    scales = _get_scale(dequant_vals.flatten(), min_scale)
+    scales = np.array([[scales]])
+  else:
+    # Per-channel quantization: A scale for each slice along the dimension.
+    scales = []
+    for i in range(dequant_vals.shape[quantized_dimension]):
+      if quantized_dimension == 0:
+        vec = dequant_vals[i, :]
+      else:  # quantized_dimension == 1
+        vec = dequant_vals[:, i]
+      scales.append(_get_scale(vec, min_scale))
+    # Reshape for correct broadcasting.
+    scales = (
+        np.array(scales).reshape(-1, 1)
+        if quantized_dimension == 0
+        else np.array(scales).reshape(1, -1)
+    )
+  zero_points = np.zeros_like(scales, dtype=np.int32)
+  _validate_recovered_scale(dequant_vals, scales)
+  return zero_points, scales

ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py ADDED Viewed

@@ -0,0 +1,124 @@
+# Copyright 2024 The AI Edge Quantizer Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from absl.testing import parameterized
+import numpy as np
+from tensorflow.python.platform import googletest
+from ai_edge_quantizer import qtyping
+from ai_edge_quantizer.algorithms.uniform_quantize import dequantized_weight_recovery
+from ai_edge_quantizer.utils import test_utils
+_TFLOpName = qtyping.TFLOperationName
+_TensorQuantConfig = qtyping.TensorQuantizationConfig
+class DequantizedWeightRecoveryTest(parameterized.TestCase):
+  def setUp(self):
+    super().setUp()
+    self._dummy_quantized_weights = np.array([
+        [1, -2, 3, 4],
+        [6, 7, -8, 5],
+        [-1, 8, -7, -4],
+    ])
+  @parameterized.named_parameters(
+      dict(
+          testcase_name="per-tensor-recovery",
+          quantized_dimension=None,
+          scale=np.array([0.1875]).reshape(1, 1),
+      ),
+      dict(
+          testcase_name="channel0-recovery",
+          quantized_dimension=0,
+          scale=np.array([0.1875, 1e-4, 12.3]).reshape(3, 1),
+      ),
+      dict(
+          testcase_name="channel1-recovery",
+          quantized_dimension=1,
+          scale=np.array([0.003, 1.234, 12.65, 2.24e-4]).reshape(1, 4),
+      ),
+  )
+  def test_tensor_zp_scale_from_2d_dequantized_symmetric_weights_success(
+      self, quantized_dimension, scale
+  ):
+    dequant_vals = scale * self._dummy_quantized_weights
+    zp, recovered_scale = (
+        dequantized_weight_recovery.get_zp_scale_from_2d_dequantized_symmetric_weights(
+            dequant_vals, quantized_dimension
+        )
+    )
+    self.assertEqual(recovered_scale.shape, scale.shape)
+    self.assertSequenceAlmostEqual(recovered_scale.flatten(), scale.flatten())
+    # Zero point should be zero for symmetric quantization.
+    self.assertEqual(np.sum(zp), 0)
+    self.assertEqual(zp.shape, scale.shape)
+  def test_tensor_zp_scale_from_2d_dequantized_symmetric_weights_raises_error_for_non_2d_weights(
+      self,
+  ):
+    weights_3d = self._dummy_quantized_weights.reshape(1, 3, 4)
+    weights_3d = weights_3d * 1.02
+    with self.assertRaisesRegex(
+        ValueError, "Only 2D weights are supported. Got 3 dimensions."
+    ):
+      dequantized_weight_recovery.get_zp_scale_from_2d_dequantized_symmetric_weights(
+          weights_3d, quantized_dimension=None
+      )
+  @parameterized.named_parameters(
+      dict(testcase_name="negative_dimension", quantized_dimension=-1),
+      dict(testcase_name="too_large_dimension", quantized_dimension=2),
+  )
+  def test_tensor_zp_scale_from_2d_dequantized_symmetric_weights_raises_error_for_invalid_quantized_dimension(
+      self, quantized_dimension
+  ):
+    dequant_vals = self._dummy_quantized_weights * 1.02
+    with self.assertRaisesRegex(
+        ValueError, "quantized_dimension must be 0, 1, or None. Got"
+    ):
+      dequantized_weight_recovery.get_zp_scale_from_2d_dequantized_symmetric_weights(
+          dequant_vals, quantized_dimension
+      )
+  @parameterized.named_parameters(
+      dict(
+          testcase_name="recovery_on_wrong_dimension",
+          quantized_dimension=1,  # should be 0.
+          scale=np.array([0.1875, 1e-4, 12.3]).reshape(3, 1),
+      ),
+      dict(
+          testcase_name="tensor_recovery_for_channel_quantization",
+          quantized_dimension=None,  # should be 0.
+          scale=np.array([0.003, 1.234, 12.65, 2.24e-4]).reshape(1, 4),
+      ),
+  )
+  def test_tensor_zp_scale_from_2d_dequantized_symmetric_weights_raises_error_big_recovery_error(
+      self, quantized_dimension, scale
+  ):
+    dequant_vals = scale * self._dummy_quantized_weights
+    with self.assertRaisesRegex(
+        RuntimeError,
+        "Failed to recover the original quantized values from dequantized"
+        " values. Max diff between recovered and original values: ",
+    ):
+      dequantized_weight_recovery.get_zp_scale_from_2d_dequantized_symmetric_weights(
+          dequant_vals, quantized_dimension
+      )
+if __name__ == "__main__":
+  googletest.main()

ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py CHANGED Viewed

@@ -15,6 +15,7 @@
 """Performs naive min/max uniform quantization."""
+from collections.abc import Sequence
 from typing import Any, Optional
 import numpy as np
 from ai_edge_quantizer import qtyping
@@ -36,55 +37,133 @@ def _init_tensor_min_max(
   if tensor_data is None:
     return {}
   else:
+    weight_tensor_config = op_info.op_quant_config.weight_tensor_config
     quantized_dim = None
+    if weight_tensor_config is not None and (
+        weight_tensor_config.granularity == qtyping.QuantGranularity.CHANNELWISE
+        or weight_tensor_config.granularity
+        == qtyping.QuantGranularity.BLOCKWISE
+    ):
+      quantized_dim = common_utils.get_weight_quantized_dim(
+          op_info, tensor_data
+      )
     if (
-        op_info.op_quant_config.weight_tensor_config is not None
-        and op_info.op_quant_config.weight_tensor_config.granularity
+        weight_tensor_config is not None
+        and weight_tensor_config.granularity
         == qtyping.QuantGranularity.BLOCKWISE
     ):
-      # TODO(b/346612503): emulate subchannel only supports fully connected,
-      # will skip special handling. Once we have a spec, we can change this.
-      block_size = op_info.op_quant_config.weight_tensor_config.block_size
-      # assuming tensor is 2D, which is correct for FULLY_CONNECTED
-      transposed_tensor_data = np.transpose(tensor_data, (1, 0))
-      if transposed_tensor_data.shape[0] % block_size:
-        raise ValueError(
-            f"Block size {block_size} does not divide channel dimension"
-            f" {transposed_tensor_data.shape[0]}."
-        )
-      reshaped_tensor_data = np.reshape(
-          transposed_tensor_data,
-          (
-              1,
-              int(transposed_tensor_data.shape[0] / block_size),
-              block_size,
-              transposed_tensor_data.shape[1],
-          ),
+      reshaped_data, reduce_dims = _reshape_data_for_blockwise(
+          tensor_data,
+          quantized_dim,
+          weight_tensor_config.block_size,
       )
       return {
-          "min": np.min(reshaped_tensor_data, axis=(0, 1, 2), keepdims=True),
-          "max": np.max(reshaped_tensor_data, axis=(0, 1, 2), keepdims=True),
+          "min": np.min(reshaped_data, axis=reduce_dims, keepdims=False),
+          "max": np.max(reshaped_data, axis=reduce_dims, keepdims=False),
       }
-    if (
-        op_info.op_quant_config.weight_tensor_config is not None
-        and op_info.op_quant_config.weight_tensor_config.granularity
-        == qtyping.QuantGranularity.CHANNELWISE
-    ):
-      if op_info.op_name == _TFLOpName.BATCH_MATMUL:
-        quantized_dim = common_utils.get_bmm_weight_quantized_dim(
-            tensor_data, adj_y=op_info.op.builtinOptions.adjY
-        )
-      else:
-        quantized_dim = tfl_flatbuffer_utils.TFL_OP_TO_WEIGHT_QUANTIZED_DIM.get(
-            op_info.op_name, None
-        )
-    reduce_dims = common_utils.get_reduce_dims(
-        quantized_dim, list(tensor_data.shape)
-    )
-    return {
-        "min": np.min(tensor_data, axis=reduce_dims, keepdims=True),
-        "max": np.max(tensor_data, axis=reduce_dims, keepdims=True),
-    }
+    else:
+      reduce_dims = common_utils.get_reduce_dims(
+          quantized_dim, tensor_data.shape
+      )
+      return {
+          "min": np.min(tensor_data, axis=reduce_dims, keepdims=True),
+          "max": np.max(tensor_data, axis=reduce_dims, keepdims=True),
+      }
+def _get_tensor_shape_for_blockwise(
+    tensor_shape: Sequence[int], quantized_dim: int, block_size: int
+) -> list[int]:
+  """Get the tensor shape for blockwise quantization.
+  This function splits the quantize dimension of the tensor into blocks and the
+  dim/blocks. Hence, min/max of the tensor can be calculated for each block
+  using existing functions.
+  Args:
+    tensor_shape: The original shape of the tensor.
+    quantized_dim: The dimension to be quantized blockwise.
+    block_size: The size of the block.
+  Returns:
+    The new tensor shape for calculating scale and zp for blockwise
+    quantization.
+  """
+  new_shape = []
+  for index, val in enumerate(tensor_shape):
+    if index == quantized_dim:
+      new_shape.append(int(val / block_size))
+      new_shape.append(block_size)
+    else:
+      new_shape.append(val)
+  return new_shape
+def _reshape_data_for_blockwise(
+    tensor_data: np.ndarray, quantized_dim: int, block_size: int
+) -> tuple[np.ndarray, int]:
+  """Reshapes data for blockwise quantization.
+  Args:
+    tensor_data: The original tensor data.
+    quantized_dim: The dimension to be quantized blockwise.
+    block_size: The size of the block.
+  Returns:
+    A tuple containing the reshaped tensor data and the new reduce dimension.
+  """
+  new_shape = _get_tensor_shape_for_blockwise(
+      tensor_data.shape, quantized_dim, block_size
+  )
+  reshaped_data = tensor_data.reshape(new_shape)
+  return reshaped_data, quantized_dim + 1
+def _broadcast_scale_zp_for_blockwise(
+    tensor_content: np.ndarray,
+    quant_params: qtyping.UniformQuantParams,
+) -> qtyping.UniformQuantParams:
+  """Broadcasts scale and zp for blockwise quantization.
+  Args:
+    tensor_content: The original tensor data.
+    quant_params: The quantization parameters.
+  Returns:
+    The updated quantization parameters with broadcasted scale and zp for
+    correct constant quantization.
+  """
+  if quant_params.quantized_dimension is None:
+    raise ValueError("Quantized dimension must be specified.")
+  if quant_params.block_size is None or quant_params.block_size <= 0:
+    raise ValueError("Block size must be specified and positive.")
+  quantized_dim = quant_params.quantized_dimension
+  expanded_tensor_shape = _get_tensor_shape_for_blockwise(
+      tensor_content.shape, quantized_dim, quant_params.block_size
+  )
+  expanded_scale = np.reshape(
+      np.broadcast_to(
+          np.expand_dims(quant_params.scale, quantized_dim + 1),
+          expanded_tensor_shape,
+      ),
+      tensor_content.shape,
+  )
+  expanded_zp = np.reshape(
+      np.broadcast_to(
+          np.expand_dims(quant_params.zero_point, quantized_dim + 1),
+          expanded_tensor_shape,
+      ),
+      tensor_content.shape,
+  )
+  return qtyping.UniformQuantParams(
+      scale=expanded_scale,
+      zero_point=expanded_zp,
+      num_bits=quant_params.num_bits,
+      symmetric=quant_params.symmetric,
+      quantized_dimension=quantized_dim,
+      block_size=quant_params.block_size,
+  )
 def get_tensor_quant_params(
@@ -138,34 +217,34 @@ def get_tensor_quant_params(
       tensor_quant_config.symmetric,
   )
   quantized_dim = None
-  if tensor_quant_config.granularity == qtyping.QuantGranularity.CHANNELWISE:
-    if op_info.op_name == _TFLOpName.BATCH_MATMUL:
-      quantized_dim = common_utils.get_bmm_weight_quantized_dim(
-          tensor_content, adj_y=op_info.op.builtinOptions.adjY
-      )
-    else:
-      quantized_dim = tfl_flatbuffer_utils.TFL_OP_TO_WEIGHT_QUANTIZED_DIM[
-          op_info.op_name
-      ]
+  if (
+      tensor_quant_config.granularity == qtyping.QuantGranularity.CHANNELWISE
+      or tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE
+  ):
+    quantized_dim = common_utils.get_weight_quantized_dim(
+        op_info, tensor_content
+    )
   quant_params = qtyping.UniformQuantParams(
       scale=scale,
       zero_point=zp,
       num_bits=tensor_quant_config.num_bits,
       symmetric=tensor_quant_config.symmetric,
       quantized_dimension=quantized_dim,
+      block_size=tensor_quant_config.block_size,
   )
   if tensor_content is None:
     return quant_params
+  # The reshaping for blockwise quantization is unique hence we do this here
+  # to avoid unexpected broadcast behavior downstream.
   if tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE:
-    quantized_vars = (
-        uniform_quantize_tensor.uniform_quantize_for_emulated_subchannel(
-            tensor_content, quant_params, tensor_quant_config.block_size
-        )
-    )
-  else:
-    quantized_vars = uniform_quantize_tensor.uniform_quantize(
+    quant_params = _broadcast_scale_zp_for_blockwise(
         tensor_content, quant_params
     )
+  quantized_vars = uniform_quantize_tensor.uniform_quantize(
+      tensor_content, quant_params
+  )
   # Update with quantized values.
   return qtyping.UniformQuantParams(
       scale=scale,
@@ -174,6 +253,7 @@ def get_tensor_quant_params(
       symmetric=tensor_quant_config.symmetric,
       quantized_dimension=quantized_dim,
       quantized_data=quantized_vars,
+      block_size=tensor_quant_config.block_size,
   )
@@ -189,8 +269,8 @@ def init_qsvs(
   Args:
     op_info: Aggregated information about the op (e.g., quantization config).
     graph_info: Graph information needed to perform quantization for the op.
-    inputs_to_ignore: Input tensor indices to ignore.
-    outputs_to_ignore: Output tensor indices to ignore.
+    inputs_to_ignore: Operand indices to ignore.
+    outputs_to_ignore: Result indices to ignore.
   Returns:
     QSVs.
@@ -199,8 +279,8 @@ def init_qsvs(
   inputs_to_ignore = inputs_to_ignore or []
   outputs_to_ignore = outputs_to_ignore or []
-  for i, tensor_idx in enumerate(op_info.op.inputs):
-    if tensor_idx != -1 and i not in inputs_to_ignore:
+  for opr_idx, tensor_idx in enumerate(op_info.op.inputs):
+    if tensor_idx != -1 and opr_idx not in inputs_to_ignore:
       tensor = graph_info.subgraph_tensors[tensor_idx]
       tensor_name = tfl_flatbuffer_utils.get_tensor_name(tensor)
       tensor_data = tfl_flatbuffer_utils.get_tensor_data(
@@ -210,8 +290,8 @@ def init_qsvs(
           tensor_data,
           op_info,
       )
-  for i, tensor_idx in enumerate(op_info.op.outputs):
-    if tensor_idx != -1 and i not in outputs_to_ignore:
+  for res_idx, tensor_idx in enumerate(op_info.op.outputs):
+    if tensor_idx != -1 and res_idx not in outputs_to_ignore:
       tensor = graph_info.subgraph_tensors[tensor_idx]
       tensor_name = tfl_flatbuffer_utils.get_tensor_name(tensor)
       tensor_data = tfl_flatbuffer_utils.get_tensor_data(

ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py CHANGED Viewed

@@ -14,6 +14,7 @@
 # ==============================================================================
 import os
+from typing import cast
 from absl.testing import parameterized
 import numpy as np
@@ -21,6 +22,7 @@ import numpy as np
 from tensorflow.python.platform import googletest
 from ai_edge_quantizer import qtyping
 from ai_edge_quantizer.algorithms.uniform_quantize import naive_min_max_quantize
+from ai_edge_quantizer.algorithms.uniform_quantize import uniform_quantize_tensor
 from ai_edge_quantizer.utils import test_utils
 from ai_edge_quantizer.utils import tfl_flatbuffer_utils
@@ -157,6 +159,49 @@ class NaiveMinMaxQuantizeTest(parameterized.TestCase):
     self.assertNotIn("arith.constant1", op_qsvs)
     self.assertNotIn("arith.constant2", op_qsvs)
+  def test_get_tensor_quant_params_for_blockwise_weight(self):
+    subgraph0 = self._test_model.subgraphs[0]
+    subgraph_op_index = 3
+    fc_op = subgraph0.operators[subgraph_op_index]
+    weight_tensor_config = _TensorQuantConfig(
+        num_bits=4,
+        symmetric=True,
+        granularity=qtyping.QuantGranularity.BLOCKWISE,
+        block_size=2,
+    )
+    op_info = qtyping.OpInfo(
+        op=fc_op,
+        op_name=_TFLOpName.FULLY_CONNECTED,
+        subgraph_op_index=subgraph_op_index,
+        op_quant_config=qtyping.OpQuantizationConfig(
+            weight_tensor_config=weight_tensor_config,
+        ),
+    )
+    test_data = np.array([[-7, 7], [4, -4], [4, -4], [7, 7]])
+    quant_params = naive_min_max_quantize.get_tensor_quant_params(
+        op_info=op_info,
+        tensor_quant_config=weight_tensor_config,
+        tensor_content=test_data,
+    )
+    scale = quant_params.scale
+    zp = quant_params.zero_point
+    expected_zp, expected_scale = (
+        uniform_quantize_tensor.tensor_zp_scale_from_min_max(
+            min_value=np.array([[-7, 4], [-4, -4]]),
+            max_value=np.array([[4, 7], [7, 7]]),
+            num_bits=4,
+            symmetric=True,
+        )
+    )
+    self.assertTrue(np.array_equal(zp, expected_zp))
+    self.assertTrue(np.array_equal(scale, expected_scale))
+    self.assertIsNotNone(quant_params.quantized_data)
+    self.assertTupleEqual(
+        cast(np.ndarray, quant_params.quantized_data).shape, test_data.shape
+    )
+    self.assertEqual(quant_params.block_size, 2)
+    self.assertEqual(quant_params.quantized_dimension, 0)
 if __name__ == "__main__":
   googletest.main()

ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py CHANGED Viewed

@@ -119,55 +119,6 @@ def fix_quantization_params_rank(
   )
-def uniform_quantize_for_emulated_subchannel(
-    tensor_data: np.ndarray,
-    quantization_params: qtyping.UniformQuantParams,
-    block_size: int,
-) -> np.ndarray:
-  """Uniform quantize a tensor for emulated subchannel.
-  emulation involves reshaping the tensor and quantizing value on a different
-  axes. Hence, we use a different quantization function.
-  Args:
-    tensor_data: The tensor to be quantized.
-    quantization_params: The quantization parameters.
-    block_size: The block size of the emulated subchannel.
-  Returns:
-    The quantized tensor.
-  """
-  scales, zero_points = (
-      quantization_params.scale,
-      quantization_params.zero_point,
-  )
-  transposed_and_reshaped_tensor = np.reshape(
-      np.transpose(tensor_data, (1, 0)),
-      (
-          1,
-          int(tensor_data.shape[1] / block_size),
-          block_size,
-          tensor_data.shape[0],
-      ),
-  )
-  inverse_scales = 1.0 / scales
-  qtype = IntType(quantization_params.num_bits, signed=True)
-  # Symmetric means narrow range (e.g., -127 to 127)
-  narrow_range = quantization_params.symmetric
-  required_dtype = np.signedinteger if qtype.signed else np.unsignedinteger
-  if not np.issubdtype(zero_points.dtype, required_dtype):
-    raise ValueError(
-        f"zero_points need to be {required_dtype}."
-        f" But the actual type is {zero_points.dtype}."
-    )
-  ret = (
-      np.multiply(transposed_and_reshaped_tensor, inverse_scales) + zero_points
-  )
-  ret = _round_and_clip(ret, qtype, narrow_range)
-  ret = assign_quantized_type(ret, qtype)
-  return ret
 def uniform_quantize(
     tensor_data: np.ndarray,
     quantization_params: qtyping.UniformQuantParams,
@@ -369,3 +320,14 @@ def _is_valid_quantization_params(
         f" ({zero_point_rank}) must be the same as the tensor rank"
         f" ({tensor_rank})."
     )
+  if (
+      quantization_params.block_size != 0
+      and tensor_data.shape[quantization_params.quantized_dimension]
+      % quantization_params.block_size
+      != 0
+  ):
+    raise ValueError(
+        "Tensor dimension must be divisible by block size. Got dimension:"
+        f" {tensor_data.shape[quantization_params.quantized_dimension]} and"
+        f" block size: {quantization_params.block_size}"
+    )

ai_edge_quantizer/algorithms/utils/common_utils.py CHANGED Viewed

@@ -906,9 +906,30 @@ def get_tensor_transformation_params(
   )
+def get_weight_quantized_dim(op_info: qtyping.OpInfo, tensor_data: np.ndarray):
+  """Get the quantized dimension for the weight tensor.
+  Args:
+    op_info: Aggregated information about the op (e.g., quantization config).
+    tensor_data: The weight tensor data.
+  Returns:
+    The quantized dimension for the weight tensor.
+  """
+  if op_info.op_name == _TFLOpName.BATCH_MATMUL:
+    quantized_dim = get_bmm_weight_quantized_dim(
+        tensor_data, adj_y=op_info.op.builtinOptions.adjY
+    )
+  else:
+    quantized_dim = tfl_flatbuffer_utils.TFL_OP_TO_WEIGHT_QUANTIZED_DIM.get(
+        op_info.op_name, None
+    )
+  return quantized_dim
 def get_reduce_dims(
     quantized_dim: Optional[int],
-    tensor_shape: list[int],
+    tensor_shape: Sequence[int],
 ) -> Optional[tuple[int, ...]]:
   """Get the reduce dims of a tensor for the given quantized dimension."""
   if quantized_dim is None:

ai_edge_quantizer/calibrator.py CHANGED Viewed

@@ -283,9 +283,9 @@ class Calibrator:
         op_info = qtyping.OpInfo(op, op_key, subgraph_op_id, op_quant_config)
         # Ignore the input tensors where any dimension of the shape is 0.
         inputs_to_ignore = [
-            idx
-            for idx in op.inputs
-            if not np.all(graph_info.subgraph_tensors[idx].shape)
+            opr_idx
+            for opr_idx, tensor_idx in enumerate(op.inputs)
+            if not np.all(graph_info.subgraph_tensors[tensor_idx].shape)
         ]
         op_qsvs = qsv_init_func(op_info, graph_info, inputs_to_ignore)
         # Step3: initialize tensor qsvs.

{ai_edge_quantizer_nightly-0.0.1.dev20250219.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20250221.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-edge-quantizer-nightly
-Version: 0.0.1.dev20250219
+Version: 0.0.1.dev20250221
 Summary: A quantizer for advanced developers to quantize converted AI Edge models.
 Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
 Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI

{ai_edge_quantizer_nightly-0.0.1.dev20250219.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20250221.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ ai_edge_quantizer/__init__.py,sha256=4pFSkukSwahYyzwqia0yPRyz8TnFQfGRthVJhYpMWas
 ai_edge_quantizer/algorithm_manager.py,sha256=dOW_ex_KShoFaib7auISgsx5wRtxpaxhWrwA2nWgito,6003
 ai_edge_quantizer/algorithm_manager_api.py,sha256=u903TG0s1uIDhJqfeJne3CFl8A93phZrwgV2-hwdcXU,9247
 ai_edge_quantizer/algorithm_manager_api_test.py,sha256=tL_ozYFTsOPX8qGcti0KTz37nVsCxf0SSG5C45SyT-g,7319
-ai_edge_quantizer/calibrator.py,sha256=IqNMnKpZy24Fl_-94AIsp77KzqypMeyHkkxitMBxl58,11325
+ai_edge_quantizer/calibrator.py,sha256=2J-bX0k09A7vZSRnO3eP49YO2uBMUQh6-sk3JRz9fGQ,11363
 ai_edge_quantizer/calibrator_test.py,sha256=hQk61YUvw1X02CDVAddm2n6Dnyk9GWoDgSpO6nuSJiY,11889
 ai_edge_quantizer/conftest.py,sha256=SxCz-5LlRD_lQm4hQc4c6IGG7DS8d7IyEWY9gnscPN0,794
 ai_edge_quantizer/default_policy.py,sha256=TQ9yY8jtrSpMsTBsTyKW6TY-voGH_psvwGZoFglAbiA,9079
@@ -30,12 +30,14 @@ ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting_test.py,sha256=s64
 ai_edge_quantizer/algorithms/uniform_quantize/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
 ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=wPZevOuowJczG9t4Gynzv7tIeH6zhOnaKPsfr2K_fsk,21259
 ai_edge_quantizer/algorithms/uniform_quantize/common_quantize_test.py,sha256=qMmKbWqxrCoVKbLKHn9WuCrGKPfHkEyU0Nmhokh8Qeo,2597
-ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=g69t3oTl1fdphsWLIxVNjuIIIA3lLQ56Vt8Qgo7SocU,9946
-ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=3UV1I-to_u6NE_yKoXOVUOQgil-tMY6VQ_L273lMfqQ,5949
-ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=B-s1KMfb9tqvaDhHJV-M2zRR078z5Mwv-P9h77S3Mis,12229
+ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha256=LfwZgZwkPZVZBS6VEwaskLNw3BoeymIjxAVw3ZkjjsI,4597
+ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py,sha256=6FPYx4M2-W-SSV6iqQdggd5q5cnciqFI7Ci3Wo5Wyog,4566
+ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=fBqSidFVKZmdO-xIFfwZPdIN1eLJjOik8mUZxZj2ljk,12149
+ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=Hok09dloSyBfD0oDM5VABdSZjM9JWSQhm_hDHNbFujA,7640
+ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=Q_vx7YN7KMpjubsngxRdJ4bfdSIV-gmXjtVuxIkZuX4,11078
 ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=WZ4_bvbG999nOtCIqn7mrMnpRdoJOdiyzxhsL_QiPHA,11395
 ai_edge_quantizer/algorithms/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
-ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=Z2ziBeADwov8rRN4pRX6Qr2L_agu8RRAbOKw0_yLG7E,33936
+ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=nlLpUY1LTO9ZC3X0FjQ0EArCZekGUnv2-IF0AUu5zNM,34582
 ai_edge_quantizer/algorithms/utils/common_utils_test.py,sha256=zqapGEfYhjQWe9cNGPLmdbwtEUUYQRhlO_kNe0cXX6E,18104
 ai_edge_quantizer/transformations/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
 ai_edge_quantizer/transformations/dequant_insert.py,sha256=sL1LHFVzBDSd9jgrzlHz38LWU0bwmVX7iBkaNcui0ts,3566
@@ -58,8 +60,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=SM8H4i7Jq_nfdsJpImopHndN
 ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=Op3JxtOqlrjzmYF18jnnstL1k9xiY9kKJ8S2vklKGkc,11327
 ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
 ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
-ai_edge_quantizer_nightly-0.0.1.dev20250219.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-ai_edge_quantizer_nightly-0.0.1.dev20250219.dist-info/METADATA,sha256=I42-1mg57Fu0eGeUYlGuWKyXvQuQ7l_PzzcOCcc7CAk,1484
-ai_edge_quantizer_nightly-0.0.1.dev20250219.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-ai_edge_quantizer_nightly-0.0.1.dev20250219.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
-ai_edge_quantizer_nightly-0.0.1.dev20250219.dist-info/RECORD,,
+ai_edge_quantizer_nightly-0.0.1.dev20250221.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+ai_edge_quantizer_nightly-0.0.1.dev20250221.dist-info/METADATA,sha256=3so5mv89zJCWrCAs77PXQqIq3sGmyue7jkZsmIyO_mQ,1484
+ai_edge_quantizer_nightly-0.0.1.dev20250221.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+ai_edge_quantizer_nightly-0.0.1.dev20250221.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
+ai_edge_quantizer_nightly-0.0.1.dev20250221.dist-info/RECORD,,

{ai_edge_quantizer_nightly-0.0.1.dev20250219.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20250221.dist-info}/LICENSE RENAMED Viewed

File without changes

{ai_edge_quantizer_nightly-0.0.1.dev20250219.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20250221.dist-info}/WHEEL RENAMED Viewed

File without changes

{ai_edge_quantizer_nightly-0.0.1.dev20250219.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20250221.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai-edge-quantizer-nightly 0.0.1.dev20250219__py3-none-any.whl → 0.0.1.dev20250221__py3-none-any.whl

ai-edge-quantizer-nightly 0.0.1.dev20250219py3-none-any.whl → 0.0.1.dev20250221py3-none-any.whl