PyPI - ai-edge-quantizer-nightly - Versions diffs - 0.0.1.dev20250221__py3-none-any.whl → 0.0.1.dev20250222__py3-none-any.whl - Mend

ai-edge-quantizer-nightly 0.0.1.dev20250221py3-none-any.whl → 0.0.1.dev20250222py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py CHANGED Viewed

@@ -15,11 +15,13 @@
 """Recovers quantized weights from dequantized weights (often from QAT)."""
-from typing import Optional
+import dataclasses
+from typing import Any, Optional
 import numpy as np
 from ai_edge_quantizer import qtyping
+from ai_edge_quantizer.algorithms.uniform_quantize import naive_min_max_quantize
 from ai_edge_quantizer.algorithms.uniform_quantize import uniform_quantize_tensor
+from ai_edge_quantizer.algorithms.utils import common_utils
 ALGORITHM_KEY = "dequantized_weight_recovery"
 _TFLOpName = qtyping.TFLOperationName
@@ -27,24 +29,26 @@ _QuantTransformation = qtyping.QuantTransformation
 _IntType = uniform_quantize_tensor.IntType
-def _validate_recovered_scale(
-    dequant_vals: np.ndarray, scale: np.ndarray, tol: float = 1e-4
+def _validate_recovered_weights(
+    original_vals: np.ndarray,
+    quant_vals: np.ndarray,
+    scale: np.ndarray,
+    tol: float = 1e-4,
 ):
-  """Validates if the recovered quantized values match the dequantized values.
+  """Validates if recovered weights (from the quantized values) are close enough to the original ones.
   Args:
-      dequant_vals: The dequantized weight values.
-      scale: The scale values.
-      tol: The tolerance for the difference between the recovered and original
-        values.
+    original_vals: Original values before quantization.
+    quant_vals: Quantized values.
+    scale: Scale used for quantization.
+    tol: Tolerance for the difference between original and recovered values.
   Raises:
-      RuntimeError: If the maximum difference between the recovered and
-        original values exceeds the tolerance.
+    RuntimeError: If the maximum difference between original and recovered
+    values exceeds the tolerance.
   """
-  quant_vals = np.round(dequant_vals / scale)  # no need to clamp.
   recovered_vals = quant_vals * scale
-  diff = np.abs(recovered_vals - dequant_vals).flatten()
+  diff = np.abs(recovered_vals - original_vals).flatten()
   max_diff = diff.max()
   if max_diff > tol:
     raise RuntimeError(
@@ -127,5 +131,120 @@ def get_zp_scale_from_2d_dequantized_symmetric_weights(
     )
   zero_points = np.zeros_like(scales, dtype=np.int32)
-  _validate_recovered_scale(dequant_vals, scales)
   return zero_points, scales
+def get_tensor_quant_params(
+    op_info: qtyping.OpInfo,
+    tensor_quant_config: qtyping.TensorQuantizationConfig,
+    tensor_content: Optional[np.ndarray] = None,
+    tensor_qsv: Optional[dict[str, Any]] = None,
+) -> qtyping.UniformQuantParams:
+  """Get the quantization parameters for a tensor.
+  Args:
+    op_info: Aggregated information about the op (e.g., quantization config).
+    tensor_quant_config: The quantization config for the tensor.
+    tensor_content: The content of the tensor.
+    tensor_qsv: A dictionary containing the min/max of the tensor.
+  Returns:
+    The quantization parameters for the tensor.
+  Raises:
+    ValueError: If the quantization granularity is blockwise, or if the tensor
+    is not a 2D symmetric weight tensor.
+  """
+  # Fallback to naive_min_max_quantize.py for non-weight tensors.
+  if tensor_content is None:
+    return naive_min_max_quantize.get_tensor_quant_params(
+        op_info, tensor_quant_config, tensor_content, tensor_qsv
+    )
+  if tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE:
+    raise ValueError(
+        "Blockwise quantization is not supported for dequantized weight"
+        " recovery."
+    )
+  if tensor_content.ndim != 2 or not tensor_quant_config.symmetric:
+    raise ValueError(
+        "Only 2D symmetric weights are supported for dequantized weight"
+        " recovery."
+    )
+  quantized_dim = None
+  if tensor_quant_config.granularity == qtyping.QuantGranularity.CHANNELWISE:
+    quantized_dim = common_utils.get_weight_quantized_dim(
+        op_info, tensor_content
+    )
+  zp, scale = get_zp_scale_from_2d_dequantized_symmetric_weights(
+      dequant_vals=tensor_content,
+      quantized_dimension=quantized_dim,
+  )
+  quant_params = qtyping.UniformQuantParams(
+      scale=scale,
+      zero_point=zp,
+      num_bits=tensor_quant_config.num_bits,
+      symmetric=tensor_quant_config.symmetric,
+      quantized_dimension=quantized_dim,
+  )
+  quantized_vars = uniform_quantize_tensor.uniform_quantize(
+      tensor_content, quant_params
+  )
+  _validate_recovered_weights(tensor_content, quantized_vars, scale)
+  return dataclasses.replace(quant_params, quantized_data=quantized_vars)
+def calibrate(
+    tfl_op: Any,
+    graph_info: qtyping.GraphInfo,
+    tensor_content_map: dict[str, np.ndarray],
+    inputs_to_ignore: Optional[list[int]] = None,
+    outputs_to_ignore: Optional[list[int]] = None,
+) -> dict[str, qtyping.QSV]:
+  """Collect quantization statistics variable (QSV, e.g., min/max) for the op.
+  Args:
+    tfl_op: The tfl operation.
+    graph_info: Graph information needed to perform quantization for the op.
+    tensor_content_map: A map of tensor name to tensor content.
+    inputs_to_ignore: Input tensor indices to ignore.
+    outputs_to_ignore: Output tensor indices to ignore.
+  Returns:
+    A dictionary with key as tensor name and value as the collected QSV.
+  """
+  # Reuse the min/max calibration algorithm from naive_min_max_quantize.py since
+  # only weights need to be handled differently.
+  return naive_min_max_quantize.min_max_calibrate(
+      tfl_op,
+      graph_info,
+      tensor_content_map,
+      inputs_to_ignore,
+      outputs_to_ignore,
+  )
+def init_qsvs(
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    inputs_to_ignore: Optional[list[int]] = None,
+    outputs_to_ignore: Optional[list[int]] = None,
+) -> qtyping.QSV:
+  """Initialize the QSVs.
+  Args:
+    op_info: Aggregated information about the op (e.g., quantization config).
+    graph_info: Graph information needed to perform quantization for the op.
+    inputs_to_ignore: Input tensor indices to ignore.
+    outputs_to_ignore: Output tensor indices to ignore.
+  Returns:
+    QSVs.
+  """
+  # Reuse the min/max calibration algorithm from naive_min_max_quantize.py since
+  # only weights need to be handeled differently.
+  return naive_min_max_quantize.init_qsvs(
+      op_info, graph_info, inputs_to_ignore, outputs_to_ignore
+  )

ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py CHANGED Viewed

@@ -19,7 +19,6 @@ import numpy as np
 from tensorflow.python.platform import googletest
 from ai_edge_quantizer import qtyping
 from ai_edge_quantizer.algorithms.uniform_quantize import dequantized_weight_recovery
-from ai_edge_quantizer.utils import test_utils
 _TFLOpName = qtyping.TFLOperationName
 _TensorQuantConfig = qtyping.TensorQuantizationConfig
@@ -31,9 +30,15 @@ class DequantizedWeightRecoveryTest(parameterized.TestCase):
     super().setUp()
     self._dummy_quantized_weights = np.array([
         [1, -2, 3, 4],
-        [6, 7, -8, 5],
-        [-1, 8, -7, -4],
+        [6, 7, -6, 5],
+        [2, -6, -7, -4],
     ])
+    self._dummy_op_info = qtyping.OpInfo(
+        op=None,
+        op_name=_TFLOpName.FULLY_CONNECTED,
+        subgraph_op_index=0,
+        op_quant_config=qtyping.OpQuantizationConfig(),
+    )
   @parameterized.named_parameters(
       dict(
@@ -96,18 +101,104 @@ class DequantizedWeightRecoveryTest(parameterized.TestCase):
   @parameterized.named_parameters(
       dict(
-          testcase_name="recovery_on_wrong_dimension",
-          quantized_dimension=1,  # should be 0.
+          testcase_name="tensor-recovery-tensor-quant",
+          tensor_quant_config=qtyping.TensorQuantizationConfig(
+              num_bits=4,
+              granularity=qtyping.QuantGranularity.TENSORWISE,
+          ),
+          scale=np.array([0.1875]).reshape(1, 1),
+      ),
+      dict(
+          testcase_name="channel-recovery-channel-quant",
+          tensor_quant_config=qtyping.TensorQuantizationConfig(
+              num_bits=4,
+              granularity=qtyping.QuantGranularity.CHANNELWISE,
+          ),
           scale=np.array([0.1875, 1e-4, 12.3]).reshape(3, 1),
       ),
       dict(
-          testcase_name="tensor_recovery_for_channel_quantization",
-          quantized_dimension=None,  # should be 0.
+          testcase_name="channel-recovery-excessive-bits",
+          tensor_quant_config=qtyping.TensorQuantizationConfig(
+              num_bits=8,  # int4 is enough for the sample weights.
+              granularity=qtyping.QuantGranularity.CHANNELWISE,
+          ),
+          scale=np.array([0.1875, 1e-4, 12.3]).reshape(3, 1),
+      ),
+  )
+  def test_get_tensor_quant_params_success_with_dequantized_weights(
+      self, tensor_quant_config, scale
+  ):
+    dequant_vals = scale * self._dummy_quantized_weights
+    tensor_quant_params = dequantized_weight_recovery.get_tensor_quant_params(
+        self._dummy_op_info, tensor_quant_config, dequant_vals
+    )
+    if tensor_quant_config.granularity is qtyping.QuantGranularity.TENSORWISE:
+      self.assertIsNone(tensor_quant_params.quantized_dimension)
+    else:
+      self.assertEqual(tensor_quant_params.quantized_dimension, 0)
+    recovered_scale = tensor_quant_params.scale
+    self.assertEqual(recovered_scale.shape, scale.shape)
+    self.assertSequenceAlmostEqual(recovered_scale.flatten(), scale.flatten())
+    # Zero point should be zero for symmetric quantization.
+    recovered_zp = tensor_quant_params.zero_point
+    self.assertEqual(np.sum(recovered_zp), 0)
+    self.assertEqual(recovered_zp.shape, scale.shape)
+  def test_get_tensor_quant_params_success_with_qsv(self):
+    # Fall back to naive_min_max_quantize.py for non-weight tensors.
+    tensor_quant_params = dequantized_weight_recovery.get_tensor_quant_params(
+        self._dummy_op_info,
+        tensor_quant_config=qtyping.TensorQuantizationConfig(
+            num_bits=8,
+            granularity=qtyping.QuantGranularity.TENSORWISE,
+        ),
+        tensor_qsv={
+            "min": np.array([-1]),
+            "max": np.array([1]),
+        },
+    )
+    self.assertIsNone(tensor_quant_params.quantized_dimension)
+    recovered_scale = tensor_quant_params.scale
+    self.assertEqual(recovered_scale.shape, (1,))
+    self.assertSequenceAlmostEqual(recovered_scale.flatten(), [1 / 127])
+    # Zero point should be zero for symmetric quantization.
+    recovered_zp = tensor_quant_params.zero_point
+    self.assertEqual(np.sum(recovered_zp), 0)
+    self.assertEqual(recovered_zp.shape, (1,))
+  @parameterized.named_parameters(
+      dict(
+          testcase_name="recovery_on_wrong_dimension",
+          tensor_quant_config=qtyping.TensorQuantizationConfig(
+              num_bits=4,
+              granularity=qtyping.QuantGranularity.CHANNELWISE,
+          ),
           scale=np.array([0.003, 1.234, 12.65, 2.24e-4]).reshape(1, 4),
       ),
+      dict(
+          testcase_name="tensor_recovery_for_channel_quantization",
+          tensor_quant_config=qtyping.TensorQuantizationConfig(
+              num_bits=4,
+              granularity=qtyping.QuantGranularity.TENSORWISE,
+          ),
+          scale=np.array([0.1875, 1e-2, 12.3]).reshape(3, 1),
+      ),
+      dict(
+          testcase_name="insufficient_bits",
+          tensor_quant_config=qtyping.TensorQuantizationConfig(
+              num_bits=2,
+              granularity=qtyping.QuantGranularity.CHANNELWISE,
+          ),
+          scale=np.array([0.1875, 1e-2, 12.3]).reshape(3, 1),
+      ),
   )
-  def test_tensor_zp_scale_from_2d_dequantized_symmetric_weights_raises_error_big_recovery_error(
-      self, quantized_dimension, scale
+  def test_get_tensor_quant_params_raises_error_big_recovery_error(
+      self, tensor_quant_config, scale
   ):
     dequant_vals = scale * self._dummy_quantized_weights
     with self.assertRaisesRegex(
@@ -115,8 +206,8 @@ class DequantizedWeightRecoveryTest(parameterized.TestCase):
         "Failed to recover the original quantized values from dequantized"
         " values. Max diff between recovered and original values: ",
     ):
-      dequantized_weight_recovery.get_zp_scale_from_2d_dequantized_symmetric_weights(
-          dequant_vals, quantized_dimension
+      dequantized_weight_recovery.get_tensor_quant_params(
+          self._dummy_op_info, tensor_quant_config, dequant_vals
       )

{ai_edge_quantizer_nightly-0.0.1.dev20250221.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20250222.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-edge-quantizer-nightly
-Version: 0.0.1.dev20250221
+Version: 0.0.1.dev20250222
 Summary: A quantizer for advanced developers to quantize converted AI Edge models.
 Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
 Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI

{ai_edge_quantizer_nightly-0.0.1.dev20250221.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20250222.dist-info}/RECORD RENAMED Viewed

@@ -30,8 +30,8 @@ ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting_test.py,sha256=s64
 ai_edge_quantizer/algorithms/uniform_quantize/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
 ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=wPZevOuowJczG9t4Gynzv7tIeH6zhOnaKPsfr2K_fsk,21259
 ai_edge_quantizer/algorithms/uniform_quantize/common_quantize_test.py,sha256=qMmKbWqxrCoVKbLKHn9WuCrGKPfHkEyU0Nmhokh8Qeo,2597
-ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha256=LfwZgZwkPZVZBS6VEwaskLNw3BoeymIjxAVw3ZkjjsI,4597
-ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py,sha256=6FPYx4M2-W-SSV6iqQdggd5q5cnciqFI7Ci3Wo5Wyog,4566
+ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha256=OTXjEZ3Ctq3ffYzisX-6HwgK_DuA7uos_aap5PiIUPE,8686
+ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py,sha256=y7BK11fkF63Ex_Jzg3fbIdy0D_Ca6HuvChVZR7Uwggc,8073
 ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=fBqSidFVKZmdO-xIFfwZPdIN1eLJjOik8mUZxZj2ljk,12149
 ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=Hok09dloSyBfD0oDM5VABdSZjM9JWSQhm_hDHNbFujA,7640
 ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=Q_vx7YN7KMpjubsngxRdJ4bfdSIV-gmXjtVuxIkZuX4,11078
@@ -60,8 +60,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=SM8H4i7Jq_nfdsJpImopHndN
 ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=Op3JxtOqlrjzmYF18jnnstL1k9xiY9kKJ8S2vklKGkc,11327
 ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
 ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
-ai_edge_quantizer_nightly-0.0.1.dev20250221.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-ai_edge_quantizer_nightly-0.0.1.dev20250221.dist-info/METADATA,sha256=3so5mv89zJCWrCAs77PXQqIq3sGmyue7jkZsmIyO_mQ,1484
-ai_edge_quantizer_nightly-0.0.1.dev20250221.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-ai_edge_quantizer_nightly-0.0.1.dev20250221.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
-ai_edge_quantizer_nightly-0.0.1.dev20250221.dist-info/RECORD,,
+ai_edge_quantizer_nightly-0.0.1.dev20250222.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+ai_edge_quantizer_nightly-0.0.1.dev20250222.dist-info/METADATA,sha256=e9r1p0vAQtBGj4RIEtBbjmiyDyUVUmdNYNU8LqfDVGk,1484
+ai_edge_quantizer_nightly-0.0.1.dev20250222.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+ai_edge_quantizer_nightly-0.0.1.dev20250222.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
+ai_edge_quantizer_nightly-0.0.1.dev20250222.dist-info/RECORD,,

{ai_edge_quantizer_nightly-0.0.1.dev20250221.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20250222.dist-info}/LICENSE RENAMED Viewed

File without changes

{ai_edge_quantizer_nightly-0.0.1.dev20250221.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20250222.dist-info}/WHEEL RENAMED Viewed

File without changes

{ai_edge_quantizer_nightly-0.0.1.dev20250221.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20250222.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai-edge-quantizer-nightly 0.0.1.dev20250221__py3-none-any.whl → 0.0.1.dev20250222__py3-none-any.whl

ai-edge-quantizer-nightly 0.0.1.dev20250221py3-none-any.whl → 0.0.1.dev20250222py3-none-any.whl