PyPI - ai-edge-quantizer-nightly - Versions diffs - 0.0.1.dev20250302__py3-none-any.whl → 0.5.0.dev20260103__py3-none-any.whl - Mend

ai-edge-quantizer-nightly 0.0.1.dev20250302py3-none-any.whl → 0.5.0.dev20260103py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

ai_edge_quantizer/transformations/quantize_tensor.py CHANGED Viewed

@@ -16,6 +16,7 @@
 """quantize a given tensor."""
 from typing import Optional, cast
+import ml_dtypes
 import numpy as np
 from ai_edge_quantizer import qtyping
 from ai_edge_quantizer.transformations import transformation_utils
@@ -67,29 +68,6 @@ def nonlinear_quant_params_to_tflite_type(
     raise ValueError(f"Unsupported nonlinear params: {bitwidth}")
-def _pack_data(bitwidth: int, flattened_data: np.ndarray) -> np.ndarray:
-  """Pack the data to the corresponding bit width.
-  Currently only support 4 bits. If no packing is needed, the original data is
-  returned.
-  Args:
-    bitwidth: Bit width from NonLinearQuantParams.
-    flattened_data: The data to be packed.
-  Returns:
-    Packed data.
-  """
-  if bitwidth == 4:
-    even_data = flattened_data[::2] & 0x0F
-    odd_data = np.left_shift(flattened_data[1::2], 4).astype(np.uint8)
-    if odd_data.shape[0] == even_data.shape[0] - 1:
-      odd_data = np.pad(odd_data, (0, 1), constant_values=0)
-    return np.bitwise_or(even_data, odd_data)
-  else:
-    return flattened_data
 def _perform_channelwise_quantization(
     transformation_input: transformation_utils.TransformationInput,
 ) -> schema_py_generated.QuantizationParametersT():
@@ -142,26 +120,25 @@ def _perform_blockwise_quantization(
   )
   tensor = transformation_input.subgraph.tensors[transformation_input.tensor_id]
   blockwise_details = schema_py_generated.BlockwiseQuantizationT()
+  # Downcast and round the scale to fp16 with 7 bit mantissa.
   scale_tensor_id = transformation_utils.add_new_constant_tensor(
-      tensor.name + b"_scale",
-      transformation_input.quant_params.scale,
+      tensor.name + b"_scales",
+      transformation_input.quant_params.scale.astype(ml_dtypes.bfloat16).astype(
+          np.float16
+      ),
       schema_py_generated.TensorType.FLOAT16,
       transformation_input.subgraph,
       transformation_input.buffers,
   )
-  blockwise_details.scale = scale_tensor_id
+  blockwise_details.scales = scale_tensor_id
+  # Blockwise quantization does not support zero point yet, so this points to
+  # a -1 buffer index.
+  # TODO: b/404909258 - Add optional zero point to blockwise quantization.
+  blockwise_details.zeroPoints = -1
   blockwise_details.blockSize = transformation_input.quant_params.block_size
-  # blockwise quantization allows optional zero point.
-  if transformation_input.quant_params.zero_point is not None:
-    zero_point_tensor_id = transformation_utils.add_new_constant_tensor(
-        tensor.name + b"_zero_point",
-        transformation_input.quant_params.zero_point,
-        schema_py_generated.TensorType.INT32,
-        transformation_input.subgraph,
-        transformation_input.buffers,
-    )
-    blockwise_details.zeroPoint = zero_point_tensor_id
   flatbuffer_quantization.details = blockwise_details
+  # TODO: b/443830202 - Hardcoding to 0 for now.
+  flatbuffer_quantization.quantizedDimension = 0
   return flatbuffer_quantization
@@ -185,14 +162,17 @@ def quantize_tensor(
   # is not provided.
   if tensor.buffer:
     if transformation_input.quant_params.quantized_data is not None:
-      transformation_input.buffers[tensor.buffer].data = _pack_data(
-          transformation_input.quant_params.num_bits,
-          np.frombuffer(
-              cast(
-                  np.ndarray, transformation_input.quant_params.quantized_data
-              ).tobytes(),
-              dtype=np.uint8,
-          ).flatten(),
+      transformation_input.buffers[tensor.buffer].data = (
+          transformation_utils.pack_data(
+              transformation_input.quant_params.num_bits,
+              np.frombuffer(
+                  cast(
+                      np.ndarray,
+                      transformation_input.quant_params.quantized_data,
+                  ).tobytes(),
+                  dtype=np.uint8,
+              ).flatten(),
+          )
       )
   if isinstance(transformation_input.quant_params, qtyping.UniformQuantParams):

ai_edge_quantizer/transformations/quantize_tensor_test.py CHANGED Viewed

@@ -168,8 +168,9 @@ class QuantizeTensorTest(parameterized.TestCase):
     )
     self.assertEqual(quant_param.details.blockSize, 32)
     # Check if the scale and zero point tensors are inserted correctly.
-    self.assertEqual(quant_param.details.scale, 9)
-    self.assertEqual(quant_param.details.zeroPoint, 10)
+    self.assertEqual(quant_param.details.scales, 9)
+    # So far we don't have zero point in blockwise quantization.
+    self.assertEqual(quant_param.details.zeroPoints, -1)
   def test_int4_constant_packed_correctly(self):
     subgraph = self._model.subgraphs[0]

ai_edge_quantizer/transformations/transformation_utils.py CHANGED Viewed

@@ -15,8 +15,9 @@
 """Utility functions for graph transformations."""
+import copy
 import dataclasses
-from typing import Union
+from typing import Optional, Union
 import numpy as np
@@ -51,30 +52,94 @@ class TransformationInput:
 def add_op_code(
     op_code: schema_py_generated.OperatorCodeT,
     model_op_codes: list[schema_py_generated.OperatorCodeT],
+    custom_op_name: Optional[str] = None,
 ) -> int:
   """Add an op code into a model if it's not present.
   Args:
     op_code: The op code to be added.
     model_op_codes: The op codes of the model.
+    custom_op_name: The custom string of the op code. If None, the op code will
+      be added as a builtin op code.
   Returns:
     The index of the op code in the model.
   """
+  if (
+      op_code == schema_py_generated.BuiltinOperator.CUSTOM
+      and custom_op_name is None
+  ):
+    raise ValueError('Custom string is required for custom op code.')
   for i, model_op_code in enumerate(model_op_codes):
+    # If the model already has the op code, just return the index.
     if model_op_code.builtinCode == op_code:
-      return i
+      if custom_op_name is not None:
+        if model_op_code.customCode == custom_op_name:
+          return i
+      else:
+        # Built-in op
+        return i
   model_op_codes.append(schema_py_generated.OperatorCodeT())
   model_op_codes[-1].builtinCode = op_code
+  if custom_op_name is not None:
+    model_op_codes[-1].customCode = custom_op_name
   return len(model_op_codes) - 1
+def get_constant_buffer(
+    data: np.ndarray,
+    buffers: list[schema_py_generated.BufferT],
+    force_duplicate_buffer: bool = False,
+) -> int:
+  """Get the index of the constant buffer that contains the given data.
+  creating new buffer if provided data is not found in buffers list.
+  Args:
+    data: The data of the new tensor.
+    buffers: The buffers of the model.
+    force_duplicate_buffer: Whether to add a new buffer even if the same buffer
+      already exists.
+  Returns:
+    The index of the new buffer in the model.
+  """
+  if isinstance(data, np.ndarray):
+    # in the case where the data is passed from quantization_params.
+    new_data = np.frombuffer(data.tobytes(), dtype=np.uint8).flatten()
+  elif isinstance(data, bytes):
+    # in the case where the data is coming from duplicating buffers, we need to
+    # make a copy of the data to avoid having two buffers pointing to the same
+    # data.
+    new_data = copy.deepcopy(data)
+  else:
+    raise ValueError('data passed in must be either np.ndarray or bytes.')
+  # TODO: b/417811116 - we should make this more efficient.
+  if not force_duplicate_buffer:
+    for index, buffer in enumerate(buffers):
+      if np.array_equal(buffer.data, new_data):
+        return index
+  new_buffer = schema_py_generated.BufferT()
+  new_buffer.data = new_data
+  new_buffer.offset = 0
+  new_buffer.size = 0
+  new_buffer_id = len(buffers)
+  buffers.append(new_buffer)
+  return new_buffer_id
 def add_new_constant_tensor(
     tensor_name: str,
     data: np.ndarray,
     tensor_type: schema_py_generated.TensorType,
     subgraph: schema_py_generated.SubGraphT,
     buffers: list[schema_py_generated.BufferT],
+    tensor_shape: Optional[list[int]] = None,
+    force_duplicate_buffer: bool = False,
 ) -> int:
   """Add a new constant tensor to the model.
@@ -84,20 +149,21 @@ def add_new_constant_tensor(
     tensor_type: The type of the new tensor.
     subgraph: The subgraph where the new tensor is added.
     buffers: The buffers of the model.
+    tensor_shape: The shape of the new tensor. If not provided, the shape of the
+      data will be used.
+    force_duplicate_buffer: Whether to add a new buffer even if the same buffer
+      already exists.
   Returns:
     The index of the new tensor in the subgraph.
   """
-  tensor_buffer = schema_py_generated.BufferT()
-  tensor_buffer.data = np.frombuffer(data.tobytes(), dtype=np.uint8).flatten()
-  tensor_buffer.offset = 0
-  tensor_buffer.size = 0
-  tensor_buffer_id = len(buffers)
-  buffers.append(tensor_buffer)
+  new_buffer_id = get_constant_buffer(data, buffers, force_duplicate_buffer)
   new_tensor = schema_py_generated.TensorT()
-  new_tensor.shape = data.shape
-  new_tensor.buffer = tensor_buffer_id
+  if tensor_shape is None:
+    tensor_shape = data.shape
+  new_tensor.shape = tensor_shape
+  new_tensor.buffer = new_buffer_id
   new_tensor.type = tensor_type
   new_tensor.name = tensor_name
   new_tensor_id = len(subgraph.tensors)
@@ -123,10 +189,90 @@ def add_new_activation_tensor(
     The index of the new tensor in the subgraph.
   """
   new_tensor = schema_py_generated.TensorT()
-  new_tensor.shape = shape
+  # If there's a dynamic shape, we need to read from the shapeSignature field
+  # instead of shape. Shape should contain just 1 for the dynamic dimension but
+  # shapeSignature should contain the true shape.
+  if -1 in shape:
+    new_tensor.shapeSignature = shape
+    new_tensor.shape = [1 if i == -1 else i for i in shape]
+  else:
+    new_tensor.shape = shape
   new_tensor.type = tensor_type
   new_tensor.name = tensor_name
   new_tensor.buffer = 0
   new_tensor_id = len(subgraph.tensors)
   subgraph.tensors.append(new_tensor)
   return new_tensor_id
+def raise_deprecated_error(_: TransformationInput):
+  raise NotImplementedError(
+      'This transformation is deprecated. Please contact AI Edge Quantizer team'
+      ' if you see this error.'
+  )
+def pack_data(bitwidth: int, flattened_data: np.ndarray) -> np.ndarray:
+  """Pack the data to the corresponding bit width.
+  Currently only support 4 bits. If no packing is needed, the original data is
+  returned.
+  Args:
+    bitwidth: Bit width from NonLinearQuantParams.
+    flattened_data: The data to be packed.
+  Returns:
+    Packed data.
+  """
+  if bitwidth == 4:
+    even_data = flattened_data[::2] & 0x0F
+    odd_data = np.left_shift(flattened_data[1::2], 4).astype(np.uint8)
+    if odd_data.shape[0] == even_data.shape[0] - 1:
+      odd_data = np.pad(odd_data, (0, 1), constant_values=0)
+    return np.bitwise_or(even_data, odd_data)
+  else:
+    return flattened_data
+def get_producer_schema_op_id(
+    transformation: TransformationInput,
+) -> int:
+  """Checks if the tensor's producer matches the given op.
+  Args:
+    transformation: The transformation input to check the producer of.
+  Returns:
+    The schema op id of the producer op. E.g.
+    schema_py_generated.BuiltinOperator.FULLY_CONNECTED.
+  """
+  if transformation.producer == -1:
+    return False
+  else:
+    return (
+        transformation.op_codes[
+            transformation.subgraph.operators[
+                transformation.producer
+            ].opcodeIndex
+        ].builtinCode
+    )
+def get_schema_op_id(
+    transformation: TransformationInput, op_id: int
+) -> bool:
+  """Returns the schema op id of the given op.
+  Args:
+    transformation: The transformation input to check the consumers of.
+    op_id: The op id in the list of operators to check for.
+  Returns:
+    The schema op id of the given op.
+  """
+  return (
+      transformation.op_codes[
+          transformation.subgraph.operators[op_id].opcodeIndex
+      ].builtinCode
+  )

ai_edge_quantizer/transformations/transformation_utils_test.py CHANGED Viewed

@@ -41,19 +41,94 @@ class TransformationUtilsTest(parameterized.TestCase):
           testcase_name="add_new_op_code",
           op_code=schema_py_generated.BuiltinOperator.LOGISTIC,
           expected=1,
+          custom_op_name=None,
       ),
       dict(
           testcase_name="add_existing_op_code",
           op_code=schema_py_generated.BuiltinOperator.FULLY_CONNECTED,
           expected=0,
+          custom_op_name=None,
+      ),
+      dict(
+          testcase_name="add_new_custom_op_code",
+          op_code=schema_py_generated.BuiltinOperator.CUSTOM,
+          expected=1,
+          custom_op_name="random_new_custom_op",
       ),
   )
-  def test_add_op_code(self, op_code, expected):
+  def test_add_op_code(self, op_code, expected, custom_op_name):
     """Tests if the op code is added to the model."""
     got = transformation_utils.add_op_code(
-        op_code=op_code, model_op_codes=self.model.operatorCodes
+        op_code=op_code,
+        model_op_codes=self.model.operatorCodes,
+        custom_op_name=custom_op_name,
     )
     self.assertEqual(expected, got)
+    if custom_op_name is not None:
+      self.assertEqual(self.model.operatorCodes[got].customCode, custom_op_name)
+  def test_add_custom_op_code_without_op_string_raises_error(self):
+    with self.assertRaisesRegex(ValueError, "Custom string is required"):
+      transformation_utils.add_op_code(
+          op_code=schema_py_generated.BuiltinOperator.CUSTOM,
+          model_op_codes=self.model.operatorCodes,
+          custom_op_name=None,
+      )
+  def test_add_two_custom_op_codes(self):
+    custom_op_name = "random_new_custom_op"
+    added_index = transformation_utils.add_op_code(
+        op_code=schema_py_generated.BuiltinOperator.CUSTOM,
+        model_op_codes=self.model.operatorCodes,
+        custom_op_name=custom_op_name,
+    )
+    self.assertEqual(1, added_index)
+    self.assertEqual(
+        self.model.operatorCodes[added_index].customCode, custom_op_name
+    )
+    custom_op_name_2 = "random_new_custom_op_2"
+    added_index = transformation_utils.add_op_code(
+        op_code=schema_py_generated.BuiltinOperator.CUSTOM,
+        model_op_codes=self.model.operatorCodes,
+        custom_op_name=custom_op_name_2,
+    )
+    self.assertEqual(2, added_index)
+    self.assertEqual(
+        self.model.operatorCodes[added_index].customCode, custom_op_name_2
+    )
+  @parameterized.named_parameters(
+      dict(
+          testcase_name="float32",
+          data=np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32),
+      ),
+      dict(
+          testcase_name="int8",
+          data=np.array([[1, 2], [3, 4]], dtype=np.int8),
+      ),
+  )
+  def test_add_new_constant_buffer(self, data):
+    """Tests if the constant buffer is added to the model."""
+    prev_num_buffers = len(self.model.buffers) - 1
+    new_buffer_idx = transformation_utils.get_constant_buffer(
+        data=data,
+        buffers=self.model.buffers,
+    )
+    self.assertEqual(new_buffer_idx, prev_num_buffers + 1)
+    expected_buffer_data = (
+        np.frombuffer(
+            data.tobytes(),
+            dtype=np.uint8,
+        )
+        .flatten()
+        .tolist()
+    )
+    self.assertEqual(
+        self.model.buffers[new_buffer_idx].data.tolist(),
+        expected_buffer_data,
+    )
   @parameterized.named_parameters(
       dict(
@@ -157,6 +232,25 @@ class TransformationUtilsTest(parameterized.TestCase):
         self.model.subgraphs[0].tensors[-1].shape,
     )
+  def test_add_new_activation_tensor_with_dynamic_shape(self):
+    """Tests adding an activation tensor with dynamic shape."""
+    subgraph = self.model.subgraphs[0]
+    new_id = transformation_utils.add_new_activation_tensor(
+        tensor_name="test_tensor",
+        shape=[1, -1, -1, 1],
+        tensor_type=schema_py_generated.TensorType.FLOAT32,
+        subgraph=subgraph,
+    )
+    # Originally had 4 tensors, new tensor is added at index 4.
+    self.assertEqual(new_id, 4)
+    self.assertLen(subgraph.tensors, 5)
+    self.assertEqual(subgraph.tensors[-1].name, "test_tensor")
+    self.assertEqual(
+        subgraph.tensors[-1].type, schema_py_generated.TensorType.FLOAT32
+    )
+    self.assertEqual(subgraph.tensors[-1].shape, [1, 1, 1, 1])
+    self.assertEqual(subgraph.tensors[-1].shapeSignature, [1, -1, -1, 1])
 if __name__ == "__main__":
   googletest.main()

ai-edge-quantizer-nightly 0.0.1.dev20250302__py3-none-any.whl → 0.5.0.dev20260103__py3-none-any.whl

ai-edge-quantizer-nightly 0.0.1.dev20250302py3-none-any.whl → 0.5.0.dev20260103py3-none-any.whl