PyPI - ai-edge-quantizer-nightly - Versions diffs - 0.0.1.dev20250204__py3-none-any.whl → 0.0.1.dev20250205__py3-none-any.whl - Mend

ai-edge-quantizer-nightly 0.0.1.dev20250204py3-none-any.whl → 0.0.1.dev20250205py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

ai_edge_quantizer/calibrator.py CHANGED Viewed

@@ -281,7 +281,13 @@ class Calibrator:
             algorithm_name, op_key
         )
         op_info = qtyping.OpInfo(op, op_key, subgraph_op_id, op_quant_config)
-        op_qsvs = qsv_init_func(op_info, graph_info)
+        # Ignore the input tensors where any dimension of the shape is 0.
+        inputs_to_ignore = [
+            idx
+            for idx in op.inputs
+            if not np.all(graph_info.subgraph_tensors[idx].shape)
+        ]
+        op_qsvs = qsv_init_func(op_info, graph_info, inputs_to_ignore)
         # Step3: initialize tensor qsvs.
         for tensor_name, qsv in op_qsvs.items():
           if tensor_name not in self._model_qsvs:

ai_edge_quantizer/calibrator_test.py CHANGED Viewed

@@ -228,6 +228,18 @@ class CalibratorTest(googletest.TestCase):
     )
     self.assertLen(test_calibrator.get_cached_output(), 10)
+  def test_calibrate_reshape_with_empty_shape_success(self):
+    test_model_path = os.path.join(
+        TEST_DATA_PREFIX_PATH, "tests/models/reshape_with_empty_shape.tflite"
+    )
+    test_calibrator = calibrator.Calibrator(test_model_path)
+    _add_default_int8xint8_integer_recipe(self._recipe_manager)
+    calib_data = test_utils.create_random_normal_input_data(
+        test_model_path, num_samples=4
+    )
+    test_calibrator.calibrate(calib_data, self._recipe_manager)
+    self.assertNotEmpty(test_calibrator.get_model_qsvs())
 class CalibratorAlreadyQuantizedModelTest(googletest.TestCase):

ai_edge_quantizer/qtyping.py CHANGED Viewed

@@ -120,6 +120,8 @@ class UniformQuantParams:
     zero_point: The zero point of the quantization.
     symmetric: Whether the quantization is symmetric (force zero_point to be 0).
     quantized_data: The quantized data.
+    block_size: The block size for blockwise quantization, block_size=0 meaning
+      no blockwise quantization.
   """
   num_bits: int
@@ -128,6 +130,7 @@ class UniformQuantParams:
   zero_point: np.ndarray
   symmetric: bool = True
   quantized_data: Optional[np.ndarray] = None
+  block_size: int = 0
   @classmethod
   def from_tfl_tensor_details(cls, tensor_detail) -> 'UniformQuantParams':
@@ -170,6 +173,7 @@ class UniformQuantParams:
         and np.array_equal(self.zero_point, other.zero_point)
         and self.symmetric == other.symmetric
         and _compare_array_or_none(self.quantized_data, other.quantized_data)
+        and self.block_size == other.block_size
     )

ai_edge_quantizer/transformations/quantize_tensor.py CHANGED Viewed

@@ -90,24 +90,99 @@ def _pack_data(bitwidth: int, flattened_data: np.ndarray) -> np.ndarray:
     return flattened_data
+def _perform_channelwise_quantization(
+    transformation_input: transformation_utils.TransformationInput,
+) -> schema_py_generated.QuantizationParametersT():
+  """Perform channelwise quantization and fill the quantization parameters.
+  Args:
+    transformation_input: Input structure that contains all information needed
+      for the transformation.
+  Returns:
+    The quantization parameters.
+  """
+  assert isinstance(
+      transformation_input.quant_params, qtyping.UniformQuantParams
+  )
+  flatbuffer_quantization = schema_py_generated.QuantizationParametersT()
+  flatbuffer_quantization.scale = list(
+      transformation_input.quant_params.scale.flatten().astype(np.float32)
+  )  # Flatbuffer requires scale as list[float].
+  if transformation_input.quant_params.zero_point is not None:
+    flatbuffer_quantization.zeroPoint = list(
+        transformation_input.quant_params.zero_point.flatten().astype(np.int64)
+    )  # Flatbuffer requires zeroPoint as list[int64]
+  if transformation_input.quant_params.quantized_dimension is not None:
+    flatbuffer_quantization.quantizedDimension = (
+        transformation_input.quant_params.quantized_dimension
+    )
+  return flatbuffer_quantization
+def _perform_blockwise_quantization(
+    transformation_input: transformation_utils.TransformationInput,
+) -> schema_py_generated.QuantizationParametersT():
+  """Perform blockwise quantization and fill the quantization parameters.
+  Args:
+    transformation_input: Input structure that contains all information needed
+      for the transformation.
+  Returns:
+    The quantization parameters.
+  """
+  assert isinstance(
+      transformation_input.quant_params, qtyping.UniformQuantParams
+  )
+  flatbuffer_quantization = schema_py_generated.QuantizationParametersT()
+  flatbuffer_quantization.detailsType = (
+      schema_py_generated.QuantizationDetails.BlockwiseQuantization
+  )
+  tensor = transformation_input.subgraph.tensors[transformation_input.tensor_id]
+  blockwise_details = schema_py_generated.BlockwiseQuantizationT()
+  scale_tensor_id = transformation_utils.add_new_constant_tensor(
+      tensor.name + b"_scale",
+      transformation_input.quant_params.scale,
+      schema_py_generated.TensorType.FLOAT16,
+      transformation_input.subgraph,
+      transformation_input.buffers,
+  )
+  blockwise_details.scale = scale_tensor_id
+  blockwise_details.blockSize = transformation_input.quant_params.block_size
+  # blockwise quantization allows optional zero point.
+  if transformation_input.quant_params.zero_point is not None:
+    zero_point_tensor_id = transformation_utils.add_new_constant_tensor(
+        tensor.name + b"_zero_point",
+        transformation_input.quant_params.zero_point,
+        schema_py_generated.TensorType.INT32,
+        transformation_input.subgraph,
+        transformation_input.buffers,
+    )
+    blockwise_details.zeroPoint = zero_point_tensor_id
+  flatbuffer_quantization.details = blockwise_details
+  return flatbuffer_quantization
 def quantize_tensor(
     transformation_input: transformation_utils.TransformationInput,
 ) -> qtyping.TransformationInfo:
   """Quantize the tensor at the tensor_id in the given subgraph.
   Args:
-    transformation_input: input structure that contains all information needed
+    transformation_input: Input structure that contains all information needed
       for the transformation.
   Returns:
     TransformationInfo:
-      op_id: the producer index for tensor
-      num_ops_added: the total number of ops inserted by this operation, which
-        is 0
+      op_id: The producer index for tensor.
+      num_ops_added: The total number of ops inserted by this operation, which
+        is 0.
   """
   tensor = transformation_input.subgraph.tensors[transformation_input.tensor_id]
-  # TODO: b/336385820 - suppport quantize buffer directly when quantized_data
-  # is not provided
+  # TODO: b/336385820 - Suppport quantize buffer directly when quantized_data
+  # is not provided.
   if tensor.buffer:
     if transformation_input.quant_params.quantized_data is not None:
       transformation_input.buffers[tensor.buffer].data = _pack_data(
@@ -121,29 +196,18 @@ def quantize_tensor(
       )
   if isinstance(transformation_input.quant_params, qtyping.UniformQuantParams):
-    flatbuffer_quantization = schema_py_generated.QuantizationParametersT()
-    flatbuffer_quantization.scale = list(
-        transformation_input.quant_params.scale.flatten().astype(np.float32)
-    )  # flatbuffer requires scale as list[float]
-    flatbuffer_quantization.zeroPoint = list(
-        transformation_input.quant_params.zero_point.flatten().astype(np.int64)
-    )  # flatbuffer requires zeroPoint as list[int64]
-    if transformation_input.quant_params.quantized_dimension is not None:
-      flatbuffer_quantization.quantizedDimension = (
-          transformation_input.quant_params.quantized_dimension
+    if transformation_input.quant_params.block_size == 0:
+      flatbuffer_quantization = _perform_channelwise_quantization(
+          transformation_input
+      )
+    else:
+      flatbuffer_quantization = _perform_blockwise_quantization(
+          transformation_input
       )
     tensor.quantization = flatbuffer_quantization
     tensor.type = quant_params_to_tflite_type(
         transformation_input.quant_params.num_bits
     )
-  if isinstance(
-      transformation_input.quant_params, qtyping.NonLinearQuantParams
-  ):
-    tensor.type = nonlinear_quant_params_to_tflite_type(
-        transformation_input.quant_params.num_bits
-    )
   if isinstance(
       transformation_input.quant_params, qtyping.NonLinearQuantParams
   ):

ai_edge_quantizer/transformations/quantize_tensor_test.py CHANGED Viewed

@@ -42,7 +42,7 @@ class QuantizeTensorTest(parameterized.TestCase):
     """test quantizing a constant tensor."""
     subgraph = self._model.subgraphs[0]
     model = self._model
-    data = np.ones([1, 112, 112, 3], dtype=np.int8)
+    data = np.ones([1, 112, 112, 32], dtype=np.int8)
     ret = quantize_tensor.quantize_tensor(
         transformation_utils.TransformationInput(
             7,
@@ -135,6 +135,42 @@ class QuantizeTensorTest(parameterized.TestCase):
         subgraph.tensors[4].type, schema_py_generated.TensorType.FLOAT16
     )
+  def test_blockwise_quantization_with_zero_point(self):
+    """Test blockwise quantization with explicit zero point."""
+    subgraph = self._model.subgraphs[0]
+    model = self._model
+    tensor_wh = 112
+    test_tensor_id = 7
+    data = np.ones([1, tensor_wh, tensor_wh, 32]).astype(np.int8)
+    quantize_tensor.quantize_tensor(
+        transformation_utils.TransformationInput(
+            tensor_id=test_tensor_id,
+            op_codes=model.operatorCodes,
+            buffers=model.buffers,
+            subgraph=subgraph,
+            producer=1,
+            consumers=[3],
+            quant_params=qtyping.UniformQuantParams(
+                num_bits=8,
+                quantized_dimension=None,
+                scale=np.ones([1, tensor_wh, tensor_wh, 1]).astype(np.float16),
+                zero_point=np.zeros([1, tensor_wh, tensor_wh, 1]),
+                symmetric=True,
+                quantized_data=data,
+                block_size=32,
+            ),
+        )
+    )
+    quant_param = subgraph.tensors[test_tensor_id].quantization
+    self.assertEqual(
+        quant_param.detailsType,
+        schema_py_generated.QuantizationDetails.BlockwiseQuantization,
+    )
+    self.assertEqual(quant_param.details.blockSize, 32)
+    # Check if the scale and zero point tensors are inserted correctly.
+    self.assertEqual(quant_param.details.scale, 9)
+    self.assertEqual(quant_param.details.zeroPoint, 10)
   def test_int4_constant_packed_correctly(self):
     subgraph = self._model.subgraphs[0]
     model = self._model

ai_edge_quantizer/utils/tfl_interpreter_utils.py CHANGED Viewed

@@ -188,9 +188,14 @@ def get_tensor_name_to_content_map(
   """
   tensors = {}
   for tensor_detail in tflite_interpreter.get_tensor_details(subgraph_index):
-    # Don't return temporary, unnamed tensors
+    # Don't return temporary, unnamed tensors.
     if not tensor_detail["name"]:
       continue
+    # Don't return tensors where any dimension of the shape is 0.
+    if not np.all(tensor_detail["shape"]):
+      continue
     tensors[tensor_detail["name"]] = get_tensor_data(
         tflite_interpreter, tensor_detail, subgraph_index, dequantize
     )

{ai_edge_quantizer_nightly-0.0.1.dev20250204.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20250205.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-edge-quantizer-nightly
-Version: 0.0.1.dev20250204
+Version: 0.0.1.dev20250205
 Summary: A quantizer for advanced developers to quantize converted AI Edge models.
 Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
 Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI

{ai_edge_quantizer_nightly-0.0.1.dev20250204.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20250205.dist-info}/RECORD RENAMED Viewed

@@ -2,8 +2,8 @@ ai_edge_quantizer/__init__.py,sha256=4pFSkukSwahYyzwqia0yPRyz8TnFQfGRthVJhYpMWas
 ai_edge_quantizer/algorithm_manager.py,sha256=9nd4Txfl2z-14rFHmL7vqSfnkAQeagCRKyCIQ7ru0_Y,5981
 ai_edge_quantizer/algorithm_manager_api.py,sha256=u903TG0s1uIDhJqfeJne3CFl8A93phZrwgV2-hwdcXU,9247
 ai_edge_quantizer/algorithm_manager_api_test.py,sha256=tL_ozYFTsOPX8qGcti0KTz37nVsCxf0SSG5C45SyT-g,7319
-ai_edge_quantizer/calibrator.py,sha256=0zAWrSpl_08u6BNLVgG_TQeNcT16wJ-oLeQgznziGoo,11079
-ai_edge_quantizer/calibrator_test.py,sha256=5DGvKWRRjjU3L5wZoN56AyOVljmxOitwhuBUp6GL_bU,11354
+ai_edge_quantizer/calibrator.py,sha256=IqNMnKpZy24Fl_-94AIsp77KzqypMeyHkkxitMBxl58,11325
+ai_edge_quantizer/calibrator_test.py,sha256=hQk61YUvw1X02CDVAddm2n6Dnyk9GWoDgSpO6nuSJiY,11889
 ai_edge_quantizer/conftest.py,sha256=SxCz-5LlRD_lQm4hQc4c6IGG7DS8d7IyEWY9gnscPN0,794
 ai_edge_quantizer/default_policy.py,sha256=TQ9yY8jtrSpMsTBsTyKW6TY-voGH_psvwGZoFglAbiA,9079
 ai_edge_quantizer/model_modifier.py,sha256=Z8EYtrz4zhCFpzd1zVwl2AetVE3BGBf5OvB2DbVQuds,5850
@@ -12,7 +12,7 @@ ai_edge_quantizer/model_validator.py,sha256=oZk0b1qGczaEm5erJFm4SbwadDnl7DFhC0bX
 ai_edge_quantizer/model_validator_test.py,sha256=ctvVmMHvnmFbkG4o8Jaa6kXXRrGHzhYpNylgLSmOboA,12951
 ai_edge_quantizer/params_generator.py,sha256=FvBub5yM2q98k7wNLgEyRerf8sVIETvGbrFcXFPUPdA,13523
 ai_edge_quantizer/params_generator_test.py,sha256=d9JwR-yxNJgg1SW-m8sFFPkIRdhgsDwMpVKsBQFL0gg,37658
-ai_edge_quantizer/qtyping.py,sha256=bue_WfK05QTkQcoyVVWeIxh8LRVGhHMWruXk3cgpFpw,14577
+ai_edge_quantizer/qtyping.py,sha256=eZNwNNjXf67OjIhTDGpmJe-4HuaohS6BYB8v7Tnq34A,14760
 ai_edge_quantizer/quantizer.py,sha256=Gny7WLuRibiIuDtcRn_g8RCD-zAm_fuDG7WmGq5dRx8,13238
 ai_edge_quantizer/quantizer_test.py,sha256=38oTMJwMmxwPDeqT3eaVbazjtuIUIzMQ3mJNKh_eNQY,20493
 ai_edge_quantizer/recipe.py,sha256=r5tJiUs-ihZFzeK_jP2sUIUgTqZsL5SWvbUokuIUPDo,2251
@@ -42,8 +42,8 @@ ai_edge_quantizer/transformations/emulated_subchannel.py,sha256=HVaRxoC8PCAvy3xe
 ai_edge_quantizer/transformations/emulated_subchannel_test.py,sha256=gZP6u9NdPXl7s19qB_Un8evou9ZZV6I9Gy0E1rdobHM,7722
 ai_edge_quantizer/transformations/quant_insert.py,sha256=jn6HsJaV-sqBiFPY-Aqbd64t8zgcYVkEkZI375x_FWY,3958
 ai_edge_quantizer/transformations/quant_insert_test.py,sha256=X9ptPDvJCFkR5tejKnD1SlHFGPazQTW-wNNMV9MEAuw,10107
-ai_edge_quantizer/transformations/quantize_tensor.py,sha256=KsJbvhoyBu3D1G5R4nkl54w0TbdYPyit6JfABwlvtbw,5437
-ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=xfbVNdMbvfJXQcl0vPtmyqKhifVxNZlhu_Xq7RLL2NI,7638
+ai_edge_quantizer/transformations/quantize_tensor.py,sha256=6CyUFR7fGmzbS-mSuDlSSCJJGxY9X_WnCmEuKqL4LzQ,7864
+ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=QnJmQ_-XN5X0oR57FoY9bWGTp7migf11psbdO9R2pLg,9050
 ai_edge_quantizer/transformations/transformation_utils.py,sha256=BaKy5LYWgqli62XGo3AGRDNtHjwpBNp5VF5XgFbfVmg,4298
 ai_edge_quantizer/transformations/transformation_utils_test.py,sha256=ks81nNvruOC88Tjdk3_qwku0V8p54p3gOqfObzNhWMM,5371
 ai_edge_quantizer/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
@@ -52,12 +52,12 @@ ai_edge_quantizer/utils/calibration_utils_test.py,sha256=Z-AcdTieesWFKyKBb08ZXm4
 ai_edge_quantizer/utils/test_utils.py,sha256=95BDAdjE4Zvd6JZ90fG8FE3wKWE-Lu0ZIE3hQ1B6adI,3616
 ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=F6_AkCSv35FAhJX2qel8VTARhGOVwaeo7_mqRZygrpA,10126
 ai_edge_quantizer/utils/tfl_flatbuffer_utils_test.py,sha256=AbyDxoM62k4ojD8gPdkWo--xe5hlX3t0kobQSA80kuk,7740
-ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=yta7b_VmhVZmntwHK27vqVnie3XRejN459P0uJHbpb8,10431
+ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=SM8H4i7Jq_nfdsJpImopHndNCJznlLr-6ptUbp5bVWA,10558
 ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=Op3JxtOqlrjzmYF18jnnstL1k9xiY9kKJ8S2vklKGkc,11327
 ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
 ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
-ai_edge_quantizer_nightly-0.0.1.dev20250204.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-ai_edge_quantizer_nightly-0.0.1.dev20250204.dist-info/METADATA,sha256=8OfFupnLdT4RmcSu0nr8uDfNWQayk1KTb1hdMKqAEBc,1484
-ai_edge_quantizer_nightly-0.0.1.dev20250204.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-ai_edge_quantizer_nightly-0.0.1.dev20250204.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
-ai_edge_quantizer_nightly-0.0.1.dev20250204.dist-info/RECORD,,
+ai_edge_quantizer_nightly-0.0.1.dev20250205.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+ai_edge_quantizer_nightly-0.0.1.dev20250205.dist-info/METADATA,sha256=fy1r2mGtmXIX4XYgkpQePrnnTgMiuHw7c393xq_5OWI,1484
+ai_edge_quantizer_nightly-0.0.1.dev20250205.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+ai_edge_quantizer_nightly-0.0.1.dev20250205.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
+ai_edge_quantizer_nightly-0.0.1.dev20250205.dist-info/RECORD,,

{ai_edge_quantizer_nightly-0.0.1.dev20250204.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20250205.dist-info}/LICENSE RENAMED Viewed

File without changes

{ai_edge_quantizer_nightly-0.0.1.dev20250204.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20250205.dist-info}/WHEEL RENAMED Viewed

File without changes

{ai_edge_quantizer_nightly-0.0.1.dev20250204.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20250205.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai-edge-quantizer-nightly 0.0.1.dev20250204__py3-none-any.whl → 0.0.1.dev20250205__py3-none-any.whl

ai-edge-quantizer-nightly 0.0.1.dev20250204py3-none-any.whl → 0.0.1.dev20250205py3-none-any.whl