PyPI - ai-edge-quantizer-nightly - Versions diffs - 0.0.1.dev20241119__py3-none-any.whl → 0.0.1.dev20241125__py3-none-any.whl - Mend

ai-edge-quantizer-nightly 0.0.1.dev20241119py3-none-any.whl → 0.0.1.dev20241125py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

ai_edge_quantizer/algorithm_manager.py CHANGED Viewed

@@ -90,6 +90,8 @@ for op_name, materialize_func in zip(
         _TFLOpName.SPLIT,
         _TFLOpName.LOGISTIC,  # Sigmoid
         _TFLOpName.SLICE,
+        _TFLOpName.SUM,
+        _TFLOpName.SELECT_V2,
     ),
     (
         naive_min_max_quantize.materialize_input,
@@ -116,6 +118,8 @@ for op_name, materialize_func in zip(
         naive_min_max_quantize.materialize_split,
         naive_min_max_quantize.materialize_softmax_and_logistic,
         naive_min_max_quantize.materialize_slice,
+        naive_min_max_quantize.materialize_sum,
+        naive_min_max_quantize.materialize_select_v2,
     ),
 ):
   register_quantized_op(

ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py CHANGED Viewed

@@ -325,6 +325,38 @@ def materialize_slice(
   )
+def materialize_select_v2(
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.select_v2."""
+  return utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      constraint=_OpQuantConstraint.SAME_AS_OUTPUT_SCALE,
+      inputs_to_ignore=[
+          0,
+      ],  # Condition tensor does not need to be quantized.
+  )
+def materialize_sum(
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.sum."""
+  return utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+      inputs_to_ignore=[1],  # Axis index does not need to be quantized.
+  )
 def materialize_fc_conv(
     op_info: qtyping.OpInfo,
     graph_info: qtyping.GraphInfo,

ai_edge_quantizer/calibrator_test.py CHANGED Viewed

@@ -290,7 +290,7 @@ class CalibratorToyGemma2Test(googletest.TestCase):
         self._toy_gemma2_calibration_dataset,
         model_recipe_manager=recipe_mngr,
     )
-    self.assertLen(calib.get_model_qsvs(), 274)
+    self.assertLen(calib.get_model_qsvs(), 282)
 if __name__ == "__main__":

ai_edge_quantizer/default_policy.py CHANGED Viewed

@@ -165,7 +165,9 @@ DEFAULT_JSON_POLICY = """
       "INPUT",
       "OUTPUT",
       "SLICE",
-      "EMBEDDING_LOOKUP"
+      "EMBEDDING_LOOKUP",
+      "SUM",
+      "SELECT_V2"
     ],
     "static_wi8_ai8": [
       "ADD",
@@ -191,7 +193,9 @@ DEFAULT_JSON_POLICY = """
       "INPUT",
       "OUTPUT",
       "SLICE",
-      "EMBEDDING_LOOKUP"
+      "EMBEDDING_LOOKUP",
+      "SUM",
+      "SELECT_V2"
     ],
     "static_wi4_ai8": ["FULLY_CONNECTED", "CONV_2D", "INPUT", "OUTPUT"],
     "static_wi4_ai16": ["FULLY_CONNECTED", "CONV_2D", "INPUT", "OUTPUT"],

ai_edge_quantizer/qtyping.py CHANGED Viewed

@@ -58,6 +58,8 @@ class TFLOperationName(str, enum.Enum):
   SPLIT = 'SPLIT'
   LOGISTIC = 'LOGISTIC'
   SLICE = 'SLICE'
+  SUM = 'SUM'
+  SELECT_V2 = 'SELECT_V2'
 class QuantizeMode(enum.Enum):

ai_edge_quantizer/transformations/quantize_tensor.py CHANGED Viewed

@@ -22,19 +22,19 @@ from ai_edge_quantizer.transformations import transformation_utils
 from ai_edge_litert import schema_py_generated  # pylint: disable=g-direct-tensorflow-import
-# TODO: b/335014051 - support distinguishing INT, FLOAT & UINT, BFLOAT
+# TODO: b/335014051 - Support distinguishing INT, FLOAT & UINT, BFLOAT.
 def quant_params_to_tflite_type(
     bitwidth: int,
 ) -> Optional[schema_py_generated.TensorType]:
-  """Given specifications from quant param return the corresponding tflite dtype.
+  """Given specifications from quant param return the corresponding TFLite dtype.
   Args:
-    bitwidth: bitwidth from UniformQuantParams
+    bitwidth: Bit width from UniformQuantParams.
   Returns:
-    the corresponding tflite tensortype
+    The corresponding TFLite tensor type.
   """
-  if bitwidth <= 4:
+  if bitwidth == 4:
     return schema_py_generated.TensorType.INT4
   elif bitwidth <= 8:
     return schema_py_generated.TensorType.INT8
@@ -68,19 +68,19 @@ def nonlinear_quant_params_to_tflite_type(
 def _pack_data(bitwidth: int, flattened_data: np.ndarray) -> np.ndarray:
-  """Pack the data to the corresponding bitwidth.
+  """Pack the data to the corresponding bit width.
-  If no packing is needed, the original data is returned. Any bitwidth equal or
-  less than 4 bits will be packed to 4 bits.
+  Currently only support 4 bits. If no packing is needed, the original data is
+  returned.
   Args:
-    bitwidth: Bitwidth from NonLinearQuantParams.
+    bitwidth: Bit width from NonLinearQuantParams.
     flattened_data: The data to be packed.
   Returns:
     Packed data.
   """
-  if bitwidth <= 4:
+  if bitwidth == 4:
     even_data = flattened_data[::2] & 0x0F
     odd_data = np.left_shift(flattened_data[1::2], 4).astype(np.uint8)
     if odd_data.shape[0] == even_data.shape[0] - 1:

ai_edge_quantizer/transformations/quantize_tensor_test.py CHANGED Viewed

@@ -18,6 +18,7 @@
 import os
 import numpy as np
 from tensorflow.python.platform import googletest
+from absl.testing import parameterized
 from ai_edge_quantizer import qtyping
 from ai_edge_quantizer.transformations import quantize_tensor
 from ai_edge_quantizer.transformations import transformation_utils
@@ -28,7 +29,7 @@ from ai_edge_litert import schema_py_generated  # pylint: disable=g-direct-tenso
 TEST_DATA_PREFIX_PATH = test_utils.get_path_to_datafile("..")
-class QuantizeTensorTest(googletest.TestCase):
+class QuantizeTensorTest(parameterized.TestCase):
   def setUp(self):
     super().setUp()
@@ -179,40 +180,44 @@ class QuantizeTensorTest(googletest.TestCase):
     np.testing.assert_array_equal(quant_param.zeroPoint, [1])
     self.assertEqual(quant_param.quantizedDimension, 0)
-  def test_int5_constant_not_packed(self):
+  @parameterized.named_parameters(
+      dict(
+          testcase_name="int5",
+          num_bits=5,
+      ),
+      dict(
+          testcase_name="int2",
+          num_bits=2,
+      ),
+  )
+  def test_int_constant_not_packed(self, num_bits):
     subgraph = self._model.subgraphs[0]
     model = self._model
-    data = np.array(
-        [
-            0x0,
-            0x1,
-            0x2,
-            0x3,
-            0x4,
-            0x5,
-            0x6,
-            0x7,
-        ],
-        dtype=np.int8,
-    )
+    tensor_id = 7
+    data = np.array([0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7], dtype=np.int8)
     expected = np.array([0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7])
     ret = quantize_tensor.quantize_tensor(
         transformation_utils.TransformationInput(
-            tensor_id=7,
+            tensor_id=tensor_id,
             op_codes=model.operatorCodes,
             buffers=model.buffers,
             subgraph=subgraph,
             producer=-1,
             consumers=[4],
             quant_params=qtyping.UniformQuantParams(
-                5, None, np.ones(1), np.ones(1), True, data
+                num_bits=num_bits,
+                quantized_dimension=None,
+                scale=np.ones(1),
+                zero_point=np.ones(1),
+                symmetric=True,
+                quantized_data=data,
             ),
         )
     )
     self.assertEqual(ret.op_id, 0)
     self.assertEqual(ret.num_ops_added, 0)
     np.testing.assert_array_equal(model.buffers[8].data, expected)
-    quant_param = subgraph.tensors[7].quantization
+    quant_param = subgraph.tensors[tensor_id].quantization
     np.testing.assert_array_equal(quant_param.scale, [1])
     np.testing.assert_array_equal(quant_param.zeroPoint, [1])
     self.assertEqual(quant_param.quantizedDimension, 0)

ai_edge_quantizer/utils/tfl_flatbuffer_utils.py CHANGED Viewed

@@ -60,6 +60,8 @@ TFL_OP_NAME_TO_CODE = immutabledict.immutabledict({
     _TFLOpName.SPLIT: schema_py_generated.BuiltinOperator.SPLIT,
     _TFLOpName.LOGISTIC: schema_py_generated.BuiltinOperator.LOGISTIC,
     _TFLOpName.SLICE: schema_py_generated.BuiltinOperator.SLICE,
+    _TFLOpName.SUM: schema_py_generated.BuiltinOperator.SUM,
+    _TFLOpName.SELECT_V2: schema_py_generated.BuiltinOperator.SELECT_V2,
 })
 TFL_OP_CODE_TO_NAME = immutabledict.immutabledict(

{ai_edge_quantizer_nightly-0.0.1.dev20241119.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20241125.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-edge-quantizer-nightly
-Version: 0.0.1.dev20241119
+Version: 0.0.1.dev20241125
 Summary: A quantizer for advanced developers to quantize converted AI Edge models.
 Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
 Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI

{ai_edge_quantizer_nightly-0.0.1.dev20241119.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20241125.dist-info}/RECORD RENAMED Viewed

@@ -1,18 +1,18 @@
 ai_edge_quantizer/__init__.py,sha256=4pFSkukSwahYyzwqia0yPRyz8TnFQfGRthVJhYpMWas,793
-ai_edge_quantizer/algorithm_manager.py,sha256=PiRvUH6CY4EzPBLJhMyYR5u7_pMYvg0eOGTupdfyxTA,5825
+ai_edge_quantizer/algorithm_manager.py,sha256=9nd4Txfl2z-14rFHmL7vqSfnkAQeagCRKyCIQ7ru0_Y,5981
 ai_edge_quantizer/algorithm_manager_api.py,sha256=u903TG0s1uIDhJqfeJne3CFl8A93phZrwgV2-hwdcXU,9247
 ai_edge_quantizer/algorithm_manager_api_test.py,sha256=tL_ozYFTsOPX8qGcti0KTz37nVsCxf0SSG5C45SyT-g,7319
 ai_edge_quantizer/calibrator.py,sha256=BSu0DPzVhAgFFA0JsHZtawPFlr0YPirRxItuA9SerNg,11007
-ai_edge_quantizer/calibrator_test.py,sha256=U7aiy9COsuDMV0xT5le1PxON8eP8lUeiF_mzkPL9k9c,11354
+ai_edge_quantizer/calibrator_test.py,sha256=5DGvKWRRjjU3L5wZoN56AyOVljmxOitwhuBUp6GL_bU,11354
 ai_edge_quantizer/conftest.py,sha256=SxCz-5LlRD_lQm4hQc4c6IGG7DS8d7IyEWY9gnscPN0,794
-ai_edge_quantizer/default_policy.py,sha256=MYURzrGnXaGOjckAFc5cIKcW3bF8bDrHQYRUpkL2-oI,9015
+ai_edge_quantizer/default_policy.py,sha256=TQ9yY8jtrSpMsTBsTyKW6TY-voGH_psvwGZoFglAbiA,9079
 ai_edge_quantizer/model_modifier.py,sha256=Z8EYtrz4zhCFpzd1zVwl2AetVE3BGBf5OvB2DbVQuds,5850
 ai_edge_quantizer/model_modifier_test.py,sha256=cJd04SLOG-fQZZNZPcisoBLx3cLtWEwGqUBbLb-pif4,4751
 ai_edge_quantizer/model_validator.py,sha256=QvlG1TewSBo9FMwzDYPFGqR4mOa_Xhn21wi2OFAvbCI,12593
 ai_edge_quantizer/model_validator_test.py,sha256=ctvVmMHvnmFbkG4o8Jaa6kXXRrGHzhYpNylgLSmOboA,12951
 ai_edge_quantizer/params_generator.py,sha256=FvBub5yM2q98k7wNLgEyRerf8sVIETvGbrFcXFPUPdA,13523
 ai_edge_quantizer/params_generator_test.py,sha256=d9JwR-yxNJgg1SW-m8sFFPkIRdhgsDwMpVKsBQFL0gg,37658
-ai_edge_quantizer/qtyping.py,sha256=L2-G6k1cZh3uxAkjJ2zmqF5Q0TyZB7vm0i7LGGeBFdk,14537
+ai_edge_quantizer/qtyping.py,sha256=bue_WfK05QTkQcoyVVWeIxh8LRVGhHMWruXk3cgpFpw,14577
 ai_edge_quantizer/quantizer.py,sha256=OYfSo06JcoursXbJBRfHQbR2-Pa4sHnZB2n9od9OzEY,13039
 ai_edge_quantizer/quantizer_test.py,sha256=38oTMJwMmxwPDeqT3eaVbazjtuIUIzMQ3mJNKh_eNQY,20493
 ai_edge_quantizer/recipe.py,sha256=r5tJiUs-ihZFzeK_jP2sUIUgTqZsL5SWvbUokuIUPDo,2251
@@ -28,7 +28,7 @@ ai_edge_quantizer/algorithms/nonlinear_quantize/__init__.py,sha256=lpq1g2ayg3lCP
 ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting.py,sha256=Bs9CK7wZAw6jNaZ8xEtbwO2vM34VYXNZSMVWvxJo9nw,9297
 ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting_test.py,sha256=s64eDDH9bmRWy6Bl1peHnhGewLnFJjvnhYOdjo1zYOA,22625
 ai_edge_quantizer/algorithms/uniform_quantize/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
-ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=Oc_B0KSNoPj1SPkXW5drxN4zZMDoVAC5UeuLsHilgb4,20133
+ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=oU4EhsDhWoipR0M28yILGdRcs2duWQBU5RpDyGiAGHk,21065
 ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=MfN2JFogBVsPIJ_Fdqf3ot4vn-WLIjcnyL2oBnmrwiI,6739
 ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=B-s1KMfb9tqvaDhHJV-M2zRR078z5Mwv-P9h77S3Mis,12229
 ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=WZ4_bvbG999nOtCIqn7mrMnpRdoJOdiyzxhsL_QiPHA,11395
@@ -42,22 +42,22 @@ ai_edge_quantizer/transformations/emulated_subchannel.py,sha256=HVaRxoC8PCAvy3xe
 ai_edge_quantizer/transformations/emulated_subchannel_test.py,sha256=gZP6u9NdPXl7s19qB_Un8evou9ZZV6I9Gy0E1rdobHM,7722
 ai_edge_quantizer/transformations/quant_insert.py,sha256=jn6HsJaV-sqBiFPY-Aqbd64t8zgcYVkEkZI375x_FWY,3958
 ai_edge_quantizer/transformations/quant_insert_test.py,sha256=X9ptPDvJCFkR5tejKnD1SlHFGPazQTW-wNNMV9MEAuw,10107
-ai_edge_quantizer/transformations/quantize_tensor.py,sha256=6lLJHA0G7tf9nrydnSPnWj1rYRN17dH-x5aCkEy3YDQ,5464
-ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=QAyV3IrvCc9puIWdDz-iONNCuKob7ZejgTbSvT5K3YA,7335
+ai_edge_quantizer/transformations/quantize_tensor.py,sha256=KsJbvhoyBu3D1G5R4nkl54w0TbdYPyit6JfABwlvtbw,5437
+ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=xfbVNdMbvfJXQcl0vPtmyqKhifVxNZlhu_Xq7RLL2NI,7638
 ai_edge_quantizer/transformations/transformation_utils.py,sha256=BaKy5LYWgqli62XGo3AGRDNtHjwpBNp5VF5XgFbfVmg,4298
 ai_edge_quantizer/transformations/transformation_utils_test.py,sha256=ks81nNvruOC88Tjdk3_qwku0V8p54p3gOqfObzNhWMM,5371
 ai_edge_quantizer/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
 ai_edge_quantizer/utils/calibration_utils.py,sha256=1Fj9MIO6aLZIRgyd4axvZN4S_O64nB_-Miu1WP664js,2536
 ai_edge_quantizer/utils/calibration_utils_test.py,sha256=Z-AcdTieesWFKyKBb08ZXm4Mgu6cvJ4bg2-MJ7hLD10,2856
 ai_edge_quantizer/utils/test_utils.py,sha256=95BDAdjE4Zvd6JZ90fG8FE3wKWE-Lu0ZIE3hQ1B6adI,3616
-ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=2aSyXNeS1V0gCbw72GVJplj-0qiLbKVJLAymc3j-t2k,9992
+ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=F6_AkCSv35FAhJX2qel8VTARhGOVwaeo7_mqRZygrpA,10126
 ai_edge_quantizer/utils/tfl_flatbuffer_utils_test.py,sha256=AbyDxoM62k4ojD8gPdkWo--xe5hlX3t0kobQSA80kuk,7740
 ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=GzrsaL3fkOXN5iPRJv7lqhNISY6lnrBVTotWDHzI5m8,10344
 ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=Op3JxtOqlrjzmYF18jnnstL1k9xiY9kKJ8S2vklKGkc,11327
 ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
 ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
-ai_edge_quantizer_nightly-0.0.1.dev20241119.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-ai_edge_quantizer_nightly-0.0.1.dev20241119.dist-info/METADATA,sha256=YA9hk0ihn6J3h_tew_SjSEOlLN-GOI1C-xDEPSp1e5s,1484
-ai_edge_quantizer_nightly-0.0.1.dev20241119.dist-info/WHEEL,sha256=bFJAMchF8aTQGUgMZzHJyDDMPTO3ToJ7x23SLJa1SVo,92
-ai_edge_quantizer_nightly-0.0.1.dev20241119.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
-ai_edge_quantizer_nightly-0.0.1.dev20241119.dist-info/RECORD,,
+ai_edge_quantizer_nightly-0.0.1.dev20241125.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+ai_edge_quantizer_nightly-0.0.1.dev20241125.dist-info/METADATA,sha256=24moeOFxxWCBOchvK6RDViIhwKbRdvcLL68HOZrbbrQ,1484
+ai_edge_quantizer_nightly-0.0.1.dev20241125.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+ai_edge_quantizer_nightly-0.0.1.dev20241125.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
+ai_edge_quantizer_nightly-0.0.1.dev20241125.dist-info/RECORD,,

{ai_edge_quantizer_nightly-0.0.1.dev20241119.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20241125.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.45.0)
+Generator: bdist_wheel (0.45.1)
 Root-Is-Purelib: true
 Tag: py3-none-any

{ai_edge_quantizer_nightly-0.0.1.dev20241119.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20241125.dist-info}/LICENSE RENAMED Viewed

File without changes

{ai_edge_quantizer_nightly-0.0.1.dev20241119.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20241125.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai-edge-quantizer-nightly 0.0.1.dev20241119__py3-none-any.whl → 0.0.1.dev20241125__py3-none-any.whl

ai-edge-quantizer-nightly 0.0.1.dev20241119py3-none-any.whl → 0.0.1.dev20241125py3-none-any.whl