PyPI - ai-edge-quantizer-nightly - Versions diffs - 0.0.1.dev20250115__py3-none-any.whl - Mend

ai-edge-quantizer-nightly 0.0.1.dev20250115__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

ai_edge_quantizer/__init__.py +19 -0
ai_edge_quantizer/algorithm_manager.py +167 -0
ai_edge_quantizer/algorithm_manager_api.py +271 -0
ai_edge_quantizer/algorithm_manager_api_test.py +210 -0
ai_edge_quantizer/algorithms/__init__.py +15 -0
ai_edge_quantizer/algorithms/nonlinear_quantize/__init__.py +15 -0
ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting.py +273 -0
ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting_test.py +664 -0
ai_edge_quantizer/algorithms/uniform_quantize/__init__.py +15 -0
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py +666 -0
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py +184 -0
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py +371 -0
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py +357 -0
ai_edge_quantizer/algorithms/utils/__init__.py +15 -0
ai_edge_quantizer/algorithms/utils/min_max_quantize_utils.py +1067 -0
ai_edge_quantizer/algorithms/utils/min_max_quantize_utils_test.py +512 -0
ai_edge_quantizer/calibrator.py +288 -0
ai_edge_quantizer/calibrator_test.py +297 -0
ai_edge_quantizer/conftest.py +22 -0
ai_edge_quantizer/default_policy.py +310 -0
ai_edge_quantizer/model_modifier.py +176 -0
ai_edge_quantizer/model_modifier_test.py +130 -0
ai_edge_quantizer/model_validator.py +357 -0
ai_edge_quantizer/model_validator_test.py +354 -0
ai_edge_quantizer/params_generator.py +361 -0
ai_edge_quantizer/params_generator_test.py +1041 -0
ai_edge_quantizer/qtyping.py +483 -0
ai_edge_quantizer/quantizer.py +372 -0
ai_edge_quantizer/quantizer_test.py +532 -0
ai_edge_quantizer/recipe.py +67 -0
ai_edge_quantizer/recipe_manager.py +245 -0
ai_edge_quantizer/recipe_manager_test.py +815 -0
ai_edge_quantizer/recipe_test.py +97 -0
ai_edge_quantizer/transformation_instruction_generator.py +584 -0
ai_edge_quantizer/transformation_instruction_generator_test.py +1082 -0
ai_edge_quantizer/transformation_performer.py +278 -0
ai_edge_quantizer/transformation_performer_test.py +344 -0
ai_edge_quantizer/transformations/__init__.py +15 -0
ai_edge_quantizer/transformations/dequant_insert.py +87 -0
ai_edge_quantizer/transformations/dequant_insert_test.py +304 -0
ai_edge_quantizer/transformations/emulated_subchannel.py +363 -0
ai_edge_quantizer/transformations/emulated_subchannel_test.py +212 -0
ai_edge_quantizer/transformations/quant_insert.py +100 -0
ai_edge_quantizer/transformations/quant_insert_test.py +284 -0
ai_edge_quantizer/transformations/quantize_tensor.py +156 -0
ai_edge_quantizer/transformations/quantize_tensor_test.py +227 -0
ai_edge_quantizer/transformations/transformation_utils.py +132 -0
ai_edge_quantizer/transformations/transformation_utils_test.py +162 -0
ai_edge_quantizer/utils/__init__.py +15 -0
ai_edge_quantizer/utils/calibration_utils.py +86 -0
ai_edge_quantizer/utils/calibration_utils_test.py +77 -0
ai_edge_quantizer/utils/test_utils.py +107 -0
ai_edge_quantizer/utils/tfl_flatbuffer_utils.py +317 -0
ai_edge_quantizer/utils/tfl_flatbuffer_utils_test.py +200 -0
ai_edge_quantizer/utils/tfl_interpreter_utils.py +312 -0
ai_edge_quantizer/utils/tfl_interpreter_utils_test.py +332 -0
ai_edge_quantizer/utils/validation_utils.py +125 -0
ai_edge_quantizer/utils/validation_utils_test.py +87 -0
ai_edge_quantizer_nightly-0.0.1.dev20250115.dist-info/LICENSE +201 -0
ai_edge_quantizer_nightly-0.0.1.dev20250115.dist-info/METADATA +32 -0
ai_edge_quantizer_nightly-0.0.1.dev20250115.dist-info/RECORD +63 -0
ai_edge_quantizer_nightly-0.0.1.dev20250115.dist-info/WHEEL +5 -0
ai_edge_quantizer_nightly-0.0.1.dev20250115.dist-info/top_level.txt +1 -0

ai_edge_quantizer/transformations/dequant_insert.py ADDED Viewed

@@ -0,0 +1,87 @@
+# Copyright 2024 The AI Edge Quantizer Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Apply dequantization transformations to the given op/tensor.
+Inserts dequantize node after the given tensor to enable float execution of
+the tensor consumer
+"""
+from ai_edge_quantizer import qtyping
+from ai_edge_quantizer.transformations import quantize_tensor
+from ai_edge_quantizer.transformations import transformation_utils
+from ai_edge_litert import schema_py_generated  # pylint: disable=g-direct-tensorflow-import
+def insert_dequant(
+    transformation_input: transformation_utils.TransformationInput,
+) -> qtyping.TransformationInfo:
+  """Insert dequant op after the given tensor in the subgraph.
+  Args:
+    transformation_input: input structure that contains all information needed
+      for the transformation.
+  Returns:
+    TransformationInfo:
+      op_id: the index where the dequant op is added
+      num_ops_added: the total number of ops inserted by this operation, which
+        is 1
+  """
+  dequant_op_code_idx = transformation_utils.add_op_code(
+      schema_py_generated.BuiltinOperator.DEQUANTIZE,
+      transformation_input.op_codes,
+  )
+  # create output tensor for the dequant op
+  tensor = transformation_input.subgraph.tensors[transformation_input.tensor_id]
+  new_tensor_id = transformation_utils.add_new_activation_tensor(
+      tensor.name + b'_dequant',
+      tensor.shape,
+      schema_py_generated.TensorType.FLOAT32,
+      transformation_input.subgraph,
+  )
+  # create dequantize_op
+  dequant_op = schema_py_generated.OperatorT()
+  dequant_op.opcodeIndex = dequant_op_code_idx
+  dequant_op.outputs = [new_tensor_id]
+  dequant_op.inputs = [transformation_input.tensor_id]
+  # quantize the source tensor
+  quantize_tensor.quantize_tensor(transformation_input)
+  # update the original consumers of the op to take the dequant op,
+  # and find the first consumer of the new tensor
+  first_consumer_id = min(transformation_input.consumers)
+  for consumer_id in transformation_input.consumers:
+    op = transformation_input.subgraph.operators[consumer_id]
+    for input_idx in range(len(op.inputs)):
+      if op.inputs[input_idx] == transformation_input.tensor_id:
+        op.inputs[input_idx] = new_tensor_id
+  # if the output is also an output to the graph, we need to update that as well
+  for output_idx, output in enumerate(transformation_input.subgraph.outputs):
+    if output == transformation_input.tensor_id:
+      transformation_input.subgraph.outputs[output_idx] = new_tensor_id
+  # add dequant into the subgraph op list,
+  # must insert the op right before it's first consumer
+  # in the case of output goes to graph output, we need to ensure the dequant
+  # op is inserted after the producer
+  op_id = max(transformation_input.producer + 1, first_consumer_id)
+  transformation_input.subgraph.operators.insert(op_id, dequant_op)
+  return qtyping.TransformationInfo(
+      op_id=op_id, num_ops_added=1, output_tensor_id=new_tensor_id
+  )

ai_edge_quantizer/transformations/dequant_insert_test.py ADDED Viewed

@@ -0,0 +1,304 @@
+# Copyright 2024 The AI Edge Quantizer Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Test for various transformations used by quantizer."""
+import os
+import numpy as np
+from tensorflow.python.platform import googletest
+from ai_edge_quantizer import qtyping
+from ai_edge_quantizer.transformations import dequant_insert
+from ai_edge_quantizer.transformations import transformation_utils
+from ai_edge_quantizer.utils import test_utils
+from ai_edge_quantizer.utils import tfl_flatbuffer_utils
+from ai_edge_litert import schema_py_generated  # pylint: disable=g-direct-tensorflow-import
+TEST_DATA_PREFIX_PATH = test_utils.get_path_to_datafile("..")
+class DequantInsertTest(googletest.TestCase):
+  def setUp(self):
+    super().setUp()
+    self._orig_test_model_path = os.path.join(
+        TEST_DATA_PREFIX_PATH, "tests/models/insert_dequant_test.tflite"
+    )
+    self._model = tfl_flatbuffer_utils.read_model(self._orig_test_model_path)
+  def test_dequant_insert_constant(self):
+    """Test dequant insert lib on a constant tensor."""
+    subgraph = self._model.subgraphs[0]
+    model = self._model
+    dequant_opcode = schema_py_generated.BuiltinOperator.DEQUANTIZE
+    # insert dequant on the constant before the add node
+    dequant_insert.insert_dequant(
+        transformation_utils.TransformationInput(
+            7,
+            model.operatorCodes,
+            model.buffers,
+            subgraph,
+            -1,
+            [4],
+            qtyping.UniformQuantParams(8, None, np.array([1]), np.array([0])),
+        )
+    )
+    # check dequant op code is added to the model
+    self.assertEqual(
+        model.operatorCodes[len(model.operatorCodes) - 1].builtinCode,
+        dequant_opcode,
+    )
+    # check new tensor is correct created
+    self.assertIn(b"_dequant", subgraph.tensors[9].name)
+    self.assertEqual(
+        subgraph.tensors[9].type, schema_py_generated.TensorType.FLOAT32
+    )
+    self.assertEqual(
+        subgraph.tensors[7].type, schema_py_generated.TensorType.INT8
+    )
+    # checking if consumer haves the correct input
+    self.assertEqual(subgraph.operators[5].inputs[0], 6)
+    self.assertEqual(subgraph.operators[5].inputs[1], 9)
+    # checking the inserted node has the correct input/output
+    self.assertEqual(subgraph.operators[4].outputs[0], 9)
+    self.assertEqual(subgraph.operators[4].inputs[0], 7)
+    # checking inserted node is the dequant node
+    self.assertEqual(
+        subgraph.operators[4].opcodeIndex, len(model.operatorCodes) - 1
+    )
+  def test_dequant_insert_activation(self):
+    """Test dequant insert lib on activation tensors."""
+    subgraph = self._model.subgraphs[0]
+    model = self._model
+    dequant_opcode = schema_py_generated.BuiltinOperator.DEQUANTIZE
+    # insert dequant on the output of a conv node
+    dequant_insert.insert_dequant(
+        transformation_utils.TransformationInput(
+            4,
+            model.operatorCodes,
+            model.buffers,
+            subgraph,
+            1,
+            [3],
+            qtyping.UniformQuantParams(8, None, np.array([1]), np.array([0])),
+        )
+    )
+    # check dequant op code is added to the model
+    self.assertEqual(
+        model.operatorCodes[len(model.operatorCodes) - 1].builtinCode,
+        dequant_opcode,
+    )
+    # check new tensor is correct created
+    self.assertIn(b"_dequant", subgraph.tensors[9].name)
+    self.assertEqual(
+        subgraph.tensors[9].type, schema_py_generated.TensorType.FLOAT32
+    )
+    # check original source tensor is updated
+    self.assertEqual(
+        subgraph.tensors[4].type, schema_py_generated.TensorType.INT8
+    )
+    # checking if consumer haves the correct input
+    self.assertEqual(subgraph.operators[4].inputs[0], 9)
+    self.assertEqual(subgraph.operators[4].inputs[1], 5)
+    # checking the inserted node has the correct input/output
+    self.assertEqual(subgraph.operators[3].outputs[0], 9)
+    self.assertEqual(subgraph.operators[3].inputs[0], 4)
+    # checking inserted node is the dequant node
+    self.assertEqual(
+        subgraph.operators[3].opcodeIndex, len(model.operatorCodes) - 1
+    )
+  def test_dequant_insert_constant_multiple_consumers(self):
+    """Test dequant insert lib on tensors with multiple consumers."""
+    subgraph = self._model.subgraphs[0]
+    model = self._model
+    dequant_opcode = schema_py_generated.BuiltinOperator.DEQUANTIZE
+    # insert dequant on the input of a conv node
+    post_trans_info = dequant_insert.insert_dequant(
+        transformation_utils.TransformationInput(
+            2,
+            model.operatorCodes,
+            model.buffers,
+            subgraph,
+            -1,
+            [1, 2],
+            qtyping.UniformQuantParams(8, None, np.array([1]), np.array([0])),
+        )
+    )
+    self.assertEqual(post_trans_info.op_id, 1)
+    self.assertEqual(post_trans_info.num_ops_added, 1)
+    # check dequant op code is added to the model
+    self.assertEqual(
+        model.operatorCodes[len(model.operatorCodes) - 1].builtinCode,
+        dequant_opcode,
+    )
+    # check new tensor is correct created
+    self.assertIn(b"_dequant", subgraph.tensors[9].name)
+    self.assertEqual(
+        subgraph.tensors[9].type, schema_py_generated.TensorType.FLOAT32
+    )
+    # check original source tensor has the correct type
+    self.assertEqual(
+        subgraph.tensors[2].type, schema_py_generated.TensorType.INT8
+    )
+    # checking the inserted node has the correct input/output
+    self.assertEqual(subgraph.operators[1].outputs[0], 9)
+    self.assertEqual(subgraph.operators[1].inputs[0], 2)
+    # checking inserted node is the dequant node
+    self.assertEqual(
+        subgraph.operators[1].opcodeIndex, len(model.operatorCodes) - 1
+    )
+    # checking if consumer haves the correct input
+    self.assertEqual(subgraph.operators[2].inputs[1], 9)
+    self.assertEqual(subgraph.operators[3].inputs[1], 9)
+  def test_dequant_insert_activation_multiple_consumers(self):
+    """Test dequant insert lib on tensors with multiple consumers."""
+    subgraph = self._model.subgraphs[0]
+    model = self._model
+    dequant_opcode = schema_py_generated.BuiltinOperator.DEQUANTIZE
+    # insert dequant on the output of a conv node
+    dequant_insert.insert_dequant(
+        transformation_utils.TransformationInput(
+            1,
+            model.operatorCodes,
+            model.buffers,
+            subgraph,
+            0,
+            [1, 2],
+            qtyping.UniformQuantParams(8, None, np.array([1]), np.array([0])),
+        )
+    )
+    # check dequant op code is added to the model
+    self.assertEqual(
+        model.operatorCodes[len(model.operatorCodes) - 1].builtinCode,
+        dequant_opcode,
+    )
+    # check new tensor is correct created
+    self.assertIn(b"_dequant", subgraph.tensors[9].name)
+    self.assertEqual(
+        subgraph.tensors[9].type, schema_py_generated.TensorType.FLOAT32
+    )
+    # check original source tensor is updated
+    self.assertEqual(
+        subgraph.tensors[1].type, schema_py_generated.TensorType.INT8
+    )
+    # checking the inserted node has the correct input/output
+    self.assertEqual(subgraph.operators[1].outputs[0], 9)
+    self.assertEqual(subgraph.operators[1].inputs[0], 1)
+    # checking inserted node is the dequant node
+    self.assertEqual(
+        subgraph.operators[1].opcodeIndex, len(model.operatorCodes) - 1
+    )
+    # checking if consumer haves the correct input
+    self.assertEqual(subgraph.operators[2].inputs[0], 9)
+    self.assertEqual(subgraph.operators[3].inputs[0], 9)
+  def test_dequant_insert_activation_multiple_consumers_select(self):
+    """Test dequant insert lib on tensors with multiple consumers but only insert for one of them."""
+    subgraph = self._model.subgraphs[0]
+    model = self._model
+    dequant_opcode = schema_py_generated.BuiltinOperator.DEQUANTIZE
+    # insert dequant on the output of a conv node
+    dequant_insert.insert_dequant(
+        transformation_utils.TransformationInput(
+            1,
+            model.operatorCodes,
+            model.buffers,
+            subgraph,
+            0,
+            [1],
+            qtyping.UniformQuantParams(8, None, np.array([1]), np.array([0])),
+        )
+    )
+    # check dequant op code is added to the model
+    self.assertEqual(
+        model.operatorCodes[len(model.operatorCodes) - 1].builtinCode,
+        dequant_opcode,
+    )
+    # check new tensor is correct created
+    self.assertIn(b"_dequant", subgraph.tensors[9].name)
+    self.assertEqual(
+        subgraph.tensors[9].type, schema_py_generated.TensorType.FLOAT32
+    )
+    # check original source tensor is updated
+    self.assertEqual(
+        subgraph.tensors[1].type, schema_py_generated.TensorType.INT8
+    )
+    # checking the inserted node has the correct input/output
+    self.assertEqual(subgraph.operators[1].outputs[0], 9)
+    self.assertEqual(subgraph.operators[1].inputs[0], 1)
+    # checking inserted node is the dequant node
+    self.assertEqual(
+        subgraph.operators[1].opcodeIndex, len(model.operatorCodes) - 1
+    )
+    # checking if consumer haves the correct input
+    self.assertEqual(subgraph.operators[2].inputs[0], 9)
+    self.assertEqual(subgraph.operators[3].inputs[0], 1)
+  def test_dequant_insert_on_graph_output(self):
+    """Test dequant insert lib on graph output."""
+    subgraph = self._model.subgraphs[0]
+    model = self._model
+    dequant_opcode = schema_py_generated.BuiltinOperator.DEQUANTIZE
+    # insert dequant on the graph output
+    dequant_insert.insert_dequant(
+        transformation_utils.TransformationInput(
+            8,
+            model.operatorCodes,
+            model.buffers,
+            subgraph,
+            4,
+            [-1],
+            qtyping.UniformQuantParams(8, None, np.array([1]), np.array([0])),
+        )
+    )
+    # check dequant op code is added to the model
+    self.assertEqual(
+        model.operatorCodes[len(model.operatorCodes) - 1].builtinCode,
+        dequant_opcode,
+    )
+    # checking inserted node is the dequant node
+    self.assertEqual(
+        subgraph.operators[5].opcodeIndex, len(model.operatorCodes) - 1
+    )
+    # check if the graph output is updated
+    self.assertEqual(subgraph.outputs[0], 9)
+if __name__ == "__main__":
+  googletest.main()