PyPI - ai-edge-quantizer-nightly - Versions diffs - 0.0.1.dev20250115__py3-none-any.whl - Mend

ai-edge-quantizer-nightly 0.0.1.dev20250115__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

ai_edge_quantizer/__init__.py +19 -0
ai_edge_quantizer/algorithm_manager.py +167 -0
ai_edge_quantizer/algorithm_manager_api.py +271 -0
ai_edge_quantizer/algorithm_manager_api_test.py +210 -0
ai_edge_quantizer/algorithms/__init__.py +15 -0
ai_edge_quantizer/algorithms/nonlinear_quantize/__init__.py +15 -0
ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting.py +273 -0
ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting_test.py +664 -0
ai_edge_quantizer/algorithms/uniform_quantize/__init__.py +15 -0
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py +666 -0
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py +184 -0
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py +371 -0
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py +357 -0
ai_edge_quantizer/algorithms/utils/__init__.py +15 -0
ai_edge_quantizer/algorithms/utils/min_max_quantize_utils.py +1067 -0
ai_edge_quantizer/algorithms/utils/min_max_quantize_utils_test.py +512 -0
ai_edge_quantizer/calibrator.py +288 -0
ai_edge_quantizer/calibrator_test.py +297 -0
ai_edge_quantizer/conftest.py +22 -0
ai_edge_quantizer/default_policy.py +310 -0
ai_edge_quantizer/model_modifier.py +176 -0
ai_edge_quantizer/model_modifier_test.py +130 -0
ai_edge_quantizer/model_validator.py +357 -0
ai_edge_quantizer/model_validator_test.py +354 -0
ai_edge_quantizer/params_generator.py +361 -0
ai_edge_quantizer/params_generator_test.py +1041 -0
ai_edge_quantizer/qtyping.py +483 -0
ai_edge_quantizer/quantizer.py +372 -0
ai_edge_quantizer/quantizer_test.py +532 -0
ai_edge_quantizer/recipe.py +67 -0
ai_edge_quantizer/recipe_manager.py +245 -0
ai_edge_quantizer/recipe_manager_test.py +815 -0
ai_edge_quantizer/recipe_test.py +97 -0
ai_edge_quantizer/transformation_instruction_generator.py +584 -0
ai_edge_quantizer/transformation_instruction_generator_test.py +1082 -0
ai_edge_quantizer/transformation_performer.py +278 -0
ai_edge_quantizer/transformation_performer_test.py +344 -0
ai_edge_quantizer/transformations/__init__.py +15 -0
ai_edge_quantizer/transformations/dequant_insert.py +87 -0
ai_edge_quantizer/transformations/dequant_insert_test.py +304 -0
ai_edge_quantizer/transformations/emulated_subchannel.py +363 -0
ai_edge_quantizer/transformations/emulated_subchannel_test.py +212 -0
ai_edge_quantizer/transformations/quant_insert.py +100 -0
ai_edge_quantizer/transformations/quant_insert_test.py +284 -0
ai_edge_quantizer/transformations/quantize_tensor.py +156 -0
ai_edge_quantizer/transformations/quantize_tensor_test.py +227 -0
ai_edge_quantizer/transformations/transformation_utils.py +132 -0
ai_edge_quantizer/transformations/transformation_utils_test.py +162 -0
ai_edge_quantizer/utils/__init__.py +15 -0
ai_edge_quantizer/utils/calibration_utils.py +86 -0
ai_edge_quantizer/utils/calibration_utils_test.py +77 -0
ai_edge_quantizer/utils/test_utils.py +107 -0
ai_edge_quantizer/utils/tfl_flatbuffer_utils.py +317 -0
ai_edge_quantizer/utils/tfl_flatbuffer_utils_test.py +200 -0
ai_edge_quantizer/utils/tfl_interpreter_utils.py +312 -0
ai_edge_quantizer/utils/tfl_interpreter_utils_test.py +332 -0
ai_edge_quantizer/utils/validation_utils.py +125 -0
ai_edge_quantizer/utils/validation_utils_test.py +87 -0
ai_edge_quantizer_nightly-0.0.1.dev20250115.dist-info/LICENSE +201 -0
ai_edge_quantizer_nightly-0.0.1.dev20250115.dist-info/METADATA +32 -0
ai_edge_quantizer_nightly-0.0.1.dev20250115.dist-info/RECORD +63 -0
ai_edge_quantizer_nightly-0.0.1.dev20250115.dist-info/WHEEL +5 -0
ai_edge_quantizer_nightly-0.0.1.dev20250115.dist-info/top_level.txt +1 -0

ai_edge_quantizer/transformations/quant_insert_test.py ADDED Viewed

@@ -0,0 +1,284 @@
+# Copyright 2024 The AI Edge Quantizer Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Test for various transformations used by quantization toolkit."""
+import os
+import numpy as np
+from tensorflow.python.platform import googletest
+from ai_edge_quantizer import qtyping
+from ai_edge_quantizer.transformations import quant_insert
+from ai_edge_quantizer.transformations import transformation_utils
+from ai_edge_quantizer.utils import test_utils
+from ai_edge_quantizer.utils import tfl_flatbuffer_utils
+from ai_edge_litert import schema_py_generated  # pylint: disable=g-direct-tensorflow-import
+TEST_DATA_PREFIX_PATH = test_utils.get_path_to_datafile("..")
+class QuantInsertTest(googletest.TestCase):
+  def setUp(self):
+    super().setUp()
+    self._orig_test_model_path = os.path.join(
+        TEST_DATA_PREFIX_PATH, "tests/models/insert_dequant_test.tflite"
+    )
+    self._model = tfl_flatbuffer_utils.read_model(self._orig_test_model_path)
+  def test_quant_insert_constant(self):
+    """Test quant insert lib on a constant tensor."""
+    subgraph = self._model.subgraphs[0]
+    model = self._model
+    quant_opcode = schema_py_generated.BuiltinOperator.QUANTIZE
+    # insert quant on the constant before the add node
+    quant_insert.insert_quant(
+        transformation_utils.TransformationInput(
+            7,
+            model.operatorCodes,
+            model.buffers,
+            subgraph,
+            -1,
+            [4],
+            qtyping.UniformQuantParams(8, None, np.array([1]), np.array([0])),
+        )
+    )
+    # check quant op code is added to the model
+    self.assertEqual(
+        model.operatorCodes[0].builtinCode,
+        quant_opcode,
+    )
+    # check new tensor is correct created
+    self.assertIn(b"_quantized", subgraph.tensors[9].name)
+    self.assertEqual(
+        subgraph.tensors[9].type, schema_py_generated.TensorType.INT8
+    )
+    self.assertEqual(
+        subgraph.tensors[7].type, schema_py_generated.TensorType.UINT8
+    )
+    # checking if consumer has the correct input
+    self.assertEqual(subgraph.operators[5].inputs[0], 6)
+    self.assertEqual(subgraph.operators[5].inputs[1], 9)
+    # checking the inserted node has the correct input/output
+    self.assertEqual(subgraph.operators[4].outputs[0], 9)
+    self.assertEqual(subgraph.operators[4].inputs[0], 7)
+    # checking inserted node is the quant node
+    self.assertEqual(subgraph.operators[4].opcodeIndex, 0)
+  def test_quant_insert_activation(self):
+    """Test quant insert lib on activation tensors."""
+    subgraph = self._model.subgraphs[0]
+    model = self._model
+    quant_opcode = schema_py_generated.BuiltinOperator.QUANTIZE
+    # insert quant on the output of a conv node
+    quant_insert.insert_quant(
+        transformation_utils.TransformationInput(
+            4,
+            model.operatorCodes,
+            model.buffers,
+            subgraph,
+            1,
+            [3],
+            qtyping.UniformQuantParams(8, None, np.array([1]), np.array([0])),
+        )
+    )
+    # check quant op code is added to the model
+    self.assertEqual(
+        model.operatorCodes[0].builtinCode,
+        quant_opcode,
+    )
+    # check new tensor is correctly created
+    self.assertIn(b"_quantized", subgraph.tensors[9].name)
+    self.assertEqual(
+        subgraph.tensors[9].type, schema_py_generated.TensorType.INT8
+    )
+    # check original source tensor is updated
+    self.assertEqual(
+        subgraph.tensors[4].type, schema_py_generated.TensorType.UINT8
+    )
+    # checking if consumer haves the correct input
+    self.assertEqual(subgraph.operators[4].inputs[0], 9)
+    self.assertEqual(subgraph.operators[4].inputs[1], 5)
+    # checking the inserted node has the correct input/output
+    self.assertEqual(subgraph.operators[3].outputs[0], 9)
+    self.assertEqual(subgraph.operators[3].inputs[0], 4)
+    # checking inserted node is the quant node
+    self.assertEqual(subgraph.operators[3].opcodeIndex, 0)
+  def test_quant_insert_constant_multiple_consumers(self):
+    """Test quant insert lib on tensors with multiple consumers."""
+    subgraph = self._model.subgraphs[0]
+    model = self._model
+    quant_opcode = schema_py_generated.BuiltinOperator.QUANTIZE
+    # insert quant on the input of a conv node
+    post_trans_info = quant_insert.insert_quant(
+        transformation_utils.TransformationInput(
+            2,
+            model.operatorCodes,
+            model.buffers,
+            subgraph,
+            -1,
+            [1, 2],
+            qtyping.UniformQuantParams(8, None, np.array([1]), np.array([0])),
+        )
+    )
+    self.assertEqual(post_trans_info.op_id, 1)
+    self.assertEqual(post_trans_info.num_ops_added, 1)
+    # check quant op code is added to the model
+    self.assertEqual(
+        model.operatorCodes[0].builtinCode,
+        quant_opcode,
+    )
+    # check new tensor is correct created
+    self.assertIn(b"_quantized", subgraph.tensors[9].name)
+    self.assertEqual(
+        subgraph.tensors[9].type, schema_py_generated.TensorType.INT8
+    )
+    # check original source tensor has the correct type
+    self.assertEqual(
+        subgraph.tensors[2].type, schema_py_generated.TensorType.UINT8
+    )
+    # checking the inserted node has the correct input/output
+    self.assertEqual(subgraph.operators[1].outputs[0], 9)
+    self.assertEqual(subgraph.operators[1].inputs[0], 2)
+    # checking inserted node is the quant node
+    self.assertEqual(subgraph.operators[1].opcodeIndex, 0)
+    # checking if consumer haves the correct input
+    self.assertEqual(subgraph.operators[2].inputs[1], 9)
+    self.assertEqual(subgraph.operators[3].inputs[1], 9)
+  def test_quant_insert_activation_multiple_consumers(self):
+    """Test quant insert lib on tensors with multiple consumers."""
+    subgraph = self._model.subgraphs[0]
+    model = self._model
+    quant_opcode = schema_py_generated.BuiltinOperator.QUANTIZE
+    # insert quant on the output of a conv node
+    quant_insert.insert_quant(
+        transformation_utils.TransformationInput(
+            1,
+            model.operatorCodes,
+            model.buffers,
+            subgraph,
+            0,
+            [1, 2],
+            qtyping.UniformQuantParams(8, None, np.array([1]), np.array([0])),
+        )
+    )
+    # check quant op code is added to the model
+    self.assertEqual(
+        model.operatorCodes[0].builtinCode,
+        quant_opcode,
+    )
+    # check new tensor is correct created
+    self.assertIn(b"_quantized", subgraph.tensors[9].name)
+    self.assertEqual(
+        subgraph.tensors[9].type, schema_py_generated.TensorType.INT8
+    )
+    # check original source tensor is updated
+    self.assertEqual(
+        subgraph.tensors[1].type, schema_py_generated.TensorType.UINT8
+    )
+    # checking the inserted node has the correct input/output
+    self.assertEqual(subgraph.operators[1].outputs[0], 9)
+    self.assertEqual(subgraph.operators[1].inputs[0], 1)
+    # checking inserted node is the quant node
+    self.assertEqual(subgraph.operators[1].opcodeIndex, 0)
+    # checking if consumer haves the correct input
+    self.assertEqual(subgraph.operators[2].inputs[0], 9)
+    self.assertEqual(subgraph.operators[3].inputs[0], 9)
+  def test_quant_insert_activation_multiple_consumers_select(self):
+    """Test quant insert lib on tensors with multiple consumers but only insert for one of them."""
+    subgraph = self._model.subgraphs[0]
+    model = self._model
+    quant_opcode = schema_py_generated.BuiltinOperator.QUANTIZE
+    # insert quant on the output of a conv node
+    quant_insert.insert_quant(
+        transformation_utils.TransformationInput(
+            1,
+            model.operatorCodes,
+            model.buffers,
+            subgraph,
+            0,
+            [1],
+            qtyping.UniformQuantParams(8, None, np.array([1]), np.array([0])),
+        )
+    )
+    # check quant op code is added to the model
+    self.assertEqual(
+        model.operatorCodes[0].builtinCode,
+        quant_opcode,
+    )
+    # check new tensor is correct created
+    self.assertIn(b"_quantized", subgraph.tensors[9].name)
+    self.assertEqual(
+        subgraph.tensors[9].type, schema_py_generated.TensorType.INT8
+    )
+    # check original source tensor is updated
+    self.assertEqual(
+        subgraph.tensors[1].type, schema_py_generated.TensorType.UINT8
+    )
+    # checking inserted node is the quant node
+    self.assertEqual(subgraph.operators[1].opcodeIndex, 0)
+    # checking if consumer haves the correct input
+    self.assertEqual(subgraph.operators[2].inputs[0], 9)
+    self.assertEqual(subgraph.operators[3].inputs[0], 1)
+    # checking the inserted node has the correct input/output
+    self.assertEqual(subgraph.operators[1].outputs[0], 9)
+    self.assertEqual(subgraph.operators[1].inputs[0], 1)
+  def test_dequant_insert_on_graph_output(self):
+    """Test dequant insert lib on graph output."""
+    subgraph = self._model.subgraphs[0]
+    model = self._model
+    # insert dequant on the graph output
+    quant_insert.insert_quant(
+        transformation_utils.TransformationInput(
+            8,
+            model.operatorCodes,
+            model.buffers,
+            subgraph,
+            4,
+            [-1],
+            qtyping.UniformQuantParams(8, None, np.array([1]), np.array([0])),
+        )
+    )
+    # checking inserted node is the quant node
+    self.assertEqual(subgraph.operators[5].opcodeIndex, 0)
+    # check if the graph output is updated
+    self.assertEqual(subgraph.outputs[0], 9)
+if __name__ == "__main__":
+  googletest.main()

ai_edge_quantizer/transformations/quantize_tensor.py ADDED Viewed

@@ -0,0 +1,156 @@
+# Copyright 2024 The AI Edge Quantizer Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""quantize a given tensor."""
+from typing import Optional, cast
+import numpy as np
+from ai_edge_quantizer import qtyping
+from ai_edge_quantizer.transformations import transformation_utils
+from ai_edge_litert import schema_py_generated  # pylint: disable=g-direct-tensorflow-import
+# TODO: b/335014051 - Support distinguishing INT, FLOAT & UINT, BFLOAT.
+def quant_params_to_tflite_type(
+    bitwidth: int,
+) -> Optional[schema_py_generated.TensorType]:
+  """Given specifications from quant param return the corresponding TFLite dtype.
+  Args:
+    bitwidth: Bit width from UniformQuantParams.
+  Returns:
+    The corresponding TFLite tensor type.
+  """
+  if bitwidth == 4:
+    return schema_py_generated.TensorType.INT4
+  elif bitwidth <= 8:
+    return schema_py_generated.TensorType.INT8
+  elif bitwidth <= 16:
+    return schema_py_generated.TensorType.INT16
+  elif bitwidth <= 32:
+    return schema_py_generated.TensorType.INT32
+  elif bitwidth <= 64:
+    return schema_py_generated.TensorType.INT64
+  else:
+    raise ValueError(f"Unsupported quant params: {bitwidth}")
+def nonlinear_quant_params_to_tflite_type(
+    bitwidth: int,
+) -> Optional[schema_py_generated.TensorType]:
+  """Given specifications from quant param return the corresponding tflite dtype.
+  Args:
+    bitwidth: bitwidth from NonLinearQuantParams
+  Returns:
+    the corresponding tflite tensortype
+  """
+  if bitwidth == 16:
+    return schema_py_generated.TensorType.FLOAT16
+  elif bitwidth == 32:
+    return schema_py_generated.TensorType.FLOAT32
+  else:
+    raise ValueError(f"Unsupported nonlinear params: {bitwidth}")
+def _pack_data(bitwidth: int, flattened_data: np.ndarray) -> np.ndarray:
+  """Pack the data to the corresponding bit width.
+  Currently only support 4 bits. If no packing is needed, the original data is
+  returned.
+  Args:
+    bitwidth: Bit width from NonLinearQuantParams.
+    flattened_data: The data to be packed.
+  Returns:
+    Packed data.
+  """
+  if bitwidth == 4:
+    even_data = flattened_data[::2] & 0x0F
+    odd_data = np.left_shift(flattened_data[1::2], 4).astype(np.uint8)
+    if odd_data.shape[0] == even_data.shape[0] - 1:
+      odd_data = np.pad(odd_data, (0, 1), constant_values=0)
+    return np.bitwise_or(even_data, odd_data)
+  else:
+    return flattened_data
+def quantize_tensor(
+    transformation_input: transformation_utils.TransformationInput,
+) -> qtyping.TransformationInfo:
+  """Quantize the tensor at the tensor_id in the given subgraph.
+  Args:
+    transformation_input: input structure that contains all information needed
+      for the transformation.
+  Returns:
+    TransformationInfo:
+      op_id: the producer index for tensor
+      num_ops_added: the total number of ops inserted by this operation, which
+        is 0
+  """
+  tensor = transformation_input.subgraph.tensors[transformation_input.tensor_id]
+  # TODO: b/336385820 - suppport quantize buffer directly when quantized_data
+  # is not provided
+  if tensor.buffer:
+    if transformation_input.quant_params.quantized_data is not None:
+      transformation_input.buffers[tensor.buffer].data = _pack_data(
+          transformation_input.quant_params.num_bits,
+          np.frombuffer(
+              cast(
+                  np.ndarray, transformation_input.quant_params.quantized_data
+              ).tobytes(),
+              dtype=np.uint8,
+          ).flatten(),
+      )
+  if isinstance(transformation_input.quant_params, qtyping.UniformQuantParams):
+    flatbuffer_quantization = schema_py_generated.QuantizationParametersT()
+    flatbuffer_quantization.scale = list(
+        transformation_input.quant_params.scale.flatten().astype(np.float32)
+    )  # flatbuffer requires scale as list[float]
+    flatbuffer_quantization.zeroPoint = list(
+        transformation_input.quant_params.zero_point.flatten().astype(np.int64)
+    )  # flatbuffer requires zeroPoint as list[int64]
+    if transformation_input.quant_params.quantized_dimension is not None:
+      flatbuffer_quantization.quantizedDimension = (
+          transformation_input.quant_params.quantized_dimension
+      )
+    tensor.quantization = flatbuffer_quantization
+    tensor.type = quant_params_to_tflite_type(
+        transformation_input.quant_params.num_bits
+    )
+  if isinstance(
+      transformation_input.quant_params, qtyping.NonLinearQuantParams
+  ):
+    tensor.type = nonlinear_quant_params_to_tflite_type(
+        transformation_input.quant_params.num_bits
+    )
+  if isinstance(
+      transformation_input.quant_params, qtyping.NonLinearQuantParams
+  ):
+    tensor.type = nonlinear_quant_params_to_tflite_type(
+        transformation_input.quant_params.num_bits
+    )
+  return qtyping.TransformationInfo(
+      0, num_ops_added=0, output_tensor_id=transformation_input.tensor_id
+  )

ai_edge_quantizer/transformations/quantize_tensor_test.py ADDED Viewed

@@ -0,0 +1,227 @@
+# Copyright 2024 The AI Edge Quantizer Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""test for quantize tensor."""
+import os
+import numpy as np
+from tensorflow.python.platform import googletest
+from absl.testing import parameterized
+from ai_edge_quantizer import qtyping
+from ai_edge_quantizer.transformations import quantize_tensor
+from ai_edge_quantizer.transformations import transformation_utils
+from ai_edge_quantizer.utils import test_utils
+from ai_edge_quantizer.utils import tfl_flatbuffer_utils
+from ai_edge_litert import schema_py_generated  # pylint: disable=g-direct-tensorflow-import
+TEST_DATA_PREFIX_PATH = test_utils.get_path_to_datafile("..")
+class QuantizeTensorTest(parameterized.TestCase):
+  def setUp(self):
+    super().setUp()
+    self._orig_test_model_path = os.path.join(
+        TEST_DATA_PREFIX_PATH, "tests/models/insert_dequant_test.tflite"
+    )
+    self._model = tfl_flatbuffer_utils.read_model(self._orig_test_model_path)
+  def test_quantize_constant_tensor(self):
+    """test quantizing a constant tensor."""
+    subgraph = self._model.subgraphs[0]
+    model = self._model
+    data = np.ones([1, 112, 112, 3], dtype=np.int8)
+    ret = quantize_tensor.quantize_tensor(
+        transformation_utils.TransformationInput(
+            7,
+            model.operatorCodes,
+            model.buffers,
+            subgraph,
+            -1,
+            [4],
+            qtyping.UniformQuantParams(
+                8, None, np.ones(1), np.ones(1), True, data
+            ),
+        )
+    )
+    self.assertEqual(ret.op_id, 0)
+    self.assertEqual(ret.num_ops_added, 0)
+    self.assertListEqual(
+        np.array(model.buffers[8].data).tolist(), data.flatten().tolist()
+    )
+    quant_param = subgraph.tensors[7].quantization
+    self.assertListEqual(np.array(quant_param.scale).tolist(), [1])
+    self.assertEqual(np.array(quant_param.zeroPoint).tolist(), [1])
+    self.assertEqual(quant_param.quantizedDimension, 0)
+  def test_quantize_activation_tensor(self):
+    """test quantizing an activation tensor."""
+    subgraph = self._model.subgraphs[0]
+    model = self._model
+    ret = quantize_tensor.quantize_tensor(
+        transformation_utils.TransformationInput(
+            4,
+            model.operatorCodes,
+            model.buffers,
+            subgraph,
+            1,
+            [3],
+            qtyping.UniformQuantParams(
+                8, None, np.array([22]), np.array([127])
+            ),
+        )
+    )
+    self.assertEqual(ret.op_id, 0)
+    self.assertEqual(ret.num_ops_added, 0)
+    quant_param = subgraph.tensors[4].quantization
+    self.assertListEqual(np.array(quant_param.scale).tolist(), [22])
+    self.assertListEqual(np.array(quant_param.zeroPoint).tolist(), [127])
+    self.assertEqual(quant_param.quantizedDimension, 0)
+  def test_quantize_tensor_with_per_channel_quantization(self):
+    """test quantizing an activation tensor."""
+    subgraph = self._model.subgraphs[0]
+    model = self._model
+    ret = quantize_tensor.quantize_tensor(
+        transformation_utils.TransformationInput(
+            4,
+            model.operatorCodes,
+            model.buffers,
+            subgraph,
+            1,
+            [3],
+            qtyping.UniformQuantParams(8, 3, np.ones([22]), np.zeros([22])),
+        )
+    )
+    self.assertEqual(ret.op_id, 0)
+    self.assertEqual(ret.num_ops_added, 0)
+    quant_param = subgraph.tensors[4].quantization
+    self.assertListEqual(
+        np.array(quant_param.scale).tolist(), np.ones([22]).tolist()
+    )
+    self.assertListEqual(
+        np.array(quant_param.zeroPoint).tolist(), np.zeros([22]).tolist()
+    )
+    self.assertEqual(quant_param.quantizedDimension, 3)
+  def test_quantize_tensor_with_nonlinear_quantization(self):
+    """test quantizing an activation tensor with non-linear quantization."""
+    subgraph = self._model.subgraphs[0]
+    model = self._model
+    quantize_tensor.quantize_tensor(
+        transformation_utils.TransformationInput(
+            4,
+            model.operatorCodes,
+            model.buffers,
+            subgraph,
+            1,
+            [3],
+            qtyping.NonLinearQuantParams(16, None),
+        )
+    )
+    self.assertEqual(
+        subgraph.tensors[4].type, schema_py_generated.TensorType.FLOAT16
+    )
+  def test_int4_constant_packed_correctly(self):
+    subgraph = self._model.subgraphs[0]
+    model = self._model
+    data = np.array(
+        [
+            0x0,
+            0x1,
+            0x2,
+            0x3,
+            0x4,
+            0x5,
+            0x6,
+            0x7,
+            0x8,
+            0x9,
+            0xA,
+            0xB,
+            0xC,
+            0xD,
+            0xE,
+        ],
+        dtype=np.int8,
+    )
+    expected = np.array([0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0x0E])
+    ret = quantize_tensor.quantize_tensor(
+        transformation_utils.TransformationInput(
+            tensor_id=7,
+            op_codes=model.operatorCodes,
+            buffers=model.buffers,
+            subgraph=subgraph,
+            producer=-1,
+            consumers=[4],
+            quant_params=qtyping.UniformQuantParams(
+                4, None, np.ones(1), np.ones(1), True, data
+            ),
+        )
+    )
+    self.assertEqual(ret.op_id, 0)
+    self.assertEqual(ret.num_ops_added, 0)
+    np.testing.assert_array_equal(model.buffers[8].data, expected)
+    quant_param = subgraph.tensors[7].quantization
+    np.testing.assert_array_equal(quant_param.scale, [1])
+    np.testing.assert_array_equal(quant_param.zeroPoint, [1])
+    self.assertEqual(quant_param.quantizedDimension, 0)
+  @parameterized.named_parameters(
+      dict(
+          testcase_name="int5",
+          num_bits=5,
+      ),
+      dict(
+          testcase_name="int2",
+          num_bits=2,
+      ),
+  )
+  def test_int_constant_not_packed(self, num_bits):
+    subgraph = self._model.subgraphs[0]
+    model = self._model
+    tensor_id = 7
+    data = np.array([0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7], dtype=np.int8)
+    expected = np.array([0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7])
+    ret = quantize_tensor.quantize_tensor(
+        transformation_utils.TransformationInput(
+            tensor_id=tensor_id,
+            op_codes=model.operatorCodes,
+            buffers=model.buffers,
+            subgraph=subgraph,
+            producer=-1,
+            consumers=[4],
+            quant_params=qtyping.UniformQuantParams(
+                num_bits=num_bits,
+                quantized_dimension=None,
+                scale=np.ones(1),
+                zero_point=np.ones(1),
+                symmetric=True,
+                quantized_data=data,
+            ),
+        )
+    )
+    self.assertEqual(ret.op_id, 0)
+    self.assertEqual(ret.num_ops_added, 0)
+    np.testing.assert_array_equal(model.buffers[8].data, expected)
+    quant_param = subgraph.tensors[tensor_id].quantization
+    np.testing.assert_array_equal(quant_param.scale, [1])
+    np.testing.assert_array_equal(quant_param.zeroPoint, [1])
+    self.assertEqual(quant_param.quantizedDimension, 0)
+if __name__ == "__main__":
+  googletest.main()