ai-edge-quantizer-nightly 0.4.0.dev20250925__py3-none-any.whl → 0.4.0.dev20250927__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_edge_quantizer/algorithm_manager.py +36 -0
- ai_edge_quantizer/algorithms/uniform_quantize/mse.py +125 -0
- ai_edge_quantizer/algorithms/uniform_quantize/mse_test.py +195 -0
- ai_edge_quantizer/recipe_manager.py +12 -3
- ai_edge_quantizer/recipe_manager_test.py +69 -2
- {ai_edge_quantizer_nightly-0.4.0.dev20250925.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250927.dist-info}/METADATA +1 -1
- {ai_edge_quantizer_nightly-0.4.0.dev20250925.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250927.dist-info}/RECORD +10 -8
- {ai_edge_quantizer_nightly-0.4.0.dev20250925.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250927.dist-info}/LICENSE +0 -0
- {ai_edge_quantizer_nightly-0.4.0.dev20250925.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250927.dist-info}/WHEEL +0 -0
- {ai_edge_quantizer_nightly-0.4.0.dev20250925.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250927.dist-info}/top_level.txt +0 -0
|
@@ -25,9 +25,11 @@ from ai_edge_quantizer.algorithms.nonlinear_quantize import float_casting
|
|
|
25
25
|
from ai_edge_quantizer.algorithms.uniform_quantize import common_quantize
|
|
26
26
|
from ai_edge_quantizer.algorithms.uniform_quantize import dequantized_weight_recovery
|
|
27
27
|
from ai_edge_quantizer.algorithms.uniform_quantize import hadamard_rotation
|
|
28
|
+
from ai_edge_quantizer.algorithms.uniform_quantize import mse
|
|
28
29
|
from ai_edge_quantizer.algorithms.uniform_quantize import naive_min_max_quantize
|
|
29
30
|
from ai_edge_quantizer.algorithms.uniform_quantize import octav
|
|
30
31
|
|
|
32
|
+
|
|
31
33
|
# TODO: b/399775701 - Clean up this file.
|
|
32
34
|
|
|
33
35
|
_TFLOpName = qtyping.TFLOperationName
|
|
@@ -60,6 +62,7 @@ class AlgorithmName(str, enum.Enum):
|
|
|
60
62
|
DEQUANTIZED_WEIGHT_RECOVERY = dequantized_weight_recovery.ALGORITHM_KEY
|
|
61
63
|
OCTAV = octav.ALGORITHM_KEY
|
|
62
64
|
HADAMARD_ROTATION = hadamard_rotation.ALGORITHM_KEY
|
|
65
|
+
MSE = mse.ALGORITHM_KEY
|
|
63
66
|
|
|
64
67
|
|
|
65
68
|
### MIN/MAX_UNIFORM_QUANT ###
|
|
@@ -322,3 +325,36 @@ for (
|
|
|
322
325
|
calibration_func=naive_min_max_quantize.min_max_calibrate,
|
|
323
326
|
materialize_func=materialize_func,
|
|
324
327
|
)
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
# Register the MSE algorithm.
|
|
331
|
+
register_op_quant_config_validation_func(
|
|
332
|
+
AlgorithmName.MSE,
|
|
333
|
+
common_quantize.check_op_quantization_config,
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
# Register a config check policy for the MSE algorithm.
|
|
337
|
+
register_config_check_policy_func(
|
|
338
|
+
AlgorithmName.MSE,
|
|
339
|
+
default_policy.DEFAULT_CONFIG_CHECK_POLICY,
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
# Register specialized MSE materialize functions.
|
|
343
|
+
_MSE_OP_NAME_MATERIALIZE_FUNC_DICT = immutabledict({
|
|
344
|
+
_TFLOpName.FULLY_CONNECTED: common_quantize.materialize_fc_conv,
|
|
345
|
+
_TFLOpName.EMBEDDING_LOOKUP: common_quantize.materialize_embedding_lookup,
|
|
346
|
+
})
|
|
347
|
+
for (
|
|
348
|
+
op_name,
|
|
349
|
+
materialize_func,
|
|
350
|
+
) in _MSE_OP_NAME_MATERIALIZE_FUNC_DICT.items():
|
|
351
|
+
register_quantized_op(
|
|
352
|
+
AlgorithmName.MSE,
|
|
353
|
+
op_name,
|
|
354
|
+
naive_min_max_quantize.init_qsvs,
|
|
355
|
+
calibration_func=naive_min_max_quantize.min_max_calibrate,
|
|
356
|
+
materialize_func=functools.partial(
|
|
357
|
+
materialize_func,
|
|
358
|
+
mse.get_tensor_quant_params,
|
|
359
|
+
),
|
|
360
|
+
)
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# Copyright 2024 The AI Edge Quantizer Authors.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ==============================================================================
|
|
15
|
+
|
|
16
|
+
"""Implements the MSE quantization."""
|
|
17
|
+
|
|
18
|
+
import dataclasses
|
|
19
|
+
from typing import Any, Optional
|
|
20
|
+
import numpy as np
|
|
21
|
+
from ai_edge_quantizer import qtyping
|
|
22
|
+
from ai_edge_quantizer.algorithms.uniform_quantize import common_quantize
|
|
23
|
+
from ai_edge_quantizer.algorithms.uniform_quantize import naive_min_max_quantize
|
|
24
|
+
from ai_edge_quantizer.algorithms.uniform_quantize import uniform_quantize_tensor
|
|
25
|
+
from ai_edge_quantizer.algorithms.utils import common_utils
|
|
26
|
+
|
|
27
|
+
ALGORITHM_KEY = "MSE"
|
|
28
|
+
|
|
29
|
+
# Coefficients from offline numeric analysis.
|
|
30
|
+
_MSE_QUANT_MULS = {
|
|
31
|
+
8: 0.05408,
|
|
32
|
+
4: 0.37755,
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_tensor_quant_params(
|
|
37
|
+
op_info: qtyping.OpInfo,
|
|
38
|
+
tensor_quant_config: qtyping.TensorQuantizationConfig,
|
|
39
|
+
tensor_content: Optional[np.ndarray] = None,
|
|
40
|
+
tensor_qsv: Optional[dict[str, Any]] = None,
|
|
41
|
+
) -> qtyping.UniformQuantParams:
|
|
42
|
+
"""Returns the quantization parameters for a tensor.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
op_info: Aggregated information about the op (e.g., quantization config).
|
|
46
|
+
tensor_quant_config: The quantization config for the tensor.
|
|
47
|
+
tensor_content: The content of the tensor. When None, it means the tensor is
|
|
48
|
+
not a weight tensor (e.g. static quantization) so we fallback to using
|
|
49
|
+
naive_min_max_quantize.
|
|
50
|
+
tensor_qsv: A dictionary containing the min/max of the tensor.
|
|
51
|
+
|
|
52
|
+
Raises:
|
|
53
|
+
ValueError: If the blockwise quantization is requested.
|
|
54
|
+
ValueError: If the asymmetric quantization is requested.
|
|
55
|
+
ValueError: `tensor_qsv` must contain min/max values, or `tensor_content`
|
|
56
|
+
must be provided so that they can be inferred.
|
|
57
|
+
"""
|
|
58
|
+
if tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE:
|
|
59
|
+
raise ValueError(
|
|
60
|
+
"Blockwise quantization is not supported for MSE quantization."
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Fallback to naive_min_max_quantize.py for non-weight tensors.
|
|
64
|
+
if tensor_content is None:
|
|
65
|
+
return naive_min_max_quantize.get_tensor_quant_params(
|
|
66
|
+
op_info, tensor_quant_config, tensor_content, tensor_qsv
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
if not tensor_quant_config.symmetric:
|
|
70
|
+
raise ValueError(
|
|
71
|
+
f"Unsupported symmetry: {tensor_quant_config.symmetric}. MSE"
|
|
72
|
+
" supports symmetric quantization only for now."
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
if not tensor_qsv:
|
|
76
|
+
# We need min/max to calculate quantization parameters, which
|
|
77
|
+
# should be collected during the calibration process. However,
|
|
78
|
+
# weight-only and DRQ do not require calibration, thus it is
|
|
79
|
+
# possible that this information is missing here. In that case we
|
|
80
|
+
# collect min/max on the spot.
|
|
81
|
+
tensor_min_max = common_quantize.init_tensor_min_max(
|
|
82
|
+
tensor_content,
|
|
83
|
+
op_info,
|
|
84
|
+
)
|
|
85
|
+
else:
|
|
86
|
+
tensor_min_max = tensor_qsv
|
|
87
|
+
|
|
88
|
+
if "min" not in tensor_min_max or "max" not in tensor_min_max:
|
|
89
|
+
raise ValueError(
|
|
90
|
+
"min and max must be provided to produce tensor quantization"
|
|
91
|
+
" parameters. Check if the correct calibration results are passed into"
|
|
92
|
+
" the ParamsGenerator."
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
quantized_dim = common_utils.get_weight_quantized_dim(
|
|
96
|
+
op_info, tensor_content, tensor_quant_config.granularity
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
reshaped_data = tensor_content
|
|
100
|
+
reduce_dims = common_utils.get_reduce_dims(
|
|
101
|
+
quantized_dim, tensor_content.shape
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
multiplier = _MSE_QUANT_MULS[tensor_quant_config.num_bits]
|
|
105
|
+
scale = multiplier * np.sqrt(
|
|
106
|
+
np.mean(reshaped_data**2, axis=reduce_dims, keepdims=True)
|
|
107
|
+
)
|
|
108
|
+
zp = np.zeros_like(scale, dtype=np.int32)
|
|
109
|
+
|
|
110
|
+
quant_params = qtyping.UniformQuantParams(
|
|
111
|
+
scale=scale,
|
|
112
|
+
zero_point=zp,
|
|
113
|
+
num_bits=tensor_quant_config.num_bits,
|
|
114
|
+
symmetric=tensor_quant_config.symmetric,
|
|
115
|
+
quantized_dimension=quantized_dim,
|
|
116
|
+
block_size=tensor_quant_config.block_size,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
quantized_vars = uniform_quantize_tensor.uniform_quantize(
|
|
120
|
+
tensor_content,
|
|
121
|
+
quant_params,
|
|
122
|
+
tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
return dataclasses.replace(quant_params, quantized_data=quantized_vars)
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
# Copyright 2024 The AI Edge Quantizer Authors.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ==============================================================================
|
|
15
|
+
|
|
16
|
+
import os
|
|
17
|
+
from typing import cast
|
|
18
|
+
|
|
19
|
+
from absl.testing import parameterized
|
|
20
|
+
import numpy as np
|
|
21
|
+
|
|
22
|
+
from tensorflow.python.platform import googletest
|
|
23
|
+
from ai_edge_quantizer import qtyping
|
|
24
|
+
from ai_edge_quantizer.algorithms.uniform_quantize import mse
|
|
25
|
+
from ai_edge_quantizer.utils import test_utils
|
|
26
|
+
from ai_edge_quantizer.utils import tfl_flatbuffer_utils
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class MseQuantizeTest(parameterized.TestCase):
|
|
30
|
+
"""Tests for general functions for MSE."""
|
|
31
|
+
|
|
32
|
+
def setUp(self):
|
|
33
|
+
super().setUp()
|
|
34
|
+
np.random.seed(666)
|
|
35
|
+
self._test_model_path = os.path.join(
|
|
36
|
+
test_utils.get_path_to_datafile("../../tests/models"),
|
|
37
|
+
"conv_fc_mnist.tflite",
|
|
38
|
+
)
|
|
39
|
+
self._test_model = tfl_flatbuffer_utils.read_model(self._test_model_path)
|
|
40
|
+
# The test model has one subgraph for now.
|
|
41
|
+
self._graph_info = qtyping.GraphInfo(
|
|
42
|
+
subgraph_tensors=self._test_model.subgraphs[0].tensors,
|
|
43
|
+
buffers=self._test_model.buffers,
|
|
44
|
+
)
|
|
45
|
+
self._tensor_name_to_qsv = {}
|
|
46
|
+
subgraph0 = self._test_model.subgraphs[0]
|
|
47
|
+
self._subgraph_op_index = 3
|
|
48
|
+
self._fc_op = subgraph0.operators[self._subgraph_op_index]
|
|
49
|
+
self._fc_op_info = qtyping.OpInfo(
|
|
50
|
+
op=self._fc_op,
|
|
51
|
+
op_name=qtyping.TFLOperationName.FULLY_CONNECTED,
|
|
52
|
+
subgraph_op_index=self._subgraph_op_index,
|
|
53
|
+
op_quant_config=qtyping.OpQuantizationConfig(
|
|
54
|
+
weight_tensor_config=None,
|
|
55
|
+
),
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
def test_get_tensor_quant_params_raises_error_with_unsupported_symmetry(self):
|
|
59
|
+
err_msg = "Unsupported symmetry"
|
|
60
|
+
test_data = np.array([[-7, 7], [4, -4], [4, -4], [7, 7]])
|
|
61
|
+
with self.assertRaisesWithPredicateMatch(
|
|
62
|
+
ValueError, lambda err: err_msg in str(err)
|
|
63
|
+
):
|
|
64
|
+
_ = mse.get_tensor_quant_params(
|
|
65
|
+
op_info=self._fc_op_info,
|
|
66
|
+
tensor_quant_config=qtyping.TensorQuantizationConfig(
|
|
67
|
+
num_bits=4,
|
|
68
|
+
symmetric=False,
|
|
69
|
+
granularity=qtyping.QuantGranularity.CHANNELWISE,
|
|
70
|
+
),
|
|
71
|
+
tensor_content=test_data,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
def test_get_tensor_quant_params_raises_error_with_unsupported_granularity(
|
|
75
|
+
self,
|
|
76
|
+
):
|
|
77
|
+
err_msg = "Blockwise quantization is not supported"
|
|
78
|
+
test_data = np.array([[-7, 7], [4, -4], [4, -4], [7, 7]])
|
|
79
|
+
with self.assertRaisesWithPredicateMatch(
|
|
80
|
+
ValueError, lambda err: err_msg in str(err)
|
|
81
|
+
):
|
|
82
|
+
_ = mse.get_tensor_quant_params(
|
|
83
|
+
op_info=self._fc_op_info,
|
|
84
|
+
tensor_quant_config=qtyping.TensorQuantizationConfig(
|
|
85
|
+
num_bits=4,
|
|
86
|
+
symmetric=True,
|
|
87
|
+
granularity=qtyping.QuantGranularity.BLOCKWISE,
|
|
88
|
+
),
|
|
89
|
+
tensor_content=test_data,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
def test_get_tensor_quant_params_succeeds_with_qsv(self):
|
|
93
|
+
# Fall back to naive_min_max_quantize.py for non-weight tensors.
|
|
94
|
+
tensor_quant_params = mse.get_tensor_quant_params(
|
|
95
|
+
op_info=self._fc_op_info,
|
|
96
|
+
tensor_quant_config=qtyping.TensorQuantizationConfig(
|
|
97
|
+
num_bits=8,
|
|
98
|
+
granularity=qtyping.QuantGranularity.TENSORWISE,
|
|
99
|
+
),
|
|
100
|
+
tensor_qsv={
|
|
101
|
+
"min": np.array([-1]),
|
|
102
|
+
"max": np.array([1]),
|
|
103
|
+
},
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
self.assertIsNone(tensor_quant_params.quantized_dimension)
|
|
107
|
+
scale = tensor_quant_params.scale
|
|
108
|
+
self.assertEqual(scale.shape, (1,))
|
|
109
|
+
self.assertSequenceAlmostEqual(scale.flatten(), [1 / 127])
|
|
110
|
+
|
|
111
|
+
# Zero point should be zero for symmetric quantization.
|
|
112
|
+
zp = tensor_quant_params.zero_point
|
|
113
|
+
self.assertEqual(np.sum(zp), 0)
|
|
114
|
+
self.assertEqual(zp.shape, (1,))
|
|
115
|
+
|
|
116
|
+
def test_get_tensor_quant_params_succeeds_with_tensorwise_granularity(self):
|
|
117
|
+
test_data = np.array([
|
|
118
|
+
[-1e5, 25, -50, 75, -100, 125],
|
|
119
|
+
[25, -30, 50, -75, 1e5, -125],
|
|
120
|
+
[50, -60, 70, -80, 90, -100],
|
|
121
|
+
])
|
|
122
|
+
tensor_config = qtyping.TensorQuantizationConfig(
|
|
123
|
+
num_bits=4,
|
|
124
|
+
symmetric=True,
|
|
125
|
+
granularity=qtyping.QuantGranularity.TENSORWISE,
|
|
126
|
+
)
|
|
127
|
+
fc_op_info = qtyping.OpInfo(
|
|
128
|
+
op=self._fc_op,
|
|
129
|
+
op_name=qtyping.TFLOperationName.FULLY_CONNECTED,
|
|
130
|
+
subgraph_op_index=self._subgraph_op_index,
|
|
131
|
+
op_quant_config=qtyping.OpQuantizationConfig(
|
|
132
|
+
weight_tensor_config=tensor_config,
|
|
133
|
+
),
|
|
134
|
+
)
|
|
135
|
+
quant_params = mse.get_tensor_quant_params(
|
|
136
|
+
op_info=fc_op_info,
|
|
137
|
+
tensor_quant_config=tensor_config,
|
|
138
|
+
tensor_content=test_data,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
with self.subTest(name="CheckQuantParamsShapes"):
|
|
142
|
+
self.assertEqual(quant_params.zero_point.shape, (1, 1))
|
|
143
|
+
self.assertEqual(quant_params.scale.shape, (1, 1))
|
|
144
|
+
self.assertIsNone(quant_params.quantized_dimension)
|
|
145
|
+
self.assertIsNotNone(quant_params.quantized_data)
|
|
146
|
+
self.assertTupleEqual(
|
|
147
|
+
cast(np.ndarray, quant_params.quantized_data).shape, test_data.shape
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
with self.subTest(name="CheckQuantParamsValues"):
|
|
151
|
+
self.assertTrue(np.all(quant_params.zero_point == 0))
|
|
152
|
+
|
|
153
|
+
def test_get_tensor_quant_params_succeeds_with_channelwise_granularity(self):
|
|
154
|
+
# Test that the call generates quant params that are appropriately shaped,
|
|
155
|
+
# have some clipping, and correct config values without checking the
|
|
156
|
+
# actual values numerically.
|
|
157
|
+
test_data = np.array([
|
|
158
|
+
[-1e5, 25, -50, 75, -100, 125],
|
|
159
|
+
[25, -30, 50, -75, 1e5, -125],
|
|
160
|
+
[50, -60, 70, -80, 90, -100],
|
|
161
|
+
])
|
|
162
|
+
tensor_config = qtyping.TensorQuantizationConfig(
|
|
163
|
+
num_bits=4,
|
|
164
|
+
symmetric=True,
|
|
165
|
+
granularity=qtyping.QuantGranularity.CHANNELWISE,
|
|
166
|
+
)
|
|
167
|
+
fc_op_info = qtyping.OpInfo(
|
|
168
|
+
op=self._fc_op,
|
|
169
|
+
op_name=qtyping.TFLOperationName.FULLY_CONNECTED,
|
|
170
|
+
subgraph_op_index=self._subgraph_op_index,
|
|
171
|
+
op_quant_config=qtyping.OpQuantizationConfig(
|
|
172
|
+
weight_tensor_config=tensor_config,
|
|
173
|
+
),
|
|
174
|
+
)
|
|
175
|
+
quant_params = mse.get_tensor_quant_params(
|
|
176
|
+
op_info=fc_op_info,
|
|
177
|
+
tensor_quant_config=tensor_config,
|
|
178
|
+
tensor_content=test_data,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
with self.subTest(name="CheckQuantParamsShapes"):
|
|
182
|
+
self.assertEqual(quant_params.zero_point.shape, (test_data.shape[0], 1))
|
|
183
|
+
self.assertEqual(quant_params.scale.shape, (test_data.shape[0], 1))
|
|
184
|
+
self.assertIsNotNone(quant_params.quantized_data)
|
|
185
|
+
self.assertTupleEqual(
|
|
186
|
+
cast(np.ndarray, quant_params.quantized_data).shape, test_data.shape
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
with self.subTest(name="CheckQuantParamsValues"):
|
|
190
|
+
self.assertTrue(np.all(quant_params.zero_point == 0))
|
|
191
|
+
self.assertEqual(quant_params.quantized_dimension, 0)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
if __name__ == "__main__":
|
|
195
|
+
googletest.main()
|
|
@@ -82,7 +82,6 @@ class RecipeManager:
|
|
|
82
82
|
str, list[OpQuantizationRecipe]
|
|
83
83
|
] = collections.OrderedDict()
|
|
84
84
|
|
|
85
|
-
# TODO: b/335254997 - Check if an op quantization config is supported.
|
|
86
85
|
def add_quantization_config(
|
|
87
86
|
self,
|
|
88
87
|
regex: str,
|
|
@@ -272,7 +271,8 @@ class RecipeManager:
|
|
|
272
271
|
"""
|
|
273
272
|
weight_config = qtyping.TensorQuantizationConfig(
|
|
274
273
|
num_bits=num_bits,
|
|
275
|
-
symmetric=True, # LiteRT kernels only support symmetric quantized
|
|
274
|
+
symmetric=True, # LiteRT kernels only support symmetric quantized
|
|
275
|
+
# weights.
|
|
276
276
|
granularity=granularity,
|
|
277
277
|
)
|
|
278
278
|
self.add_quantization_config(
|
|
@@ -316,10 +316,18 @@ class RecipeManager:
|
|
|
316
316
|
granularity: Granularity of quantization.
|
|
317
317
|
algorithm_key: Algorithm key to be applied.
|
|
318
318
|
"""
|
|
319
|
+
# Default to integer quantization but allow float quantization for
|
|
320
|
+
# FLOAT_CASTING algorithm. This is to support weight-only quantization with
|
|
321
|
+
# fp16 weights.
|
|
322
|
+
weight_dtype = qtyping.TensorDataType.INT
|
|
323
|
+
if algorithm_key == AlgorithmName.FLOAT_CASTING:
|
|
324
|
+
weight_dtype = qtyping.TensorDataType.FLOAT
|
|
325
|
+
|
|
319
326
|
weight_config = qtyping.TensorQuantizationConfig(
|
|
320
327
|
num_bits=num_bits,
|
|
321
328
|
symmetric=True, # TFL kernels only support symmetric quantized weights.
|
|
322
329
|
granularity=granularity,
|
|
330
|
+
dtype=weight_dtype,
|
|
323
331
|
)
|
|
324
332
|
self.add_quantization_config(
|
|
325
333
|
regex,
|
|
@@ -365,7 +373,8 @@ class RecipeManager:
|
|
|
365
373
|
raise ValueError(
|
|
366
374
|
'Activation quantization is only supported for 16 or 8 bits.'
|
|
367
375
|
)
|
|
368
|
-
# INT16 is symmetric and INT8 is asymmetric due to LiteRT kernel
|
|
376
|
+
# INT16 is symmetric and INT8 is asymmetric due to LiteRT kernel
|
|
377
|
+
# limitations.
|
|
369
378
|
activation_symmetric = activation_num_bits == 16
|
|
370
379
|
activation_config = qtyping.TensorQuantizationConfig(
|
|
371
380
|
num_bits=activation_num_bits, symmetric=activation_symmetric
|
|
@@ -315,11 +315,12 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
|
|
|
315
315
|
_QuantGranularity.CHANNELWISE,
|
|
316
316
|
)
|
|
317
317
|
|
|
318
|
-
|
|
318
|
+
@parameterized.parameters(4, 8)
|
|
319
|
+
def test_add_weight_only_config_int(self, num_bits):
|
|
319
320
|
self._recipe_manager.add_weight_only_config(
|
|
320
321
|
regex='.*/Dense/.*',
|
|
321
322
|
operation_name=_TFLOpName.FULLY_CONNECTED,
|
|
322
|
-
num_bits=
|
|
323
|
+
num_bits=num_bits,
|
|
323
324
|
)
|
|
324
325
|
alg_key, op_config = self._recipe_manager.get_quantization_configs(
|
|
325
326
|
_TFLOpName.FULLY_CONNECTED, 'model/Dense/op'
|
|
@@ -330,6 +331,72 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
|
|
|
330
331
|
self.assertIsNone(op_config.activation_tensor_config)
|
|
331
332
|
weight_tensor_config = op_config.weight_tensor_config
|
|
332
333
|
self.assertIsNotNone(weight_tensor_config)
|
|
334
|
+
self.assertEqual(weight_tensor_config.num_bits, num_bits)
|
|
335
|
+
self.assertTrue(weight_tensor_config.symmetric)
|
|
336
|
+
self.assertEqual(
|
|
337
|
+
weight_tensor_config.granularity,
|
|
338
|
+
_QuantGranularity.CHANNELWISE,
|
|
339
|
+
)
|
|
340
|
+
self.assertEqual(weight_tensor_config.dtype, _TensorDataType.INT)
|
|
341
|
+
|
|
342
|
+
def test_add_weight_only_config_fp16(self):
|
|
343
|
+
self._recipe_manager.add_weight_only_config(
|
|
344
|
+
regex='.*/Dense2/.*',
|
|
345
|
+
operation_name=_TFLOpName.FULLY_CONNECTED,
|
|
346
|
+
num_bits=16,
|
|
347
|
+
algorithm_key=_AlgorithmName.FLOAT_CASTING,
|
|
348
|
+
)
|
|
349
|
+
alg_key, op_config = self._recipe_manager.get_quantization_configs(
|
|
350
|
+
_TFLOpName.FULLY_CONNECTED, 'model/Dense2/op'
|
|
351
|
+
)
|
|
352
|
+
self.assertEqual(alg_key, _AlgorithmName.FLOAT_CASTING)
|
|
353
|
+
self.assertEqual(op_config.compute_precision, _ComputePrecision.FLOAT)
|
|
354
|
+
self.assertTrue(op_config.explicit_dequantize)
|
|
355
|
+
self.assertIsNone(op_config.activation_tensor_config)
|
|
356
|
+
weight_tensor_config = op_config.weight_tensor_config
|
|
357
|
+
self.assertIsNotNone(weight_tensor_config)
|
|
358
|
+
self.assertEqual(weight_tensor_config.num_bits, 16)
|
|
359
|
+
self.assertTrue(weight_tensor_config.symmetric)
|
|
360
|
+
self.assertEqual(
|
|
361
|
+
weight_tensor_config.granularity,
|
|
362
|
+
_QuantGranularity.CHANNELWISE,
|
|
363
|
+
)
|
|
364
|
+
self.assertEqual(weight_tensor_config.dtype, _TensorDataType.FLOAT)
|
|
365
|
+
|
|
366
|
+
def test_add_weight_only_config_fp8_raise_error(self):
|
|
367
|
+
error_message = (
|
|
368
|
+
'float casting quantization config requires number of bits to be set'
|
|
369
|
+
' as 16'
|
|
370
|
+
)
|
|
371
|
+
with self.assertRaisesWithPredicateMatch(
|
|
372
|
+
ValueError, lambda err: error_message in str(err)
|
|
373
|
+
):
|
|
374
|
+
self._recipe_manager.add_weight_only_config(
|
|
375
|
+
regex='.*/Dense2/.*',
|
|
376
|
+
operation_name=_TFLOpName.FULLY_CONNECTED,
|
|
377
|
+
num_bits=8,
|
|
378
|
+
algorithm_key=_AlgorithmName.FLOAT_CASTING,
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
def test_add_static_config(self):
|
|
382
|
+
self._recipe_manager.add_static_config(
|
|
383
|
+
regex='.*/Dense/.*',
|
|
384
|
+
operation_name=_TFLOpName.FULLY_CONNECTED,
|
|
385
|
+
activation_num_bits=8,
|
|
386
|
+
weight_num_bits=4,
|
|
387
|
+
)
|
|
388
|
+
alg_key, op_config = self._recipe_manager.get_quantization_configs(
|
|
389
|
+
_TFLOpName.FULLY_CONNECTED, 'model/Dense/op'
|
|
390
|
+
)
|
|
391
|
+
self.assertEqual(alg_key, _AlgorithmName.MIN_MAX_UNIFORM_QUANT)
|
|
392
|
+
self.assertEqual(op_config.compute_precision, _ComputePrecision.INTEGER)
|
|
393
|
+
self.assertFalse(op_config.explicit_dequantize)
|
|
394
|
+
activation_tensor_config = op_config.activation_tensor_config
|
|
395
|
+
self.assertIsNotNone(activation_tensor_config)
|
|
396
|
+
self.assertEqual(activation_tensor_config.num_bits, 8)
|
|
397
|
+
self.assertFalse(activation_tensor_config.symmetric)
|
|
398
|
+
weight_tensor_config = op_config.weight_tensor_config
|
|
399
|
+
self.assertIsNotNone(weight_tensor_config)
|
|
333
400
|
self.assertEqual(weight_tensor_config.num_bits, 4)
|
|
334
401
|
self.assertTrue(weight_tensor_config.symmetric)
|
|
335
402
|
self.assertEqual(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ai-edge-quantizer-nightly
|
|
3
|
-
Version: 0.4.0.
|
|
3
|
+
Version: 0.4.0.dev20250927
|
|
4
4
|
Summary: A quantizer for advanced developers to quantize converted AI Edge models.
|
|
5
5
|
Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
|
|
6
6
|
Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
ai_edge_quantizer/__init__.py,sha256=4pFSkukSwahYyzwqia0yPRyz8TnFQfGRthVJhYpMWas,793
|
|
2
|
-
ai_edge_quantizer/algorithm_manager.py,sha256=
|
|
2
|
+
ai_edge_quantizer/algorithm_manager.py,sha256=XkLMG_wQqf_X6swp6YBIhJpIbIdRcOt2LJ_6oTZ3GzU,14956
|
|
3
3
|
ai_edge_quantizer/algorithm_manager_api.py,sha256=u903TG0s1uIDhJqfeJne3CFl8A93phZrwgV2-hwdcXU,9247
|
|
4
4
|
ai_edge_quantizer/algorithm_manager_api_test.py,sha256=w6bSONvXkX6bzXAGc0-7b6gNDt9oz9ieq97KP8Sg_JU,7666
|
|
5
5
|
ai_edge_quantizer/calibrator.py,sha256=Sms7_AIHPH9G5xFaz5Ef3a5gPhxuIWQI8d2LUM8C96I,12071
|
|
@@ -16,8 +16,8 @@ ai_edge_quantizer/qtyping.py,sha256=tfrPip-uzJuF_PASgUExx5Oy9gghWUbQaApR0XaBpNw,
|
|
|
16
16
|
ai_edge_quantizer/quantizer.py,sha256=ckAEOnnBxuCKZuvlzdChevCKPuE-IeDPHCNtFTWr250,17857
|
|
17
17
|
ai_edge_quantizer/quantizer_test.py,sha256=m6f4ayyaF3yQb9i4V0aFAbmGw0OKZ2Zam1RoTPh-u24,22917
|
|
18
18
|
ai_edge_quantizer/recipe.py,sha256=MEkfQ2Sg3KAE9LAORHWcbjYNPg06EUbwc1d-VspQA2U,6461
|
|
19
|
-
ai_edge_quantizer/recipe_manager.py,sha256=
|
|
20
|
-
ai_edge_quantizer/recipe_manager_test.py,sha256=
|
|
19
|
+
ai_edge_quantizer/recipe_manager.py,sha256=6l2uq8KL23KLu9OQDmPGkxrFiwHrdDB9xnn-ni8WdEM,15036
|
|
20
|
+
ai_edge_quantizer/recipe_manager_test.py,sha256=qjgGUF-wggXnSXqZ5khmqrDMIQI5CShk52IVWTahq6s,36817
|
|
21
21
|
ai_edge_quantizer/recipe_test.py,sha256=QisyaTol8JRZFcGOGyee7QRCvqj5VbF4guKWdIoMUOE,6213
|
|
22
22
|
ai_edge_quantizer/transformation_instruction_generator.py,sha256=O0U2aZcB8aXQgOV8r9g1rGNzDUiuI5Ta53XnxZbVffE,31576
|
|
23
23
|
ai_edge_quantizer/transformation_instruction_generator_test.py,sha256=KW5-WoTTo9IqLEVnWxVC8ut8eWLi_91xfKgGqVQ9QDk,54635
|
|
@@ -34,6 +34,8 @@ ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha
|
|
|
34
34
|
ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py,sha256=sT5eX5TLZEHTtPfnSkCPDlS0sQxlTFWbCsbvOuj--yY,8889
|
|
35
35
|
ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation.py,sha256=otKRiZn_C0QH0891pxLsIPIBT1mLDwbKYYP7bI-MXAA,12279
|
|
36
36
|
ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py,sha256=_SpP12aDLujv_7tWf_mCt89WknNXTSGE-JpZWO1bYSE,13238
|
|
37
|
+
ai_edge_quantizer/algorithms/uniform_quantize/mse.py,sha256=qiIyzogATGVxjYwxzH0cZvgwPSPBJv_3y8NSumHZXTk,4561
|
|
38
|
+
ai_edge_quantizer/algorithms/uniform_quantize/mse_test.py,sha256=-_P4jQJ7gVo0FNSapP3sIGcnhwfjQHW1AKLfoiAlS_s,7142
|
|
37
39
|
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=1sB2j1vlvvWDKyjcGvA_JLCpN2KbCmMslGCBUc4--V4,8461
|
|
38
40
|
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=nscKDvNb14ErZdAfG0aXRWyRs6bTvhMqMjKx2vxvUK0,8725
|
|
39
41
|
ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=Umxh4kJyeHddZf-Wd4aXE5MTI1XWFa5KRuM17uYU714,6922
|
|
@@ -70,8 +72,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOi
|
|
|
70
72
|
ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
|
|
71
73
|
ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
|
|
72
74
|
ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
|
|
73
|
-
ai_edge_quantizer_nightly-0.4.0.
|
|
74
|
-
ai_edge_quantizer_nightly-0.4.0.
|
|
75
|
-
ai_edge_quantizer_nightly-0.4.0.
|
|
76
|
-
ai_edge_quantizer_nightly-0.4.0.
|
|
77
|
-
ai_edge_quantizer_nightly-0.4.0.
|
|
75
|
+
ai_edge_quantizer_nightly-0.4.0.dev20250927.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
76
|
+
ai_edge_quantizer_nightly-0.4.0.dev20250927.dist-info/METADATA,sha256=Va5a1yrSq5LuoVneSYo__gfnJj7POIJZif9kdRs6Nck,1508
|
|
77
|
+
ai_edge_quantizer_nightly-0.4.0.dev20250927.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
|
78
|
+
ai_edge_quantizer_nightly-0.4.0.dev20250927.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
|
|
79
|
+
ai_edge_quantizer_nightly-0.4.0.dev20250927.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|