ai-edge-quantizer-nightly 0.3.0.dev20250806__py3-none-any.whl → 0.3.0.dev20250807__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py +17 -2
- ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py +35 -0
- ai_edge_quantizer/quantizer.py +28 -14
- {ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info → ai_edge_quantizer_nightly-0.3.0.dev20250807.dist-info}/METADATA +1 -1
- {ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info → ai_edge_quantizer_nightly-0.3.0.dev20250807.dist-info}/RECORD +8 -8
- {ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info → ai_edge_quantizer_nightly-0.3.0.dev20250807.dist-info}/LICENSE +0 -0
- {ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info → ai_edge_quantizer_nightly-0.3.0.dev20250807.dist-info}/WHEEL +0 -0
- {ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info → ai_edge_quantizer_nightly-0.3.0.dev20250807.dist-info}/top_level.txt +0 -0
@@ -166,6 +166,7 @@ def min_max_calibrate(
|
|
166
166
|
tensor_content_map: dict[str, np.ndarray],
|
167
167
|
inputs_to_ignore: Optional[list[int]] = None,
|
168
168
|
outputs_to_ignore: Optional[list[int]] = None,
|
169
|
+
valid_range: tuple[float, float] = (-3e38, 3e38),
|
169
170
|
) -> dict[str, qtyping.QSV]:
|
170
171
|
"""Collect quantization statistics variable (QSV, e.g., min/max) for the op.
|
171
172
|
|
@@ -175,11 +176,18 @@ def min_max_calibrate(
|
|
175
176
|
tensor_content_map: A map of tensor name to tensor content.
|
176
177
|
inputs_to_ignore: Input tensor indices to ignore.
|
177
178
|
outputs_to_ignore: Output tensor indices to ignore.
|
179
|
+
valid_range: The valid range for tensor content, excluding the boundaries.
|
180
|
+
Tensor values outside this range are ignored during calibration. Defaults
|
181
|
+
to an approximate bfloat16 range. This range is chosen to address issues
|
182
|
+
with `padv2` where a bfloat16 -inf padding constant can cause problems.
|
183
|
+
Values exceeding this range can lead to quantization issues and are
|
184
|
+
therefore excluded from min/max calibration.
|
178
185
|
|
179
186
|
Returns:
|
180
187
|
A dictionary with key as tensor name and value as the collected QSV.
|
181
188
|
"""
|
182
189
|
op_qsvs = {}
|
190
|
+
min_val, max_val = valid_range
|
183
191
|
|
184
192
|
def _collect_activation_tensor_min_max(tensor_idx):
|
185
193
|
tensor = graph_info.subgraph_tensors[tensor_idx]
|
@@ -191,9 +199,16 @@ def min_max_calibrate(
|
|
191
199
|
return
|
192
200
|
tensor_name = tfl_flatbuffer_utils.get_tensor_name(tensor)
|
193
201
|
tensor_content = tensor_content_map[tensor_name]
|
202
|
+
qsv_shape = (1,) * tensor_content.ndim
|
203
|
+
filter_mask = (tensor_content > min_val) & (tensor_content < max_val)
|
204
|
+
if np.any(filter_mask):
|
205
|
+
tensor_content = tensor_content[filter_mask]
|
206
|
+
# Reshape is needed to ensure the scalar min/max have the same number of
|
207
|
+
# dimensions as the input tensor array, for compatibility with subsequent
|
208
|
+
# operations.
|
194
209
|
op_qsvs[tensor_name] = {
|
195
|
-
"min": np.min(tensor_content, axis=None
|
196
|
-
"max": np.max(tensor_content, axis=None
|
210
|
+
"min": np.min(tensor_content, axis=None).reshape(qsv_shape),
|
211
|
+
"max": np.max(tensor_content, axis=None).reshape(qsv_shape),
|
197
212
|
}
|
198
213
|
|
199
214
|
inputs_to_ignore = inputs_to_ignore or []
|
@@ -200,6 +200,41 @@ class NaiveMinMaxQuantizeTest(parameterized.TestCase):
|
|
200
200
|
self.assertEqual(quant_params.block_size, 2)
|
201
201
|
self.assertEqual(quant_params.quantized_dimension, 1)
|
202
202
|
|
203
|
+
def test_calibrate_ignores_inf_min_max(self):
|
204
|
+
"""Tests that calibration ignores infinity values."""
|
205
|
+
# Sample input/output data for the fc op.
|
206
|
+
input_tensor_name = "sequential/flatten/Reshape"
|
207
|
+
output_tensor_name = (
|
208
|
+
"sequential/dense/MatMul;sequential/dense/Relu;sequential/dense/BiasAdd"
|
209
|
+
)
|
210
|
+
bloat16_inf = 3.39e38
|
211
|
+
tensor_content_map = {
|
212
|
+
input_tensor_name: np.array(
|
213
|
+
[[-np.inf, 1.0, 5.0, np.inf, bloat16_inf]], dtype=np.float32
|
214
|
+
),
|
215
|
+
output_tensor_name: np.array(
|
216
|
+
[[6.0, 7.0, -bloat16_inf, 9.0, np.inf]], dtype=np.float32
|
217
|
+
),
|
218
|
+
}
|
219
|
+
# Read from Model Explorer.
|
220
|
+
subgraph0 = self._test_model.subgraphs[0]
|
221
|
+
fc_op = subgraph0.operators[3]
|
222
|
+
op_qsvs = naive_min_max_quantize.min_max_calibrate(
|
223
|
+
fc_op,
|
224
|
+
self._graph_info,
|
225
|
+
tensor_content_map,
|
226
|
+
inputs_to_ignore=[1, 2], # Ignore weight and bias.
|
227
|
+
outputs_to_ignore=[],
|
228
|
+
)
|
229
|
+
|
230
|
+
self.assertIn(input_tensor_name, op_qsvs)
|
231
|
+
self.assertEqual(op_qsvs[input_tensor_name]["min"], 1.0)
|
232
|
+
self.assertEqual(op_qsvs[input_tensor_name]["max"], 5.0)
|
233
|
+
|
234
|
+
self.assertIn(output_tensor_name, op_qsvs)
|
235
|
+
self.assertEqual(op_qsvs[output_tensor_name]["min"], 6.0)
|
236
|
+
self.assertEqual(op_qsvs[output_tensor_name]["max"], 9.0)
|
237
|
+
|
203
238
|
|
204
239
|
if __name__ == "__main__":
|
205
240
|
googletest.main()
|
ai_edge_quantizer/quantizer.py
CHANGED
@@ -18,8 +18,10 @@
|
|
18
18
|
from collections.abc import Iterable
|
19
19
|
import dataclasses
|
20
20
|
import json
|
21
|
+
import logging
|
21
22
|
import os
|
22
23
|
from typing import Any, Optional, Union
|
24
|
+
|
23
25
|
from ai_edge_quantizer import algorithm_manager
|
24
26
|
from ai_edge_quantizer import calibrator
|
25
27
|
from ai_edge_quantizer import default_policy
|
@@ -57,49 +59,61 @@ class QuantizationResult:
|
|
57
59
|
recipe: _QuantRecipe
|
58
60
|
quantized_model: Optional[bytearray]
|
59
61
|
|
60
|
-
def save(
|
62
|
+
def save(
|
63
|
+
self, save_folder: str, model_name: str, overwrite: bool = False
|
64
|
+
) -> None:
|
61
65
|
"""Saves the quantized model and the quantization recipe.
|
62
66
|
|
63
67
|
Args:
|
64
68
|
save_folder: Path to the folder to save the quantized model and the
|
65
69
|
quantization recipe.
|
66
70
|
model_name: Name of the model.
|
71
|
+
overwrite: Whether to overwrite the model if it already exists.
|
67
72
|
|
68
73
|
Raises:
|
69
74
|
RuntimeError: If no quantized model is available.
|
70
|
-
FileExistsError: If the model already exists in the folder.
|
71
75
|
"""
|
72
|
-
if
|
73
|
-
|
74
|
-
|
75
|
-
)
|
76
|
+
if not gfile.Exists(save_folder):
|
77
|
+
gfile.MakeDirs(save_folder)
|
78
|
+
|
76
79
|
model_save_path = os.path.join(save_folder, f'{model_name}.tflite')
|
77
|
-
|
78
|
-
raise FileExistsError(
|
79
|
-
f'The model {model_save_path} already exists in the folder.'
|
80
|
-
)
|
81
|
-
with gfile.GFile(model_save_path, 'wb') as output_file_handle:
|
82
|
-
output_file_handle.write(self.quantized_model)
|
80
|
+
self.export_model(model_save_path, overwrite)
|
83
81
|
|
84
|
-
recipe = json.dumps(self.recipe)
|
85
82
|
recipe_save_path = os.path.join(save_folder, model_name + '_recipe.json')
|
83
|
+
recipe = json.dumps(self.recipe)
|
86
84
|
with gfile.GFile(recipe_save_path, 'w') as output_file_handle:
|
87
85
|
output_file_handle.write(recipe)
|
88
86
|
|
89
|
-
def export_model(self, filepath: str) -> None:
|
87
|
+
def export_model(self, filepath: str, overwrite: bool = False) -> None:
|
90
88
|
"""Exports the quantized model to a .tflite flatbuffer.
|
91
89
|
|
92
90
|
Args:
|
93
91
|
filepath: Path (including file name) that the exported model should be
|
94
92
|
serialized to.
|
93
|
+
overwrite: Whether to overwrite the model if it already exists.
|
95
94
|
|
96
95
|
Raises:
|
97
96
|
RuntimeError: If no quantized model is available.
|
97
|
+
ValueError: If the model already exists in the folder and overwrite is
|
98
|
+
False.
|
98
99
|
"""
|
99
100
|
if self.quantized_model is None:
|
100
101
|
raise RuntimeError(
|
101
102
|
'No quantized model to save. Make sure .quantize() is called.'
|
102
103
|
)
|
104
|
+
if gfile.Exists(filepath):
|
105
|
+
if overwrite:
|
106
|
+
logging.warning(
|
107
|
+
'The model %s already exists in the folder. Overwriting the model'
|
108
|
+
' since overwrite=True.',
|
109
|
+
filepath,
|
110
|
+
)
|
111
|
+
else:
|
112
|
+
raise ValueError(
|
113
|
+
f'The model {filepath} already exists in the folder. Please'
|
114
|
+
' consider change the model name or specify overwrite=True to'
|
115
|
+
' overwrite the model if needed.'
|
116
|
+
)
|
103
117
|
with gfile.GFile(filepath, 'wb') as output_file_handle:
|
104
118
|
output_file_handle.write(self.quantized_model)
|
105
119
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ai-edge-quantizer-nightly
|
3
|
-
Version: 0.3.0.
|
3
|
+
Version: 0.3.0.dev20250807
|
4
4
|
Summary: A quantizer for advanced developers to quantize converted AI Edge models.
|
5
5
|
Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
|
6
6
|
Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
|
@@ -13,7 +13,7 @@ ai_edge_quantizer/model_validator_test.py,sha256=EeqOP_mrZsnZ3rug756s0ryDDqd2KgI
|
|
13
13
|
ai_edge_quantizer/params_generator.py,sha256=hcgMHJlERZERUyIAEi6AHJcLJ8gsKIBAEojzFFz-tqk,20098
|
14
14
|
ai_edge_quantizer/params_generator_test.py,sha256=RDYoRZDJfEZRtjlTAU2kZ_4t3JHOqEHxfJX9V4ETAhg,40597
|
15
15
|
ai_edge_quantizer/qtyping.py,sha256=t7S5wTjg6VFmKYzeM_qymu36tT18rnkmY-4YOXMQxzc,16722
|
16
|
-
ai_edge_quantizer/quantizer.py,sha256=
|
16
|
+
ai_edge_quantizer/quantizer.py,sha256=WeKwhh8cYZ07DUwvS0S1EdNzEZSfPODlynqIBvJ-Br4,13624
|
17
17
|
ai_edge_quantizer/quantizer_test.py,sha256=K_HBA56JkFI3HL8VLWCqGEfC0ISh5ldMKoNyBdGRAJg,20368
|
18
18
|
ai_edge_quantizer/recipe.py,sha256=FR0uJceumZrnle2VRSOQZ1uXup4S1cTYKRH-N53mWRo,2919
|
19
19
|
ai_edge_quantizer/recipe_manager.py,sha256=qcGUD7e7BISKdsY9WH2rdaRR3acmzSA5qMezGNbzlpo,8931
|
@@ -34,8 +34,8 @@ ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha
|
|
34
34
|
ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py,sha256=sT5eX5TLZEHTtPfnSkCPDlS0sQxlTFWbCsbvOuj--yY,8889
|
35
35
|
ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation.py,sha256=U3h5scCHSOdqHA-pb1C3pNgwumT4ydGbtkCSM0ORhrs,12740
|
36
36
|
ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py,sha256=5VUxlaKP1jz4HV-LcKxXMMtmb6eWamq0A6qWJd63cR4,10179
|
37
|
-
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=
|
38
|
-
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=
|
37
|
+
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=1sB2j1vlvvWDKyjcGvA_JLCpN2KbCmMslGCBUc4--V4,8461
|
38
|
+
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=nscKDvNb14ErZdAfG0aXRWyRs6bTvhMqMjKx2vxvUK0,8725
|
39
39
|
ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=Umxh4kJyeHddZf-Wd4aXE5MTI1XWFa5KRuM17uYU714,6922
|
40
40
|
ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=sha1d99Xk87bI87tgz0g5LeDC-EeE4WMfM5rRC98-m4,9140
|
41
41
|
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=0d5XdNbjR2pVsAc-gWX3ik_pAIL-bZ-zemEz_jS2d0c,16531
|
@@ -68,8 +68,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOi
|
|
68
68
|
ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
|
69
69
|
ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
|
70
70
|
ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
|
71
|
-
ai_edge_quantizer_nightly-0.3.0.
|
72
|
-
ai_edge_quantizer_nightly-0.3.0.
|
73
|
-
ai_edge_quantizer_nightly-0.3.0.
|
74
|
-
ai_edge_quantizer_nightly-0.3.0.
|
75
|
-
ai_edge_quantizer_nightly-0.3.0.
|
71
|
+
ai_edge_quantizer_nightly-0.3.0.dev20250807.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
72
|
+
ai_edge_quantizer_nightly-0.3.0.dev20250807.dist-info/METADATA,sha256=zDKLqFrxvrjKJM46l8OaqwP7YriogP6dqsvY_8Y3O-I,1528
|
73
|
+
ai_edge_quantizer_nightly-0.3.0.dev20250807.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
74
|
+
ai_edge_quantizer_nightly-0.3.0.dev20250807.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
|
75
|
+
ai_edge_quantizer_nightly-0.3.0.dev20250807.dist-info/RECORD,,
|
File without changes
|
File without changes
|