ai-edge-quantizer-nightly 0.3.0.dev20250806__py3-none-any.whl → 0.3.0.dev20250807__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -166,6 +166,7 @@ def min_max_calibrate(
166
166
  tensor_content_map: dict[str, np.ndarray],
167
167
  inputs_to_ignore: Optional[list[int]] = None,
168
168
  outputs_to_ignore: Optional[list[int]] = None,
169
+ valid_range: tuple[float, float] = (-3e38, 3e38),
169
170
  ) -> dict[str, qtyping.QSV]:
170
171
  """Collect quantization statistics variable (QSV, e.g., min/max) for the op.
171
172
 
@@ -175,11 +176,18 @@ def min_max_calibrate(
175
176
  tensor_content_map: A map of tensor name to tensor content.
176
177
  inputs_to_ignore: Input tensor indices to ignore.
177
178
  outputs_to_ignore: Output tensor indices to ignore.
179
+ valid_range: The valid range for tensor content, excluding the boundaries.
180
+ Tensor values outside this range are ignored during calibration. Defaults
181
+ to an approximate bfloat16 range. This range is chosen to address issues
182
+ with `padv2` where a bfloat16 -inf padding constant can cause problems.
183
+ Values exceeding this range can lead to quantization issues and are
184
+ therefore excluded from min/max calibration.
178
185
 
179
186
  Returns:
180
187
  A dictionary with key as tensor name and value as the collected QSV.
181
188
  """
182
189
  op_qsvs = {}
190
+ min_val, max_val = valid_range
183
191
 
184
192
  def _collect_activation_tensor_min_max(tensor_idx):
185
193
  tensor = graph_info.subgraph_tensors[tensor_idx]
@@ -191,9 +199,16 @@ def min_max_calibrate(
191
199
  return
192
200
  tensor_name = tfl_flatbuffer_utils.get_tensor_name(tensor)
193
201
  tensor_content = tensor_content_map[tensor_name]
202
+ qsv_shape = (1,) * tensor_content.ndim
203
+ filter_mask = (tensor_content > min_val) & (tensor_content < max_val)
204
+ if np.any(filter_mask):
205
+ tensor_content = tensor_content[filter_mask]
206
+ # Reshape is needed to ensure the scalar min/max have the same number of
207
+ # dimensions as the input tensor array, for compatibility with subsequent
208
+ # operations.
194
209
  op_qsvs[tensor_name] = {
195
- "min": np.min(tensor_content, axis=None, keepdims=True),
196
- "max": np.max(tensor_content, axis=None, keepdims=True),
210
+ "min": np.min(tensor_content, axis=None).reshape(qsv_shape),
211
+ "max": np.max(tensor_content, axis=None).reshape(qsv_shape),
197
212
  }
198
213
 
199
214
  inputs_to_ignore = inputs_to_ignore or []
@@ -200,6 +200,41 @@ class NaiveMinMaxQuantizeTest(parameterized.TestCase):
200
200
  self.assertEqual(quant_params.block_size, 2)
201
201
  self.assertEqual(quant_params.quantized_dimension, 1)
202
202
 
203
+ def test_calibrate_ignores_inf_min_max(self):
204
+ """Tests that calibration ignores infinity values."""
205
+ # Sample input/output data for the fc op.
206
+ input_tensor_name = "sequential/flatten/Reshape"
207
+ output_tensor_name = (
208
+ "sequential/dense/MatMul;sequential/dense/Relu;sequential/dense/BiasAdd"
209
+ )
210
+ bloat16_inf = 3.39e38
211
+ tensor_content_map = {
212
+ input_tensor_name: np.array(
213
+ [[-np.inf, 1.0, 5.0, np.inf, bloat16_inf]], dtype=np.float32
214
+ ),
215
+ output_tensor_name: np.array(
216
+ [[6.0, 7.0, -bloat16_inf, 9.0, np.inf]], dtype=np.float32
217
+ ),
218
+ }
219
+ # Read from Model Explorer.
220
+ subgraph0 = self._test_model.subgraphs[0]
221
+ fc_op = subgraph0.operators[3]
222
+ op_qsvs = naive_min_max_quantize.min_max_calibrate(
223
+ fc_op,
224
+ self._graph_info,
225
+ tensor_content_map,
226
+ inputs_to_ignore=[1, 2], # Ignore weight and bias.
227
+ outputs_to_ignore=[],
228
+ )
229
+
230
+ self.assertIn(input_tensor_name, op_qsvs)
231
+ self.assertEqual(op_qsvs[input_tensor_name]["min"], 1.0)
232
+ self.assertEqual(op_qsvs[input_tensor_name]["max"], 5.0)
233
+
234
+ self.assertIn(output_tensor_name, op_qsvs)
235
+ self.assertEqual(op_qsvs[output_tensor_name]["min"], 6.0)
236
+ self.assertEqual(op_qsvs[output_tensor_name]["max"], 9.0)
237
+
203
238
 
204
239
  if __name__ == "__main__":
205
240
  googletest.main()
@@ -18,8 +18,10 @@
18
18
  from collections.abc import Iterable
19
19
  import dataclasses
20
20
  import json
21
+ import logging
21
22
  import os
22
23
  from typing import Any, Optional, Union
24
+
23
25
  from ai_edge_quantizer import algorithm_manager
24
26
  from ai_edge_quantizer import calibrator
25
27
  from ai_edge_quantizer import default_policy
@@ -57,49 +59,61 @@ class QuantizationResult:
57
59
  recipe: _QuantRecipe
58
60
  quantized_model: Optional[bytearray]
59
61
 
60
- def save(self, save_folder: str, model_name: str) -> None:
62
+ def save(
63
+ self, save_folder: str, model_name: str, overwrite: bool = False
64
+ ) -> None:
61
65
  """Saves the quantized model and the quantization recipe.
62
66
 
63
67
  Args:
64
68
  save_folder: Path to the folder to save the quantized model and the
65
69
  quantization recipe.
66
70
  model_name: Name of the model.
71
+ overwrite: Whether to overwrite the model if it already exists.
67
72
 
68
73
  Raises:
69
74
  RuntimeError: If no quantized model is available.
70
- FileExistsError: If the model already exists in the folder.
71
75
  """
72
- if self.quantized_model is None:
73
- raise RuntimeError(
74
- 'No quantized model to save. Make sure .quantize() is called.'
75
- )
76
+ if not gfile.Exists(save_folder):
77
+ gfile.MakeDirs(save_folder)
78
+
76
79
  model_save_path = os.path.join(save_folder, f'{model_name}.tflite')
77
- if gfile.Exists(model_save_path):
78
- raise FileExistsError(
79
- f'The model {model_save_path} already exists in the folder.'
80
- )
81
- with gfile.GFile(model_save_path, 'wb') as output_file_handle:
82
- output_file_handle.write(self.quantized_model)
80
+ self.export_model(model_save_path, overwrite)
83
81
 
84
- recipe = json.dumps(self.recipe)
85
82
  recipe_save_path = os.path.join(save_folder, model_name + '_recipe.json')
83
+ recipe = json.dumps(self.recipe)
86
84
  with gfile.GFile(recipe_save_path, 'w') as output_file_handle:
87
85
  output_file_handle.write(recipe)
88
86
 
89
- def export_model(self, filepath: str) -> None:
87
+ def export_model(self, filepath: str, overwrite: bool = False) -> None:
90
88
  """Exports the quantized model to a .tflite flatbuffer.
91
89
 
92
90
  Args:
93
91
  filepath: Path (including file name) that the exported model should be
94
92
  serialized to.
93
+ overwrite: Whether to overwrite the model if it already exists.
95
94
 
96
95
  Raises:
97
96
  RuntimeError: If no quantized model is available.
97
+ ValueError: If the model already exists in the folder and overwrite is
98
+ False.
98
99
  """
99
100
  if self.quantized_model is None:
100
101
  raise RuntimeError(
101
102
  'No quantized model to save. Make sure .quantize() is called.'
102
103
  )
104
+ if gfile.Exists(filepath):
105
+ if overwrite:
106
+ logging.warning(
107
+ 'The model %s already exists in the folder. Overwriting the model'
108
+ ' since overwrite=True.',
109
+ filepath,
110
+ )
111
+ else:
112
+ raise ValueError(
113
+ f'The model {filepath} already exists in the folder. Please'
114
+ ' consider change the model name or specify overwrite=True to'
115
+ ' overwrite the model if needed.'
116
+ )
103
117
  with gfile.GFile(filepath, 'wb') as output_file_handle:
104
118
  output_file_handle.write(self.quantized_model)
105
119
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ai-edge-quantizer-nightly
3
- Version: 0.3.0.dev20250806
3
+ Version: 0.3.0.dev20250807
4
4
  Summary: A quantizer for advanced developers to quantize converted AI Edge models.
5
5
  Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
6
6
  Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
@@ -13,7 +13,7 @@ ai_edge_quantizer/model_validator_test.py,sha256=EeqOP_mrZsnZ3rug756s0ryDDqd2KgI
13
13
  ai_edge_quantizer/params_generator.py,sha256=hcgMHJlERZERUyIAEi6AHJcLJ8gsKIBAEojzFFz-tqk,20098
14
14
  ai_edge_quantizer/params_generator_test.py,sha256=RDYoRZDJfEZRtjlTAU2kZ_4t3JHOqEHxfJX9V4ETAhg,40597
15
15
  ai_edge_quantizer/qtyping.py,sha256=t7S5wTjg6VFmKYzeM_qymu36tT18rnkmY-4YOXMQxzc,16722
16
- ai_edge_quantizer/quantizer.py,sha256=g3DMqFMrMpt9jQttCE0WcdNbMtk0JZnmN5MmCHrNdyM,13202
16
+ ai_edge_quantizer/quantizer.py,sha256=WeKwhh8cYZ07DUwvS0S1EdNzEZSfPODlynqIBvJ-Br4,13624
17
17
  ai_edge_quantizer/quantizer_test.py,sha256=K_HBA56JkFI3HL8VLWCqGEfC0ISh5ldMKoNyBdGRAJg,20368
18
18
  ai_edge_quantizer/recipe.py,sha256=FR0uJceumZrnle2VRSOQZ1uXup4S1cTYKRH-N53mWRo,2919
19
19
  ai_edge_quantizer/recipe_manager.py,sha256=qcGUD7e7BISKdsY9WH2rdaRR3acmzSA5qMezGNbzlpo,8931
@@ -34,8 +34,8 @@ ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha
34
34
  ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py,sha256=sT5eX5TLZEHTtPfnSkCPDlS0sQxlTFWbCsbvOuj--yY,8889
35
35
  ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation.py,sha256=U3h5scCHSOdqHA-pb1C3pNgwumT4ydGbtkCSM0ORhrs,12740
36
36
  ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py,sha256=5VUxlaKP1jz4HV-LcKxXMMtmb6eWamq0A6qWJd63cR4,10179
37
- ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=8_tNLTbOWTKId4DfHBjkOR9RvELUyIpxlGxKu7tv5Ko,7556
38
- ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=zoF_EHjYqsKkuev8wfuutIITEmp_maa70IpJI_Df3ck,7431
37
+ ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=1sB2j1vlvvWDKyjcGvA_JLCpN2KbCmMslGCBUc4--V4,8461
38
+ ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=nscKDvNb14ErZdAfG0aXRWyRs6bTvhMqMjKx2vxvUK0,8725
39
39
  ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=Umxh4kJyeHddZf-Wd4aXE5MTI1XWFa5KRuM17uYU714,6922
40
40
  ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=sha1d99Xk87bI87tgz0g5LeDC-EeE4WMfM5rRC98-m4,9140
41
41
  ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=0d5XdNbjR2pVsAc-gWX3ik_pAIL-bZ-zemEz_jS2d0c,16531
@@ -68,8 +68,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOi
68
68
  ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
69
69
  ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
70
70
  ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
71
- ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
72
- ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info/METADATA,sha256=xTO8cST-KD3qLcf9Ts-E51tmKAOQJscoUJ983f-owr0,1528
73
- ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
74
- ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
75
- ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info/RECORD,,
71
+ ai_edge_quantizer_nightly-0.3.0.dev20250807.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
72
+ ai_edge_quantizer_nightly-0.3.0.dev20250807.dist-info/METADATA,sha256=zDKLqFrxvrjKJM46l8OaqwP7YriogP6dqsvY_8Y3O-I,1528
73
+ ai_edge_quantizer_nightly-0.3.0.dev20250807.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
74
+ ai_edge_quantizer_nightly-0.3.0.dev20250807.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
75
+ ai_edge_quantizer_nightly-0.3.0.dev20250807.dist-info/RECORD,,