ai-edge-quantizer-nightly 0.3.0.dev20250813__py3-none-any.whl → 0.3.0.dev20250815__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_edge_quantizer/algorithm_manager.py +2 -0
- ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py +19 -0
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py +22 -7
- ai_edge_quantizer/calibrator_test.py +1 -1
- ai_edge_quantizer/default_policy.py +2 -0
- ai_edge_quantizer/qtyping.py +1 -0
- ai_edge_quantizer/quantizer.py +104 -0
- ai_edge_quantizer/quantizer_test.py +70 -0
- ai_edge_quantizer/recipe.py +154 -42
- ai_edge_quantizer/recipe_manager.py +143 -0
- ai_edge_quantizer/recipe_manager_test.py +44 -0
- ai_edge_quantizer/recipe_test.py +86 -16
- ai_edge_quantizer/utils/tfl_flatbuffer_utils.py +1 -0
- {ai_edge_quantizer_nightly-0.3.0.dev20250813.dist-info → ai_edge_quantizer_nightly-0.3.0.dev20250815.dist-info}/METADATA +1 -1
- {ai_edge_quantizer_nightly-0.3.0.dev20250813.dist-info → ai_edge_quantizer_nightly-0.3.0.dev20250815.dist-info}/RECORD +18 -18
- {ai_edge_quantizer_nightly-0.3.0.dev20250813.dist-info → ai_edge_quantizer_nightly-0.3.0.dev20250815.dist-info}/LICENSE +0 -0
- {ai_edge_quantizer_nightly-0.3.0.dev20250813.dist-info → ai_edge_quantizer_nightly-0.3.0.dev20250815.dist-info}/WHEEL +0 -0
- {ai_edge_quantizer_nightly-0.3.0.dev20250813.dist-info → ai_edge_quantizer_nightly-0.3.0.dev20250815.dist-info}/top_level.txt +0 -0
@@ -102,6 +102,7 @@ MIN_MAX_OP_NAME_MATERIALIZE_FUNC_DICT = {
|
|
102
102
|
_TFLOpName.LOGISTIC: common_quantize.materialize_softmax_and_logistic,
|
103
103
|
_TFLOpName.SLICE: common_quantize.materialize_slice,
|
104
104
|
_TFLOpName.SUM: common_quantize.materialize_sum,
|
105
|
+
_TFLOpName.SELECT: common_quantize.materialize_select,
|
105
106
|
_TFLOpName.SELECT_V2: common_quantize.materialize_select_v2,
|
106
107
|
_TFLOpName.DYNAMIC_UPDATE_SLICE: (
|
107
108
|
common_quantize.materialize_dynamic_update_slice
|
@@ -250,6 +251,7 @@ _OCTAV_OP_NAME_MATERIALIZE_FUNC_DICT = immutabledict({
|
|
250
251
|
_TFLOpName.LOGISTIC: common_quantize.materialize_softmax_and_logistic,
|
251
252
|
_TFLOpName.SLICE: common_quantize.materialize_slice,
|
252
253
|
_TFLOpName.SUM: common_quantize.materialize_sum,
|
254
|
+
_TFLOpName.SELECT: common_quantize.materialize_select,
|
253
255
|
_TFLOpName.SELECT_V2: common_quantize.materialize_select_v2,
|
254
256
|
_TFLOpName.DYNAMIC_UPDATE_SLICE: (
|
255
257
|
common_quantize.materialize_dynamic_update_slice
|
@@ -371,6 +371,25 @@ def materialize_slice(
|
|
371
371
|
)
|
372
372
|
|
373
373
|
|
374
|
+
def materialize_select(
|
375
|
+
get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
|
376
|
+
op_info: qtyping.OpInfo,
|
377
|
+
graph_info: qtyping.GraphInfo,
|
378
|
+
tensor_name_to_qsv: dict[str, Any],
|
379
|
+
) -> list[qtyping.TensorTransformationParams]:
|
380
|
+
"""Materialize tensors in tfl.select."""
|
381
|
+
return common_utils.materialize_standard_op(
|
382
|
+
op_info,
|
383
|
+
graph_info,
|
384
|
+
tensor_name_to_qsv,
|
385
|
+
get_tensor_quant_params_fn,
|
386
|
+
constraint=_OpQuantConstraint.SAME_AS_OUTPUT_SCALE,
|
387
|
+
inputs_to_ignore=[
|
388
|
+
0,
|
389
|
+
], # Condition tensor does not need to be quantized.
|
390
|
+
)
|
391
|
+
|
392
|
+
|
374
393
|
def materialize_select_v2(
|
375
394
|
get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
|
376
395
|
op_info: qtyping.OpInfo,
|
@@ -387,22 +387,37 @@ def tensor_zp_scale_from_min_max(
|
|
387
387
|
)
|
388
388
|
qmin, qmax = get_quantized_range(qtype)
|
389
389
|
min_bound = 1e-4 # 1e-6 precision for int8 and 1e-8 for int16.
|
390
|
+
pos_clipping_values = None if clipping_values is None else clipping_values
|
391
|
+
neg_clipping_values = None if clipping_values is None else -clipping_values
|
390
392
|
|
391
393
|
if granularity == qtyping.QuantGranularity.BLOCKWISE:
|
392
|
-
# Blockwise quantization uses float16 scale,
|
393
|
-
# so the maximum
|
394
|
-
|
395
|
-
|
394
|
+
# Blockwise quantization uses float16 scale,
|
395
|
+
# with 7 bit mantissa, so the maximum scale value is 65280 and maximum
|
396
|
+
# representable range is [-65280 * (2 ** num_bits),
|
397
|
+
# 65280 * (2 ** num_bits - 1)].
|
398
|
+
# Note that we have one extra value on the negative side.
|
399
|
+
float16_max = np.broadcast_to(
|
400
|
+
np.array(65280) * (2**num_bits - 1), max_value.shape
|
401
|
+
)
|
402
|
+
float16_min = np.broadcast_to(
|
403
|
+
np.array(-65280) * (2**num_bits), min_value.shape
|
404
|
+
)
|
405
|
+
pos_clipping_values = (
|
396
406
|
float16_max
|
397
|
-
if
|
398
|
-
else np.minimum(
|
407
|
+
if pos_clipping_values is None
|
408
|
+
else np.minimum(pos_clipping_values, float16_max)
|
409
|
+
)
|
410
|
+
neg_clipping_values = (
|
411
|
+
float16_min
|
412
|
+
if neg_clipping_values is None
|
413
|
+
else np.maximum(neg_clipping_values, float16_min)
|
399
414
|
)
|
400
415
|
|
401
416
|
if symmetric:
|
402
417
|
bound = np.maximum(np.abs(min_value), np.abs(max_value))
|
403
418
|
bound = np.maximum(bound, min_bound)
|
404
419
|
if clipping_values is not None:
|
405
|
-
bound = np.clip(bound,
|
420
|
+
bound = np.clip(bound, neg_clipping_values, pos_clipping_values)
|
406
421
|
if not qtype.signed:
|
407
422
|
half_q = (qmax - 1) / 2
|
408
423
|
scale = bound / half_q
|
@@ -302,7 +302,7 @@ class CalibratorToyGemma2Test(googletest.TestCase):
|
|
302
302
|
self._toy_gemma2_calibration_dataset,
|
303
303
|
model_recipe_manager=recipe_mngr,
|
304
304
|
)
|
305
|
-
self.assertLen(calib.get_model_qsvs(),
|
305
|
+
self.assertLen(calib.get_model_qsvs(), 290)
|
306
306
|
|
307
307
|
|
308
308
|
if __name__ == "__main__":
|
@@ -180,6 +180,7 @@ DEFAULT_JSON_POLICY = """
|
|
180
180
|
"SLICE",
|
181
181
|
"EMBEDDING_LOOKUP",
|
182
182
|
"SUM",
|
183
|
+
"SELECT",
|
183
184
|
"SELECT_V2",
|
184
185
|
"DYNAMIC_UPDATE_SLICE",
|
185
186
|
"SELECT_V2",
|
@@ -222,6 +223,7 @@ DEFAULT_JSON_POLICY = """
|
|
222
223
|
"SLICE",
|
223
224
|
"EMBEDDING_LOOKUP",
|
224
225
|
"SUM",
|
226
|
+
"SELECT",
|
225
227
|
"SELECT_V2",
|
226
228
|
"DYNAMIC_UPDATE_SLICE",
|
227
229
|
"SELECT_V2",
|
ai_edge_quantizer/qtyping.py
CHANGED
ai_edge_quantizer/quantizer.py
CHANGED
@@ -35,6 +35,7 @@ from ai_edge_quantizer.utils import tfl_interpreter_utils
|
|
35
35
|
from ai_edge_quantizer.utils import validation_utils
|
36
36
|
from tensorflow.python.platform import gfile # pylint: disable=g-direct-tensorflow-import
|
37
37
|
|
38
|
+
|
38
39
|
# Expose algorithm names to users.
|
39
40
|
AlgorithmName = algorithm_manager.AlgorithmName
|
40
41
|
|
@@ -220,6 +221,109 @@ class Quantizer:
|
|
220
221
|
regex, operation_name, op_config, algorithm_key
|
221
222
|
)
|
222
223
|
|
224
|
+
def add_dynamic_config(
|
225
|
+
self,
|
226
|
+
regex: str,
|
227
|
+
operation_name: _TFLOpName,
|
228
|
+
num_bits: int,
|
229
|
+
granularity: qtyping.QuantGranularity = qtyping.QuantGranularity.CHANNELWISE,
|
230
|
+
algorithm_key: str = algorithm_manager.AlgorithmName.MIN_MAX_UNIFORM_QUANT,
|
231
|
+
):
|
232
|
+
"""Adds a dynamic quantization configuration to the recipe.
|
233
|
+
|
234
|
+
During dynamic quantization, activations are not processed by AEQ and
|
235
|
+
remain in float format. The runtime kernel is expected to quantize these
|
236
|
+
activations on-the-fly, as indicated by compute_precision=Integer and
|
237
|
+
explicit_dequantize=False.
|
238
|
+
|
239
|
+
The model quality may suffer due to the on-the-fly quantization. If quality
|
240
|
+
is a concern, consider using weight-only
|
241
|
+
quantization.
|
242
|
+
|
243
|
+
Args:
|
244
|
+
regex: Regular expression for layer name (op's output tensor name)
|
245
|
+
matching.
|
246
|
+
operation_name: Target TFLite operation.
|
247
|
+
num_bits: Number of bits for quantization.
|
248
|
+
granularity: Granularity of quantization.
|
249
|
+
algorithm_key: Algorithm key to be applied.
|
250
|
+
"""
|
251
|
+
self._recipe_manager.add_dynamic_config(
|
252
|
+
regex, operation_name, num_bits, granularity, algorithm_key
|
253
|
+
)
|
254
|
+
|
255
|
+
def add_weight_only_config(
|
256
|
+
self,
|
257
|
+
regex: str,
|
258
|
+
operation_name: _TFLOpName,
|
259
|
+
num_bits: int,
|
260
|
+
granularity: qtyping.QuantGranularity = qtyping.QuantGranularity.CHANNELWISE,
|
261
|
+
algorithm_key: str = algorithm_manager.AlgorithmName.MIN_MAX_UNIFORM_QUANT,
|
262
|
+
):
|
263
|
+
"""Adds a weight only quantization configuration to the recipe.
|
264
|
+
|
265
|
+
In weight-only quantization, weights are quantized, but the actual operation
|
266
|
+
(op) computation remains in float. The quantized weight is explicitly
|
267
|
+
dequantized before being fed into the op. This is achieved by inserting a
|
268
|
+
dequantize op between the quantized weight and the consuming op. To enable
|
269
|
+
this, both compute_precision will be set to Float and explicit_dequantize to
|
270
|
+
True.
|
271
|
+
|
272
|
+
Weight-only quantization is useful for reducing model size but may
|
273
|
+
not decrease latency due to float computation. However, quantized model
|
274
|
+
generally has better quality than other quantization options (e.g., dynamic
|
275
|
+
range quantization) due to no loss of precision on activations. If latency
|
276
|
+
is a concern, consider using dynamic quantization.
|
277
|
+
|
278
|
+
Args:
|
279
|
+
regex: Regular expression for layer name matching.
|
280
|
+
operation_name: Target TFLite operation.
|
281
|
+
num_bits: Number of bits for quantization.
|
282
|
+
granularity: Granularity of quantization.
|
283
|
+
algorithm_key: Algorithm key to be applied.
|
284
|
+
"""
|
285
|
+
self._recipe_manager.add_weight_only_config(
|
286
|
+
regex, operation_name, num_bits, granularity, algorithm_key
|
287
|
+
)
|
288
|
+
|
289
|
+
def add_static_config(
|
290
|
+
self,
|
291
|
+
regex: str,
|
292
|
+
operation_name: _TFLOpName,
|
293
|
+
activation_num_bits: int,
|
294
|
+
weight_num_bits: int,
|
295
|
+
weight_granularity: qtyping.QuantGranularity = qtyping.QuantGranularity.CHANNELWISE,
|
296
|
+
algorithm_key: str = algorithm_manager.AlgorithmName.MIN_MAX_UNIFORM_QUANT,
|
297
|
+
):
|
298
|
+
"""Adds a static quantization configuration to the recipe.
|
299
|
+
|
300
|
+
In static quantization, both weights and activations are quantized. This
|
301
|
+
requires a calibration step to determine the quantization parameters (e.g.,
|
302
|
+
min/max ranges) for activations. The quantized model uses integer arithmetic
|
303
|
+
for computations, which can lead to significant latency reductions.
|
304
|
+
|
305
|
+
However, calibration is needed to determine the quantization parameters for
|
306
|
+
activations, which requires sample data and may lead to quality loss. If
|
307
|
+
there is no hardware requirement for full integer quantization, consider
|
308
|
+
using dynamic quantization for simplicity.
|
309
|
+
|
310
|
+
Args:
|
311
|
+
regex: Regular expression for layer name matching.
|
312
|
+
operation_name: Target TFLite operation.
|
313
|
+
activation_num_bits: Number of bits for activation quantization.
|
314
|
+
weight_num_bits: Number of bits for weight quantization.
|
315
|
+
weight_granularity: Granularity of weight quantization.
|
316
|
+
algorithm_key: Algorithm key to be applied.
|
317
|
+
"""
|
318
|
+
self._recipe_manager.add_static_config(
|
319
|
+
regex,
|
320
|
+
operation_name,
|
321
|
+
activation_num_bits,
|
322
|
+
weight_num_bits,
|
323
|
+
weight_granularity,
|
324
|
+
algorithm_key,
|
325
|
+
)
|
326
|
+
|
223
327
|
@property
|
224
328
|
def need_calibration(self) -> bool:
|
225
329
|
"""Checks if the current recipe needs calibration."""
|
@@ -92,6 +92,76 @@ class QuantizerTest(parameterized.TestCase):
|
|
92
92
|
new_op_config.compute_precision,
|
93
93
|
)
|
94
94
|
|
95
|
+
def test_add_dynamic_config_succeeds(self):
|
96
|
+
self._quantizer.load_quantization_recipe(self._test_recipe_path)
|
97
|
+
scope_regex = '.*/Dense/.*'
|
98
|
+
self._quantizer.add_dynamic_config(
|
99
|
+
regex=scope_regex,
|
100
|
+
operation_name=qtyping.TFLOperationName.FULLY_CONNECTED,
|
101
|
+
num_bits=8,
|
102
|
+
)
|
103
|
+
updated_recipe = self._quantizer.get_quantization_recipe()
|
104
|
+
self.assertLen(updated_recipe, 2)
|
105
|
+
|
106
|
+
added_config = updated_recipe[-1]
|
107
|
+
self.assertEqual(added_config['regex'], scope_regex)
|
108
|
+
self.assertEqual(
|
109
|
+
added_config['op_config']['compute_precision'],
|
110
|
+
qtyping.ComputePrecision.INTEGER,
|
111
|
+
)
|
112
|
+
self.assertFalse(added_config['op_config']['explicit_dequantize'])
|
113
|
+
self.assertEqual(
|
114
|
+
added_config['op_config']['weight_tensor_config']['num_bits'], 8
|
115
|
+
)
|
116
|
+
|
117
|
+
def test_add_weight_only_config_succeeds(self):
|
118
|
+
self._quantizer.load_quantization_recipe(self._test_recipe_path)
|
119
|
+
scope_regex = '.*/Dense/.*'
|
120
|
+
self._quantizer.add_weight_only_config(
|
121
|
+
regex=scope_regex,
|
122
|
+
operation_name=qtyping.TFLOperationName.FULLY_CONNECTED,
|
123
|
+
num_bits=4,
|
124
|
+
)
|
125
|
+
updated_recipe = self._quantizer.get_quantization_recipe()
|
126
|
+
self.assertLen(updated_recipe, 2)
|
127
|
+
|
128
|
+
added_config = updated_recipe[-1]
|
129
|
+
self.assertEqual(added_config['regex'], scope_regex)
|
130
|
+
self.assertEqual(
|
131
|
+
added_config['op_config']['compute_precision'],
|
132
|
+
qtyping.ComputePrecision.FLOAT,
|
133
|
+
)
|
134
|
+
self.assertTrue(added_config['op_config']['explicit_dequantize'])
|
135
|
+
self.assertEqual(
|
136
|
+
added_config['op_config']['weight_tensor_config']['num_bits'], 4
|
137
|
+
)
|
138
|
+
|
139
|
+
def test_add_static_config_succeeds(self):
|
140
|
+
self._quantizer.load_quantization_recipe(self._test_recipe_path)
|
141
|
+
scope_regex = '.*/Dense/.*'
|
142
|
+
self._quantizer.add_static_config(
|
143
|
+
regex=scope_regex,
|
144
|
+
operation_name=qtyping.TFLOperationName.FULLY_CONNECTED,
|
145
|
+
activation_num_bits=8,
|
146
|
+
weight_num_bits=4,
|
147
|
+
)
|
148
|
+
updated_recipe = self._quantizer.get_quantization_recipe()
|
149
|
+
self.assertLen(updated_recipe, 2)
|
150
|
+
|
151
|
+
added_config = updated_recipe[-1]
|
152
|
+
self.assertEqual(added_config['regex'], scope_regex)
|
153
|
+
self.assertEqual(
|
154
|
+
added_config['op_config']['compute_precision'],
|
155
|
+
qtyping.ComputePrecision.INTEGER,
|
156
|
+
)
|
157
|
+
self.assertFalse(added_config['op_config']['explicit_dequantize'])
|
158
|
+
self.assertEqual(
|
159
|
+
added_config['op_config']['activation_tensor_config']['num_bits'], 8
|
160
|
+
)
|
161
|
+
self.assertEqual(
|
162
|
+
added_config['op_config']['weight_tensor_config']['num_bits'], 4
|
163
|
+
)
|
164
|
+
|
95
165
|
def test_load_quantization_recipe_succeeds(self):
|
96
166
|
qt = quantizer.Quantizer(self._test_model_path, None)
|
97
167
|
qt.load_quantization_recipe(self._test_recipe_path)
|
ai_edge_quantizer/recipe.py
CHANGED
@@ -15,51 +15,163 @@
|
|
15
15
|
|
16
16
|
"""Quantization recipe module."""
|
17
17
|
|
18
|
+
from ai_edge_quantizer import algorithm_manager
|
19
|
+
from ai_edge_quantizer import qtyping
|
20
|
+
from ai_edge_quantizer import recipe_manager
|
18
21
|
|
19
|
-
|
20
|
-
"""Returns a dynamic quantization recipe with int8 weights and float32 activation."""
|
21
|
-
return [
|
22
|
-
dict({
|
23
|
-
'regex': '.*',
|
24
|
-
'operation': '*',
|
25
|
-
'algorithm_key': 'min_max_uniform_quantize',
|
26
|
-
'op_config': {
|
27
|
-
'weight_tensor_config': {
|
28
|
-
'num_bits': 8,
|
29
|
-
'symmetric': True,
|
30
|
-
'granularity': 'CHANNELWISE',
|
31
|
-
'dtype': 'INT',
|
32
|
-
'block_size': 0,
|
33
|
-
},
|
34
|
-
'compute_precision': 'INTEGER',
|
35
|
-
'explicit_dequantize': False,
|
36
|
-
'skip_checks': False,
|
37
|
-
},
|
38
|
-
})
|
39
|
-
]
|
22
|
+
AlgorithmName = algorithm_manager.AlgorithmName
|
40
23
|
|
41
24
|
|
42
|
-
def
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
25
|
+
def dynamic_wi8_afp32(
|
26
|
+
algorithm_key: AlgorithmName = AlgorithmName.MIN_MAX_UNIFORM_QUANT,
|
27
|
+
):
|
28
|
+
"""Returns a dynamic quantization recipe with int8 weights and float32 activation.
|
29
|
+
|
30
|
+
All supported ops will be quantized with int8 weights and float32 activations,
|
31
|
+
which will be dynamically quantized to int8 during inference to enable int8
|
32
|
+
compute. The model quality may suffer due to the on-the-fly quantization. If
|
33
|
+
quality is a concern, consider using weight-only quantization.
|
34
|
+
|
35
|
+
Args:
|
36
|
+
algorithm_key: The algorithm to use for quantization.
|
37
|
+
|
38
|
+
Returns:
|
39
|
+
A dynamic quantization recipe.
|
40
|
+
"""
|
41
|
+
rp_manager = recipe_manager.RecipeManager()
|
42
|
+
rp_manager.add_dynamic_config(
|
43
|
+
regex='.*',
|
44
|
+
operation_name=qtyping.TFLOperationName.ALL_SUPPORTED,
|
45
|
+
num_bits=8,
|
46
|
+
algorithm_key=algorithm_key,
|
47
|
+
)
|
48
|
+
return rp_manager.get_quantization_recipe()
|
49
|
+
|
50
|
+
|
51
|
+
def dynamic_wi4_afp32(
|
52
|
+
algorithm_key: AlgorithmName = AlgorithmName.MIN_MAX_UNIFORM_QUANT,
|
53
|
+
):
|
54
|
+
"""Returns a dynamic quantization recipe with int4 weights and float32 activation.
|
55
|
+
|
56
|
+
All supported ops will be quantized with int4 weights and float32 activations,
|
57
|
+
which will be dynamically quantized to int4 during inference to enable int4
|
58
|
+
compute.
|
59
|
+
|
60
|
+
Args:
|
61
|
+
algorithm_key: The algorithm to use for quantization.
|
62
|
+
|
63
|
+
Returns:
|
64
|
+
A dynamic quantization recipe.
|
65
|
+
"""
|
66
|
+
rp_manager = recipe_manager.RecipeManager()
|
67
|
+
rp_manager.add_dynamic_config(
|
68
|
+
regex='.*',
|
69
|
+
operation_name=qtyping.TFLOperationName.ALL_SUPPORTED,
|
70
|
+
num_bits=4,
|
71
|
+
algorithm_key=algorithm_key,
|
72
|
+
)
|
73
|
+
return rp_manager.get_quantization_recipe()
|
74
|
+
|
75
|
+
|
76
|
+
def weight_only_wi8_afp32(
|
77
|
+
algorithm_key: AlgorithmName = AlgorithmName.MIN_MAX_UNIFORM_QUANT,
|
78
|
+
):
|
79
|
+
"""Returns a weight-only quantization recipe with int8 weights and float32 activation.
|
80
|
+
|
81
|
+
All supported ops will be quantized with int8 weights and float32 activations.
|
82
|
+
The weights will be explicitly dequantized before being fed into the op to
|
83
|
+
enable float compute thus retain model quality. If latency is a concern,
|
84
|
+
consider using dynamic range quantization.
|
85
|
+
|
86
|
+
Args:
|
87
|
+
algorithm_key: The algorithm to use for quantization.
|
88
|
+
|
89
|
+
Returns:
|
90
|
+
A weight-only quantization recipe.
|
91
|
+
"""
|
92
|
+
rp_manager = recipe_manager.RecipeManager()
|
93
|
+
rp_manager.add_weight_only_config(
|
94
|
+
regex='.*',
|
95
|
+
operation_name=qtyping.TFLOperationName.ALL_SUPPORTED,
|
96
|
+
num_bits=8,
|
97
|
+
algorithm_key=algorithm_key,
|
98
|
+
)
|
99
|
+
return rp_manager.get_quantization_recipe()
|
100
|
+
|
101
|
+
|
102
|
+
def weight_only_wi4_afp32(
|
103
|
+
algorithm_key: AlgorithmName = AlgorithmName.MIN_MAX_UNIFORM_QUANT,
|
104
|
+
):
|
105
|
+
"""Returns a weight-only quantization recipe with int4 weights and float32 activation.
|
106
|
+
|
107
|
+
All supported ops will be quantized with int4 weights and float32 activations.
|
108
|
+
The weights will be explicitly dequantized before being fed into the op to
|
109
|
+
enable float compute thus retain model quality.
|
110
|
+
|
111
|
+
Args:
|
112
|
+
algorithm_key: The algorithm to use for quantization.
|
113
|
+
|
114
|
+
Returns:
|
115
|
+
A weight-only quantization recipe.
|
116
|
+
"""
|
117
|
+
rp_manager = recipe_manager.RecipeManager()
|
118
|
+
rp_manager.add_weight_only_config(
|
119
|
+
regex='.*',
|
120
|
+
operation_name=qtyping.TFLOperationName.ALL_SUPPORTED,
|
121
|
+
num_bits=4,
|
122
|
+
algorithm_key=algorithm_key,
|
123
|
+
)
|
124
|
+
return rp_manager.get_quantization_recipe()
|
125
|
+
|
126
|
+
|
127
|
+
def static_wi8_ai8(
|
128
|
+
algorithm_key: AlgorithmName = AlgorithmName.MIN_MAX_UNIFORM_QUANT,
|
129
|
+
):
|
130
|
+
"""Returns a static quantization recipe with int8 weights and int8 activations.
|
131
|
+
|
132
|
+
All supported ops will be quantized with int8 weights and int8 activations.
|
133
|
+
Calibration is needed to use this recipe.
|
134
|
+
|
135
|
+
Args:
|
136
|
+
algorithm_key: The algorithm to use for quantization.
|
137
|
+
|
138
|
+
Returns:
|
139
|
+
A static quantization recipe.
|
140
|
+
"""
|
141
|
+
rp_manager = recipe_manager.RecipeManager()
|
142
|
+
rp_manager.add_static_config(
|
143
|
+
regex='.*',
|
144
|
+
operation_name=qtyping.TFLOperationName.ALL_SUPPORTED,
|
145
|
+
activation_num_bits=8,
|
146
|
+
weight_num_bits=8,
|
147
|
+
algorithm_key=algorithm_key,
|
148
|
+
)
|
149
|
+
return rp_manager.get_quantization_recipe()
|
150
|
+
|
151
|
+
|
152
|
+
def static_wi8_ai16(
|
153
|
+
algorithm_key: AlgorithmName = AlgorithmName.MIN_MAX_UNIFORM_QUANT,
|
154
|
+
):
|
155
|
+
"""Returns a static quantization recipe with int8 weights and int16 activations.
|
156
|
+
|
157
|
+
All supported ops will be quantized with int8 weights and int16 activations.
|
158
|
+
Calibration is needed to use this recipe.
|
159
|
+
|
160
|
+
Args:
|
161
|
+
algorithm_key: The algorithm to use for quantization.
|
162
|
+
|
163
|
+
Returns:
|
164
|
+
A static quantization recipe.
|
165
|
+
"""
|
166
|
+
rp_manager = recipe_manager.RecipeManager()
|
167
|
+
rp_manager.add_static_config(
|
168
|
+
regex='.*',
|
169
|
+
operation_name=qtyping.TFLOperationName.ALL_SUPPORTED,
|
170
|
+
activation_num_bits=16,
|
171
|
+
weight_num_bits=8,
|
172
|
+
algorithm_key=algorithm_key,
|
173
|
+
)
|
174
|
+
return rp_manager.get_quantization_recipe()
|
63
175
|
|
64
176
|
|
65
177
|
def dynamic_legacy_wi8_afp32():
|
@@ -243,3 +243,146 @@ class RecipeManager:
|
|
243
243
|
):
|
244
244
|
return True
|
245
245
|
return False
|
246
|
+
|
247
|
+
def add_dynamic_config(
|
248
|
+
self,
|
249
|
+
regex: str,
|
250
|
+
operation_name: _TFLOpName,
|
251
|
+
num_bits: int,
|
252
|
+
granularity: qtyping.QuantGranularity = qtyping.QuantGranularity.CHANNELWISE,
|
253
|
+
algorithm_key: str = algorithm_manager.AlgorithmName.MIN_MAX_UNIFORM_QUANT,
|
254
|
+
):
|
255
|
+
"""Adds a dynamic quantization configuration to the recipe.
|
256
|
+
|
257
|
+
During dynamic quantization, activations are not processed by AEQ and
|
258
|
+
remain in float format. The runtime kernel is expected to quantize these
|
259
|
+
activations on-the-fly, as indicated by compute_precision=Integer and
|
260
|
+
explicit_dequantize=False.
|
261
|
+
|
262
|
+
The model quality may suffer due to the on-the-fly quantization. If quality
|
263
|
+
is a concern, consider using weight-only
|
264
|
+
quantization.
|
265
|
+
|
266
|
+
Args:
|
267
|
+
regex: Regular expression for layer name matching.
|
268
|
+
operation_name: Target TFLite operation.
|
269
|
+
num_bits: Number of bits for quantization.
|
270
|
+
granularity: Granularity of quantization.
|
271
|
+
algorithm_key: Algorithm key to be applied.
|
272
|
+
"""
|
273
|
+
weight_config = qtyping.TensorQuantizationConfig(
|
274
|
+
num_bits=num_bits,
|
275
|
+
symmetric=True, # LiteRT kernels only support symmetric quantized weights.
|
276
|
+
granularity=granularity,
|
277
|
+
)
|
278
|
+
self.add_quantization_config(
|
279
|
+
regex,
|
280
|
+
operation_name,
|
281
|
+
op_config=_OpQuantizationConfig(
|
282
|
+
weight_tensor_config=weight_config,
|
283
|
+
compute_precision=qtyping.ComputePrecision.INTEGER,
|
284
|
+
explicit_dequantize=False,
|
285
|
+
),
|
286
|
+
algorithm_key=algorithm_key,
|
287
|
+
)
|
288
|
+
|
289
|
+
def add_weight_only_config(
|
290
|
+
self,
|
291
|
+
regex: str,
|
292
|
+
operation_name: _TFLOpName,
|
293
|
+
num_bits: int,
|
294
|
+
granularity: qtyping.QuantGranularity = qtyping.QuantGranularity.CHANNELWISE,
|
295
|
+
algorithm_key: str = algorithm_manager.AlgorithmName.MIN_MAX_UNIFORM_QUANT,
|
296
|
+
):
|
297
|
+
"""Adds a weight only quantization configuration to the recipe.
|
298
|
+
|
299
|
+
In weight-only quantization, weights are quantized, but the actual operation
|
300
|
+
(op) computation remains in float. The quantized weight is explicitly
|
301
|
+
dequantized before being fed into the op. This is achieved by inserting a
|
302
|
+
dequantize op between the quantized weight and the consuming op. To enable
|
303
|
+
this, both compute_precision will be set to Float and explicit_dequantize to
|
304
|
+
True.
|
305
|
+
|
306
|
+
Weight-only quantization is useful for reducing model size but may
|
307
|
+
not decrease latency due to float computation. However, quantized model
|
308
|
+
generally has better quality than other quantization options (e.g., dynamic
|
309
|
+
range quantization) due to no loss of precision on activations. If latency
|
310
|
+
is a concern, consider using dynamic quantization.
|
311
|
+
|
312
|
+
Args:
|
313
|
+
regex: Regular expression for layer name matching.
|
314
|
+
operation_name: Target TFLite operation.
|
315
|
+
num_bits: Number of bits for quantization.
|
316
|
+
granularity: Granularity of quantization.
|
317
|
+
algorithm_key: Algorithm key to be applied.
|
318
|
+
"""
|
319
|
+
weight_config = qtyping.TensorQuantizationConfig(
|
320
|
+
num_bits=num_bits,
|
321
|
+
symmetric=True, # TFL kernels only support symmetric quantized weights.
|
322
|
+
granularity=granularity,
|
323
|
+
)
|
324
|
+
self.add_quantization_config(
|
325
|
+
regex,
|
326
|
+
operation_name,
|
327
|
+
op_config=_OpQuantizationConfig(
|
328
|
+
weight_tensor_config=weight_config,
|
329
|
+
compute_precision=qtyping.ComputePrecision.FLOAT,
|
330
|
+
explicit_dequantize=True,
|
331
|
+
),
|
332
|
+
algorithm_key=algorithm_key,
|
333
|
+
)
|
334
|
+
|
335
|
+
def add_static_config(
|
336
|
+
self,
|
337
|
+
regex: str,
|
338
|
+
operation_name: _TFLOpName,
|
339
|
+
activation_num_bits: int,
|
340
|
+
weight_num_bits: int,
|
341
|
+
weight_granularity: qtyping.QuantGranularity = qtyping.QuantGranularity.CHANNELWISE,
|
342
|
+
algorithm_key: str = algorithm_manager.AlgorithmName.MIN_MAX_UNIFORM_QUANT,
|
343
|
+
):
|
344
|
+
"""Adds a static range quantization configuration to the recipe.
|
345
|
+
|
346
|
+
In static quantization, both weights and activations are quantized. This
|
347
|
+
requires a calibration step to determine the quantization parameters (e.g.,
|
348
|
+
min/max ranges) for activations. The quantized model uses integer arithmetic
|
349
|
+
for computations, which can lead to significant latency reductions.
|
350
|
+
|
351
|
+
However, calibration is needed to determine the quantization parameters for
|
352
|
+
activations, which requires sample data and may lead to quality loss. If
|
353
|
+
there is no hardware requirement for full integer quantization, consider
|
354
|
+
using dynamic quantization for simplicity.
|
355
|
+
|
356
|
+
Args:
|
357
|
+
regex: Regular expression for layer name matching.
|
358
|
+
operation_name: Target TFLite operation.
|
359
|
+
activation_num_bits: Number of bits for activation quantization.
|
360
|
+
weight_num_bits: Number of bits for weight quantization.
|
361
|
+
weight_granularity: Granularity of weight quantization.
|
362
|
+
algorithm_key: Algorithm key to be applied.
|
363
|
+
"""
|
364
|
+
if activation_num_bits not in [16, 8]:
|
365
|
+
raise ValueError(
|
366
|
+
'Activation quantization is only supported for 16 or 8 bits.'
|
367
|
+
)
|
368
|
+
# INT16 is symmetric and INT8 is asymmetric due to LiteRT kernel limitations.
|
369
|
+
activation_symmetric = activation_num_bits == 16
|
370
|
+
activation_config = qtyping.TensorQuantizationConfig(
|
371
|
+
num_bits=activation_num_bits, symmetric=activation_symmetric
|
372
|
+
)
|
373
|
+
weight_config = qtyping.TensorQuantizationConfig(
|
374
|
+
num_bits=weight_num_bits,
|
375
|
+
symmetric=True, # TFL kernels only support symmetric quantized weights.
|
376
|
+
granularity=weight_granularity,
|
377
|
+
)
|
378
|
+
self.add_quantization_config(
|
379
|
+
regex,
|
380
|
+
operation_name,
|
381
|
+
op_config=_OpQuantizationConfig(
|
382
|
+
activation_tensor_config=activation_config,
|
383
|
+
weight_tensor_config=weight_config,
|
384
|
+
compute_precision=qtyping.ComputePrecision.INTEGER,
|
385
|
+
explicit_dequantize=False,
|
386
|
+
),
|
387
|
+
algorithm_key=algorithm_key,
|
388
|
+
)
|
@@ -293,6 +293,50 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
|
|
293
293
|
# DRQ check.
|
294
294
|
self.assertEqual(op_config.compute_precision, _ComputePrecision.INTEGER)
|
295
295
|
|
296
|
+
def test_add_dynamic_config(self):
|
297
|
+
self._recipe_manager.add_dynamic_config(
|
298
|
+
regex='.*/Dense/.*',
|
299
|
+
operation_name=_TFLOpName.FULLY_CONNECTED,
|
300
|
+
num_bits=8,
|
301
|
+
)
|
302
|
+
alg_key, op_config = self._recipe_manager.get_quantization_configs(
|
303
|
+
_TFLOpName.FULLY_CONNECTED, 'model/Dense/op'
|
304
|
+
)
|
305
|
+
self.assertEqual(alg_key, _AlgorithmName.MIN_MAX_UNIFORM_QUANT)
|
306
|
+
self.assertEqual(op_config.compute_precision, _ComputePrecision.INTEGER)
|
307
|
+
self.assertFalse(op_config.explicit_dequantize)
|
308
|
+
self.assertIsNone(op_config.activation_tensor_config)
|
309
|
+
weight_tensor_config = op_config.weight_tensor_config
|
310
|
+
self.assertIsNotNone(weight_tensor_config)
|
311
|
+
self.assertEqual(weight_tensor_config.num_bits, 8)
|
312
|
+
self.assertTrue(weight_tensor_config.symmetric)
|
313
|
+
self.assertEqual(
|
314
|
+
weight_tensor_config.granularity,
|
315
|
+
_QuantGranularity.CHANNELWISE,
|
316
|
+
)
|
317
|
+
|
318
|
+
def test_add_weight_only_config(self):
|
319
|
+
self._recipe_manager.add_weight_only_config(
|
320
|
+
regex='.*/Dense/.*',
|
321
|
+
operation_name=_TFLOpName.FULLY_CONNECTED,
|
322
|
+
num_bits=4,
|
323
|
+
)
|
324
|
+
alg_key, op_config = self._recipe_manager.get_quantization_configs(
|
325
|
+
_TFLOpName.FULLY_CONNECTED, 'model/Dense/op'
|
326
|
+
)
|
327
|
+
self.assertEqual(alg_key, _AlgorithmName.MIN_MAX_UNIFORM_QUANT)
|
328
|
+
self.assertEqual(op_config.compute_precision, _ComputePrecision.FLOAT)
|
329
|
+
self.assertTrue(op_config.explicit_dequantize)
|
330
|
+
self.assertIsNone(op_config.activation_tensor_config)
|
331
|
+
weight_tensor_config = op_config.weight_tensor_config
|
332
|
+
self.assertIsNotNone(weight_tensor_config)
|
333
|
+
self.assertEqual(weight_tensor_config.num_bits, 4)
|
334
|
+
self.assertTrue(weight_tensor_config.symmetric)
|
335
|
+
self.assertEqual(
|
336
|
+
weight_tensor_config.granularity,
|
337
|
+
_QuantGranularity.CHANNELWISE,
|
338
|
+
)
|
339
|
+
|
296
340
|
def test_set_full_integer_quantization_config(self):
|
297
341
|
_add_default_int8xint8_integer_recipe(self._recipe_manager)
|
298
342
|
# Full integer setting is global
|
ai_edge_quantizer/recipe_test.py
CHANGED
@@ -21,6 +21,7 @@ from tensorflow.python.platform import googletest
|
|
21
21
|
from ai_edge_quantizer import quantizer
|
22
22
|
from ai_edge_quantizer import recipe
|
23
23
|
from ai_edge_quantizer.utils import test_utils
|
24
|
+
from ai_edge_quantizer.utils import tfl_interpreter_utils
|
24
25
|
|
25
26
|
|
26
27
|
_TEST_DATA_PREFIX_PATH = test_utils.get_path_to_datafile('')
|
@@ -30,21 +31,63 @@ class RecipeTest(parameterized.TestCase):
|
|
30
31
|
|
31
32
|
def setUp(self):
|
32
33
|
super().setUp()
|
33
|
-
|
34
|
+
# Weights has < 1024 elements so legacy recipe will not quantize it.
|
35
|
+
self._small_model_path = os.path.join(
|
34
36
|
_TEST_DATA_PREFIX_PATH,
|
35
37
|
'tests/models/single_conv2d_transpose_bias.tflite',
|
36
38
|
)
|
39
|
+
self._test_model_path = os.path.join(
|
40
|
+
_TEST_DATA_PREFIX_PATH,
|
41
|
+
'tests/models/conv_fc_mnist.tflite',
|
42
|
+
)
|
37
43
|
|
38
|
-
def _quantize_with_recipe_func(self, recipe_func):
|
39
|
-
qt = quantizer.Quantizer(
|
44
|
+
def _quantize_with_recipe_func(self, recipe_func, test_model_path):
|
45
|
+
qt = quantizer.Quantizer(test_model_path)
|
40
46
|
qt.load_quantization_recipe(recipe_func())
|
41
47
|
self.assertIsNone(qt._result.quantized_model)
|
42
|
-
|
43
|
-
|
44
|
-
|
48
|
+
if qt.need_calibration:
|
49
|
+
calibration_data = tfl_interpreter_utils.create_random_normal_input_data(
|
50
|
+
qt.float_model,
|
51
|
+
num_samples=1,
|
52
|
+
)
|
53
|
+
calibration_result = qt.calibrate(calibration_data)
|
54
|
+
quantization_result = qt.quantize(calibration_result)
|
55
|
+
else:
|
56
|
+
quantization_result = qt.quantize()
|
57
|
+
self.assertIsNotNone(quantization_result.quantized_model)
|
58
|
+
return quantization_result
|
45
59
|
|
46
60
|
def test_quantization_from_dynamic_wi8_afp32_func_succeeds(self):
|
47
|
-
quant_result = self._quantize_with_recipe_func(
|
61
|
+
quant_result = self._quantize_with_recipe_func(
|
62
|
+
recipe.dynamic_wi8_afp32, self._test_model_path
|
63
|
+
)
|
64
|
+
self.assertLess(
|
65
|
+
len(quant_result.quantized_model),
|
66
|
+
os.path.getsize(self._test_model_path),
|
67
|
+
)
|
68
|
+
|
69
|
+
def test_quantization_from_dynamic_wi4_afp32_func_succeeds(self):
|
70
|
+
quant_result = self._quantize_with_recipe_func(
|
71
|
+
recipe.dynamic_wi4_afp32, self._test_model_path
|
72
|
+
)
|
73
|
+
self.assertLess(
|
74
|
+
len(quant_result.quantized_model),
|
75
|
+
os.path.getsize(self._test_model_path),
|
76
|
+
)
|
77
|
+
|
78
|
+
def test_quantization_from_weight_only_wi8_afp32_func_succeeds(self):
|
79
|
+
quant_result = self._quantize_with_recipe_func(
|
80
|
+
recipe.weight_only_wi8_afp32, self._test_model_path
|
81
|
+
)
|
82
|
+
self.assertLess(
|
83
|
+
len(quant_result.quantized_model),
|
84
|
+
os.path.getsize(self._test_model_path),
|
85
|
+
)
|
86
|
+
|
87
|
+
def test_quantization_from_weight_only_wi4_afp32_func_succeeds(self):
|
88
|
+
quant_result = self._quantize_with_recipe_func(
|
89
|
+
recipe.weight_only_wi4_afp32, self._test_model_path
|
90
|
+
)
|
48
91
|
self.assertLess(
|
49
92
|
len(quant_result.quantized_model),
|
50
93
|
os.path.getsize(self._test_model_path),
|
@@ -52,11 +95,12 @@ class RecipeTest(parameterized.TestCase):
|
|
52
95
|
|
53
96
|
def test_quantization_from_dynamic_legacy_wi8_afp32_func_succeeds(self):
|
54
97
|
quant_result = self._quantize_with_recipe_func(
|
55
|
-
recipe.dynamic_legacy_wi8_afp32
|
98
|
+
recipe.dynamic_legacy_wi8_afp32,
|
99
|
+
self._small_model_path,
|
56
100
|
)
|
57
101
|
self.assertLen(
|
58
102
|
quant_result.quantized_model,
|
59
|
-
os.path.getsize(self.
|
103
|
+
os.path.getsize(self._small_model_path),
|
60
104
|
)
|
61
105
|
|
62
106
|
@parameterized.named_parameters(
|
@@ -65,28 +109,54 @@ class RecipeTest(parameterized.TestCase):
|
|
65
109
|
recipe_json_path='recipes/dynamic_wi8_afp32_recipe.json',
|
66
110
|
recipe_func=recipe.dynamic_wi8_afp32,
|
67
111
|
),
|
112
|
+
dict(
|
113
|
+
testcase_name='weight_only_wi8_afp32',
|
114
|
+
recipe_json_path='recipes/default_af32w8float_recipe.json',
|
115
|
+
recipe_func=recipe.weight_only_wi8_afp32,
|
116
|
+
),
|
117
|
+
dict(
|
118
|
+
testcase_name='weight_only_wi4_afp32',
|
119
|
+
recipe_json_path='recipes/default_af32w4float_recipe.json',
|
120
|
+
recipe_func=recipe.weight_only_wi4_afp32,
|
121
|
+
),
|
68
122
|
dict(
|
69
123
|
testcase_name='dynamic_legacy_wi8_afp32',
|
70
124
|
recipe_json_path='recipes/dynamic_legacy_wi8_afp32_recipe.json',
|
71
125
|
recipe_func=recipe.dynamic_legacy_wi8_afp32,
|
72
126
|
),
|
127
|
+
dict(
|
128
|
+
testcase_name='a8w8',
|
129
|
+
recipe_json_path='recipes/default_a8w8_recipe.json',
|
130
|
+
recipe_func=recipe.static_wi8_ai8,
|
131
|
+
),
|
132
|
+
dict(
|
133
|
+
testcase_name='a16w8',
|
134
|
+
recipe_json_path='recipes/default_a16w8_recipe.json',
|
135
|
+
recipe_func=recipe.static_wi8_ai16,
|
136
|
+
),
|
73
137
|
)
|
74
138
|
def test_recipe_func_and_json_matches(self, recipe_json_path, recipe_func):
|
75
139
|
# Quantize with recipe from function in recipe module.
|
76
|
-
quant_result_from_func = self._quantize_with_recipe_func(
|
140
|
+
quant_result_from_func = self._quantize_with_recipe_func(
|
141
|
+
recipe_func, self._test_model_path
|
142
|
+
)
|
77
143
|
|
78
144
|
# Quantize with recipe from json file.
|
79
145
|
qt_json = quantizer.Quantizer(self._test_model_path)
|
80
146
|
json_recipe_path = os.path.join(_TEST_DATA_PREFIX_PATH, recipe_json_path)
|
81
147
|
qt_json.load_quantization_recipe(json_recipe_path)
|
82
|
-
|
148
|
+
if qt_json.need_calibration:
|
149
|
+
calibration_data = tfl_interpreter_utils.create_random_normal_input_data(
|
150
|
+
qt_json.float_model,
|
151
|
+
num_samples=1,
|
152
|
+
)
|
153
|
+
calibration_result = qt_json.calibrate(calibration_data)
|
154
|
+
quant_result_from_json = qt_json.quantize(calibration_result)
|
155
|
+
else:
|
156
|
+
quant_result_from_json = qt_json.quantize()
|
83
157
|
self.assertIsNotNone(quant_result_from_json.quantized_model)
|
84
158
|
|
85
|
-
# Check if the
|
86
|
-
self.assertEqual(
|
87
|
-
quant_result_from_func.recipe,
|
88
|
-
quant_result_from_json.recipe,
|
89
|
-
)
|
159
|
+
# Check if the quantized models match.
|
90
160
|
self.assertEqual(
|
91
161
|
len(quant_result_from_func.quantized_model),
|
92
162
|
len(quant_result_from_json.quantized_model),
|
@@ -51,6 +51,7 @@ TFL_OP_NAME_TO_CODE = immutabledict.immutabledict({
|
|
51
51
|
_TFLOpName.LOGISTIC: schema.BuiltinOperator.LOGISTIC,
|
52
52
|
_TFLOpName.SLICE: schema.BuiltinOperator.SLICE,
|
53
53
|
_TFLOpName.SUM: schema.BuiltinOperator.SUM,
|
54
|
+
_TFLOpName.SELECT: schema.BuiltinOperator.SELECT,
|
54
55
|
_TFLOpName.SELECT_V2: schema.BuiltinOperator.SELECT_V2,
|
55
56
|
_TFLOpName.STABLEHLO_COMPOSITE: schema.BuiltinOperator.STABLEHLO_COMPOSITE,
|
56
57
|
_TFLOpName.DYNAMIC_UPDATE_SLICE: (
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ai-edge-quantizer-nightly
|
3
|
-
Version: 0.3.0.
|
3
|
+
Version: 0.3.0.dev20250815
|
4
4
|
Summary: A quantizer for advanced developers to quantize converted AI Edge models.
|
5
5
|
Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
|
6
6
|
Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
|
@@ -1,24 +1,24 @@
|
|
1
1
|
ai_edge_quantizer/__init__.py,sha256=4pFSkukSwahYyzwqia0yPRyz8TnFQfGRthVJhYpMWas,793
|
2
|
-
ai_edge_quantizer/algorithm_manager.py,sha256=
|
2
|
+
ai_edge_quantizer/algorithm_manager.py,sha256=O_psY-4R0ARmgTQHwfH2px81AJY8PmfamHtE7xJDRjQ,13424
|
3
3
|
ai_edge_quantizer/algorithm_manager_api.py,sha256=u903TG0s1uIDhJqfeJne3CFl8A93phZrwgV2-hwdcXU,9247
|
4
4
|
ai_edge_quantizer/algorithm_manager_api_test.py,sha256=w6bSONvXkX6bzXAGc0-7b6gNDt9oz9ieq97KP8Sg_JU,7666
|
5
5
|
ai_edge_quantizer/calibrator.py,sha256=Sms7_AIHPH9G5xFaz5Ef3a5gPhxuIWQI8d2LUM8C96I,12071
|
6
|
-
ai_edge_quantizer/calibrator_test.py,sha256=
|
6
|
+
ai_edge_quantizer/calibrator_test.py,sha256=ZLzIMWB2FSFU4TOatDioYuwp_kLh8iSCefZ5_Q9FU7s,11900
|
7
7
|
ai_edge_quantizer/conftest.py,sha256=SxCz-5LlRD_lQm4hQc4c6IGG7DS8d7IyEWY9gnscPN0,794
|
8
|
-
ai_edge_quantizer/default_policy.py,sha256=
|
8
|
+
ai_edge_quantizer/default_policy.py,sha256=LXEdwdr0SiCfWo6ZwbHQ8ykoqA40GV6fGAT1aofry3o,11556
|
9
9
|
ai_edge_quantizer/model_modifier.py,sha256=teGa8I6kGvn6TQY6Xv53YFIc_pQEhNvM9Zb4bvhezyw,7110
|
10
10
|
ai_edge_quantizer/model_modifier_test.py,sha256=cJd04SLOG-fQZZNZPcisoBLx3cLtWEwGqUBbLb-pif4,4751
|
11
11
|
ai_edge_quantizer/model_validator.py,sha256=Hj0_5o-Oa3dSlJ3ryVjRhvsyelHNyek1GrtG9buMczg,13153
|
12
12
|
ai_edge_quantizer/model_validator_test.py,sha256=EeqOP_mrZsnZ3rug756s0ryDDqd2KgIDld5Lm_gDuWY,13020
|
13
13
|
ai_edge_quantizer/params_generator.py,sha256=hcgMHJlERZERUyIAEi6AHJcLJ8gsKIBAEojzFFz-tqk,20098
|
14
14
|
ai_edge_quantizer/params_generator_test.py,sha256=RDYoRZDJfEZRtjlTAU2kZ_4t3JHOqEHxfJX9V4ETAhg,40597
|
15
|
-
ai_edge_quantizer/qtyping.py,sha256=
|
16
|
-
ai_edge_quantizer/quantizer.py,sha256=
|
17
|
-
ai_edge_quantizer/quantizer_test.py,sha256=
|
18
|
-
ai_edge_quantizer/recipe.py,sha256=
|
19
|
-
ai_edge_quantizer/recipe_manager.py,sha256=
|
20
|
-
ai_edge_quantizer/recipe_manager_test.py,sha256=
|
21
|
-
ai_edge_quantizer/recipe_test.py,sha256=
|
15
|
+
ai_edge_quantizer/qtyping.py,sha256=zXXmLBZUT-cfjnQrqDkytDZaGg3z_yy1wWhKr34_XVg,16792
|
16
|
+
ai_edge_quantizer/quantizer.py,sha256=ckAEOnnBxuCKZuvlzdChevCKPuE-IeDPHCNtFTWr250,17857
|
17
|
+
ai_edge_quantizer/quantizer_test.py,sha256=m6f4ayyaF3yQb9i4V0aFAbmGw0OKZ2Zam1RoTPh-u24,22917
|
18
|
+
ai_edge_quantizer/recipe.py,sha256=MEkfQ2Sg3KAE9LAORHWcbjYNPg06EUbwc1d-VspQA2U,6461
|
19
|
+
ai_edge_quantizer/recipe_manager.py,sha256=6dgbE-IZfEetzXH3p3Qm_9eQutNDOpZnMpiaLTbP-ZQ,14744
|
20
|
+
ai_edge_quantizer/recipe_manager_test.py,sha256=H-B75vwPN5ND-nUa3pOXizeHTv4mufPiC5cL_OlDIYU,34040
|
21
|
+
ai_edge_quantizer/recipe_test.py,sha256=GKuo6N65wKLS2xwSpjd-BWWeVRpF1zc7Yt7phSMYSxA,5905
|
22
22
|
ai_edge_quantizer/transformation_instruction_generator.py,sha256=iMGXy7_ufqgQRzu4drAfO31VGdze35peEFh1BMZlVHk,27714
|
23
23
|
ai_edge_quantizer/transformation_instruction_generator_test.py,sha256=Zw3EOSnvzjuB4NWeo129eJZxK_EHno9oF9OtEQ-0dnM,48905
|
24
24
|
ai_edge_quantizer/transformation_performer.py,sha256=o4J6OUbI0dLoobVYjkOFw5Po3yH0gZJXrfuTIYais4o,13029
|
@@ -28,7 +28,7 @@ ai_edge_quantizer/algorithms/nonlinear_quantize/__init__.py,sha256=lpq1g2ayg3lCP
|
|
28
28
|
ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting.py,sha256=Bs9CK7wZAw6jNaZ8xEtbwO2vM34VYXNZSMVWvxJo9nw,9297
|
29
29
|
ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting_test.py,sha256=EqIHGEZ1LgUrTN7zf880RuAzEv3Qy7kgh5ivObJGHSo,22646
|
30
30
|
ai_edge_quantizer/algorithms/uniform_quantize/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
31
|
-
ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=
|
31
|
+
ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=rkf7jLPVDKpx2ju1LyyP7bxc6n34cLD2E3w2mxLd6qE,35344
|
32
32
|
ai_edge_quantizer/algorithms/uniform_quantize/common_quantize_test.py,sha256=GGf_n3wIeg3GB_eGsmyNJ0fTcxgpeMMbugTMRONK6TQ,3553
|
33
33
|
ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha256=BDdn_uBZakfHyzdMJPKadsOqxqyC-s6W2ZzFH99L4fE,8652
|
34
34
|
ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py,sha256=sT5eX5TLZEHTtPfnSkCPDlS0sQxlTFWbCsbvOuj--yY,8889
|
@@ -38,7 +38,7 @@ ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=1
|
|
38
38
|
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=nscKDvNb14ErZdAfG0aXRWyRs6bTvhMqMjKx2vxvUK0,8725
|
39
39
|
ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=Umxh4kJyeHddZf-Wd4aXE5MTI1XWFa5KRuM17uYU714,6922
|
40
40
|
ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=sha1d99Xk87bI87tgz0g5LeDC-EeE4WMfM5rRC98-m4,9140
|
41
|
-
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=
|
41
|
+
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=uCREMXi0U2ckhXXfgGVzwSgjFZc0IbtnFU-OjlG9IO8,17146
|
42
42
|
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=7kHluzpteMv36hFD6LD_qnwwMoE1GKUP4bGmGMFbOdA,12755
|
43
43
|
ai_edge_quantizer/algorithms/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
44
44
|
ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=QrEeCuvA7gY_vK1nbKtqassNDClyAjN1ClZIiw63k5U,35895
|
@@ -62,14 +62,14 @@ ai_edge_quantizer/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V
|
|
62
62
|
ai_edge_quantizer/utils/calibration_utils.py,sha256=e3dG7Nm94Ix0hkTWTWPUhEG6a8QR_cAM3PSwblfJV5g,15106
|
63
63
|
ai_edge_quantizer/utils/calibration_utils_test.py,sha256=4BlksXl7b4yptL8xPR67hmJCnjhN9V10a2PunzfHrUE,9372
|
64
64
|
ai_edge_quantizer/utils/test_utils.py,sha256=a4Nk-wbeB09dFjTDZiA0K67d26j5DD0UDH_GIVmVG_4,8685
|
65
|
-
ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=
|
65
|
+
ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=RL6oq6FzZj-xV0Zgh0UBn7-fOQaRXSxZ-PPG_LmtyUY,11384
|
66
66
|
ai_edge_quantizer/utils/tfl_flatbuffer_utils_test.py,sha256=K1SbK8q92qYVtiVj0I0GtugsPTkpIpEKv9zakvFV_Sc,8555
|
67
67
|
ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOihexmizeJqt4SQcET9aA,14925
|
68
68
|
ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
|
69
69
|
ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
|
70
70
|
ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
|
71
|
-
ai_edge_quantizer_nightly-0.3.0.
|
72
|
-
ai_edge_quantizer_nightly-0.3.0.
|
73
|
-
ai_edge_quantizer_nightly-0.3.0.
|
74
|
-
ai_edge_quantizer_nightly-0.3.0.
|
75
|
-
ai_edge_quantizer_nightly-0.3.0.
|
71
|
+
ai_edge_quantizer_nightly-0.3.0.dev20250815.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
72
|
+
ai_edge_quantizer_nightly-0.3.0.dev20250815.dist-info/METADATA,sha256=62GLJcnK95nOeJiUr5ktSM88aCPNDQKGZvlPIEj8p0Q,1528
|
73
|
+
ai_edge_quantizer_nightly-0.3.0.dev20250815.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
74
|
+
ai_edge_quantizer_nightly-0.3.0.dev20250815.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
|
75
|
+
ai_edge_quantizer_nightly-0.3.0.dev20250815.dist-info/RECORD,,
|
File without changes
|
File without changes
|