ai-edge-quantizer-nightly 0.1.0.dev20250411__py3-none-any.whl → 0.1.0.dev20250413__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_edge_quantizer/algorithm_manager_api_test.py +7 -0
- ai_edge_quantizer/default_policy.py +31 -9
- ai_edge_quantizer/params_generator.py +138 -59
- ai_edge_quantizer/params_generator_test.py +156 -62
- {ai_edge_quantizer_nightly-0.1.0.dev20250411.dist-info → ai_edge_quantizer_nightly-0.1.0.dev20250413.dist-info}/METADATA +1 -1
- {ai_edge_quantizer_nightly-0.1.0.dev20250411.dist-info → ai_edge_quantizer_nightly-0.1.0.dev20250413.dist-info}/RECORD +9 -9
- {ai_edge_quantizer_nightly-0.1.0.dev20250411.dist-info → ai_edge_quantizer_nightly-0.1.0.dev20250413.dist-info}/LICENSE +0 -0
- {ai_edge_quantizer_nightly-0.1.0.dev20250411.dist-info → ai_edge_quantizer_nightly-0.1.0.dev20250413.dist-info}/WHEEL +0 -0
- {ai_edge_quantizer_nightly-0.1.0.dev20250411.dist-info → ai_edge_quantizer_nightly-0.1.0.dev20250413.dist-info}/top_level.txt +0 -0
@@ -18,6 +18,7 @@
|
|
18
18
|
from absl.testing import parameterized
|
19
19
|
from tensorflow.python.platform import googletest
|
20
20
|
from ai_edge_quantizer import algorithm_manager_api
|
21
|
+
from ai_edge_quantizer import default_policy
|
21
22
|
from ai_edge_quantizer import qtyping
|
22
23
|
|
23
24
|
_TFLOpName = qtyping.TFLOperationName
|
@@ -205,6 +206,12 @@ class AlgorithmManagerApiTest(parameterized.TestCase):
|
|
205
206
|
self._alg_manager._config_check_policy_registry[test_algorithm_name]
|
206
207
|
)
|
207
208
|
|
209
|
+
def test_default_policy_not_empty(self):
|
210
|
+
"""Tests that the default policy is not empty & no empty policy is generated."""
|
211
|
+
self.assertNotEmpty(default_policy.DEFAULT_CONFIG_CHECK_POLICY)
|
212
|
+
for policy in default_policy.DEFAULT_CONFIG_CHECK_POLICY.values():
|
213
|
+
self.assertNotEmpty(policy)
|
214
|
+
|
208
215
|
|
209
216
|
if __name__ == "__main__":
|
210
217
|
googletest.main()
|
@@ -57,6 +57,17 @@ DEFAULT_JSON_POLICY = """
|
|
57
57
|
"explicit_dequantize": false,
|
58
58
|
"compute_precision": "INTEGER"
|
59
59
|
},
|
60
|
+
"dynamic_wi4_afp32_blockwise": {
|
61
|
+
"weight_tensor_config": {
|
62
|
+
"num_bits": 4,
|
63
|
+
"symmetric": [true],
|
64
|
+
"granularity": ["BLOCKWISE"],
|
65
|
+
"dtype": "INT",
|
66
|
+
"block_size": [32, 64, 96, 128, 256]
|
67
|
+
},
|
68
|
+
"explicit_dequantize": false,
|
69
|
+
"compute_precision": "INTEGER"
|
70
|
+
},
|
60
71
|
"static_wi8_ai16": {
|
61
72
|
"activation_tensor_config": {
|
62
73
|
"num_bits": 16,
|
@@ -216,6 +227,7 @@ DEFAULT_JSON_POLICY = """
|
|
216
227
|
"FULLY_CONNECTED"
|
217
228
|
],
|
218
229
|
"dynamic_wi4_afp32": ["FULLY_CONNECTED", "EMBEDDING_LOOKUP", "CONV_2D"],
|
230
|
+
"dynamic_wi4_afp32_blockwise": ["EMBEDDING_LOOKUP", "FULLY_CONNECTED"],
|
219
231
|
"weightonly_wi8_afp32": [
|
220
232
|
"BATCH_MATMUL",
|
221
233
|
"CONV_2D",
|
@@ -259,6 +271,7 @@ def _unroll_json_config(
|
|
259
271
|
|
260
272
|
# Then unroll weight configs and turn them into quantization configs.
|
261
273
|
quant_configs = []
|
274
|
+
weight_configs = []
|
262
275
|
for symmetric in json_config["weight_tensor_config"]["symmetric"]:
|
263
276
|
for granularity in json_config["weight_tensor_config"]["granularity"]:
|
264
277
|
tensor_config = {
|
@@ -267,6 +280,16 @@ def _unroll_json_config(
|
|
267
280
|
"granularity": granularity,
|
268
281
|
"dtype": json_config["weight_tensor_config"]["dtype"],
|
269
282
|
}
|
283
|
+
if "block_size" in json_config["weight_tensor_config"]:
|
284
|
+
for block_size in json_config["weight_tensor_config"]["block_size"]:
|
285
|
+
tensor_config["block_size"] = block_size
|
286
|
+
weight_configs.append(
|
287
|
+
qtyping.TensorQuantizationConfig.from_dict(tensor_config)
|
288
|
+
)
|
289
|
+
else:
|
290
|
+
weight_configs.append(
|
291
|
+
qtyping.TensorQuantizationConfig.from_dict(tensor_config)
|
292
|
+
)
|
270
293
|
|
271
294
|
if activation_configs:
|
272
295
|
for activation_config in activation_configs:
|
@@ -281,15 +304,14 @@ def _unroll_json_config(
|
|
281
304
|
)
|
282
305
|
)
|
283
306
|
else:
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
)
|
307
|
+
for weight_config in weight_configs:
|
308
|
+
quant_configs.append(
|
309
|
+
qtyping.OpQuantizationConfig(
|
310
|
+
weight_tensor_config=weight_config,
|
311
|
+
compute_precision=json_config["compute_precision"],
|
312
|
+
explicit_dequantize=json_config["explicit_dequantize"],
|
313
|
+
)
|
314
|
+
)
|
293
315
|
|
294
316
|
return quant_configs
|
295
317
|
|
@@ -161,7 +161,7 @@ class ParamsGenerator:
|
|
161
161
|
RuntimeError: If the tensors sharing the same buffer have different
|
162
162
|
quantization settings.
|
163
163
|
"""
|
164
|
-
self.
|
164
|
+
self._check_and_fix_buffer_sharing()
|
165
165
|
|
166
166
|
def _update_model_quant_results(
|
167
167
|
self,
|
@@ -273,9 +273,11 @@ class ParamsGenerator:
|
|
273
273
|
"""Mark tensors that require buffer duplication.
|
274
274
|
|
275
275
|
Marking a tensor means adding a DUPLICATE_BUFFER transformation as the first
|
276
|
-
transformation to be applied for each consumer of the tensor.
|
276
|
+
transformation to be applied for each consumer of the tensor. Need to do
|
277
|
+
that for each consumer to preserve a zero layer and not affect the
|
278
|
+
horizontal optimization later in the transformation instructions generator.
|
277
279
|
|
278
|
-
|
280
|
+
Marks all tensors within each of the provided buffers as requiring buffer
|
279
281
|
duplication, except for the last tensor. The order of tensors is assumed to
|
280
282
|
be the same during both the marking and transformation performer steps, as
|
281
283
|
determined by `self.buffer_to_tensors`. This allows the final tensor to
|
@@ -292,65 +294,148 @@ class ParamsGenerator:
|
|
292
294
|
0, _QuantTrans.DUPLICATE_BUFFER
|
293
295
|
)
|
294
296
|
|
295
|
-
def
|
296
|
-
|
297
|
+
def _mark_tensors_requiring_tensor_duplication(
|
298
|
+
self, tensor_names_to_duplicate
|
299
|
+
) -> None:
|
300
|
+
"""Mark tensors that require tensor duplication.
|
301
|
+
|
302
|
+
Marking a tensor means adding a DUPLICATE_TENSOR transformation as the first
|
303
|
+
transformation to be applied for each consumer of the tensor. Need to do
|
304
|
+
that for each consumer to preserve a zero layer and not affect the
|
305
|
+
horizontal optimization later in the transformation instructions generator.
|
306
|
+
|
307
|
+
Args:
|
308
|
+
tensor_names_to_duplicate: Names of tensors to duplicate.
|
309
|
+
"""
|
310
|
+
for tensor_name in tensor_names_to_duplicate:
|
311
|
+
for consumer_params in self.model_quant_results[tensor_name].consumers:
|
312
|
+
consumer_params.transformations.insert(0, _QuantTrans.DUPLICATE_TENSOR)
|
313
|
+
|
314
|
+
def _check_buffer_sharing_for_tensor(self, tensor: Any) -> bool:
|
315
|
+
"""Check buffer sharing for the tensor against itself.
|
316
|
+
|
317
|
+
Args:
|
318
|
+
tensor: The tensor to check.
|
319
|
+
|
320
|
+
Returns:
|
321
|
+
Whether the tensor has compatible quantization parameters.
|
297
322
|
|
298
323
|
Raises:
|
299
|
-
RuntimeError: If the
|
300
|
-
|
324
|
+
RuntimeError: If the tensor has incompatible quantization parameters
|
325
|
+
and the buffer is not constant.
|
301
326
|
"""
|
302
|
-
|
303
|
-
|
304
|
-
|
327
|
+
tensor_params = self.model_quant_results.get(
|
328
|
+
tfl_flatbuffer_utils.get_tensor_name(tensor), None
|
329
|
+
)
|
330
|
+
if tensor_params is None:
|
331
|
+
return True
|
332
|
+
|
333
|
+
if _are_tensor_consumer_params_compatible(tensor_params):
|
334
|
+
return True
|
335
|
+
elif _is_constant_tensor(tensor, self.flatbuffer_model.buffers):
|
336
|
+
return False
|
337
|
+
else:
|
338
|
+
error_msg = (
|
339
|
+
f'The tensor {tensor.name} consumers do not have the same'
|
340
|
+
' quantization parameters. Please modify your quantization recipe to'
|
341
|
+
' make sure the two tensors have the same quantization settings.'
|
305
342
|
)
|
343
|
+
raise RuntimeError(error_msg)
|
306
344
|
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
345
|
+
def _check_buffer_sharing_for_self_compatible_tensors(
|
346
|
+
self, tensor1: Any, tensor2: Any
|
347
|
+
) -> bool:
|
348
|
+
"""Check a pair of self compatible tensors have the same quantization params.
|
311
349
|
|
312
|
-
|
313
|
-
|
314
|
-
if first_tensor_params is None:
|
315
|
-
continue
|
350
|
+
Self compatible means that all tensor's consumers have the same quantization
|
351
|
+
parameters.
|
316
352
|
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
continue
|
353
|
+
Args:
|
354
|
+
tensor1: The first tensor to check.
|
355
|
+
tensor2: The second tensor to check.
|
321
356
|
|
322
|
-
|
323
|
-
|
324
|
-
):
|
325
|
-
if _are_distinct_tensors_with_shared_buffer(
|
326
|
-
first_tensor, tensor, self.flatbuffer_model.buffers
|
327
|
-
):
|
328
|
-
buffers_to_duplicate.append(first_tensor.buffer)
|
329
|
-
break
|
330
|
-
else:
|
331
|
-
error_msg = (
|
332
|
-
f'The tensors {first_tensor.name} and {tensor.name} do not have'
|
333
|
-
' the same quantization parameters even though they share the'
|
334
|
-
' same buffer. Please modify your quantization recipe to make'
|
335
|
-
' sure the two tensors have the same quantization settings.'
|
336
|
-
)
|
337
|
-
raise RuntimeError(error_msg)
|
357
|
+
Returns:
|
358
|
+
Whether the tensors have compatible quantization parameters.
|
338
359
|
|
339
|
-
|
360
|
+
Raises:
|
361
|
+
RuntimeError: If the tensors have incompatible quantization parameters
|
362
|
+
and the buffer is not constant.
|
363
|
+
"""
|
364
|
+
tensor1_params = self.model_quant_results.get(
|
365
|
+
tfl_flatbuffer_utils.get_tensor_name(tensor1), None
|
366
|
+
)
|
367
|
+
tensor2_params = self.model_quant_results.get(
|
368
|
+
tfl_flatbuffer_utils.get_tensor_name(tensor2), None
|
369
|
+
)
|
340
370
|
|
371
|
+
if tensor1_params is None or tensor2_params is None:
|
372
|
+
return True
|
341
373
|
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
374
|
+
if _are_self_compatible_tensors_compatible_to_each_other(
|
375
|
+
tensor1_params, tensor2_params
|
376
|
+
):
|
377
|
+
return True
|
378
|
+
elif _is_constant_tensor(tensor1, self.flatbuffer_model.buffers):
|
379
|
+
return False
|
380
|
+
else:
|
381
|
+
error_msg = (
|
382
|
+
f'The tensors {tensor1.name} and {tensor2.name} do not have'
|
383
|
+
' the same quantization parameters even though they share the'
|
384
|
+
' same buffer. Please modify your quantization recipe to make'
|
385
|
+
' sure the two tensors have the same quantization settings.'
|
352
386
|
)
|
353
|
-
|
387
|
+
raise RuntimeError(error_msg)
|
388
|
+
|
389
|
+
def _check_and_fix_buffer_sharing(self) -> None:
|
390
|
+
"""Check and fix tensor/buffer sharing issues when possible.
|
391
|
+
|
392
|
+
This function checks if tensors sharing the same buffer have the same
|
393
|
+
quantization settings. If not, when it's possible, it will fix it by marking
|
394
|
+
such tensors or buffers to be duplicated. Otherwise, it will raise an error.
|
395
|
+
|
396
|
+
Possible cases that can be fixed by duplication:
|
397
|
+
1. A constant tensor recieves different quantization parameters from its
|
398
|
+
consumers. In this case, the tensor is marked for duplication.
|
399
|
+
2. Two or more tensors share the same constant buffer and have different
|
400
|
+
quantization parameters. In this case, the buffer is marked for
|
401
|
+
duplication.
|
402
|
+
|
403
|
+
Raises:
|
404
|
+
RuntimeError: If the tensors sharing the same buffer have different
|
405
|
+
quantization settings and it can't be resolved by duplicating the
|
406
|
+
buffer/tensor.
|
407
|
+
"""
|
408
|
+
buffers_to_duplicate = []
|
409
|
+
tensor_names_to_duplicate = []
|
410
|
+
for buffer_idx, tensors in self.buffer_to_tensors.items():
|
411
|
+
if not tensors:
|
412
|
+
continue
|
413
|
+
# Check if any of the tensors needs to be duplicated.
|
414
|
+
for tensor in tensors:
|
415
|
+
if not self._check_buffer_sharing_for_tensor(tensor):
|
416
|
+
tensor_names_to_duplicate.append(
|
417
|
+
tfl_flatbuffer_utils.get_tensor_name(tensor)
|
418
|
+
)
|
419
|
+
# Check if the buffer needs to be duplicated.
|
420
|
+
tensor_1 = tensors[0]
|
421
|
+
tensor_name_1 = tfl_flatbuffer_utils.get_tensor_name(tensor_1)
|
422
|
+
if tensor_name_1 in tensor_names_to_duplicate:
|
423
|
+
buffers_to_duplicate.append(buffer_idx)
|
424
|
+
continue
|
425
|
+
for tensor_2 in tensors[1:]:
|
426
|
+
tensor_name_2 = tfl_flatbuffer_utils.get_tensor_name(tensor_2)
|
427
|
+
if (
|
428
|
+
tensor_name_2 in tensor_names_to_duplicate
|
429
|
+
or not self._check_buffer_sharing_for_self_compatible_tensors(
|
430
|
+
tensor_1, tensor_2
|
431
|
+
)
|
432
|
+
):
|
433
|
+
buffers_to_duplicate.append(buffer_idx)
|
434
|
+
break
|
435
|
+
|
436
|
+
# Fix the buffer sharing issues.
|
437
|
+
self._mark_tensors_requiring_buffer_duplication(buffers_to_duplicate)
|
438
|
+
self._mark_tensors_requiring_tensor_duplication(tensor_names_to_duplicate)
|
354
439
|
|
355
440
|
|
356
441
|
def _are_tensor_consumer_params_compatible(
|
@@ -447,12 +532,6 @@ def _compatible_tensor_params(
|
|
447
532
|
return False
|
448
533
|
|
449
534
|
|
450
|
-
def
|
451
|
-
|
452
|
-
|
453
|
-
"""Check if two tensors are different and share a constant buffer."""
|
454
|
-
are_different_tensors = tensor1.name != tensor2.name
|
455
|
-
do_share_buffer = tensor1.buffer == tensor2.buffer
|
456
|
-
is_constant_buffer = buffers[tensor1.buffer].data is not None
|
457
|
-
|
458
|
-
return are_different_tensors and do_share_buffer and is_constant_buffer
|
535
|
+
def _is_constant_tensor(tensor: Any, buffers: Sequence[Any]) -> bool:
|
536
|
+
"""Check if the tensor is a constant tensor."""
|
537
|
+
return buffers[tensor.buffer].data is not None
|
@@ -28,7 +28,6 @@ from ai_edge_quantizer import recipe_manager
|
|
28
28
|
from ai_edge_quantizer.utils import test_utils
|
29
29
|
from ai_edge_quantizer.utils import tfl_flatbuffer_utils
|
30
30
|
from ai_edge_quantizer.utils import tfl_interpreter_utils
|
31
|
-
from ai_edge_litert import schema_py_generated # pylint: disable=g-direct-tensorflow-import
|
32
31
|
|
33
32
|
|
34
33
|
_ComputePrecision = qtyping.ComputePrecision
|
@@ -654,12 +653,164 @@ class ParamsGeneratorTest(parameterized.TestCase):
|
|
654
653
|
consumer.transformations[0],
|
655
654
|
_QTransf.DUPLICATE_BUFFER,
|
656
655
|
)
|
657
|
-
elif quant_params[tensor_name].consumers is not None:
|
658
|
-
for consumer in quant_params[tensor_name].consumers:
|
659
656
|
self.assertNotIn(
|
660
|
-
_QTransf.DUPLICATE_BUFFER,
|
661
|
-
consumer.transformations,
|
657
|
+
_QTransf.DUPLICATE_BUFFER, consumer.transformations[1:]
|
662
658
|
)
|
659
|
+
elif quant_params[tensor_name].consumers is not None:
|
660
|
+
for consumer in quant_params[tensor_name].consumers:
|
661
|
+
self.assertNotIn(_QTransf.DUPLICATE_BUFFER, consumer.transformations)
|
662
|
+
|
663
|
+
def _get_fc_recipe_entry(self, regex: str, num_bits: int):
|
664
|
+
return {
|
665
|
+
'regex': regex,
|
666
|
+
'operation': 'FULLY_CONNECTED',
|
667
|
+
'algorithm_key': 'min_max_uniform_quantize',
|
668
|
+
'op_config': {
|
669
|
+
'weight_tensor_config': {
|
670
|
+
'num_bits': num_bits,
|
671
|
+
'symmetric': True,
|
672
|
+
'granularity': 'CHANNELWISE',
|
673
|
+
'dtype': 'INT',
|
674
|
+
'block_size': 0,
|
675
|
+
},
|
676
|
+
'compute_precision': 'INTEGER',
|
677
|
+
'explicit_dequantize': False,
|
678
|
+
'skip_checks': False,
|
679
|
+
'min_weight_elements': 0,
|
680
|
+
},
|
681
|
+
}
|
682
|
+
|
683
|
+
@parameterized.named_parameters(
|
684
|
+
dict(
|
685
|
+
testcase_name='fc1_quant_fc2_no_quant',
|
686
|
+
fc1_num_bits=8,
|
687
|
+
fc2_num_bits=None,
|
688
|
+
),
|
689
|
+
dict(
|
690
|
+
testcase_name='fc1_no_quant_fc2_quant',
|
691
|
+
fc1_num_bits=None,
|
692
|
+
fc2_num_bits=8,
|
693
|
+
),
|
694
|
+
dict(
|
695
|
+
testcase_name='fc1_quant_fc2_quant_different_params',
|
696
|
+
fc1_num_bits=8,
|
697
|
+
fc2_num_bits=4,
|
698
|
+
),
|
699
|
+
)
|
700
|
+
def test_generate_params_marks_correct_buffers_tensors_for_duplication(
|
701
|
+
self,
|
702
|
+
fc1_num_bits,
|
703
|
+
fc2_num_bits,
|
704
|
+
):
|
705
|
+
model_path = os.path.join(
|
706
|
+
TEST_DATA_PREFIX_PATH,
|
707
|
+
'tests/models/constant_tensor_and_buffer_only_sharing_weight_fcs.tflite',
|
708
|
+
)
|
709
|
+
sig1_fc1_regex = 'BatchMatMulV3;'
|
710
|
+
sig1_fc2_regex = 'PartitionedCall:0;'
|
711
|
+
recipe = []
|
712
|
+
if fc1_num_bits is not None:
|
713
|
+
recipe.append(self._get_fc_recipe_entry(sig1_fc1_regex, fc1_num_bits))
|
714
|
+
if fc2_num_bits is not None:
|
715
|
+
recipe.append(self._get_fc_recipe_entry(sig1_fc2_regex, fc2_num_bits))
|
716
|
+
self._recipe_manager.load_quantization_recipe(recipe)
|
717
|
+
pg = params_generator.ParamsGenerator(model_path)
|
718
|
+
quant_params = pg.generate_quantization_parameters(self._recipe_manager)
|
719
|
+
|
720
|
+
expected_tensor = 'arith.constant'
|
721
|
+
consumers = quant_params[expected_tensor].consumers
|
722
|
+
self.assertLen(consumers, 2)
|
723
|
+
|
724
|
+
# Check FC1 transformations.
|
725
|
+
if fc1_num_bits is None:
|
726
|
+
fc1_quant_transformation = _QTransf.NO_QUANTIZE
|
727
|
+
else:
|
728
|
+
fc1_quant_transformation = _QTransf.QUANTIZE_TENSOR
|
729
|
+
self.assertEqual(
|
730
|
+
consumers[0].transformations,
|
731
|
+
[
|
732
|
+
_QTransf.DUPLICATE_TENSOR,
|
733
|
+
_QTransf.DUPLICATE_BUFFER,
|
734
|
+
fc1_quant_transformation,
|
735
|
+
],
|
736
|
+
)
|
737
|
+
# Check FC2 transformations.
|
738
|
+
if fc2_num_bits is None:
|
739
|
+
fc2_quant_transformation = _QTransf.NO_QUANTIZE
|
740
|
+
else:
|
741
|
+
fc2_quant_transformation = _QTransf.QUANTIZE_TENSOR
|
742
|
+
self.assertEqual(
|
743
|
+
consumers[1].transformations,
|
744
|
+
[
|
745
|
+
_QTransf.DUPLICATE_TENSOR,
|
746
|
+
_QTransf.DUPLICATE_BUFFER,
|
747
|
+
fc2_quant_transformation,
|
748
|
+
],
|
749
|
+
)
|
750
|
+
# Check that no other tensor has tensor or buffer duplication
|
751
|
+
# transformations.
|
752
|
+
for tensor_name, params in quant_params.items():
|
753
|
+
if tensor_name == expected_tensor:
|
754
|
+
continue
|
755
|
+
for consumer in params.consumers:
|
756
|
+
self.assertNotIn(_QTransf.DUPLICATE_TENSOR, consumer.transformations)
|
757
|
+
self.assertNotIn(_QTransf.DUPLICATE_BUFFER, consumer.transformations)
|
758
|
+
|
759
|
+
def test_generate_params_returns_valid_results_when_multiple_tensor_duplication_for_one_buffer(
|
760
|
+
self,
|
761
|
+
):
|
762
|
+
model_path = os.path.join(
|
763
|
+
TEST_DATA_PREFIX_PATH,
|
764
|
+
'tests/models/constant_tensor_and_buffer_only_sharing_weight_fcs.tflite',
|
765
|
+
)
|
766
|
+
sig1_fc1_regex = 'BatchMatMulV3;'
|
767
|
+
sig1_fc2_regex = 'PartitionedCall:0;'
|
768
|
+
sig2_fc1_regex = 'BatchMatMulV31;'
|
769
|
+
sig2_fc2_regex = 'PartitionedCall_1:0;'
|
770
|
+
recipe = [
|
771
|
+
self._get_fc_recipe_entry(sig1_fc1_regex, num_bits=8),
|
772
|
+
self._get_fc_recipe_entry(sig1_fc2_regex, num_bits=4),
|
773
|
+
self._get_fc_recipe_entry(sig2_fc1_regex, num_bits=8),
|
774
|
+
self._get_fc_recipe_entry(sig2_fc2_regex, num_bits=4),
|
775
|
+
]
|
776
|
+
self._recipe_manager.load_quantization_recipe(recipe)
|
777
|
+
pg = params_generator.ParamsGenerator(model_path)
|
778
|
+
quant_params = pg.generate_quantization_parameters(self._recipe_manager)
|
779
|
+
# Check transformations for sig1.
|
780
|
+
sig1_expected_tensor = 'arith.constant'
|
781
|
+
sig1_consumers = quant_params[sig1_expected_tensor].consumers
|
782
|
+
self.assertLen(sig1_consumers, 2)
|
783
|
+
sig1_expected_transformations = [
|
784
|
+
_QTransf.DUPLICATE_TENSOR,
|
785
|
+
_QTransf.DUPLICATE_BUFFER,
|
786
|
+
_QTransf.QUANTIZE_TENSOR,
|
787
|
+
]
|
788
|
+
for sig1_consumer in sig1_consumers:
|
789
|
+
self.assertEqual(
|
790
|
+
sig1_consumer.transformations,
|
791
|
+
sig1_expected_transformations,
|
792
|
+
)
|
793
|
+
# Check transformations for sig2.
|
794
|
+
sig2_expected_tensor = 'arith.constant1'
|
795
|
+
sig2_consumers = quant_params[sig2_expected_tensor].consumers
|
796
|
+
self.assertLen(sig2_consumers, 2)
|
797
|
+
sig2_expected_transformations = [
|
798
|
+
_QTransf.DUPLICATE_TENSOR,
|
799
|
+
_QTransf.QUANTIZE_TENSOR,
|
800
|
+
]
|
801
|
+
for sig2_consumer in sig2_consumers:
|
802
|
+
self.assertEqual(
|
803
|
+
sig2_consumer.transformations,
|
804
|
+
sig2_expected_transformations,
|
805
|
+
)
|
806
|
+
# Check that no other tensor has tensor or buffer duplication
|
807
|
+
# transformations.
|
808
|
+
for tensor_name, params in quant_params.items():
|
809
|
+
if tensor_name in [sig1_expected_tensor, sig2_expected_tensor]:
|
810
|
+
continue
|
811
|
+
for consumer in params.consumers:
|
812
|
+
self.assertNotIn(_QTransf.DUPLICATE_TENSOR, consumer.transformations)
|
813
|
+
self.assertNotIn(_QTransf.DUPLICATE_BUFFER, consumer.transformations)
|
663
814
|
|
664
815
|
@parameterized.named_parameters(
|
665
816
|
dict(
|
@@ -996,62 +1147,5 @@ class ParamsGeneratorAlreadyQuantizedModelTest(googletest.TestCase):
|
|
996
1147
|
_ = params_generator.ParamsGenerator(test_model_path)
|
997
1148
|
|
998
1149
|
|
999
|
-
def _create_tensor(name: str, buffer_idx: int) -> schema_py_generated.TensorT:
|
1000
|
-
tensor = schema_py_generated.TensorT()
|
1001
|
-
tensor.name = name.encode('utf-8')
|
1002
|
-
tensor.buffer = buffer_idx
|
1003
|
-
return tensor
|
1004
|
-
|
1005
|
-
|
1006
|
-
def _create_buffer(data: Any) -> schema_py_generated.BufferT:
|
1007
|
-
buffer = schema_py_generated.BufferT()
|
1008
|
-
buffer.data = data
|
1009
|
-
return buffer
|
1010
|
-
|
1011
|
-
|
1012
|
-
class ParamsGeneratorUtilsTest(parameterized.TestCase):
|
1013
|
-
|
1014
|
-
@parameterized.named_parameters(
|
1015
|
-
dict(
|
1016
|
-
testcase_name='same_tensors',
|
1017
|
-
tensor1=_create_tensor(name='tensor1', buffer_idx=0),
|
1018
|
-
tensor2=_create_tensor(name='tensor1', buffer_idx=0),
|
1019
|
-
buffers=[_create_buffer(data=np.array([1, 2, 3]))],
|
1020
|
-
expected=False,
|
1021
|
-
),
|
1022
|
-
dict(
|
1023
|
-
testcase_name='tensors_do_not_share_buffer',
|
1024
|
-
tensor1=_create_tensor(name='tensor1', buffer_idx=0),
|
1025
|
-
tensor2=_create_tensor(name='tensor2', buffer_idx=1),
|
1026
|
-
buffers=[
|
1027
|
-
_create_buffer(data=np.array([1, 2, 3])),
|
1028
|
-
_create_buffer(data=np.array([4, 5, 6])),
|
1029
|
-
],
|
1030
|
-
expected=False,
|
1031
|
-
),
|
1032
|
-
dict(
|
1033
|
-
testcase_name='different_tensors_share_non_constant_buffer',
|
1034
|
-
tensor1=_create_tensor(name='tensor1', buffer_idx=0),
|
1035
|
-
tensor2=_create_tensor(name='tensor2', buffer_idx=0),
|
1036
|
-
buffers=[_create_buffer(data=None)],
|
1037
|
-
expected=False,
|
1038
|
-
),
|
1039
|
-
dict(
|
1040
|
-
testcase_name='different_tensors_share_constant_buffer',
|
1041
|
-
tensor1=_create_tensor(name='tensor1', buffer_idx=0),
|
1042
|
-
tensor2=_create_tensor(name='tensor2', buffer_idx=0),
|
1043
|
-
buffers=[_create_buffer(data=np.array([1, 2, 3]))],
|
1044
|
-
expected=True,
|
1045
|
-
),
|
1046
|
-
)
|
1047
|
-
def test__are_distinct_tensors_with_shared_buffer(
|
1048
|
-
self, tensor1, tensor2, buffers, expected
|
1049
|
-
):
|
1050
|
-
got = params_generator._are_distinct_tensors_with_shared_buffer(
|
1051
|
-
tensor1=tensor1, tensor2=tensor2, buffers=buffers
|
1052
|
-
)
|
1053
|
-
self.assertEqual(expected, got)
|
1054
|
-
|
1055
|
-
|
1056
1150
|
if __name__ == '__main__':
|
1057
1151
|
googletest.main()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ai-edge-quantizer-nightly
|
3
|
-
Version: 0.1.0.
|
3
|
+
Version: 0.1.0.dev20250413
|
4
4
|
Summary: A quantizer for advanced developers to quantize converted AI Edge models.
|
5
5
|
Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
|
6
6
|
Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
|
@@ -1,17 +1,17 @@
|
|
1
1
|
ai_edge_quantizer/__init__.py,sha256=4pFSkukSwahYyzwqia0yPRyz8TnFQfGRthVJhYpMWas,793
|
2
2
|
ai_edge_quantizer/algorithm_manager.py,sha256=0uootLsVD6h9ph9TrnXZMI-ExkX8UvXSV0lbWxBLybU,10492
|
3
3
|
ai_edge_quantizer/algorithm_manager_api.py,sha256=u903TG0s1uIDhJqfeJne3CFl8A93phZrwgV2-hwdcXU,9247
|
4
|
-
ai_edge_quantizer/algorithm_manager_api_test.py,sha256=
|
4
|
+
ai_edge_quantizer/algorithm_manager_api_test.py,sha256=w6bSONvXkX6bzXAGc0-7b6gNDt9oz9ieq97KP8Sg_JU,7666
|
5
5
|
ai_edge_quantizer/calibrator.py,sha256=n7AD9j7UScR-CieoI6DQRMeiG_fhLBfSLRiM4460xaM,11895
|
6
6
|
ai_edge_quantizer/calibrator_test.py,sha256=C_oWOaRugPKYX74jF-eRFH-k6nGOdA8I9_uPiocaOuE,11900
|
7
7
|
ai_edge_quantizer/conftest.py,sha256=SxCz-5LlRD_lQm4hQc4c6IGG7DS8d7IyEWY9gnscPN0,794
|
8
|
-
ai_edge_quantizer/default_policy.py,sha256=
|
8
|
+
ai_edge_quantizer/default_policy.py,sha256=81z4cruBK7mGFt8xFRZK5LKya65axuZwo2zpbcYSicc,11099
|
9
9
|
ai_edge_quantizer/model_modifier.py,sha256=SPt9X-xBzRvcd4xIS24zLHt3aUS2QwsNDqweFqitCAo,7109
|
10
10
|
ai_edge_quantizer/model_modifier_test.py,sha256=cJd04SLOG-fQZZNZPcisoBLx3cLtWEwGqUBbLb-pif4,4751
|
11
11
|
ai_edge_quantizer/model_validator.py,sha256=fRNz0jO54cthPTibsCuViUXUuFRHl_fbvEiCukIVy20,13030
|
12
12
|
ai_edge_quantizer/model_validator_test.py,sha256=EeqOP_mrZsnZ3rug756s0ryDDqd2KgIDld5Lm_gDuWY,13020
|
13
|
-
ai_edge_quantizer/params_generator.py,sha256=
|
14
|
-
ai_edge_quantizer/params_generator_test.py,sha256=
|
13
|
+
ai_edge_quantizer/params_generator.py,sha256=NEZeHVVIeynmhRzPjl9o-acvWfauFgCS4i45pWFw3V8,20052
|
14
|
+
ai_edge_quantizer/params_generator_test.py,sha256=RDYoRZDJfEZRtjlTAU2kZ_4t3JHOqEHxfJX9V4ETAhg,40597
|
15
15
|
ai_edge_quantizer/qtyping.py,sha256=FqelZu7j0fGBRSCv_VVsuf3VmbfVlYJGgsjvdMXGgaw,15284
|
16
16
|
ai_edge_quantizer/quantizer.py,sha256=g3DMqFMrMpt9jQttCE0WcdNbMtk0JZnmN5MmCHrNdyM,13202
|
17
17
|
ai_edge_quantizer/quantizer_test.py,sha256=K_HBA56JkFI3HL8VLWCqGEfC0ISh5ldMKoNyBdGRAJg,20368
|
@@ -66,8 +66,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=x2xA2CFPpe_2trcV8v5xGaBE
|
|
66
66
|
ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=Op3JxtOqlrjzmYF18jnnstL1k9xiY9kKJ8S2vklKGkc,11327
|
67
67
|
ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
|
68
68
|
ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
|
69
|
-
ai_edge_quantizer_nightly-0.1.0.
|
70
|
-
ai_edge_quantizer_nightly-0.1.0.
|
71
|
-
ai_edge_quantizer_nightly-0.1.0.
|
72
|
-
ai_edge_quantizer_nightly-0.1.0.
|
73
|
-
ai_edge_quantizer_nightly-0.1.0.
|
69
|
+
ai_edge_quantizer_nightly-0.1.0.dev20250413.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
70
|
+
ai_edge_quantizer_nightly-0.1.0.dev20250413.dist-info/METADATA,sha256=6aamAYoiKg6FpaZrdP1NXEL-fIWLbvhdkqgc326TYtw,1527
|
71
|
+
ai_edge_quantizer_nightly-0.1.0.dev20250413.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
72
|
+
ai_edge_quantizer_nightly-0.1.0.dev20250413.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
|
73
|
+
ai_edge_quantizer_nightly-0.1.0.dev20250413.dist-info/RECORD,,
|
File without changes
|
File without changes
|