ai-edge-quantizer-nightly 0.0.1.dev20250313__py3-none-any.whl → 0.0.1.dev20250315__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_edge_quantizer/model_modifier.py +35 -1
- ai_edge_quantizer/params_generator.py +53 -7
- ai_edge_quantizer/params_generator_test.py +112 -19
- ai_edge_quantizer/quantizer_test.py +4 -10
- ai_edge_quantizer/transformation_instruction_generator_test.py +62 -0
- ai_edge_quantizer/transformation_performer.py +19 -10
- {ai_edge_quantizer_nightly-0.0.1.dev20250313.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20250315.dist-info}/METADATA +1 -1
- {ai_edge_quantizer_nightly-0.0.1.dev20250313.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20250315.dist-info}/RECORD +11 -11
- {ai_edge_quantizer_nightly-0.0.1.dev20250313.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20250315.dist-info}/LICENSE +0 -0
- {ai_edge_quantizer_nightly-0.0.1.dev20250313.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20250315.dist-info}/WHEEL +0 -0
- {ai_edge_quantizer_nightly-0.0.1.dev20250313.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20250315.dist-info}/top_level.txt +0 -0
@@ -15,6 +15,7 @@
|
|
15
15
|
|
16
16
|
"""Model Modifier class that produce the final quantized TFlite model."""
|
17
17
|
|
18
|
+
from collections.abc import Sequence
|
18
19
|
import copy
|
19
20
|
|
20
21
|
import numpy as np
|
@@ -22,6 +23,7 @@ import numpy as np
|
|
22
23
|
from ai_edge_quantizer import qtyping
|
23
24
|
from ai_edge_quantizer import transformation_instruction_generator
|
24
25
|
from ai_edge_quantizer import transformation_performer
|
26
|
+
from ai_edge_quantizer.utils import tfl_flatbuffer_utils
|
25
27
|
from ai_edge_litert import schema_py_generated # pylint: disable=g-direct-tensorflow-import
|
26
28
|
from tensorflow.lite.tools import flatbuffer_utils # pylint: disable=g-direct-tensorflow-import
|
27
29
|
|
@@ -46,6 +48,35 @@ class ModelModifier:
|
|
46
48
|
transformation_performer.TransformationPerformer()
|
47
49
|
)
|
48
50
|
|
51
|
+
def _get_tensor_processing_order(
|
52
|
+
self,
|
53
|
+
tensor_names: Sequence[str],
|
54
|
+
flatbuffer_model: schema_py_generated.ModelT,
|
55
|
+
) -> list[str]:
|
56
|
+
"""Get the tensor processing order obtained from `buffer_to_tensors`.
|
57
|
+
|
58
|
+
The processing order is used to ensure that last tensor in a buffer is
|
59
|
+
processed the last. This is required for the correctness of buffer
|
60
|
+
duplication, as the last tensor in a buffer won't be duplicated.
|
61
|
+
|
62
|
+
Args:
|
63
|
+
tensor_names: Names of the tensors that need to be processed.
|
64
|
+
flatbuffer_model: TFlite model.
|
65
|
+
|
66
|
+
Returns:
|
67
|
+
A list of tensor names in the processing order.
|
68
|
+
"""
|
69
|
+
buffer_to_tensors = tfl_flatbuffer_utils.buffer_to_tensors(flatbuffer_model)
|
70
|
+
|
71
|
+
processing_order = []
|
72
|
+
for buffer_tensors in buffer_to_tensors.values():
|
73
|
+
for tensor in buffer_tensors:
|
74
|
+
tensor_name = tfl_flatbuffer_utils.get_tensor_name(tensor)
|
75
|
+
if tensor_name in tensor_names:
|
76
|
+
processing_order.append(tensor_name)
|
77
|
+
|
78
|
+
return processing_order
|
79
|
+
|
49
80
|
def modify_model(
|
50
81
|
self, params: dict[str, qtyping.TensorTransformationParams]
|
51
82
|
) -> bytearray:
|
@@ -66,8 +97,11 @@ class ModelModifier:
|
|
66
97
|
params, quantized_model
|
67
98
|
)
|
68
99
|
|
100
|
+
tensor_processing_order = self._get_tensor_processing_order(
|
101
|
+
list(instructions.keys()), quantized_model
|
102
|
+
)
|
69
103
|
self._transformation_performer.transform_graph(
|
70
|
-
instructions, quantized_model
|
104
|
+
instructions, quantized_model, tensor_processing_order
|
71
105
|
)
|
72
106
|
constant_buffer_size = self._process_constant_map(quantized_model)
|
73
107
|
# we leave 64MB for the model architecture.
|
@@ -15,6 +15,7 @@
|
|
15
15
|
|
16
16
|
"""Generate model tensor level quantization config."""
|
17
17
|
|
18
|
+
from collections.abc import Sequence
|
18
19
|
import copy
|
19
20
|
from typing import Any, Optional, Union
|
20
21
|
|
@@ -266,6 +267,31 @@ class ParamsGenerator:
|
|
266
267
|
tensor_params.append(output_tensor_params)
|
267
268
|
return tensor_params
|
268
269
|
|
270
|
+
def _mark_tensors_requiring_buffer_duplication(
|
271
|
+
self, buffers_to_duplicate: Sequence[int]
|
272
|
+
) -> None:
|
273
|
+
"""Mark tensors that require buffer duplication.
|
274
|
+
|
275
|
+
Marking a tensor means adding a DUPLICATE_BUFFER transformation as the first
|
276
|
+
transformation to be applied for each consumer of the tensor.
|
277
|
+
|
278
|
+
Mark all tensors within each of the provided buffers as requiring buffer
|
279
|
+
duplication, except for the last tensor. The order of tensors is assumed to
|
280
|
+
be the same during both the marking and transformation performer steps, as
|
281
|
+
determined by `self.buffer_to_tensors`. This allows the final tensor to
|
282
|
+
reuse the original buffer, as it is not marked for duplication.
|
283
|
+
|
284
|
+
Args:
|
285
|
+
buffers_to_duplicate: Indices of the buffers to duplicate.
|
286
|
+
"""
|
287
|
+
for buffer_idx in buffers_to_duplicate:
|
288
|
+
for tensor in self.buffer_to_tensors[buffer_idx][:-1]:
|
289
|
+
tensor_name = tfl_flatbuffer_utils.get_tensor_name(tensor)
|
290
|
+
for consumer_params in self.model_quant_results[tensor_name].consumers:
|
291
|
+
consumer_params.transformations.insert(
|
292
|
+
0, _QuantTrans.DUPLICATE_BUFFER
|
293
|
+
)
|
294
|
+
|
269
295
|
def _check_buffer_sharing(self) -> None:
|
270
296
|
"""Check if tensors sharing the same buffer have the same quantization.
|
271
297
|
|
@@ -278,6 +304,7 @@ class ParamsGenerator:
|
|
278
304
|
tfl_flatbuffer_utils.get_tensor_name(tensor), None
|
279
305
|
)
|
280
306
|
|
307
|
+
buffers_to_duplicate = []
|
281
308
|
for tensors in self.buffer_to_tensors.values():
|
282
309
|
if len(tensors) <= 1:
|
283
310
|
continue
|
@@ -295,13 +322,21 @@ class ParamsGenerator:
|
|
295
322
|
if not _compatible_tensor_transformation_params(
|
296
323
|
first_tensor_params, tensor_params
|
297
324
|
):
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
325
|
+
if _are_distinct_tensors_with_shared_buffer(
|
326
|
+
first_tensor, tensor, self.flatbuffer_model.buffers
|
327
|
+
):
|
328
|
+
buffers_to_duplicate.append(first_tensor.buffer)
|
329
|
+
break
|
330
|
+
else:
|
331
|
+
error_msg = (
|
332
|
+
f'The tensors {first_tensor.name} and {tensor.name} do not have'
|
333
|
+
' the same quantization parameters even though they share the'
|
334
|
+
' same buffer. Please modify your quantization recipe to make'
|
335
|
+
' sure the two tensors have the same quantization settings.'
|
336
|
+
)
|
337
|
+
raise RuntimeError(error_msg)
|
338
|
+
|
339
|
+
self._mark_tensors_requiring_buffer_duplication(buffers_to_duplicate)
|
305
340
|
|
306
341
|
|
307
342
|
def _compatible_tensor_transformation_params(
|
@@ -381,3 +416,14 @@ def _compatible_tensor_params(
|
|
381
416
|
):
|
382
417
|
return True
|
383
418
|
return False
|
419
|
+
|
420
|
+
|
421
|
+
def _are_distinct_tensors_with_shared_buffer(
|
422
|
+
tensor1: Any, tensor2: Any, buffers: list[Any]
|
423
|
+
) -> bool:
|
424
|
+
"""Check if two tensors are different and share a constant buffer."""
|
425
|
+
are_different_tensors = tensor1.name != tensor2.name
|
426
|
+
do_share_buffer = tensor1.buffer == tensor2.buffer
|
427
|
+
is_constant_buffer = buffers[tensor1.buffer].data is not None
|
428
|
+
|
429
|
+
return are_different_tensors and do_share_buffer and is_constant_buffer
|
@@ -13,8 +13,6 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
15
|
|
16
|
-
"""Tests for params_generator."""
|
17
|
-
|
18
16
|
from collections.abc import Generator
|
19
17
|
import os
|
20
18
|
from typing import Any
|
@@ -30,6 +28,7 @@ from ai_edge_quantizer import recipe_manager
|
|
30
28
|
from ai_edge_quantizer.utils import test_utils
|
31
29
|
from ai_edge_quantizer.utils import tfl_flatbuffer_utils
|
32
30
|
from ai_edge_quantizer.utils import tfl_interpreter_utils
|
31
|
+
from ai_edge_litert import schema_py_generated # pylint: disable=g-direct-tensorflow-import
|
33
32
|
|
34
33
|
|
35
34
|
_ComputePrecision = qtyping.ComputePrecision
|
@@ -570,9 +569,27 @@ class ParamsGeneratorTest(parameterized.TestCase):
|
|
570
569
|
)
|
571
570
|
self.assertLen(quant_params, 6)
|
572
571
|
|
573
|
-
@parameterized.
|
574
|
-
|
575
|
-
|
572
|
+
@parameterized.named_parameters(
|
573
|
+
dict(
|
574
|
+
testcase_name='different_quant_config_fc2_no_quant',
|
575
|
+
fc_2_num_bits=None,
|
576
|
+
expected_tensor_with_buffer_duplication='BatchMatMulV3',
|
577
|
+
),
|
578
|
+
dict(
|
579
|
+
testcase_name='different_quant_config_fc2_int4',
|
580
|
+
fc_2_num_bits=4,
|
581
|
+
expected_tensor_with_buffer_duplication='BatchMatMulV3',
|
582
|
+
),
|
583
|
+
dict(
|
584
|
+
testcase_name='same_quant_config',
|
585
|
+
fc_2_num_bits=8,
|
586
|
+
expected_tensor_with_buffer_duplication=None,
|
587
|
+
),
|
588
|
+
)
|
589
|
+
def test_generate_params_marks_correct_buffers_for_duplication_when_distinct_tensors_share_constant_buffer(
|
590
|
+
self,
|
591
|
+
fc_2_num_bits,
|
592
|
+
expected_tensor_with_buffer_duplication,
|
576
593
|
):
|
577
594
|
model_path = os.path.join(
|
578
595
|
TEST_DATA_PREFIX_PATH, 'tests/models/weight_sharing_fcs.tflite'
|
@@ -580,33 +597,52 @@ class ParamsGeneratorTest(parameterized.TestCase):
|
|
580
597
|
# Setup the quantization config for the first FC.
|
581
598
|
self._recipe_manager.add_quantization_config(
|
582
599
|
regex='PartitionedCall:0',
|
583
|
-
operation_name=qtyping.TFLOperationName.
|
600
|
+
operation_name=qtyping.TFLOperationName.FULLY_CONNECTED,
|
584
601
|
op_config=qtyping.OpQuantizationConfig(
|
585
|
-
weight_tensor_config=_TensorQuantConfig(
|
602
|
+
weight_tensor_config=_TensorQuantConfig(
|
603
|
+
num_bits=8, granularity=qtyping.QuantGranularity.CHANNELWISE
|
604
|
+
),
|
586
605
|
compute_precision=_ComputePrecision.INTEGER,
|
587
606
|
),
|
588
607
|
)
|
589
608
|
# Setup the quantization config for the second FC (weight shared with the
|
590
609
|
# first FC).
|
591
|
-
if
|
592
|
-
pass
|
593
|
-
elif the_other_fc_difference == 'num_bits':
|
610
|
+
if fc_2_num_bits is not None:
|
594
611
|
self._recipe_manager.add_quantization_config(
|
595
612
|
regex='PartitionedCall_1:0',
|
596
|
-
operation_name=qtyping.TFLOperationName.
|
613
|
+
operation_name=qtyping.TFLOperationName.FULLY_CONNECTED,
|
597
614
|
op_config=qtyping.OpQuantizationConfig(
|
598
|
-
weight_tensor_config=_TensorQuantConfig(
|
615
|
+
weight_tensor_config=_TensorQuantConfig(
|
616
|
+
num_bits=fc_2_num_bits,
|
617
|
+
granularity=qtyping.QuantGranularity.CHANNELWISE,
|
618
|
+
),
|
599
619
|
compute_precision=_ComputePrecision.INTEGER,
|
600
620
|
),
|
601
621
|
)
|
602
622
|
pg = params_generator.ParamsGenerator(model_path)
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
)
|
607
|
-
|
608
|
-
|
609
|
-
|
623
|
+
quant_params = pg.generate_quantization_parameters(
|
624
|
+
self._recipe_manager,
|
625
|
+
)
|
626
|
+
self.assertLen(quant_params, 6)
|
627
|
+
|
628
|
+
# Check that the expected tensor has buffer duplication transformation as
|
629
|
+
# the first one to be applied. And no other tensor has buffer duplication
|
630
|
+
# transformation at all.
|
631
|
+
for tensor_name in quant_params:
|
632
|
+
if tensor_name == expected_tensor_with_buffer_duplication:
|
633
|
+
self.assertIsNotNone(quant_params[tensor_name].consumers)
|
634
|
+
for consumer in quant_params[tensor_name].consumers:
|
635
|
+
self.assertNotEmpty(consumer.transformations)
|
636
|
+
self.assertEqual(
|
637
|
+
consumer.transformations[0],
|
638
|
+
qtyping.QuantTransformation.DUPLICATE_BUFFER,
|
639
|
+
)
|
640
|
+
elif quant_params[tensor_name].consumers is not None:
|
641
|
+
for consumer in quant_params[tensor_name].consumers:
|
642
|
+
self.assertNotIn(
|
643
|
+
qtyping.QuantTransformation.DUPLICATE_BUFFER,
|
644
|
+
consumer.transformations,
|
645
|
+
)
|
610
646
|
|
611
647
|
@parameterized.named_parameters(
|
612
648
|
dict(
|
@@ -1037,5 +1073,62 @@ class ParamsGeneratorAlreadyQuantizedModelTest(googletest.TestCase):
|
|
1037
1073
|
_ = params_generator.ParamsGenerator(test_model_path)
|
1038
1074
|
|
1039
1075
|
|
1076
|
+
def _create_tensor(name: str, buffer_idx: int) -> schema_py_generated.TensorT:
|
1077
|
+
tensor = schema_py_generated.TensorT()
|
1078
|
+
tensor.name = name.encode('utf-8')
|
1079
|
+
tensor.buffer = buffer_idx
|
1080
|
+
return tensor
|
1081
|
+
|
1082
|
+
|
1083
|
+
def _create_buffer(data: Any) -> schema_py_generated.BufferT:
|
1084
|
+
buffer = schema_py_generated.BufferT()
|
1085
|
+
buffer.data = data
|
1086
|
+
return buffer
|
1087
|
+
|
1088
|
+
|
1089
|
+
class ParamsGeneratorUtilsTest(parameterized.TestCase):
|
1090
|
+
|
1091
|
+
@parameterized.named_parameters(
|
1092
|
+
dict(
|
1093
|
+
testcase_name='same_tensors',
|
1094
|
+
tensor1=_create_tensor(name='tensor1', buffer_idx=0),
|
1095
|
+
tensor2=_create_tensor(name='tensor1', buffer_idx=0),
|
1096
|
+
buffers=[_create_buffer(data=np.array([1, 2, 3]))],
|
1097
|
+
expected=False,
|
1098
|
+
),
|
1099
|
+
dict(
|
1100
|
+
testcase_name='tensors_do_not_share_buffer',
|
1101
|
+
tensor1=_create_tensor(name='tensor1', buffer_idx=0),
|
1102
|
+
tensor2=_create_tensor(name='tensor2', buffer_idx=1),
|
1103
|
+
buffers=[
|
1104
|
+
_create_buffer(data=np.array([1, 2, 3])),
|
1105
|
+
_create_buffer(data=np.array([4, 5, 6])),
|
1106
|
+
],
|
1107
|
+
expected=False,
|
1108
|
+
),
|
1109
|
+
dict(
|
1110
|
+
testcase_name='different_tensors_share_non_constant_buffer',
|
1111
|
+
tensor1=_create_tensor(name='tensor1', buffer_idx=0),
|
1112
|
+
tensor2=_create_tensor(name='tensor2', buffer_idx=0),
|
1113
|
+
buffers=[_create_buffer(data=None)],
|
1114
|
+
expected=False,
|
1115
|
+
),
|
1116
|
+
dict(
|
1117
|
+
testcase_name='different_tensors_share_constant_buffer',
|
1118
|
+
tensor1=_create_tensor(name='tensor1', buffer_idx=0),
|
1119
|
+
tensor2=_create_tensor(name='tensor2', buffer_idx=0),
|
1120
|
+
buffers=[_create_buffer(data=np.array([1, 2, 3]))],
|
1121
|
+
expected=True,
|
1122
|
+
),
|
1123
|
+
)
|
1124
|
+
def test__are_distinct_tensors_with_shared_buffer(
|
1125
|
+
self, tensor1, tensor2, buffers, expected
|
1126
|
+
):
|
1127
|
+
got = params_generator._are_distinct_tensors_with_shared_buffer(
|
1128
|
+
tensor1=tensor1, tensor2=tensor2, buffers=buffers
|
1129
|
+
)
|
1130
|
+
self.assertEqual(expected, got)
|
1131
|
+
|
1132
|
+
|
1040
1133
|
if __name__ == '__main__':
|
1041
1134
|
googletest.main()
|
@@ -412,7 +412,9 @@ class QuantizerMultiSignatureModelTest(parameterized.TestCase):
|
|
412
412
|
available_signatures = validation_result.available_signature_keys()
|
413
413
|
self.assertLen(available_signatures, 2)
|
414
414
|
|
415
|
-
def
|
415
|
+
def test_constant_buffer_shared_by_tensors_with_different_quantization_params_succeeds(
|
416
|
+
self,
|
417
|
+
):
|
416
418
|
recipe = [
|
417
419
|
dict({
|
418
420
|
'regex': '.*',
|
@@ -439,17 +441,9 @@ class QuantizerMultiSignatureModelTest(parameterized.TestCase):
|
|
439
441
|
},
|
440
442
|
})
|
441
443
|
]
|
442
|
-
|
443
444
|
qt = quantizer.Quantizer(self._test_model_path, recipe)
|
444
445
|
calib_result = qt.calibrate(_MULTI_SIGNATURE_CALIBRATION_DATASET)
|
445
|
-
|
446
|
-
error_message = (
|
447
|
-
"The tensors b'Add/y' and b'Mul/y' do not have the same quantization"
|
448
|
-
)
|
449
|
-
with self.assertRaisesWithPredicateMatch(
|
450
|
-
RuntimeError, lambda err: error_message in str(err)
|
451
|
-
):
|
452
|
-
qt.quantize(calib_result)
|
446
|
+
self.assertIsNotNone(qt.quantize(calib_result).quantized_model)
|
453
447
|
|
454
448
|
def test_quantization_with_insufficient_calibration(self):
|
455
449
|
# Run calibration for one signature only.
|
@@ -1077,6 +1077,68 @@ class InstructionGeneratorTest(parameterized.TestCase):
|
|
1077
1077
|
self.assertLen(instructions, 1)
|
1078
1078
|
self.assertEqual(instructions["tfl.quantize"], expected_instructions)
|
1079
1079
|
|
1080
|
+
def test_instruction_generator_keeps_buffer_duplication_as_first_transformation(
|
1081
|
+
self,
|
1082
|
+
):
|
1083
|
+
test_tensor_name = "test_tensor"
|
1084
|
+
|
1085
|
+
dummy_quant_params = qtyping.UniformQuantParams(
|
1086
|
+
8, None, np.array([1]), np.array([0])
|
1087
|
+
)
|
1088
|
+
consumer_params_1 = qtyping.OpToTensorParams(
|
1089
|
+
subgraph_op_id=0,
|
1090
|
+
transformations=[
|
1091
|
+
qtyping.QuantTransformation.DUPLICATE_BUFFER,
|
1092
|
+
qtyping.QuantTransformation.ADD_QUANTIZE,
|
1093
|
+
],
|
1094
|
+
parameters=dummy_quant_params,
|
1095
|
+
)
|
1096
|
+
consumer_params_2 = qtyping.OpToTensorParams(
|
1097
|
+
subgraph_op_id=2,
|
1098
|
+
transformations=[
|
1099
|
+
qtyping.QuantTransformation.DUPLICATE_BUFFER,
|
1100
|
+
qtyping.QuantTransformation.ADD_QUANTIZE,
|
1101
|
+
qtyping.QuantTransformation.ADD_DEQUANTIZE,
|
1102
|
+
],
|
1103
|
+
parameters=dummy_quant_params,
|
1104
|
+
)
|
1105
|
+
|
1106
|
+
quant_parameters = {
|
1107
|
+
test_tensor_name: qtyping.TensorTransformationParams(
|
1108
|
+
tensor_name=test_tensor_name,
|
1109
|
+
producer=None,
|
1110
|
+
consumers=[consumer_params_1, consumer_params_2],
|
1111
|
+
),
|
1112
|
+
}
|
1113
|
+
instruction_gen = (
|
1114
|
+
instruction_generator.TransformationInstructionsGenerator()
|
1115
|
+
)
|
1116
|
+
# _tensor_name_to_graph_info has to have an entry for the test tensor for
|
1117
|
+
# `quant_params_to_transformation_insts` to work. But the values do not
|
1118
|
+
# matter for this test.
|
1119
|
+
instruction_gen._tensor_name_to_graph_info[test_tensor_name] = (
|
1120
|
+
instruction_generator.TransformationInstructionsGenerator.TensorGraphInfo(
|
1121
|
+
tensor_id=1,
|
1122
|
+
subgraph_id=0,
|
1123
|
+
producer=0,
|
1124
|
+
consumers=[2],
|
1125
|
+
)
|
1126
|
+
)
|
1127
|
+
instructions = instruction_gen.quant_params_to_transformation_insts(
|
1128
|
+
quant_parameters
|
1129
|
+
)
|
1130
|
+
self.assertLen(instructions, 1)
|
1131
|
+
instructions = instructions[test_tensor_name].instructions
|
1132
|
+
self.assertGreater(len(instructions), 1)
|
1133
|
+
self.assertEqual(
|
1134
|
+
instructions[0].transformation,
|
1135
|
+
qtyping.QuantTransformation.DUPLICATE_BUFFER,
|
1136
|
+
)
|
1137
|
+
self.assertNotIn(
|
1138
|
+
qtyping.QuantTransformation.DUPLICATE_BUFFER,
|
1139
|
+
instructions[1:],
|
1140
|
+
)
|
1141
|
+
|
1080
1142
|
|
1081
1143
|
if __name__ == "__main__":
|
1082
1144
|
googletest.main()
|
@@ -15,7 +15,11 @@
|
|
15
15
|
|
16
16
|
"""Python manager for transformations to be applied to TFlite models."""
|
17
17
|
|
18
|
+
from collections.abc import Sequence
|
19
|
+
from typing import Optional
|
20
|
+
|
18
21
|
import numpy as np
|
22
|
+
|
19
23
|
from ai_edge_quantizer import qtyping
|
20
24
|
from ai_edge_quantizer.transformations import dequant_insert
|
21
25
|
from ai_edge_quantizer.transformations import duplicate_buffer
|
@@ -265,19 +269,24 @@ class TransformationPerformer:
|
|
265
269
|
self,
|
266
270
|
transformation_instructions: dict[str, qtyping.TensorTransformationInsts],
|
267
271
|
tflite_model: schema_py_generated.ModelT,
|
268
|
-
|
269
|
-
|
272
|
+
tensor_processing_order: Optional[Sequence[str]] = None,
|
273
|
+
) -> None:
|
274
|
+
"""Apply all transformations to the given tflite_model in place.
|
270
275
|
|
271
276
|
Args:
|
272
|
-
transformation_instructions:
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
277
|
+
transformation_instructions: Mapping from tensor name to its
|
278
|
+
transformation instructions, produced by
|
279
|
+
transformation_instruction_generator.
|
280
|
+
tflite_model: The tflite model to apply quantization to.
|
281
|
+
tensor_processing_order: The order of tensors to process. If not provided,
|
282
|
+
the order will be inferred from `transformation_instructions`.
|
278
283
|
"""
|
279
284
|
self._original_op_id_map = []
|
280
285
|
self._added_op_id_map = []
|
281
286
|
self._create_op_id_map(tflite_model)
|
282
|
-
|
283
|
-
|
287
|
+
if tensor_processing_order is None:
|
288
|
+
tensor_processing_order = transformation_instructions.keys()
|
289
|
+
for tensor_name in tensor_processing_order:
|
290
|
+
self._apply_transformations(
|
291
|
+
transformation_instructions[tensor_name], tflite_model
|
292
|
+
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ai-edge-quantizer-nightly
|
3
|
-
Version: 0.0.1.
|
3
|
+
Version: 0.0.1.dev20250315
|
4
4
|
Summary: A quantizer for advanced developers to quantize converted AI Edge models.
|
5
5
|
Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
|
6
6
|
Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
|
@@ -6,22 +6,22 @@ ai_edge_quantizer/calibrator.py,sha256=n7AD9j7UScR-CieoI6DQRMeiG_fhLBfSLRiM4460x
|
|
6
6
|
ai_edge_quantizer/calibrator_test.py,sha256=C_oWOaRugPKYX74jF-eRFH-k6nGOdA8I9_uPiocaOuE,11900
|
7
7
|
ai_edge_quantizer/conftest.py,sha256=SxCz-5LlRD_lQm4hQc4c6IGG7DS8d7IyEWY9gnscPN0,794
|
8
8
|
ai_edge_quantizer/default_policy.py,sha256=2y9p7iZIESB4ozPwjiodgPTLlnmHxQKkwKcYSfc80JI,10277
|
9
|
-
ai_edge_quantizer/model_modifier.py,sha256=
|
9
|
+
ai_edge_quantizer/model_modifier.py,sha256=SPt9X-xBzRvcd4xIS24zLHt3aUS2QwsNDqweFqitCAo,7109
|
10
10
|
ai_edge_quantizer/model_modifier_test.py,sha256=cJd04SLOG-fQZZNZPcisoBLx3cLtWEwGqUBbLb-pif4,4751
|
11
11
|
ai_edge_quantizer/model_validator.py,sha256=fRNz0jO54cthPTibsCuViUXUuFRHl_fbvEiCukIVy20,13030
|
12
12
|
ai_edge_quantizer/model_validator_test.py,sha256=EeqOP_mrZsnZ3rug756s0ryDDqd2KgIDld5Lm_gDuWY,13020
|
13
|
-
ai_edge_quantizer/params_generator.py,sha256=
|
14
|
-
ai_edge_quantizer/params_generator_test.py,sha256=
|
13
|
+
ai_edge_quantizer/params_generator.py,sha256=jOCK2UuEsnnbqtbmLxUBpjmhsXZXjZZto2vdsPifliU,16039
|
14
|
+
ai_edge_quantizer/params_generator_test.py,sha256=zmDS6jG5zKhHL_hzJw2wlMTx1LLcNCK6S5WlwogWF-A,41122
|
15
15
|
ai_edge_quantizer/qtyping.py,sha256=UBZ3HgO8IDLY6VJmO05rGtFv_idMD3Os3WWsnriA0NA,15235
|
16
16
|
ai_edge_quantizer/quantizer.py,sha256=g3DMqFMrMpt9jQttCE0WcdNbMtk0JZnmN5MmCHrNdyM,13202
|
17
|
-
ai_edge_quantizer/quantizer_test.py,sha256=
|
17
|
+
ai_edge_quantizer/quantizer_test.py,sha256=K_HBA56JkFI3HL8VLWCqGEfC0ISh5ldMKoNyBdGRAJg,20368
|
18
18
|
ai_edge_quantizer/recipe.py,sha256=r5tJiUs-ihZFzeK_jP2sUIUgTqZsL5SWvbUokuIUPDo,2251
|
19
19
|
ai_edge_quantizer/recipe_manager.py,sha256=qcGUD7e7BISKdsY9WH2rdaRR3acmzSA5qMezGNbzlpo,8931
|
20
20
|
ai_edge_quantizer/recipe_manager_test.py,sha256=LulVxsYp6TBGFI2PLCUCd4VsFq8ELpC7kMNkUjsLgbo,32230
|
21
21
|
ai_edge_quantizer/recipe_test.py,sha256=Fg_sfxovI2fRjk5qdu18ghOvXdUvhDR1TxbE0GHDczc,3381
|
22
22
|
ai_edge_quantizer/transformation_instruction_generator.py,sha256=WkECCO85lLs4cEnjZF5eVGbtuul4P8N77gUxUCK9ESY,21605
|
23
|
-
ai_edge_quantizer/transformation_instruction_generator_test.py,sha256
|
24
|
-
ai_edge_quantizer/transformation_performer.py,sha256=
|
23
|
+
ai_edge_quantizer/transformation_instruction_generator_test.py,sha256=-6ycvqpLoAMRvHuLiAwNBZNhsWqwWTsi9tqFxV9Gfq0,41218
|
24
|
+
ai_edge_quantizer/transformation_performer.py,sha256=y7kBTwXO_ORTBiijBv3y-L85Y-NwaDEIx3_OdI0uhUI,11551
|
25
25
|
ai_edge_quantizer/transformation_performer_test.py,sha256=m3V6nd6jsjd6jVId5wTBNuyDB2h2p4tHlMWhlnomlJo,13341
|
26
26
|
ai_edge_quantizer/algorithms/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
27
27
|
ai_edge_quantizer/algorithms/nonlinear_quantize/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
@@ -62,8 +62,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=x2xA2CFPpe_2trcV8v5xGaBE
|
|
62
62
|
ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=Op3JxtOqlrjzmYF18jnnstL1k9xiY9kKJ8S2vklKGkc,11327
|
63
63
|
ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
|
64
64
|
ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
|
65
|
-
ai_edge_quantizer_nightly-0.0.1.
|
66
|
-
ai_edge_quantizer_nightly-0.0.1.
|
67
|
-
ai_edge_quantizer_nightly-0.0.1.
|
68
|
-
ai_edge_quantizer_nightly-0.0.1.
|
69
|
-
ai_edge_quantizer_nightly-0.0.1.
|
65
|
+
ai_edge_quantizer_nightly-0.0.1.dev20250315.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
66
|
+
ai_edge_quantizer_nightly-0.0.1.dev20250315.dist-info/METADATA,sha256=SRo0h_ZOLgM6BuC3goBLldKwIUjSrCchPWUWCAIunvU,1528
|
67
|
+
ai_edge_quantizer_nightly-0.0.1.dev20250315.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
68
|
+
ai_edge_quantizer_nightly-0.0.1.dev20250315.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
|
69
|
+
ai_edge_quantizer_nightly-0.0.1.dev20250315.dist-info/RECORD,,
|
File without changes
|
File without changes
|