ai-edge-quantizer-nightly 0.1.0.dev20250404__py3-none-any.whl → 0.1.0.dev20250406__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_edge_quantizer/qtyping.py +2 -0
- ai_edge_quantizer/transformation_instruction_generator.py +175 -11
- ai_edge_quantizer/transformation_instruction_generator_test.py +230 -6
- ai_edge_quantizer/transformation_performer.py +33 -23
- ai_edge_quantizer/transformation_performer_test.py +50 -0
- ai_edge_quantizer/transformations/duplicate_tensor.py +61 -0
- ai_edge_quantizer/transformations/duplicate_tensor_test.py +131 -0
- ai_edge_quantizer/transformations/transformation_utils.py +7 -2
- {ai_edge_quantizer_nightly-0.1.0.dev20250404.dist-info → ai_edge_quantizer_nightly-0.1.0.dev20250406.dist-info}/METADATA +1 -1
- {ai_edge_quantizer_nightly-0.1.0.dev20250404.dist-info → ai_edge_quantizer_nightly-0.1.0.dev20250406.dist-info}/RECORD +13 -11
- {ai_edge_quantizer_nightly-0.1.0.dev20250404.dist-info → ai_edge_quantizer_nightly-0.1.0.dev20250406.dist-info}/LICENSE +0 -0
- {ai_edge_quantizer_nightly-0.1.0.dev20250404.dist-info → ai_edge_quantizer_nightly-0.1.0.dev20250406.dist-info}/WHEEL +0 -0
- {ai_edge_quantizer_nightly-0.1.0.dev20250404.dist-info → ai_edge_quantizer_nightly-0.1.0.dev20250406.dist-info}/top_level.txt +0 -0
ai_edge_quantizer/qtyping.py
CHANGED
@@ -27,6 +27,9 @@ from ai_edge_quantizer.utils import tfl_flatbuffer_utils
|
|
27
27
|
from ai_edge_litert import schema_py_generated # pylint: disable=g-direct-tensorflow-import
|
28
28
|
|
29
29
|
|
30
|
+
_QuantTransformation = qtyping.QuantTransformation
|
31
|
+
|
32
|
+
|
30
33
|
# When a tensor has no producer, we'll assign -1 to the producer field
|
31
34
|
# When a tensor is a graph output, we'll also include a -1 in the consumer list
|
32
35
|
def check_horizontal_optimization(
|
@@ -454,6 +457,53 @@ class TransformationInstructionsGenerator:
|
|
454
457
|
transformations.insert(0, producer_trans_rule)
|
455
458
|
return transformations
|
456
459
|
|
460
|
+
def _remove_last_tensor_duplication(
|
461
|
+
self, tensor_trans_insts: qtyping.TensorTransformationInsts
|
462
|
+
) -> None:
|
463
|
+
"""Remove the last tensor duplication so the original tensor can be reused."""
|
464
|
+
instructions = tensor_trans_insts.instructions
|
465
|
+
if not instructions:
|
466
|
+
return
|
467
|
+
for i in range(len(instructions) - 1, -1, -1):
|
468
|
+
if (
|
469
|
+
instructions[i].transformation
|
470
|
+
== _QuantTransformation.DUPLICATE_TENSOR
|
471
|
+
):
|
472
|
+
instructions.pop(i)
|
473
|
+
return
|
474
|
+
|
475
|
+
def _remove_unnecessary_buffer_duplication(
|
476
|
+
self, tensor_trans_insts: qtyping.TensorTransformationInsts
|
477
|
+
) -> None:
|
478
|
+
"""Remove buffer duplications that comes after a tensor duplication.
|
479
|
+
|
480
|
+
When a tensor is duplicated, a new buffer is created for it. Therefore,
|
481
|
+
buffer duplication transformation that comes after it is unnecessary.
|
482
|
+
|
483
|
+
Args:
|
484
|
+
tensor_trans_insts: Transformation instructions for a tensor.
|
485
|
+
"""
|
486
|
+
instructions = tensor_trans_insts.instructions
|
487
|
+
if not instructions:
|
488
|
+
return
|
489
|
+
|
490
|
+
# Find all consumers that have a tensor duplication.
|
491
|
+
consumers_with_tensor_duplication = set()
|
492
|
+
for instr in instructions:
|
493
|
+
if instr.transformation == _QuantTransformation.DUPLICATE_TENSOR:
|
494
|
+
consumers_with_tensor_duplication.update(instr.consumers)
|
495
|
+
if not consumers_with_tensor_duplication:
|
496
|
+
return
|
497
|
+
|
498
|
+
# Remove a buffer duplication that comes with a tensor duplication.
|
499
|
+
for i in range(len(instructions) - 1, -1, -1):
|
500
|
+
instr = instructions[i]
|
501
|
+
if (
|
502
|
+
instr.transformation == _QuantTransformation.DUPLICATE_BUFFER
|
503
|
+
and consumers_with_tensor_duplication.issuperset(instr.consumers)
|
504
|
+
):
|
505
|
+
instructions.pop(i)
|
506
|
+
|
457
507
|
def _quant_params_to_transformation_insts(
|
458
508
|
self,
|
459
509
|
param: qtyping.TensorTransformationParams,
|
@@ -510,27 +560,119 @@ class TransformationInstructionsGenerator:
|
|
510
560
|
# Adding other consumers rules.
|
511
561
|
transformations += other_consumer_transformations
|
512
562
|
tensor_trans_insts.instructions = transformations
|
563
|
+
|
564
|
+
# Now, when all optimizations are done, we can remove the last tensor
|
565
|
+
# duplication instruction, so the original tensor can be reused.
|
566
|
+
self._remove_last_tensor_duplication(tensor_trans_insts)
|
567
|
+
# With the tensor duplication instructions finalized, we can remove
|
568
|
+
# unnecessary buffer duplications applied to the same duplicated tensors.
|
569
|
+
# This is not a part of a vertical optimization because vertical
|
570
|
+
# optimization only works between producers & consumers, and this is between
|
571
|
+
# the consumer only. Also this can't be done during the params generation
|
572
|
+
# because removing last tensor duplication has to happen first.
|
573
|
+
self._remove_unnecessary_buffer_duplication(tensor_trans_insts)
|
574
|
+
|
513
575
|
# Check the generated transformation instructions are valid, the function
|
514
576
|
# will raise an error if the instructions are not valid.
|
515
577
|
self._check_tensor_transformation_instructions_valid(tensor_trans_insts)
|
516
578
|
|
517
579
|
return tensor_trans_insts
|
518
580
|
|
519
|
-
def
|
520
|
-
self,
|
521
|
-
|
522
|
-
|
581
|
+
def _split_instructions_by_tensor_duplication(
|
582
|
+
self,
|
583
|
+
instructions: qtyping.TensorTransformationInsts,
|
584
|
+
) -> list[list[qtyping.TransformationInst]]:
|
585
|
+
"""Split the instructions into subsets by tensor duplication.
|
586
|
+
|
587
|
+
Splits the instructions into subsets based on which tensor (original or one
|
588
|
+
of duplicated ones) they will be applied to.
|
589
|
+
|
590
|
+
The first subset is for the original tensor. The following subsets are for
|
591
|
+
the duplicated tensors. The order of instructions in each subset is
|
592
|
+
preserved.
|
593
|
+
|
594
|
+
Enforced constraints for each duplicated tensor's instructions subset:
|
595
|
+
1. The first instruction must be a `DUPLICATE_TENSOR` one.
|
596
|
+
2. No other `DUPLICATE_TENSOR` instructions can be present.
|
597
|
+
|
598
|
+
For the following instructions:
|
599
|
+
[
|
600
|
+
(transformation=DUPLICATE_TENSOR, consumers=[1, 2, 3]),
|
601
|
+
(transformation=DUPLICATE_TENSOR, consumers=[4]),
|
602
|
+
(transformation=T1, consumers=[1, 2]),
|
603
|
+
(transformation=T2, consumers=[3]),
|
604
|
+
(transformation=T3, consumers=[4]),
|
605
|
+
(transformation=T4, consumers=[5])
|
606
|
+
]
|
607
|
+
|
608
|
+
`instruction_subsets` will be:
|
609
|
+
[
|
610
|
+
[(transformation=T4, consumers=[5])],
|
611
|
+
[
|
612
|
+
(transformation=DUPLICATE_TENSOR, consumers=[1, 2, 3]),
|
613
|
+
(transformation=T1, consumers=[1, 2]),
|
614
|
+
(transformation=T2, consumers=[3])
|
615
|
+
],
|
616
|
+
[
|
617
|
+
(transformation=DUPLICATE_TENSOR, consumers=[4]),
|
618
|
+
(transformation=T3, consumers=[4])
|
619
|
+
]
|
620
|
+
],
|
523
621
|
|
524
622
|
Args:
|
525
623
|
instructions: Transformation instructions for a tensor.
|
526
624
|
|
625
|
+
Returns:
|
626
|
+
A list of subsets of transformation instructions, where the first subset
|
627
|
+
is for the original tensor, and the following subsets are for the
|
628
|
+
duplicated tensors.
|
629
|
+
|
527
630
|
Raises:
|
528
|
-
ValueError: If
|
631
|
+
ValueError: If DUPLICATE_TENSOR is found and it's not the first
|
632
|
+
transformation for its consumers.
|
633
|
+
"""
|
634
|
+
original_tensor_subset_idx = 0
|
635
|
+
instruction_subsets = [[]]
|
636
|
+
consumer_to_subset_idx = {}
|
637
|
+
for instruction in instructions.instructions:
|
638
|
+
if instruction.transformation == _QuantTransformation.DUPLICATE_TENSOR:
|
639
|
+
instruction_subsets.append([instruction])
|
640
|
+
subset_idx = len(instruction_subsets) - 1
|
641
|
+
for consumer in instruction.consumers:
|
642
|
+
if consumer in consumer_to_subset_idx:
|
643
|
+
raise ValueError(
|
644
|
+
f"Tensor {instructions.tensor_name} : duplicate tensor should"
|
645
|
+
" be the first instruction for its consumers."
|
646
|
+
)
|
647
|
+
else:
|
648
|
+
consumer_to_subset_idx[consumer] = subset_idx
|
649
|
+
else:
|
650
|
+
first_consumer = instruction.consumers[0]
|
651
|
+
if first_consumer not in consumer_to_subset_idx:
|
652
|
+
consumer_to_subset_idx[first_consumer] = original_tensor_subset_idx
|
653
|
+
subset_idx = consumer_to_subset_idx[first_consumer]
|
654
|
+
instruction_subsets[subset_idx].append(instruction)
|
655
|
+
|
656
|
+
return instruction_subsets
|
657
|
+
|
658
|
+
def _check_subset_of_tensor_transformation_instructions_valid(
|
659
|
+
self,
|
660
|
+
instructions: Optional[list[qtyping.TransformationInst]],
|
661
|
+
tensor_name: str,
|
662
|
+
):
|
663
|
+
"""Check if a subset of tensor transformation instructions is valid.
|
664
|
+
|
665
|
+
Args:
|
666
|
+
instructions: A subset of transformation instructions for a tensor.
|
667
|
+
tensor_name: The name of the tensor.
|
668
|
+
|
669
|
+
Raises:
|
670
|
+
ValueError: If the subset of instructions are not valid.
|
529
671
|
"""
|
530
672
|
is_tensor_unquantized = False
|
531
673
|
is_tensor_quantized = False
|
532
674
|
is_operator_emulated = False
|
533
|
-
for instruction in instructions
|
675
|
+
for instruction in instructions:
|
534
676
|
transform_type = instruction.transformation
|
535
677
|
if transform_type == qtyping.QuantTransformation.NO_QUANTIZE:
|
536
678
|
is_tensor_unquantized = True
|
@@ -543,14 +685,36 @@ class TransformationInstructionsGenerator:
|
|
543
685
|
is_operator_emulated = True
|
544
686
|
if is_tensor_unquantized and is_tensor_quantized:
|
545
687
|
raise ValueError(
|
546
|
-
"Tensor %s can not be both quantized and unquantized"
|
547
|
-
% instructions.tensor_name
|
688
|
+
"Tensor %s can not be both quantized and unquantized" % tensor_name
|
548
689
|
)
|
549
|
-
if is_operator_emulated and len(instructions
|
690
|
+
if is_operator_emulated and len(instructions) > 1:
|
550
691
|
raise ValueError(
|
551
692
|
"Tensor %s : op replacement transformation can not be combined with"
|
552
|
-
" other transformations."
|
553
|
-
|
693
|
+
" other transformations." % tensor_name
|
694
|
+
)
|
695
|
+
|
696
|
+
def _check_tensor_transformation_instructions_valid(
|
697
|
+
self,
|
698
|
+
instructions: qtyping.TensorTransformationInsts,
|
699
|
+
):
|
700
|
+
"""Check if the tensor transformation instructions are valid.
|
701
|
+
|
702
|
+
Args:
|
703
|
+
instructions: Transformation instructions for a tensor.
|
704
|
+
|
705
|
+
Raises:
|
706
|
+
ValueError: If the instructions are not valid.
|
707
|
+
"""
|
708
|
+
# Split the instructions into subsets based on which tensor (original or one
|
709
|
+
# of duplicated ones) they will be applied to.
|
710
|
+
instruction_subsets = self._split_instructions_by_tensor_duplication(
|
711
|
+
instructions
|
712
|
+
)
|
713
|
+
# Check that each subset of instructions is valid.
|
714
|
+
for instruction_subset in instruction_subsets:
|
715
|
+
self._check_subset_of_tensor_transformation_instructions_valid(
|
716
|
+
instruction_subset,
|
717
|
+
instructions.tensor_name,
|
554
718
|
)
|
555
719
|
|
556
720
|
def quant_params_to_transformation_insts(
|
@@ -27,6 +27,8 @@ from ai_edge_quantizer.utils import test_utils
|
|
27
27
|
|
28
28
|
TEST_DATA_PREFIX_PATH = test_utils.get_path_to_datafile(".")
|
29
29
|
|
30
|
+
_QTransf = qtyping.QuantTransformation
|
31
|
+
|
30
32
|
|
31
33
|
class InstructionGeneratorTest(parameterized.TestCase):
|
32
34
|
|
@@ -1130,14 +1132,236 @@ class InstructionGeneratorTest(parameterized.TestCase):
|
|
1130
1132
|
self.assertLen(instructions, 1)
|
1131
1133
|
instructions = instructions[test_tensor_name].instructions
|
1132
1134
|
self.assertGreater(len(instructions), 1)
|
1133
|
-
self.assertEqual(
|
1134
|
-
|
1135
|
-
|
1135
|
+
self.assertEqual(instructions[0].transformation, _QTransf.DUPLICATE_BUFFER)
|
1136
|
+
self.assertNotIn(_QTransf.DUPLICATE_BUFFER, instructions[1:])
|
1137
|
+
|
1138
|
+
def _get_test_instruction(self, transformation, consumers=None):
|
1139
|
+
if consumers is None:
|
1140
|
+
consumers = []
|
1141
|
+
return qtyping.TransformationInst(
|
1142
|
+
transformation=transformation,
|
1143
|
+
consumers=consumers,
|
1144
|
+
# Dummy values below.
|
1145
|
+
tensor_id=0,
|
1146
|
+
producer=None,
|
1147
|
+
parameters=None,
|
1148
|
+
)
|
1149
|
+
|
1150
|
+
def test__remove_last_tensor_duplication_succeeds(self):
|
1151
|
+
tensor_instructions = qtyping.TensorTransformationInsts(
|
1152
|
+
tensor_name="test_tensor",
|
1153
|
+
subgraph_id=0,
|
1154
|
+
instructions=[
|
1155
|
+
self._get_test_instruction(_QTransf.DUPLICATE_TENSOR),
|
1156
|
+
self._get_test_instruction(_QTransf.ADD_QUANTIZE),
|
1157
|
+
self._get_test_instruction(_QTransf.DUPLICATE_TENSOR),
|
1158
|
+
self._get_test_instruction(_QTransf.ADD_DEQUANTIZE),
|
1159
|
+
],
|
1160
|
+
)
|
1161
|
+
instruction_gen = (
|
1162
|
+
instruction_generator.TransformationInstructionsGenerator()
|
1163
|
+
)
|
1164
|
+
instruction_gen._remove_last_tensor_duplication(tensor_instructions)
|
1165
|
+
|
1166
|
+
self.assertLen(tensor_instructions.instructions, 3)
|
1167
|
+
expected_transformations = [
|
1168
|
+
_QTransf.DUPLICATE_TENSOR,
|
1169
|
+
_QTransf.ADD_QUANTIZE,
|
1170
|
+
_QTransf.ADD_DEQUANTIZE,
|
1171
|
+
]
|
1172
|
+
got_transformations = [
|
1173
|
+
instruction.transformation
|
1174
|
+
for instruction in tensor_instructions.instructions
|
1175
|
+
]
|
1176
|
+
self.assertEqual(got_transformations, expected_transformations)
|
1177
|
+
|
1178
|
+
def test__remove_unnecessary_buffer_duplication_succeeds(
|
1179
|
+
self,
|
1180
|
+
):
|
1181
|
+
instructions = [
|
1182
|
+
self._get_test_instruction(_QTransf.DUPLICATE_TENSOR, consumers=[1]),
|
1183
|
+
self._get_test_instruction(_QTransf.DUPLICATE_BUFFER, consumers=[1]),
|
1184
|
+
self._get_test_instruction(_QTransf.ADD_QUANTIZE),
|
1185
|
+
self._get_test_instruction(_QTransf.DUPLICATE_BUFFER, consumers=[1]),
|
1186
|
+
self._get_test_instruction(_QTransf.ADD_DEQUANTIZE),
|
1187
|
+
self._get_test_instruction(_QTransf.DUPLICATE_BUFFER, consumers=[2]),
|
1188
|
+
self._get_test_instruction(_QTransf.DUPLICATE_TENSOR, consumers=[3, 4]),
|
1189
|
+
self._get_test_instruction(_QTransf.ADD_QUANTIZE),
|
1190
|
+
self._get_test_instruction(_QTransf.DUPLICATE_BUFFER, consumers=[3, 4]),
|
1191
|
+
]
|
1192
|
+
tensor_instructions = qtyping.TensorTransformationInsts(
|
1193
|
+
tensor_name="test_tensor",
|
1194
|
+
subgraph_id=0,
|
1195
|
+
instructions=instructions,
|
1196
|
+
)
|
1197
|
+
instruction_gen = (
|
1198
|
+
instruction_generator.TransformationInstructionsGenerator()
|
1199
|
+
)
|
1200
|
+
instruction_gen._remove_unnecessary_buffer_duplication(tensor_instructions)
|
1201
|
+
|
1202
|
+
self.assertLen(tensor_instructions.instructions, 6)
|
1203
|
+
expected_transformations = [
|
1204
|
+
_QTransf.DUPLICATE_TENSOR,
|
1205
|
+
_QTransf.ADD_QUANTIZE,
|
1206
|
+
_QTransf.ADD_DEQUANTIZE,
|
1207
|
+
_QTransf.DUPLICATE_BUFFER,
|
1208
|
+
_QTransf.DUPLICATE_TENSOR,
|
1209
|
+
_QTransf.ADD_QUANTIZE,
|
1210
|
+
]
|
1211
|
+
got_transformations = [
|
1212
|
+
instruction.transformation
|
1213
|
+
for instruction in tensor_instructions.instructions
|
1214
|
+
]
|
1215
|
+
self.assertEqual(got_transformations, expected_transformations)
|
1216
|
+
|
1217
|
+
def test__instruction_generator_removes_unnecessary_tensor_and_buffer_duplication(
|
1218
|
+
self,
|
1219
|
+
):
|
1220
|
+
test_model_path = os.path.join(
|
1221
|
+
TEST_DATA_PREFIX_PATH,
|
1222
|
+
"tests/models/constant_tensor_and_buffer_only_sharing_weight_fcs.tflite",
|
1223
|
+
)
|
1224
|
+
params_4_bits = qtyping.UniformQuantParams(
|
1225
|
+
4, None, np.array([1]), np.array([0])
|
1226
|
+
)
|
1227
|
+
params_8_bits = qtyping.UniformQuantParams(
|
1228
|
+
8, None, np.array([1]), np.array([0])
|
1229
|
+
)
|
1230
|
+
quant_parameters = {}
|
1231
|
+
# Two FCs share a weight tensor `arith.constant`.
|
1232
|
+
quant_parameters["arith.constant"] = qtyping.TensorTransformationParams(
|
1233
|
+
tensor_name="arith.constant",
|
1234
|
+
producer=None,
|
1235
|
+
consumers=[
|
1236
|
+
qtyping.OpToTensorParams(
|
1237
|
+
subgraph_op_id=0,
|
1238
|
+
transformations=[
|
1239
|
+
_QTransf.DUPLICATE_TENSOR,
|
1240
|
+
_QTransf.DUPLICATE_BUFFER, # Expected to be removed.
|
1241
|
+
_QTransf.QUANTIZE_TENSOR,
|
1242
|
+
],
|
1243
|
+
parameters=params_8_bits,
|
1244
|
+
),
|
1245
|
+
qtyping.OpToTensorParams(
|
1246
|
+
subgraph_op_id=1,
|
1247
|
+
transformations=[
|
1248
|
+
_QTransf.DUPLICATE_TENSOR, # Expected to be removed.
|
1249
|
+
_QTransf.DUPLICATE_BUFFER,
|
1250
|
+
_QTransf.QUANTIZE_TENSOR,
|
1251
|
+
],
|
1252
|
+
parameters=params_4_bits,
|
1253
|
+
),
|
1254
|
+
],
|
1255
|
+
)
|
1256
|
+
instruction_gen = instruction_generator.TransformationInstructionsGenerator(
|
1257
|
+
test_model_path
|
1258
|
+
)
|
1259
|
+
instructions = instruction_gen.quant_params_to_transformation_insts(
|
1260
|
+
quant_parameters
|
1136
1261
|
)
|
1137
|
-
|
1138
|
-
|
1139
|
-
|
1262
|
+
|
1263
|
+
def get_expected_instruction(transformation, consumers, params):
|
1264
|
+
return qtyping.TransformationInst(
|
1265
|
+
transformation=transformation,
|
1266
|
+
consumers=consumers,
|
1267
|
+
tensor_id=1,
|
1268
|
+
producer=-1,
|
1269
|
+
parameters=params,
|
1270
|
+
)
|
1271
|
+
|
1272
|
+
expected_instructions = qtyping.TensorTransformationInsts(
|
1273
|
+
tensor_name="arith.constant",
|
1274
|
+
subgraph_id=0,
|
1275
|
+
instructions=[
|
1276
|
+
get_expected_instruction(
|
1277
|
+
_QTransf.DUPLICATE_TENSOR, consumers=[0], params=params_8_bits
|
1278
|
+
),
|
1279
|
+
get_expected_instruction(
|
1280
|
+
_QTransf.DUPLICATE_BUFFER, consumers=[1], params=params_4_bits
|
1281
|
+
),
|
1282
|
+
get_expected_instruction(
|
1283
|
+
_QTransf.QUANTIZE_TENSOR, consumers=[0], params=params_8_bits
|
1284
|
+
),
|
1285
|
+
get_expected_instruction(
|
1286
|
+
_QTransf.QUANTIZE_TENSOR, consumers=[1], params=params_4_bits
|
1287
|
+
),
|
1288
|
+
],
|
1140
1289
|
)
|
1290
|
+
self.assertLen(instructions, 1)
|
1291
|
+
self.assertEqual(instructions["arith.constant"], expected_instructions)
|
1292
|
+
|
1293
|
+
def test__split_instructions_by_tensor_duplication_returns_expected_subsets(
|
1294
|
+
self,
|
1295
|
+
):
|
1296
|
+
instructions = [
|
1297
|
+
self._get_test_instruction(_QTransf.DUPLICATE_TENSOR, consumers=[1, 2, 3]), # pylint: disable=line-too-long
|
1298
|
+
self._get_test_instruction(_QTransf.DUPLICATE_TENSOR, consumers=[4]),
|
1299
|
+
self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[1, 2]),
|
1300
|
+
self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[3]),
|
1301
|
+
self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[4]),
|
1302
|
+
self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[5]),
|
1303
|
+
]
|
1304
|
+
tensor_instructions = qtyping.TensorTransformationInsts(
|
1305
|
+
tensor_name="test_tensor", subgraph_id=0, instructions=instructions
|
1306
|
+
)
|
1307
|
+
instruction_gen = (
|
1308
|
+
instruction_generator.TransformationInstructionsGenerator()
|
1309
|
+
)
|
1310
|
+
got = instruction_gen._split_instructions_by_tensor_duplication(
|
1311
|
+
tensor_instructions
|
1312
|
+
)
|
1313
|
+
expected = [
|
1314
|
+
[self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[5])],
|
1315
|
+
[
|
1316
|
+
self._get_test_instruction(_QTransf.DUPLICATE_TENSOR, consumers=[1, 2, 3]), # pylint: disable=line-too-long
|
1317
|
+
self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[1, 2]),
|
1318
|
+
self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[3]),
|
1319
|
+
],
|
1320
|
+
[
|
1321
|
+
self._get_test_instruction(_QTransf.DUPLICATE_TENSOR, consumers=[4]), # pylint: disable=line-too-long
|
1322
|
+
self._get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[4]),
|
1323
|
+
],
|
1324
|
+
]
|
1325
|
+
self.assertEqual(got, expected)
|
1326
|
+
|
1327
|
+
def test__check_tensor_transformation_instructions_valid_succeeds_on_q_dq_with_duplication(
|
1328
|
+
self,
|
1329
|
+
):
|
1330
|
+
instructions = [
|
1331
|
+
self._get_test_instruction(_QTransf.DUPLICATE_TENSOR, consumers=[1]),
|
1332
|
+
self._get_test_instruction(_QTransf.NO_QUANTIZE, consumers=[1]),
|
1333
|
+
self._get_test_instruction(_QTransf.QUANTIZE_TENSOR, consumers=[2]),
|
1334
|
+
]
|
1335
|
+
tensor_instructions = qtyping.TensorTransformationInsts(
|
1336
|
+
tensor_name="test_tensor", subgraph_id=0, instructions=instructions
|
1337
|
+
)
|
1338
|
+
instruction_gen = (
|
1339
|
+
instruction_generator.TransformationInstructionsGenerator()
|
1340
|
+
)
|
1341
|
+
instruction_gen._check_tensor_transformation_instructions_valid(
|
1342
|
+
tensor_instructions
|
1343
|
+
)
|
1344
|
+
|
1345
|
+
def test__check_tensor_transformation_instructions_valid_fails_when_q_noq_wo_duplication(
|
1346
|
+
self,
|
1347
|
+
):
|
1348
|
+
tensor_instructions = qtyping.TensorTransformationInsts(
|
1349
|
+
tensor_name="test_tensor",
|
1350
|
+
subgraph_id=0,
|
1351
|
+
instructions=[
|
1352
|
+
self._get_test_instruction(_QTransf.NO_QUANTIZE, consumers=[1]),
|
1353
|
+
self._get_test_instruction(_QTransf.QUANTIZE_TENSOR, consumers=[2]),
|
1354
|
+
],
|
1355
|
+
)
|
1356
|
+
instruction_gen = (
|
1357
|
+
instruction_generator.TransformationInstructionsGenerator()
|
1358
|
+
)
|
1359
|
+
with self.assertRaisesRegex(
|
1360
|
+
ValueError, "can not be both quantized and unquantized"
|
1361
|
+
):
|
1362
|
+
instruction_gen._check_tensor_transformation_instructions_valid(
|
1363
|
+
tensor_instructions
|
1364
|
+
)
|
1141
1365
|
|
1142
1366
|
|
1143
1367
|
if __name__ == "__main__":
|
@@ -23,6 +23,7 @@ import numpy as np
|
|
23
23
|
from ai_edge_quantizer import qtyping
|
24
24
|
from ai_edge_quantizer.transformations import dequant_insert
|
25
25
|
from ai_edge_quantizer.transformations import duplicate_buffer
|
26
|
+
from ai_edge_quantizer.transformations import duplicate_tensor
|
26
27
|
from ai_edge_quantizer.transformations import emulated_subchannel
|
27
28
|
from ai_edge_quantizer.transformations import quant_insert
|
28
29
|
from ai_edge_quantizer.transformations import quantize_tensor
|
@@ -76,6 +77,9 @@ class TransformationPerformer:
|
|
76
77
|
qtyping.QuantTransformation.DUPLICATE_BUFFER: (
|
77
78
|
duplicate_buffer.duplicate_buffer
|
78
79
|
),
|
80
|
+
qtyping.QuantTransformation.DUPLICATE_TENSOR: (
|
81
|
+
duplicate_tensor.duplicate_tensor
|
82
|
+
),
|
79
83
|
}
|
80
84
|
# transformations are seprated in two categories:
|
81
85
|
# op_insertion_transformations are transformations that only insert ops
|
@@ -86,6 +90,7 @@ class TransformationPerformer:
|
|
86
90
|
qtyping.QuantTransformation.QUANTIZE_TENSOR,
|
87
91
|
qtyping.QuantTransformation.ADD_QUANTIZE,
|
88
92
|
qtyping.QuantTransformation.DUPLICATE_BUFFER,
|
93
|
+
qtyping.QuantTransformation.DUPLICATE_TENSOR,
|
89
94
|
])
|
90
95
|
self._op_replacement_transformations = set(
|
91
96
|
[qtyping.QuantTransformation.EMULATED_SUBCHANNEL]
|
@@ -132,42 +137,47 @@ class TransformationPerformer:
|
|
132
137
|
transformations: list[qtyping.TransformationInst],
|
133
138
|
subgraph_id: int,
|
134
139
|
trans_info: qtyping.TransformationInfo,
|
135
|
-
):
|
136
|
-
"""Update the instructions after the graph is modified.
|
140
|
+
) -> None:
|
141
|
+
"""Update the instructions in-place after the graph is modified.
|
137
142
|
|
138
|
-
After an op is inserted, the topology is changed
|
139
|
-
following transformation to be applied. So we need
|
140
|
-
that have yet to be applied.
|
143
|
+
After an op is inserted or a tensor is duplicated, the topology is changed
|
144
|
+
and this may impact the following transformation to be applied. So we need
|
145
|
+
to update instructions that have yet to be applied.
|
141
146
|
|
142
147
|
Args:
|
143
|
-
prev_transformation_index:
|
144
|
-
transformations:
|
145
|
-
subgraph_id:
|
146
|
-
trans_info:
|
147
|
-
|
148
|
-
Returns:
|
149
|
-
None, modifies the transformation in place
|
148
|
+
prev_transformation_index: The index of the last applied transformation.
|
149
|
+
transformations: The list of transformations we're applying.
|
150
|
+
subgraph_id: The subgraph where the provided instructions belong to.
|
151
|
+
trans_info: Transformation info returned by a transformation.
|
150
152
|
"""
|
151
|
-
# if no ops were added, then no need for update
|
152
|
-
if trans_info.num_ops_added == 0:
|
153
|
-
return
|
154
153
|
prev_transformation = transformations[prev_transformation_index]
|
155
|
-
|
156
|
-
|
154
|
+
is_prev_not_duplicate_tensor = (
|
155
|
+
prev_transformation.transformation
|
156
|
+
!= qtyping.QuantTransformation.DUPLICATE_TENSOR
|
157
157
|
)
|
158
|
+
was_op_added = trans_info.num_ops_added > 0
|
159
|
+
if not was_op_added and is_prev_not_duplicate_tensor:
|
160
|
+
return
|
161
|
+
|
162
|
+
if was_op_added:
|
163
|
+
self._added_op_id_map[subgraph_id].append(
|
164
|
+
trans_info.op_id + trans_info.num_ops_added - 1
|
165
|
+
)
|
166
|
+
|
158
167
|
for transformations_index in range(
|
159
168
|
prev_transformation_index + 1, len(transformations)
|
160
169
|
):
|
161
170
|
transformation = transformations[transformations_index]
|
162
171
|
for consumer_index in transformation.consumers:
|
163
|
-
#
|
172
|
+
# If the consumer needs to use newly added ops, then the new added op
|
164
173
|
# index needs to be outside of the range of the orignal op ids.
|
165
174
|
if consumer_index in prev_transformation.consumers:
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
175
|
+
if was_op_added:
|
176
|
+
transformation.producer = (
|
177
|
+
len(self._original_op_id_map[subgraph_id])
|
178
|
+
+ len(self._added_op_id_map[subgraph_id])
|
179
|
+
- 1
|
180
|
+
)
|
171
181
|
transformation.tensor_id = trans_info.output_tensor_id
|
172
182
|
|
173
183
|
def _apply_single_transformation(
|
@@ -15,6 +15,7 @@
|
|
15
15
|
|
16
16
|
"""Tests for transformation_performer."""
|
17
17
|
|
18
|
+
import copy
|
18
19
|
import os
|
19
20
|
|
20
21
|
import numpy as np
|
@@ -26,6 +27,9 @@ from ai_edge_quantizer import transformation_performer
|
|
26
27
|
from ai_edge_quantizer.utils import test_utils
|
27
28
|
from ai_edge_quantizer.utils import tfl_flatbuffer_utils
|
28
29
|
|
30
|
+
_QTransf = qtyping.QuantTransformation
|
31
|
+
|
32
|
+
|
29
33
|
TEST_DATA_PREFIX_PATH = test_utils.get_path_to_datafile(".")
|
30
34
|
|
31
35
|
|
@@ -267,6 +271,52 @@ class TransformationPerformerTest(parameterized.TestCase):
|
|
267
271
|
expected_added_op_id_map,
|
268
272
|
)
|
269
273
|
|
274
|
+
def test__update_instructions_updates_tensor_id_after_duplicate_tensor(self):
|
275
|
+
def get_test_instruction(transformation, consumers):
|
276
|
+
return qtyping.TransformationInst(
|
277
|
+
transformation=transformation,
|
278
|
+
consumers=consumers,
|
279
|
+
# Dummy values below.
|
280
|
+
tensor_id=0,
|
281
|
+
producer=0,
|
282
|
+
parameters=qtyping.UniformQuantParams(
|
283
|
+
8, None, np.array([1]), np.array([0])
|
284
|
+
),
|
285
|
+
)
|
286
|
+
|
287
|
+
instructions = [
|
288
|
+
get_test_instruction(_QTransf.DUPLICATE_TENSOR, consumers=[1]),
|
289
|
+
get_test_instruction(_QTransf.ADD_QUANTIZE, consumers=[1]),
|
290
|
+
get_test_instruction(_QTransf.ADD_DEQUANTIZE, consumers=[1]),
|
291
|
+
get_test_instruction(_QTransf.QUANTIZE_TENSOR, consumers=[2]),
|
292
|
+
]
|
293
|
+
# Simulate a situation as if the first instruction (duplicate tensor) was
|
294
|
+
# applied.
|
295
|
+
subgraph_id = 0
|
296
|
+
duplicated_tensor_id = 13
|
297
|
+
prev_trans_idx = 0
|
298
|
+
trans_info = qtyping.TransformationInfo(
|
299
|
+
# Copy of what duplicate_tensor.py returns.
|
300
|
+
op_id=0,
|
301
|
+
num_ops_added=0,
|
302
|
+
output_tensor_id=duplicated_tensor_id,
|
303
|
+
)
|
304
|
+
self._transformation_performer._create_op_id_map(self._test_model)
|
305
|
+
self._transformation_performer._update_instructions(
|
306
|
+
prev_trans_idx, instructions, subgraph_id, trans_info
|
307
|
+
)
|
308
|
+
# Expecting the ops with the same consumers as in the DUPLICATE_TENSOR
|
309
|
+
# instruction to use the new tensor id.
|
310
|
+
expected_instructions = copy.deepcopy(instructions)
|
311
|
+
expected_instructions[1].tensor_id = duplicated_tensor_id
|
312
|
+
expected_instructions[2].tensor_id = duplicated_tensor_id
|
313
|
+
self.assertSequenceEqual(instructions, expected_instructions)
|
314
|
+
# Expecting no change to the op id map.
|
315
|
+
self.assertListEqual(
|
316
|
+
self._transformation_performer._added_op_id_map,
|
317
|
+
[[]],
|
318
|
+
)
|
319
|
+
|
270
320
|
def test_transform_graph(self):
|
271
321
|
"""test for transform_graph."""
|
272
322
|
instructions = {
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# Copyright 2024 The AI Edge Quantizer Authors.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# ==============================================================================
|
15
|
+
|
16
|
+
"""Duplicate tensor transformation."""
|
17
|
+
|
18
|
+
from ai_edge_quantizer import qtyping
|
19
|
+
from ai_edge_quantizer.transformations import transformation_utils
|
20
|
+
from ai_edge_quantizer.utils import tfl_flatbuffer_utils
|
21
|
+
|
22
|
+
|
23
|
+
def duplicate_tensor(
|
24
|
+
transformation_input: transformation_utils.TransformationInput,
|
25
|
+
) -> qtyping.TransformationInfo:
|
26
|
+
"""Duplicates the tensor."""
|
27
|
+
tensor_id = transformation_input.tensor_id
|
28
|
+
subgraph = transformation_input.subgraph
|
29
|
+
tensor = subgraph.tensors[tensor_id]
|
30
|
+
tensor_name = tfl_flatbuffer_utils.get_tensor_name(tensor)
|
31
|
+
buffer_data = transformation_input.buffers[tensor.buffer].data
|
32
|
+
if buffer_data is None:
|
33
|
+
raise ValueError(
|
34
|
+
'Duplicate Tensor transformation supports only constant tensors.'
|
35
|
+
f' Tensor {tensor_name} is not constant.'
|
36
|
+
)
|
37
|
+
new_tensor_id = transformation_utils.add_new_constant_tensor(
|
38
|
+
tensor_name=f'{tensor_name}_duplicated',
|
39
|
+
data=buffer_data,
|
40
|
+
tensor_type=tensor.type,
|
41
|
+
tensor_shape=tensor.shape,
|
42
|
+
subgraph=subgraph,
|
43
|
+
buffers=transformation_input.buffers,
|
44
|
+
)
|
45
|
+
# Update the tensor name to avoid name collision in case when tensor is
|
46
|
+
# duplicated mulitple times.
|
47
|
+
subgraph.tensors[new_tensor_id].name += f'_{new_tensor_id}'
|
48
|
+
|
49
|
+
# Update the consumers' input tensor id to the duplicated tensor id.
|
50
|
+
# Assuming transformation_input to contain all and only consumers that are
|
51
|
+
# supposed to use this new duplicated tensor.
|
52
|
+
for consumer in transformation_input.consumers:
|
53
|
+
consumer_inputs = subgraph.operators[consumer].inputs
|
54
|
+
for i in range(len(consumer_inputs)):
|
55
|
+
if consumer_inputs[i] == tensor_id:
|
56
|
+
consumer_inputs[i] = new_tensor_id
|
57
|
+
break
|
58
|
+
|
59
|
+
return qtyping.TransformationInfo(
|
60
|
+
op_id=0, num_ops_added=0, output_tensor_id=new_tensor_id
|
61
|
+
)
|
@@ -0,0 +1,131 @@
|
|
1
|
+
# Copyright 2024 The AI Edge Quantizer Authors.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# ==============================================================================
|
15
|
+
|
16
|
+
import os
|
17
|
+
import numpy as np
|
18
|
+
from tensorflow.python.platform import googletest
|
19
|
+
from ai_edge_quantizer import qtyping
|
20
|
+
from ai_edge_quantizer.transformations import duplicate_tensor
|
21
|
+
from ai_edge_quantizer.transformations import transformation_utils
|
22
|
+
from ai_edge_quantizer.utils import test_utils
|
23
|
+
from ai_edge_quantizer.utils import tfl_flatbuffer_utils
|
24
|
+
|
25
|
+
TEST_DATA_PREFIX_PATH = test_utils.get_path_to_datafile('..')
|
26
|
+
|
27
|
+
|
28
|
+
class DuplicateTensorTest(googletest.TestCase):
|
29
|
+
|
30
|
+
def setUp(self):
|
31
|
+
super().setUp()
|
32
|
+
model_path = os.path.join(
|
33
|
+
TEST_DATA_PREFIX_PATH, 'tests/models/weight_sharing_fcs.tflite'
|
34
|
+
)
|
35
|
+
self.model = tfl_flatbuffer_utils.read_model(model_path)
|
36
|
+
|
37
|
+
def _get_transformation_input(
|
38
|
+
self,
|
39
|
+
subgraph_idx: int,
|
40
|
+
tensor_idx: int,
|
41
|
+
consumers: list[int],
|
42
|
+
) -> transformation_utils.TransformationInput:
|
43
|
+
return transformation_utils.TransformationInput(
|
44
|
+
tensor_id=tensor_idx,
|
45
|
+
buffers=self.model.buffers,
|
46
|
+
consumers=consumers,
|
47
|
+
# Dummy params below.
|
48
|
+
op_codes=self.model.operatorCodes,
|
49
|
+
subgraph=self.model.subgraphs[subgraph_idx],
|
50
|
+
producer=-1,
|
51
|
+
quant_params=qtyping.UniformQuantParams(
|
52
|
+
num_bits=8,
|
53
|
+
quantized_dimension=None,
|
54
|
+
scale=np.ones(1),
|
55
|
+
zero_point=np.zeros(1),
|
56
|
+
),
|
57
|
+
)
|
58
|
+
|
59
|
+
def test_constant_tensor_is_correctly_duplicated(self):
|
60
|
+
# Duplicate the FC weight tensor in the second subgraph for the first FC.
|
61
|
+
subgraph_idx = 1
|
62
|
+
fc1_op_idx = 0
|
63
|
+
prev_weight_tensor_idx = 1
|
64
|
+
subgraph = self.model.subgraphs[subgraph_idx]
|
65
|
+
weight_idx_in_op_inputs = list(subgraph.operators[fc1_op_idx].inputs).index(
|
66
|
+
prev_weight_tensor_idx
|
67
|
+
)
|
68
|
+
prev_num_tensors = len(subgraph.tensors)
|
69
|
+
prev_buffer_id = subgraph.tensors[prev_weight_tensor_idx].buffer
|
70
|
+
prev_num_buffers = len(self.model.buffers)
|
71
|
+
transformation_input = self._get_transformation_input(
|
72
|
+
subgraph_idx, prev_weight_tensor_idx, consumers=[fc1_op_idx]
|
73
|
+
)
|
74
|
+
transformation_info = duplicate_tensor.duplicate_tensor(
|
75
|
+
transformation_input
|
76
|
+
)
|
77
|
+
self.assertEqual(transformation_info.op_id, 0)
|
78
|
+
self.assertEqual(transformation_info.num_ops_added, 0)
|
79
|
+
# Check that a new tensor and buffer were added.
|
80
|
+
self.assertLen(subgraph.tensors, prev_num_tensors + 1)
|
81
|
+
self.assertLen(self.model.buffers, prev_num_buffers + 1)
|
82
|
+
# Check that the duplicated tensor is the last tensor in the subgraph.
|
83
|
+
weight_tensor_idx = transformation_info.output_tensor_id
|
84
|
+
self.assertEqual(weight_tensor_idx, len(subgraph.tensors) - 1)
|
85
|
+
# Compare tensors.
|
86
|
+
original_tensor = subgraph.tensors[prev_weight_tensor_idx]
|
87
|
+
original_tensor_name = tfl_flatbuffer_utils.get_tensor_name(original_tensor)
|
88
|
+
duplicated_tensor = subgraph.tensors[weight_tensor_idx]
|
89
|
+
self.assertEqual(
|
90
|
+
duplicated_tensor.name,
|
91
|
+
f'{original_tensor_name}_duplicated_{weight_tensor_idx}',
|
92
|
+
)
|
93
|
+
self.assertEqual(duplicated_tensor.type, original_tensor.type)
|
94
|
+
self.assertTrue(np.all(duplicated_tensor.shape == original_tensor.shape))
|
95
|
+
# Check that the new buffer is used by the duplicated tensor.
|
96
|
+
new_buffer_id = len(self.model.buffers) - 1
|
97
|
+
self.assertEqual(duplicated_tensor.buffer, new_buffer_id)
|
98
|
+
# Check that the new buffer has the same data as the original one.
|
99
|
+
self.assertTrue(
|
100
|
+
np.all(
|
101
|
+
np.frombuffer(
|
102
|
+
self.model.buffers[new_buffer_id].data,
|
103
|
+
dtype=np.float32,
|
104
|
+
)
|
105
|
+
== np.frombuffer(
|
106
|
+
self.model.buffers[prev_buffer_id].data,
|
107
|
+
dtype=np.float32,
|
108
|
+
)
|
109
|
+
)
|
110
|
+
)
|
111
|
+
# Check that first FC input tensor id was updated.
|
112
|
+
self.assertEqual(
|
113
|
+
subgraph.operators[fc1_op_idx].inputs[weight_idx_in_op_inputs],
|
114
|
+
weight_tensor_idx,
|
115
|
+
)
|
116
|
+
|
117
|
+
def test_duplicate_tensor_raises_error_when_tensor_is_not_constant(self):
|
118
|
+
# Duplicate the FC input tensor in the second subgraph.
|
119
|
+
subgraph_idx = 1
|
120
|
+
input_tensor_idx = 0
|
121
|
+
transformation_input = self._get_transformation_input(
|
122
|
+
subgraph_idx, input_tensor_idx, consumers=[0]
|
123
|
+
)
|
124
|
+
with self.assertRaisesRegex(
|
125
|
+
ValueError,
|
126
|
+
'Duplicate Tensor transformation supports only constant tensors.',
|
127
|
+
):
|
128
|
+
duplicate_tensor.duplicate_tensor(transformation_input)
|
129
|
+
|
130
|
+
if __name__ == '__main__':
|
131
|
+
googletest.main()
|
@@ -16,7 +16,7 @@
|
|
16
16
|
"""Utility functions for graph transformations."""
|
17
17
|
|
18
18
|
import dataclasses
|
19
|
-
from typing import Union
|
19
|
+
from typing import Optional, Union
|
20
20
|
|
21
21
|
import numpy as np
|
22
22
|
|
@@ -98,6 +98,7 @@ def add_new_constant_tensor(
|
|
98
98
|
tensor_type: schema_py_generated.TensorType,
|
99
99
|
subgraph: schema_py_generated.SubGraphT,
|
100
100
|
buffers: list[schema_py_generated.BufferT],
|
101
|
+
tensor_shape: Optional[list[int]] = None,
|
101
102
|
) -> int:
|
102
103
|
"""Add a new constant tensor to the model.
|
103
104
|
|
@@ -107,6 +108,8 @@ def add_new_constant_tensor(
|
|
107
108
|
tensor_type: The type of the new tensor.
|
108
109
|
subgraph: The subgraph where the new tensor is added.
|
109
110
|
buffers: The buffers of the model.
|
111
|
+
tensor_shape: The shape of the new tensor. If not provided, the shape of the
|
112
|
+
data will be used.
|
110
113
|
|
111
114
|
Returns:
|
112
115
|
The index of the new tensor in the subgraph.
|
@@ -114,7 +117,9 @@ def add_new_constant_tensor(
|
|
114
117
|
new_buffer_id = add_new_constant_buffer(data, buffers)
|
115
118
|
|
116
119
|
new_tensor = schema_py_generated.TensorT()
|
117
|
-
|
120
|
+
if tensor_shape is None:
|
121
|
+
tensor_shape = data.shape
|
122
|
+
new_tensor.shape = tensor_shape
|
118
123
|
new_tensor.buffer = new_buffer_id
|
119
124
|
new_tensor.type = tensor_type
|
120
125
|
new_tensor.name = tensor_name
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ai-edge-quantizer-nightly
|
3
|
-
Version: 0.1.0.
|
3
|
+
Version: 0.1.0.dev20250406
|
4
4
|
Summary: A quantizer for advanced developers to quantize converted AI Edge models.
|
5
5
|
Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
|
6
6
|
Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
|
@@ -12,17 +12,17 @@ ai_edge_quantizer/model_validator.py,sha256=fRNz0jO54cthPTibsCuViUXUuFRHl_fbvEiC
|
|
12
12
|
ai_edge_quantizer/model_validator_test.py,sha256=EeqOP_mrZsnZ3rug756s0ryDDqd2KgIDld5Lm_gDuWY,13020
|
13
13
|
ai_edge_quantizer/params_generator.py,sha256=46XDjnP4R3m4xsoXNp7brv0sNQPdQMg217_CbEl-Wgg,15780
|
14
14
|
ai_edge_quantizer/params_generator_test.py,sha256=9WTUl87XqbM4NruX5ypLuVRtuhcw-CmxndsMOUzZ92Q,43171
|
15
|
-
ai_edge_quantizer/qtyping.py,sha256=
|
15
|
+
ai_edge_quantizer/qtyping.py,sha256=FqelZu7j0fGBRSCv_VVsuf3VmbfVlYJGgsjvdMXGgaw,15284
|
16
16
|
ai_edge_quantizer/quantizer.py,sha256=g3DMqFMrMpt9jQttCE0WcdNbMtk0JZnmN5MmCHrNdyM,13202
|
17
17
|
ai_edge_quantizer/quantizer_test.py,sha256=K_HBA56JkFI3HL8VLWCqGEfC0ISh5ldMKoNyBdGRAJg,20368
|
18
18
|
ai_edge_quantizer/recipe.py,sha256=FR0uJceumZrnle2VRSOQZ1uXup4S1cTYKRH-N53mWRo,2919
|
19
19
|
ai_edge_quantizer/recipe_manager.py,sha256=qcGUD7e7BISKdsY9WH2rdaRR3acmzSA5qMezGNbzlpo,8931
|
20
20
|
ai_edge_quantizer/recipe_manager_test.py,sha256=LulVxsYp6TBGFI2PLCUCd4VsFq8ELpC7kMNkUjsLgbo,32230
|
21
21
|
ai_edge_quantizer/recipe_test.py,sha256=Fg_sfxovI2fRjk5qdu18ghOvXdUvhDR1TxbE0GHDczc,3381
|
22
|
-
ai_edge_quantizer/transformation_instruction_generator.py,sha256=
|
23
|
-
ai_edge_quantizer/transformation_instruction_generator_test.py,sha256
|
24
|
-
ai_edge_quantizer/transformation_performer.py,sha256=
|
25
|
-
ai_edge_quantizer/transformation_performer_test.py,sha256=
|
22
|
+
ai_edge_quantizer/transformation_instruction_generator.py,sha256=R7A90Qj6iQQROrznXmXLJd-5yXq0PRHbLOdNY51dEu4,27913
|
23
|
+
ai_edge_quantizer/transformation_instruction_generator_test.py,sha256=E0QSDCav6N6izlJ-a1ZJOsb2VEUxuxBmTbt0-EgDdxY,49890
|
24
|
+
ai_edge_quantizer/transformation_performer.py,sha256=PIrylVhuWZCpnXEl7qSw2BlxRrY7lqj6aQvagJVCVts,11989
|
25
|
+
ai_edge_quantizer/transformation_performer_test.py,sha256=n9xI6QMqvrj9KUul2LuObIsF7YdLSqgMg4X6d4BkFP8,15219
|
26
26
|
ai_edge_quantizer/algorithms/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
27
27
|
ai_edge_quantizer/algorithms/nonlinear_quantize/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
28
28
|
ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting.py,sha256=Bs9CK7wZAw6jNaZ8xEtbwO2vM34VYXNZSMVWvxJo9nw,9297
|
@@ -46,13 +46,15 @@ ai_edge_quantizer/transformations/dequant_insert.py,sha256=sL1LHFVzBDSd9jgrzlHz3
|
|
46
46
|
ai_edge_quantizer/transformations/dequant_insert_test.py,sha256=NJ18PnG71_AvUPz3Cr_TmG6URMeBfa7IiDDyddfTkKQ,10830
|
47
47
|
ai_edge_quantizer/transformations/duplicate_buffer.py,sha256=sEod0EtmcHX0VDqBCI4BYCX9CSRyDtx2vmjtOentFiY,1743
|
48
48
|
ai_edge_quantizer/transformations/duplicate_buffer_test.py,sha256=YYWl3Q5WF60s8T8pLzzA8TCSxz-i7dqc03dJt1LtMw4,3880
|
49
|
+
ai_edge_quantizer/transformations/duplicate_tensor.py,sha256=HF1uuKFm5kFF6X0XUpdYlLPoikSRd7pIPK1oxN7TuHY,2455
|
50
|
+
ai_edge_quantizer/transformations/duplicate_tensor_test.py,sha256=s-RqSxNBMfVJyCunXz2eb7-KA6UiBmbOmL7phLslENQ,5056
|
49
51
|
ai_edge_quantizer/transformations/emulated_subchannel.py,sha256=HVaRxoC8PCAvy3xeMv3OIymukUy_yW1zK0xN8Ann6I4,13602
|
50
52
|
ai_edge_quantizer/transformations/emulated_subchannel_test.py,sha256=gZP6u9NdPXl7s19qB_Un8evou9ZZV6I9Gy0E1rdobHM,7722
|
51
53
|
ai_edge_quantizer/transformations/quant_insert.py,sha256=jn6HsJaV-sqBiFPY-Aqbd64t8zgcYVkEkZI375x_FWY,3958
|
52
54
|
ai_edge_quantizer/transformations/quant_insert_test.py,sha256=X9ptPDvJCFkR5tejKnD1SlHFGPazQTW-wNNMV9MEAuw,10107
|
53
55
|
ai_edge_quantizer/transformations/quantize_tensor.py,sha256=y6As38mTzhva50YvNQ7p0SFpuWet3LPqFwE3qIO0gEQ,8231
|
54
56
|
ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=mHLO3_MRt36A8-ZN8ADn5tBBJlqjTWa7ZUN8Mmu5Rcw,9116
|
55
|
-
ai_edge_quantizer/transformations/transformation_utils.py,sha256=
|
57
|
+
ai_edge_quantizer/transformations/transformation_utils.py,sha256=5w0fG6TP362elTHs-JZokl24fuK4Gv6DGyIpybQYb3g,4885
|
56
58
|
ai_edge_quantizer/transformations/transformation_utils_test.py,sha256=xH64SF3UHDh84vYbt-WvmXNjM-Jg-mefES1ACO1tkqw,6269
|
57
59
|
ai_edge_quantizer/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
58
60
|
ai_edge_quantizer/utils/calibration_utils.py,sha256=1Fj9MIO6aLZIRgyd4axvZN4S_O64nB_-Miu1WP664js,2536
|
@@ -64,8 +66,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=x2xA2CFPpe_2trcV8v5xGaBE
|
|
64
66
|
ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=Op3JxtOqlrjzmYF18jnnstL1k9xiY9kKJ8S2vklKGkc,11327
|
65
67
|
ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
|
66
68
|
ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
|
67
|
-
ai_edge_quantizer_nightly-0.1.0.
|
68
|
-
ai_edge_quantizer_nightly-0.1.0.
|
69
|
-
ai_edge_quantizer_nightly-0.1.0.
|
70
|
-
ai_edge_quantizer_nightly-0.1.0.
|
71
|
-
ai_edge_quantizer_nightly-0.1.0.
|
69
|
+
ai_edge_quantizer_nightly-0.1.0.dev20250406.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
70
|
+
ai_edge_quantizer_nightly-0.1.0.dev20250406.dist-info/METADATA,sha256=W7h2q3SS2TX0imvGdEIJiCocHydrTH813QV1behoKQU,1527
|
71
|
+
ai_edge_quantizer_nightly-0.1.0.dev20250406.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
72
|
+
ai_edge_quantizer_nightly-0.1.0.dev20250406.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
|
73
|
+
ai_edge_quantizer_nightly-0.1.0.dev20250406.dist-info/RECORD,,
|
File without changes
|
File without changes
|