ai-edge-quantizer-nightly 0.1.0.dev20250327__py3-none-any.whl → 0.1.0.dev20250328__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_edge_quantizer/params_generator.py +1 -8
- ai_edge_quantizer/params_generator_test.py +52 -0
- ai_edge_quantizer/transformations/quantize_tensor.py +25 -1
- {ai_edge_quantizer_nightly-0.1.0.dev20250327.dist-info → ai_edge_quantizer_nightly-0.1.0.dev20250328.dist-info}/METADATA +1 -1
- {ai_edge_quantizer_nightly-0.1.0.dev20250327.dist-info → ai_edge_quantizer_nightly-0.1.0.dev20250328.dist-info}/RECORD +8 -8
- {ai_edge_quantizer_nightly-0.1.0.dev20250327.dist-info → ai_edge_quantizer_nightly-0.1.0.dev20250328.dist-info}/LICENSE +0 -0
- {ai_edge_quantizer_nightly-0.1.0.dev20250327.dist-info → ai_edge_quantizer_nightly-0.1.0.dev20250328.dist-info}/WHEEL +0 -0
- {ai_edge_quantizer_nightly-0.1.0.dev20250327.dist-info → ai_edge_quantizer_nightly-0.1.0.dev20250328.dist-info}/top_level.txt +0 -0
@@ -394,14 +394,6 @@ def _compatible_tensor_params(
|
|
394
394
|
]
|
395
395
|
if _same_tensor_params_except_id(params1, params2):
|
396
396
|
return True
|
397
|
-
if (
|
398
|
-
params1.transformations[0] != _QuantTrans.NO_QUANTIZE
|
399
|
-
and params2.transformations[0] != _QuantTrans.NO_QUANTIZE
|
400
|
-
):
|
401
|
-
# NO_QUANTIZE has no parameters. So only if both params aren't NO_QUANTIZE
|
402
|
-
# do we expect the parameters to be the same.
|
403
|
-
if params1.parameters != params2.parameters:
|
404
|
-
return False
|
405
397
|
# We only need to check the first transformation because transformations are
|
406
398
|
# applied in order, and as long as the one that's immediately after the tensor
|
407
399
|
# is the same, it's compatible.
|
@@ -413,6 +405,7 @@ def _compatible_tensor_params(
|
|
413
405
|
if (
|
414
406
|
params1.transformations[0] in quantized_source_transformations
|
415
407
|
and params2.transformations[0] in quantized_source_transformations
|
408
|
+
and params1.parameters == params2.parameters
|
416
409
|
):
|
417
410
|
return True
|
418
411
|
return False
|
@@ -914,6 +914,58 @@ class ParamsGeneratorTest(parameterized.TestCase):
|
|
914
914
|
),
|
915
915
|
expected=True,
|
916
916
|
),
|
917
|
+
dict(
|
918
|
+
testcase_name='compatible_no_numeric_check',
|
919
|
+
param1=qtyping.TensorTransformationParams(
|
920
|
+
tensor_name='tfl.quantize',
|
921
|
+
producer=None,
|
922
|
+
consumers=[
|
923
|
+
qtyping.OpToTensorParams(
|
924
|
+
subgraph_op_id=4,
|
925
|
+
transformations=[
|
926
|
+
qtyping.QuantTransformation.ADD_QUANTIZE,
|
927
|
+
],
|
928
|
+
parameters=qtyping.UniformQuantParams(
|
929
|
+
8, None, np.array([0.00028806]), np.array([0])
|
930
|
+
),
|
931
|
+
),
|
932
|
+
qtyping.OpToTensorParams(
|
933
|
+
subgraph_op_id=5,
|
934
|
+
transformations=[
|
935
|
+
qtyping.QuantTransformation.ADD_QUANTIZE,
|
936
|
+
],
|
937
|
+
parameters=qtyping.UniformQuantParams(
|
938
|
+
8, None, np.array([0.00027501]), np.array([0])
|
939
|
+
),
|
940
|
+
),
|
941
|
+
],
|
942
|
+
),
|
943
|
+
param2=qtyping.TensorTransformationParams(
|
944
|
+
tensor_name='tfl.quantize',
|
945
|
+
producer=None,
|
946
|
+
consumers=[
|
947
|
+
qtyping.OpToTensorParams(
|
948
|
+
subgraph_op_id=4,
|
949
|
+
transformations=[
|
950
|
+
qtyping.QuantTransformation.ADD_QUANTIZE,
|
951
|
+
],
|
952
|
+
parameters=qtyping.UniformQuantParams(
|
953
|
+
8, None, np.array([0.00028806]), np.array([0])
|
954
|
+
),
|
955
|
+
),
|
956
|
+
qtyping.OpToTensorParams(
|
957
|
+
subgraph_op_id=5,
|
958
|
+
transformations=[
|
959
|
+
qtyping.QuantTransformation.ADD_QUANTIZE,
|
960
|
+
],
|
961
|
+
parameters=qtyping.UniformQuantParams(
|
962
|
+
8, None, np.array([0.00027501]), np.array([0])
|
963
|
+
),
|
964
|
+
),
|
965
|
+
],
|
966
|
+
),
|
967
|
+
expected=True,
|
968
|
+
),
|
917
969
|
)
|
918
970
|
def test_params_compatible(self, param1, param2, expected):
|
919
971
|
# adding a test to make production coverage happy.
|
@@ -121,6 +121,26 @@ def _perform_channelwise_quantization(
|
|
121
121
|
return flatbuffer_quantization
|
122
122
|
|
123
123
|
|
124
|
+
def _downcast_and_truncate_scale(input_scale: np.ndarray) -> np.ndarray:
|
125
|
+
"""Given a fp32 scale, downcast it to fp16 and truncate mantissa to 7 bits.
|
126
|
+
|
127
|
+
CPU kernel can only utilize 7 bits of mantissa for fp16, so we want to produce
|
128
|
+
scale this way to unify behaviours across different platforms.
|
129
|
+
|
130
|
+
Args:
|
131
|
+
input_scale: The input scale in fp32.
|
132
|
+
|
133
|
+
Returns:
|
134
|
+
The downcasted & truncated scale in fp16.
|
135
|
+
"""
|
136
|
+
|
137
|
+
# A regular fp16 has 10 bits of mantissa, so we need to zero out the 3 least
|
138
|
+
# significant bits.
|
139
|
+
return (
|
140
|
+
input_scale.astype(np.float16).view(dtype=np.uint16) & np.uint16(0xFFF8)
|
141
|
+
).view(dtype=np.float16)
|
142
|
+
|
143
|
+
|
124
144
|
def _perform_blockwise_quantization(
|
125
145
|
transformation_input: transformation_utils.TransformationInput,
|
126
146
|
) -> schema_py_generated.QuantizationParametersT():
|
@@ -142,9 +162,13 @@ def _perform_blockwise_quantization(
|
|
142
162
|
)
|
143
163
|
tensor = transformation_input.subgraph.tensors[transformation_input.tensor_id]
|
144
164
|
blockwise_details = schema_py_generated.BlockwiseQuantizationT()
|
165
|
+
# Downcast and truncate the scale to fp16.
|
166
|
+
downcasted_scale = _downcast_and_truncate_scale(
|
167
|
+
transformation_input.quant_params.scale
|
168
|
+
)
|
145
169
|
scale_tensor_id = transformation_utils.add_new_constant_tensor(
|
146
170
|
tensor.name + b"_scales",
|
147
|
-
|
171
|
+
downcasted_scale,
|
148
172
|
schema_py_generated.TensorType.FLOAT16,
|
149
173
|
transformation_input.subgraph,
|
150
174
|
transformation_input.buffers,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ai-edge-quantizer-nightly
|
3
|
-
Version: 0.1.0.
|
3
|
+
Version: 0.1.0.dev20250328
|
4
4
|
Summary: A quantizer for advanced developers to quantize converted AI Edge models.
|
5
5
|
Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
|
6
6
|
Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
|
@@ -10,8 +10,8 @@ ai_edge_quantizer/model_modifier.py,sha256=SPt9X-xBzRvcd4xIS24zLHt3aUS2QwsNDqweF
|
|
10
10
|
ai_edge_quantizer/model_modifier_test.py,sha256=cJd04SLOG-fQZZNZPcisoBLx3cLtWEwGqUBbLb-pif4,4751
|
11
11
|
ai_edge_quantizer/model_validator.py,sha256=fRNz0jO54cthPTibsCuViUXUuFRHl_fbvEiCukIVy20,13030
|
12
12
|
ai_edge_quantizer/model_validator_test.py,sha256=EeqOP_mrZsnZ3rug756s0ryDDqd2KgIDld5Lm_gDuWY,13020
|
13
|
-
ai_edge_quantizer/params_generator.py,sha256=
|
14
|
-
ai_edge_quantizer/params_generator_test.py,sha256=
|
13
|
+
ai_edge_quantizer/params_generator.py,sha256=46XDjnP4R3m4xsoXNp7brv0sNQPdQMg217_CbEl-Wgg,15780
|
14
|
+
ai_edge_quantizer/params_generator_test.py,sha256=9WTUl87XqbM4NruX5ypLuVRtuhcw-CmxndsMOUzZ92Q,43171
|
15
15
|
ai_edge_quantizer/qtyping.py,sha256=UBZ3HgO8IDLY6VJmO05rGtFv_idMD3Os3WWsnriA0NA,15235
|
16
16
|
ai_edge_quantizer/quantizer.py,sha256=g3DMqFMrMpt9jQttCE0WcdNbMtk0JZnmN5MmCHrNdyM,13202
|
17
17
|
ai_edge_quantizer/quantizer_test.py,sha256=K_HBA56JkFI3HL8VLWCqGEfC0ISh5ldMKoNyBdGRAJg,20368
|
@@ -50,7 +50,7 @@ ai_edge_quantizer/transformations/emulated_subchannel.py,sha256=HVaRxoC8PCAvy3xe
|
|
50
50
|
ai_edge_quantizer/transformations/emulated_subchannel_test.py,sha256=gZP6u9NdPXl7s19qB_Un8evou9ZZV6I9Gy0E1rdobHM,7722
|
51
51
|
ai_edge_quantizer/transformations/quant_insert.py,sha256=jn6HsJaV-sqBiFPY-Aqbd64t8zgcYVkEkZI375x_FWY,3958
|
52
52
|
ai_edge_quantizer/transformations/quant_insert_test.py,sha256=X9ptPDvJCFkR5tejKnD1SlHFGPazQTW-wNNMV9MEAuw,10107
|
53
|
-
ai_edge_quantizer/transformations/quantize_tensor.py,sha256=
|
53
|
+
ai_edge_quantizer/transformations/quantize_tensor.py,sha256=y6As38mTzhva50YvNQ7p0SFpuWet3LPqFwE3qIO0gEQ,8231
|
54
54
|
ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=mHLO3_MRt36A8-ZN8ADn5tBBJlqjTWa7ZUN8Mmu5Rcw,9116
|
55
55
|
ai_edge_quantizer/transformations/transformation_utils.py,sha256=R42OIbzwQ7JYJ-Qt46jsqwb6u4MfDGiIPCRZCUGLVCw,4664
|
56
56
|
ai_edge_quantizer/transformations/transformation_utils_test.py,sha256=xH64SF3UHDh84vYbt-WvmXNjM-Jg-mefES1ACO1tkqw,6269
|
@@ -64,8 +64,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=x2xA2CFPpe_2trcV8v5xGaBE
|
|
64
64
|
ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=Op3JxtOqlrjzmYF18jnnstL1k9xiY9kKJ8S2vklKGkc,11327
|
65
65
|
ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
|
66
66
|
ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
|
67
|
-
ai_edge_quantizer_nightly-0.1.0.
|
68
|
-
ai_edge_quantizer_nightly-0.1.0.
|
69
|
-
ai_edge_quantizer_nightly-0.1.0.
|
70
|
-
ai_edge_quantizer_nightly-0.1.0.
|
71
|
-
ai_edge_quantizer_nightly-0.1.0.
|
67
|
+
ai_edge_quantizer_nightly-0.1.0.dev20250328.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
68
|
+
ai_edge_quantizer_nightly-0.1.0.dev20250328.dist-info/METADATA,sha256=ajE8NjYUjSmr3VZQWBzARMc9MLqLkeHooAmK-fCg7Ms,1527
|
69
|
+
ai_edge_quantizer_nightly-0.1.0.dev20250328.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
70
|
+
ai_edge_quantizer_nightly-0.1.0.dev20250328.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
|
71
|
+
ai_edge_quantizer_nightly-0.1.0.dev20250328.dist-info/RECORD,,
|
File without changes
|
File without changes
|