ai-edge-quantizer-nightly 0.3.0.dev20250805__py3-none-any.whl → 0.3.0.dev20250806__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_edge_quantizer/algorithm_manager.py +2 -0
- ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py +17 -0
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py +9 -2
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py +9 -1
- ai_edge_quantizer/default_policy.py +4 -2
- ai_edge_quantizer/qtyping.py +1 -0
- ai_edge_quantizer/utils/tfl_flatbuffer_utils.py +1 -0
- {ai_edge_quantizer_nightly-0.3.0.dev20250805.dist-info → ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info}/METADATA +1 -1
- {ai_edge_quantizer_nightly-0.3.0.dev20250805.dist-info → ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info}/RECORD +12 -12
- {ai_edge_quantizer_nightly-0.3.0.dev20250805.dist-info → ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info}/LICENSE +0 -0
- {ai_edge_quantizer_nightly-0.3.0.dev20250805.dist-info → ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info}/WHEEL +0 -0
- {ai_edge_quantizer_nightly-0.3.0.dev20250805.dist-info → ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info}/top_level.txt +0 -0
@@ -119,6 +119,7 @@ MIN_MAX_OP_NAME_MATERIALIZE_FUNC_DICT = {
|
|
119
119
|
_TFLOpName.DIV: common_quantize.materialize_div,
|
120
120
|
_TFLOpName.BROADCAST_TO: common_quantize.materialize_broadcast_to,
|
121
121
|
_TFLOpName.SQRT: common_quantize.materialize_sqrt,
|
122
|
+
_TFLOpName.GATHER: common_quantize.materialize_gather,
|
122
123
|
}
|
123
124
|
for op_name, materialize_func in MIN_MAX_OP_NAME_MATERIALIZE_FUNC_DICT.items():
|
124
125
|
register_quantized_op(
|
@@ -264,6 +265,7 @@ _OCTAV_OP_NAME_MATERIALIZE_FUNC_DICT = immutabledict({
|
|
264
265
|
_TFLOpName.DIV: common_quantize.materialize_div,
|
265
266
|
_TFLOpName.BROADCAST_TO: common_quantize.materialize_broadcast_to,
|
266
267
|
_TFLOpName.SQRT: common_quantize.materialize_sqrt,
|
268
|
+
_TFLOpName.GATHER: common_quantize.materialize_gather,
|
267
269
|
})
|
268
270
|
|
269
271
|
for op_name, materialize_func in _OCTAV_OP_NAME_MATERIALIZE_FUNC_DICT.items():
|
@@ -841,6 +841,23 @@ def materialize_sqrt(
|
|
841
841
|
)
|
842
842
|
|
843
843
|
|
844
|
+
def materialize_gather(
|
845
|
+
get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
|
846
|
+
op_info: qtyping.OpInfo,
|
847
|
+
graph_info: qtyping.GraphInfo,
|
848
|
+
tensor_name_to_qsv: dict[str, Any],
|
849
|
+
) -> list[qtyping.TensorTransformationParams]:
|
850
|
+
"""Materialize tensors in tfl.gather."""
|
851
|
+
return common_utils.materialize_standard_op(
|
852
|
+
op_info,
|
853
|
+
graph_info,
|
854
|
+
tensor_name_to_qsv,
|
855
|
+
get_tensor_quant_params_fn,
|
856
|
+
constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
|
857
|
+
inputs_to_ignore=[1], # Indices do not need to be quantized.
|
858
|
+
)
|
859
|
+
|
860
|
+
|
844
861
|
def _get_tensor_shape_for_blockwise(
|
845
862
|
tensor_shape: Sequence[int], quantized_dim: int, block_size: int
|
846
863
|
) -> list[int]:
|
@@ -256,8 +256,15 @@ def uniform_quantize(
|
|
256
256
|
inverse_scales = 1.0 / scales
|
257
257
|
# TODO: b/332574603 - support unsigned data type.
|
258
258
|
qtype = IntType(quantization_params.num_bits, signed=True)
|
259
|
-
#
|
260
|
-
|
259
|
+
# For quantization with more than 8 bits, symmetric narrow-range quantization
|
260
|
+
# is required due to assumptions made by legacy TFLite kernels. However, this
|
261
|
+
# method is not ideal for low-bit quantization (e.g., 2-bit quantization,
|
262
|
+
# which only has 4 bins), as it wastes a bin and there are no kernel
|
263
|
+
# requirements for a narrow range when < 8 bits because the data is unpacked
|
264
|
+
# to int8 before being used in the kernel.
|
265
|
+
narrow_range = (
|
266
|
+
quantization_params.symmetric and quantization_params.num_bits >= 8
|
267
|
+
)
|
261
268
|
required_dtype = np.signedinteger if qtype.signed else np.unsignedinteger
|
262
269
|
if not np.issubdtype(zero_points.dtype, required_dtype):
|
263
270
|
raise ValueError(
|
@@ -123,6 +123,14 @@ class TensorUtilsTest(parameterized.TestCase):
|
|
123
123
|
False,
|
124
124
|
[-24, 10, 19, 127],
|
125
125
|
),
|
126
|
+
(
|
127
|
+
[-16.0, 1.3, 2.4, 16.0],
|
128
|
+
[0.12598425],
|
129
|
+
[0],
|
130
|
+
8,
|
131
|
+
True,
|
132
|
+
[-127, 10, 19, 127], # int8 symmetric is narrow range, -127 to 127
|
133
|
+
),
|
126
134
|
(
|
127
135
|
[-3.0, 1.3, 2.4, 16.0],
|
128
136
|
[1.2666667],
|
@@ -137,7 +145,7 @@ class TensorUtilsTest(parameterized.TestCase):
|
|
137
145
|
[-6],
|
138
146
|
4,
|
139
147
|
True,
|
140
|
-
[-
|
148
|
+
[-8, -5, -4, 7], # int4 symmetric is not narrow range, -8 to 7
|
141
149
|
),
|
142
150
|
)
|
143
151
|
def test_uniform_quantize(
|
@@ -192,7 +192,8 @@ DEFAULT_JSON_POLICY = """
|
|
192
192
|
"UNPACK",
|
193
193
|
"DIV",
|
194
194
|
"BROADCAST_TO",
|
195
|
-
"SQRT"
|
195
|
+
"SQRT",
|
196
|
+
"GATHER"
|
196
197
|
],
|
197
198
|
"static_wi8_ai8": [
|
198
199
|
"ADD",
|
@@ -233,7 +234,8 @@ DEFAULT_JSON_POLICY = """
|
|
233
234
|
"UNPACK",
|
234
235
|
"DIV",
|
235
236
|
"BROADCAST_TO",
|
236
|
-
"SQRT"
|
237
|
+
"SQRT",
|
238
|
+
"GATHER"
|
237
239
|
],
|
238
240
|
"static_wi4_ai8": ["FULLY_CONNECTED", "CONV_2D", "INPUT", "OUTPUT", "EMBEDDING_LOOKUP"],
|
239
241
|
"static_wi4_ai16": ["FULLY_CONNECTED", "CONV_2D", "INPUT", "OUTPUT", "EMBEDDING_LOOKUP"],
|
ai_edge_quantizer/qtyping.py
CHANGED
@@ -66,6 +66,7 @@ TFL_OP_NAME_TO_CODE = immutabledict.immutabledict({
|
|
66
66
|
_TFLOpName.DIV: schema.BuiltinOperator.DIV,
|
67
67
|
_TFLOpName.BROADCAST_TO: schema.BuiltinOperator.BROADCAST_TO,
|
68
68
|
_TFLOpName.SQRT: schema.BuiltinOperator.SQRT,
|
69
|
+
_TFLOpName.GATHER: schema.BuiltinOperator.GATHER,
|
69
70
|
})
|
70
71
|
|
71
72
|
TFL_OP_CODE_TO_NAME = immutabledict.immutabledict(
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ai-edge-quantizer-nightly
|
3
|
-
Version: 0.3.0.
|
3
|
+
Version: 0.3.0.dev20250806
|
4
4
|
Summary: A quantizer for advanced developers to quantize converted AI Edge models.
|
5
5
|
Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
|
6
6
|
Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
|
@@ -1,18 +1,18 @@
|
|
1
1
|
ai_edge_quantizer/__init__.py,sha256=4pFSkukSwahYyzwqia0yPRyz8TnFQfGRthVJhYpMWas,793
|
2
|
-
ai_edge_quantizer/algorithm_manager.py,sha256=
|
2
|
+
ai_edge_quantizer/algorithm_manager.py,sha256=40dFnpjsFKeEYL6eJ_oPwciU-rZ6hPw-esrc3hifvx8,13050
|
3
3
|
ai_edge_quantizer/algorithm_manager_api.py,sha256=u903TG0s1uIDhJqfeJne3CFl8A93phZrwgV2-hwdcXU,9247
|
4
4
|
ai_edge_quantizer/algorithm_manager_api_test.py,sha256=w6bSONvXkX6bzXAGc0-7b6gNDt9oz9ieq97KP8Sg_JU,7666
|
5
5
|
ai_edge_quantizer/calibrator.py,sha256=Sms7_AIHPH9G5xFaz5Ef3a5gPhxuIWQI8d2LUM8C96I,12071
|
6
6
|
ai_edge_quantizer/calibrator_test.py,sha256=ejKc5YC7id8J1Ll9HAYCzMnKzxd0FUENSD06zkSSV0c,11900
|
7
7
|
ai_edge_quantizer/conftest.py,sha256=SxCz-5LlRD_lQm4hQc4c6IGG7DS8d7IyEWY9gnscPN0,794
|
8
|
-
ai_edge_quantizer/default_policy.py,sha256=
|
8
|
+
ai_edge_quantizer/default_policy.py,sha256=qrEkZpe1OfzR2VvsQvzdsBzga1R5k6LyktarY9ETp7U,11470
|
9
9
|
ai_edge_quantizer/model_modifier.py,sha256=teGa8I6kGvn6TQY6Xv53YFIc_pQEhNvM9Zb4bvhezyw,7110
|
10
10
|
ai_edge_quantizer/model_modifier_test.py,sha256=cJd04SLOG-fQZZNZPcisoBLx3cLtWEwGqUBbLb-pif4,4751
|
11
11
|
ai_edge_quantizer/model_validator.py,sha256=Hj0_5o-Oa3dSlJ3ryVjRhvsyelHNyek1GrtG9buMczg,13153
|
12
12
|
ai_edge_quantizer/model_validator_test.py,sha256=EeqOP_mrZsnZ3rug756s0ryDDqd2KgIDld5Lm_gDuWY,13020
|
13
13
|
ai_edge_quantizer/params_generator.py,sha256=hcgMHJlERZERUyIAEi6AHJcLJ8gsKIBAEojzFFz-tqk,20098
|
14
14
|
ai_edge_quantizer/params_generator_test.py,sha256=RDYoRZDJfEZRtjlTAU2kZ_4t3JHOqEHxfJX9V4ETAhg,40597
|
15
|
-
ai_edge_quantizer/qtyping.py,sha256=
|
15
|
+
ai_edge_quantizer/qtyping.py,sha256=t7S5wTjg6VFmKYzeM_qymu36tT18rnkmY-4YOXMQxzc,16722
|
16
16
|
ai_edge_quantizer/quantizer.py,sha256=g3DMqFMrMpt9jQttCE0WcdNbMtk0JZnmN5MmCHrNdyM,13202
|
17
17
|
ai_edge_quantizer/quantizer_test.py,sha256=K_HBA56JkFI3HL8VLWCqGEfC0ISh5ldMKoNyBdGRAJg,20368
|
18
18
|
ai_edge_quantizer/recipe.py,sha256=FR0uJceumZrnle2VRSOQZ1uXup4S1cTYKRH-N53mWRo,2919
|
@@ -28,7 +28,7 @@ ai_edge_quantizer/algorithms/nonlinear_quantize/__init__.py,sha256=lpq1g2ayg3lCP
|
|
28
28
|
ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting.py,sha256=Bs9CK7wZAw6jNaZ8xEtbwO2vM34VYXNZSMVWvxJo9nw,9297
|
29
29
|
ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting_test.py,sha256=EqIHGEZ1LgUrTN7zf880RuAzEv3Qy7kgh5ivObJGHSo,22646
|
30
30
|
ai_edge_quantizer/algorithms/uniform_quantize/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
31
|
-
ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=
|
31
|
+
ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=Ip9qF_P1LsTuTo0P6XnmWOqnXwxo6IKkzbRuveDKL8Q,33803
|
32
32
|
ai_edge_quantizer/algorithms/uniform_quantize/common_quantize_test.py,sha256=GGf_n3wIeg3GB_eGsmyNJ0fTcxgpeMMbugTMRONK6TQ,3553
|
33
33
|
ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha256=BDdn_uBZakfHyzdMJPKadsOqxqyC-s6W2ZzFH99L4fE,8652
|
34
34
|
ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py,sha256=sT5eX5TLZEHTtPfnSkCPDlS0sQxlTFWbCsbvOuj--yY,8889
|
@@ -38,8 +38,8 @@ ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=8
|
|
38
38
|
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=zoF_EHjYqsKkuev8wfuutIITEmp_maa70IpJI_Df3ck,7431
|
39
39
|
ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=Umxh4kJyeHddZf-Wd4aXE5MTI1XWFa5KRuM17uYU714,6922
|
40
40
|
ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=sha1d99Xk87bI87tgz0g5LeDC-EeE4WMfM5rRC98-m4,9140
|
41
|
-
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=
|
42
|
-
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=
|
41
|
+
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=0d5XdNbjR2pVsAc-gWX3ik_pAIL-bZ-zemEz_jS2d0c,16531
|
42
|
+
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=7kHluzpteMv36hFD6LD_qnwwMoE1GKUP4bGmGMFbOdA,12755
|
43
43
|
ai_edge_quantizer/algorithms/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
44
44
|
ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=QrEeCuvA7gY_vK1nbKtqassNDClyAjN1ClZIiw63k5U,35895
|
45
45
|
ai_edge_quantizer/algorithms/utils/common_utils_test.py,sha256=zqapGEfYhjQWe9cNGPLmdbwtEUUYQRhlO_kNe0cXX6E,18104
|
@@ -62,14 +62,14 @@ ai_edge_quantizer/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V
|
|
62
62
|
ai_edge_quantizer/utils/calibration_utils.py,sha256=e3dG7Nm94Ix0hkTWTWPUhEG6a8QR_cAM3PSwblfJV5g,15106
|
63
63
|
ai_edge_quantizer/utils/calibration_utils_test.py,sha256=4BlksXl7b4yptL8xPR67hmJCnjhN9V10a2PunzfHrUE,9372
|
64
64
|
ai_edge_quantizer/utils/test_utils.py,sha256=a4Nk-wbeB09dFjTDZiA0K67d26j5DD0UDH_GIVmVG_4,8685
|
65
|
-
ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=
|
65
|
+
ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=TdbqU_BuXh6jLDpxQ9u9SJ70SsTl6l-Fzh_JoU8ru7o,11212
|
66
66
|
ai_edge_quantizer/utils/tfl_flatbuffer_utils_test.py,sha256=K1SbK8q92qYVtiVj0I0GtugsPTkpIpEKv9zakvFV_Sc,8555
|
67
67
|
ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOihexmizeJqt4SQcET9aA,14925
|
68
68
|
ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
|
69
69
|
ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
|
70
70
|
ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
|
71
|
-
ai_edge_quantizer_nightly-0.3.0.
|
72
|
-
ai_edge_quantizer_nightly-0.3.0.
|
73
|
-
ai_edge_quantizer_nightly-0.3.0.
|
74
|
-
ai_edge_quantizer_nightly-0.3.0.
|
75
|
-
ai_edge_quantizer_nightly-0.3.0.
|
71
|
+
ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
72
|
+
ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info/METADATA,sha256=xTO8cST-KD3qLcf9Ts-E51tmKAOQJscoUJ983f-owr0,1528
|
73
|
+
ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
74
|
+
ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
|
75
|
+
ai_edge_quantizer_nightly-0.3.0.dev20250806.dist-info/RECORD,,
|
File without changes
|
File without changes
|