ai-edge-quantizer-nightly 0.0.1.dev20241121__py3-none-any.whl → 0.0.1.dev20241123__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_edge_quantizer/algorithm_manager.py +2 -0
- ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py +17 -0
- ai_edge_quantizer/default_policy.py +4 -2
- ai_edge_quantizer/qtyping.py +1 -0
- ai_edge_quantizer/transformations/quantize_tensor.py +10 -10
- ai_edge_quantizer/transformations/quantize_tensor_test.py +23 -18
- ai_edge_quantizer/utils/tfl_flatbuffer_utils.py +1 -0
- {ai_edge_quantizer_nightly-0.0.1.dev20241121.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20241123.dist-info}/METADATA +1 -1
- {ai_edge_quantizer_nightly-0.0.1.dev20241121.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20241123.dist-info}/RECORD +12 -12
- {ai_edge_quantizer_nightly-0.0.1.dev20241121.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20241123.dist-info}/LICENSE +0 -0
- {ai_edge_quantizer_nightly-0.0.1.dev20241121.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20241123.dist-info}/WHEEL +0 -0
- {ai_edge_quantizer_nightly-0.0.1.dev20241121.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20241123.dist-info}/top_level.txt +0 -0
@@ -91,6 +91,7 @@ for op_name, materialize_func in zip(
|
|
91
91
|
_TFLOpName.LOGISTIC, # Sigmoid
|
92
92
|
_TFLOpName.SLICE,
|
93
93
|
_TFLOpName.SUM,
|
94
|
+
_TFLOpName.SELECT_V2,
|
94
95
|
),
|
95
96
|
(
|
96
97
|
naive_min_max_quantize.materialize_input,
|
@@ -118,6 +119,7 @@ for op_name, materialize_func in zip(
|
|
118
119
|
naive_min_max_quantize.materialize_softmax_and_logistic,
|
119
120
|
naive_min_max_quantize.materialize_slice,
|
120
121
|
naive_min_max_quantize.materialize_sum,
|
122
|
+
naive_min_max_quantize.materialize_select_v2,
|
121
123
|
),
|
122
124
|
):
|
123
125
|
register_quantized_op(
|
@@ -325,6 +325,23 @@ def materialize_slice(
|
|
325
325
|
)
|
326
326
|
|
327
327
|
|
328
|
+
def materialize_select_v2(
|
329
|
+
op_info: qtyping.OpInfo,
|
330
|
+
graph_info: qtyping.GraphInfo,
|
331
|
+
tensor_name_to_qsv: dict[str, Any],
|
332
|
+
) -> list[qtyping.TensorTransformationParams]:
|
333
|
+
"""Materialize tensors in tfl.select_v2."""
|
334
|
+
return utils.materialize_standard_op(
|
335
|
+
op_info,
|
336
|
+
graph_info,
|
337
|
+
tensor_name_to_qsv,
|
338
|
+
constraint=_OpQuantConstraint.SAME_AS_OUTPUT_SCALE,
|
339
|
+
inputs_to_ignore=[
|
340
|
+
0,
|
341
|
+
], # Condition tensor does not need to be quantized.
|
342
|
+
)
|
343
|
+
|
344
|
+
|
328
345
|
def materialize_sum(
|
329
346
|
op_info: qtyping.OpInfo,
|
330
347
|
graph_info: qtyping.GraphInfo,
|
@@ -166,7 +166,8 @@ DEFAULT_JSON_POLICY = """
|
|
166
166
|
"OUTPUT",
|
167
167
|
"SLICE",
|
168
168
|
"EMBEDDING_LOOKUP",
|
169
|
-
"SUM"
|
169
|
+
"SUM",
|
170
|
+
"SELECT_V2"
|
170
171
|
],
|
171
172
|
"static_wi8_ai8": [
|
172
173
|
"ADD",
|
@@ -193,7 +194,8 @@ DEFAULT_JSON_POLICY = """
|
|
193
194
|
"OUTPUT",
|
194
195
|
"SLICE",
|
195
196
|
"EMBEDDING_LOOKUP",
|
196
|
-
"SUM"
|
197
|
+
"SUM",
|
198
|
+
"SELECT_V2"
|
197
199
|
],
|
198
200
|
"static_wi4_ai8": ["FULLY_CONNECTED", "CONV_2D", "INPUT", "OUTPUT"],
|
199
201
|
"static_wi4_ai16": ["FULLY_CONNECTED", "CONV_2D", "INPUT", "OUTPUT"],
|
ai_edge_quantizer/qtyping.py
CHANGED
@@ -22,19 +22,19 @@ from ai_edge_quantizer.transformations import transformation_utils
|
|
22
22
|
from ai_edge_litert import schema_py_generated # pylint: disable=g-direct-tensorflow-import
|
23
23
|
|
24
24
|
|
25
|
-
# TODO: b/335014051 -
|
25
|
+
# TODO: b/335014051 - Support distinguishing INT, FLOAT & UINT, BFLOAT.
|
26
26
|
def quant_params_to_tflite_type(
|
27
27
|
bitwidth: int,
|
28
28
|
) -> Optional[schema_py_generated.TensorType]:
|
29
|
-
"""Given specifications from quant param return the corresponding
|
29
|
+
"""Given specifications from quant param return the corresponding TFLite dtype.
|
30
30
|
|
31
31
|
Args:
|
32
|
-
bitwidth:
|
32
|
+
bitwidth: Bit width from UniformQuantParams.
|
33
33
|
|
34
34
|
Returns:
|
35
|
-
|
35
|
+
The corresponding TFLite tensor type.
|
36
36
|
"""
|
37
|
-
if bitwidth
|
37
|
+
if bitwidth == 4:
|
38
38
|
return schema_py_generated.TensorType.INT4
|
39
39
|
elif bitwidth <= 8:
|
40
40
|
return schema_py_generated.TensorType.INT8
|
@@ -68,19 +68,19 @@ def nonlinear_quant_params_to_tflite_type(
|
|
68
68
|
|
69
69
|
|
70
70
|
def _pack_data(bitwidth: int, flattened_data: np.ndarray) -> np.ndarray:
|
71
|
-
"""Pack the data to the corresponding
|
71
|
+
"""Pack the data to the corresponding bit width.
|
72
72
|
|
73
|
-
If no packing is needed, the original data is
|
74
|
-
|
73
|
+
Currently only support 4 bits. If no packing is needed, the original data is
|
74
|
+
returned.
|
75
75
|
|
76
76
|
Args:
|
77
|
-
bitwidth:
|
77
|
+
bitwidth: Bit width from NonLinearQuantParams.
|
78
78
|
flattened_data: The data to be packed.
|
79
79
|
|
80
80
|
Returns:
|
81
81
|
Packed data.
|
82
82
|
"""
|
83
|
-
if bitwidth
|
83
|
+
if bitwidth == 4:
|
84
84
|
even_data = flattened_data[::2] & 0x0F
|
85
85
|
odd_data = np.left_shift(flattened_data[1::2], 4).astype(np.uint8)
|
86
86
|
if odd_data.shape[0] == even_data.shape[0] - 1:
|
@@ -18,6 +18,7 @@
|
|
18
18
|
import os
|
19
19
|
import numpy as np
|
20
20
|
from tensorflow.python.platform import googletest
|
21
|
+
from absl.testing import parameterized
|
21
22
|
from ai_edge_quantizer import qtyping
|
22
23
|
from ai_edge_quantizer.transformations import quantize_tensor
|
23
24
|
from ai_edge_quantizer.transformations import transformation_utils
|
@@ -28,7 +29,7 @@ from ai_edge_litert import schema_py_generated # pylint: disable=g-direct-tenso
|
|
28
29
|
TEST_DATA_PREFIX_PATH = test_utils.get_path_to_datafile("..")
|
29
30
|
|
30
31
|
|
31
|
-
class QuantizeTensorTest(
|
32
|
+
class QuantizeTensorTest(parameterized.TestCase):
|
32
33
|
|
33
34
|
def setUp(self):
|
34
35
|
super().setUp()
|
@@ -179,40 +180,44 @@ class QuantizeTensorTest(googletest.TestCase):
|
|
179
180
|
np.testing.assert_array_equal(quant_param.zeroPoint, [1])
|
180
181
|
self.assertEqual(quant_param.quantizedDimension, 0)
|
181
182
|
|
182
|
-
|
183
|
+
@parameterized.named_parameters(
|
184
|
+
dict(
|
185
|
+
testcase_name="int5",
|
186
|
+
num_bits=5,
|
187
|
+
),
|
188
|
+
dict(
|
189
|
+
testcase_name="int2",
|
190
|
+
num_bits=2,
|
191
|
+
),
|
192
|
+
)
|
193
|
+
def test_int_constant_not_packed(self, num_bits):
|
183
194
|
subgraph = self._model.subgraphs[0]
|
184
195
|
model = self._model
|
185
|
-
|
186
|
-
|
187
|
-
0x0,
|
188
|
-
0x1,
|
189
|
-
0x2,
|
190
|
-
0x3,
|
191
|
-
0x4,
|
192
|
-
0x5,
|
193
|
-
0x6,
|
194
|
-
0x7,
|
195
|
-
],
|
196
|
-
dtype=np.int8,
|
197
|
-
)
|
196
|
+
tensor_id = 7
|
197
|
+
data = np.array([0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7], dtype=np.int8)
|
198
198
|
expected = np.array([0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7])
|
199
199
|
ret = quantize_tensor.quantize_tensor(
|
200
200
|
transformation_utils.TransformationInput(
|
201
|
-
tensor_id=
|
201
|
+
tensor_id=tensor_id,
|
202
202
|
op_codes=model.operatorCodes,
|
203
203
|
buffers=model.buffers,
|
204
204
|
subgraph=subgraph,
|
205
205
|
producer=-1,
|
206
206
|
consumers=[4],
|
207
207
|
quant_params=qtyping.UniformQuantParams(
|
208
|
-
|
208
|
+
num_bits=num_bits,
|
209
|
+
quantized_dimension=None,
|
210
|
+
scale=np.ones(1),
|
211
|
+
zero_point=np.ones(1),
|
212
|
+
symmetric=True,
|
213
|
+
quantized_data=data,
|
209
214
|
),
|
210
215
|
)
|
211
216
|
)
|
212
217
|
self.assertEqual(ret.op_id, 0)
|
213
218
|
self.assertEqual(ret.num_ops_added, 0)
|
214
219
|
np.testing.assert_array_equal(model.buffers[8].data, expected)
|
215
|
-
quant_param = subgraph.tensors[
|
220
|
+
quant_param = subgraph.tensors[tensor_id].quantization
|
216
221
|
np.testing.assert_array_equal(quant_param.scale, [1])
|
217
222
|
np.testing.assert_array_equal(quant_param.zeroPoint, [1])
|
218
223
|
self.assertEqual(quant_param.quantizedDimension, 0)
|
@@ -61,6 +61,7 @@ TFL_OP_NAME_TO_CODE = immutabledict.immutabledict({
|
|
61
61
|
_TFLOpName.LOGISTIC: schema_py_generated.BuiltinOperator.LOGISTIC,
|
62
62
|
_TFLOpName.SLICE: schema_py_generated.BuiltinOperator.SLICE,
|
63
63
|
_TFLOpName.SUM: schema_py_generated.BuiltinOperator.SUM,
|
64
|
+
_TFLOpName.SELECT_V2: schema_py_generated.BuiltinOperator.SELECT_V2,
|
64
65
|
})
|
65
66
|
|
66
67
|
TFL_OP_CODE_TO_NAME = immutabledict.immutabledict(
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ai-edge-quantizer-nightly
|
3
|
-
Version: 0.0.1.
|
3
|
+
Version: 0.0.1.dev20241123
|
4
4
|
Summary: A quantizer for advanced developers to quantize converted AI Edge models.
|
5
5
|
Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
|
6
6
|
Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
|
@@ -1,18 +1,18 @@
|
|
1
1
|
ai_edge_quantizer/__init__.py,sha256=4pFSkukSwahYyzwqia0yPRyz8TnFQfGRthVJhYpMWas,793
|
2
|
-
ai_edge_quantizer/algorithm_manager.py,sha256=
|
2
|
+
ai_edge_quantizer/algorithm_manager.py,sha256=9nd4Txfl2z-14rFHmL7vqSfnkAQeagCRKyCIQ7ru0_Y,5981
|
3
3
|
ai_edge_quantizer/algorithm_manager_api.py,sha256=u903TG0s1uIDhJqfeJne3CFl8A93phZrwgV2-hwdcXU,9247
|
4
4
|
ai_edge_quantizer/algorithm_manager_api_test.py,sha256=tL_ozYFTsOPX8qGcti0KTz37nVsCxf0SSG5C45SyT-g,7319
|
5
5
|
ai_edge_quantizer/calibrator.py,sha256=BSu0DPzVhAgFFA0JsHZtawPFlr0YPirRxItuA9SerNg,11007
|
6
6
|
ai_edge_quantizer/calibrator_test.py,sha256=5DGvKWRRjjU3L5wZoN56AyOVljmxOitwhuBUp6GL_bU,11354
|
7
7
|
ai_edge_quantizer/conftest.py,sha256=SxCz-5LlRD_lQm4hQc4c6IGG7DS8d7IyEWY9gnscPN0,794
|
8
|
-
ai_edge_quantizer/default_policy.py,sha256=
|
8
|
+
ai_edge_quantizer/default_policy.py,sha256=TQ9yY8jtrSpMsTBsTyKW6TY-voGH_psvwGZoFglAbiA,9079
|
9
9
|
ai_edge_quantizer/model_modifier.py,sha256=Z8EYtrz4zhCFpzd1zVwl2AetVE3BGBf5OvB2DbVQuds,5850
|
10
10
|
ai_edge_quantizer/model_modifier_test.py,sha256=cJd04SLOG-fQZZNZPcisoBLx3cLtWEwGqUBbLb-pif4,4751
|
11
11
|
ai_edge_quantizer/model_validator.py,sha256=QvlG1TewSBo9FMwzDYPFGqR4mOa_Xhn21wi2OFAvbCI,12593
|
12
12
|
ai_edge_quantizer/model_validator_test.py,sha256=ctvVmMHvnmFbkG4o8Jaa6kXXRrGHzhYpNylgLSmOboA,12951
|
13
13
|
ai_edge_quantizer/params_generator.py,sha256=FvBub5yM2q98k7wNLgEyRerf8sVIETvGbrFcXFPUPdA,13523
|
14
14
|
ai_edge_quantizer/params_generator_test.py,sha256=d9JwR-yxNJgg1SW-m8sFFPkIRdhgsDwMpVKsBQFL0gg,37658
|
15
|
-
ai_edge_quantizer/qtyping.py,sha256=
|
15
|
+
ai_edge_quantizer/qtyping.py,sha256=bue_WfK05QTkQcoyVVWeIxh8LRVGhHMWruXk3cgpFpw,14577
|
16
16
|
ai_edge_quantizer/quantizer.py,sha256=OYfSo06JcoursXbJBRfHQbR2-Pa4sHnZB2n9od9OzEY,13039
|
17
17
|
ai_edge_quantizer/quantizer_test.py,sha256=38oTMJwMmxwPDeqT3eaVbazjtuIUIzMQ3mJNKh_eNQY,20493
|
18
18
|
ai_edge_quantizer/recipe.py,sha256=r5tJiUs-ihZFzeK_jP2sUIUgTqZsL5SWvbUokuIUPDo,2251
|
@@ -28,7 +28,7 @@ ai_edge_quantizer/algorithms/nonlinear_quantize/__init__.py,sha256=lpq1g2ayg3lCP
|
|
28
28
|
ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting.py,sha256=Bs9CK7wZAw6jNaZ8xEtbwO2vM34VYXNZSMVWvxJo9nw,9297
|
29
29
|
ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting_test.py,sha256=s64eDDH9bmRWy6Bl1peHnhGewLnFJjvnhYOdjo1zYOA,22625
|
30
30
|
ai_edge_quantizer/algorithms/uniform_quantize/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
31
|
-
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=
|
31
|
+
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=oU4EhsDhWoipR0M28yILGdRcs2duWQBU5RpDyGiAGHk,21065
|
32
32
|
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=MfN2JFogBVsPIJ_Fdqf3ot4vn-WLIjcnyL2oBnmrwiI,6739
|
33
33
|
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=B-s1KMfb9tqvaDhHJV-M2zRR078z5Mwv-P9h77S3Mis,12229
|
34
34
|
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=WZ4_bvbG999nOtCIqn7mrMnpRdoJOdiyzxhsL_QiPHA,11395
|
@@ -42,22 +42,22 @@ ai_edge_quantizer/transformations/emulated_subchannel.py,sha256=HVaRxoC8PCAvy3xe
|
|
42
42
|
ai_edge_quantizer/transformations/emulated_subchannel_test.py,sha256=gZP6u9NdPXl7s19qB_Un8evou9ZZV6I9Gy0E1rdobHM,7722
|
43
43
|
ai_edge_quantizer/transformations/quant_insert.py,sha256=jn6HsJaV-sqBiFPY-Aqbd64t8zgcYVkEkZI375x_FWY,3958
|
44
44
|
ai_edge_quantizer/transformations/quant_insert_test.py,sha256=X9ptPDvJCFkR5tejKnD1SlHFGPazQTW-wNNMV9MEAuw,10107
|
45
|
-
ai_edge_quantizer/transformations/quantize_tensor.py,sha256=
|
46
|
-
ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=
|
45
|
+
ai_edge_quantizer/transformations/quantize_tensor.py,sha256=KsJbvhoyBu3D1G5R4nkl54w0TbdYPyit6JfABwlvtbw,5437
|
46
|
+
ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=xfbVNdMbvfJXQcl0vPtmyqKhifVxNZlhu_Xq7RLL2NI,7638
|
47
47
|
ai_edge_quantizer/transformations/transformation_utils.py,sha256=BaKy5LYWgqli62XGo3AGRDNtHjwpBNp5VF5XgFbfVmg,4298
|
48
48
|
ai_edge_quantizer/transformations/transformation_utils_test.py,sha256=ks81nNvruOC88Tjdk3_qwku0V8p54p3gOqfObzNhWMM,5371
|
49
49
|
ai_edge_quantizer/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
50
50
|
ai_edge_quantizer/utils/calibration_utils.py,sha256=1Fj9MIO6aLZIRgyd4axvZN4S_O64nB_-Miu1WP664js,2536
|
51
51
|
ai_edge_quantizer/utils/calibration_utils_test.py,sha256=Z-AcdTieesWFKyKBb08ZXm4Mgu6cvJ4bg2-MJ7hLD10,2856
|
52
52
|
ai_edge_quantizer/utils/test_utils.py,sha256=95BDAdjE4Zvd6JZ90fG8FE3wKWE-Lu0ZIE3hQ1B6adI,3616
|
53
|
-
ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=
|
53
|
+
ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=F6_AkCSv35FAhJX2qel8VTARhGOVwaeo7_mqRZygrpA,10126
|
54
54
|
ai_edge_quantizer/utils/tfl_flatbuffer_utils_test.py,sha256=AbyDxoM62k4ojD8gPdkWo--xe5hlX3t0kobQSA80kuk,7740
|
55
55
|
ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=GzrsaL3fkOXN5iPRJv7lqhNISY6lnrBVTotWDHzI5m8,10344
|
56
56
|
ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=Op3JxtOqlrjzmYF18jnnstL1k9xiY9kKJ8S2vklKGkc,11327
|
57
57
|
ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
|
58
58
|
ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
|
59
|
-
ai_edge_quantizer_nightly-0.0.1.
|
60
|
-
ai_edge_quantizer_nightly-0.0.1.
|
61
|
-
ai_edge_quantizer_nightly-0.0.1.
|
62
|
-
ai_edge_quantizer_nightly-0.0.1.
|
63
|
-
ai_edge_quantizer_nightly-0.0.1.
|
59
|
+
ai_edge_quantizer_nightly-0.0.1.dev20241123.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
60
|
+
ai_edge_quantizer_nightly-0.0.1.dev20241123.dist-info/METADATA,sha256=kbiV3ZINS_65wIAUbDdCofe67N4ofeY241iKW0nhmH0,1484
|
61
|
+
ai_edge_quantizer_nightly-0.0.1.dev20241123.dist-info/WHEEL,sha256=bFJAMchF8aTQGUgMZzHJyDDMPTO3ToJ7x23SLJa1SVo,92
|
62
|
+
ai_edge_quantizer_nightly-0.0.1.dev20241123.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
|
63
|
+
ai_edge_quantizer_nightly-0.0.1.dev20241123.dist-info/RECORD,,
|
File without changes
|
File without changes
|