ai-edge-quantizer-nightly 0.0.1.dev20241119__py3-none-any.whl → 0.0.1.dev20241125__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -90,6 +90,8 @@ for op_name, materialize_func in zip(
90
90
  _TFLOpName.SPLIT,
91
91
  _TFLOpName.LOGISTIC, # Sigmoid
92
92
  _TFLOpName.SLICE,
93
+ _TFLOpName.SUM,
94
+ _TFLOpName.SELECT_V2,
93
95
  ),
94
96
  (
95
97
  naive_min_max_quantize.materialize_input,
@@ -116,6 +118,8 @@ for op_name, materialize_func in zip(
116
118
  naive_min_max_quantize.materialize_split,
117
119
  naive_min_max_quantize.materialize_softmax_and_logistic,
118
120
  naive_min_max_quantize.materialize_slice,
121
+ naive_min_max_quantize.materialize_sum,
122
+ naive_min_max_quantize.materialize_select_v2,
119
123
  ),
120
124
  ):
121
125
  register_quantized_op(
@@ -325,6 +325,38 @@ def materialize_slice(
325
325
  )
326
326
 
327
327
 
328
+ def materialize_select_v2(
329
+ op_info: qtyping.OpInfo,
330
+ graph_info: qtyping.GraphInfo,
331
+ tensor_name_to_qsv: dict[str, Any],
332
+ ) -> list[qtyping.TensorTransformationParams]:
333
+ """Materialize tensors in tfl.select_v2."""
334
+ return utils.materialize_standard_op(
335
+ op_info,
336
+ graph_info,
337
+ tensor_name_to_qsv,
338
+ constraint=_OpQuantConstraint.SAME_AS_OUTPUT_SCALE,
339
+ inputs_to_ignore=[
340
+ 0,
341
+ ], # Condition tensor does not need to be quantized.
342
+ )
343
+
344
+
345
+ def materialize_sum(
346
+ op_info: qtyping.OpInfo,
347
+ graph_info: qtyping.GraphInfo,
348
+ tensor_name_to_qsv: dict[str, Any],
349
+ ) -> list[qtyping.TensorTransformationParams]:
350
+ """Materialize tensors in tfl.sum."""
351
+ return utils.materialize_standard_op(
352
+ op_info,
353
+ graph_info,
354
+ tensor_name_to_qsv,
355
+ constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
356
+ inputs_to_ignore=[1], # Axis index does not need to be quantized.
357
+ )
358
+
359
+
328
360
  def materialize_fc_conv(
329
361
  op_info: qtyping.OpInfo,
330
362
  graph_info: qtyping.GraphInfo,
@@ -290,7 +290,7 @@ class CalibratorToyGemma2Test(googletest.TestCase):
290
290
  self._toy_gemma2_calibration_dataset,
291
291
  model_recipe_manager=recipe_mngr,
292
292
  )
293
- self.assertLen(calib.get_model_qsvs(), 274)
293
+ self.assertLen(calib.get_model_qsvs(), 282)
294
294
 
295
295
 
296
296
  if __name__ == "__main__":
@@ -165,7 +165,9 @@ DEFAULT_JSON_POLICY = """
165
165
  "INPUT",
166
166
  "OUTPUT",
167
167
  "SLICE",
168
- "EMBEDDING_LOOKUP"
168
+ "EMBEDDING_LOOKUP",
169
+ "SUM",
170
+ "SELECT_V2"
169
171
  ],
170
172
  "static_wi8_ai8": [
171
173
  "ADD",
@@ -191,7 +193,9 @@ DEFAULT_JSON_POLICY = """
191
193
  "INPUT",
192
194
  "OUTPUT",
193
195
  "SLICE",
194
- "EMBEDDING_LOOKUP"
196
+ "EMBEDDING_LOOKUP",
197
+ "SUM",
198
+ "SELECT_V2"
195
199
  ],
196
200
  "static_wi4_ai8": ["FULLY_CONNECTED", "CONV_2D", "INPUT", "OUTPUT"],
197
201
  "static_wi4_ai16": ["FULLY_CONNECTED", "CONV_2D", "INPUT", "OUTPUT"],
@@ -58,6 +58,8 @@ class TFLOperationName(str, enum.Enum):
58
58
  SPLIT = 'SPLIT'
59
59
  LOGISTIC = 'LOGISTIC'
60
60
  SLICE = 'SLICE'
61
+ SUM = 'SUM'
62
+ SELECT_V2 = 'SELECT_V2'
61
63
 
62
64
 
63
65
  class QuantizeMode(enum.Enum):
@@ -22,19 +22,19 @@ from ai_edge_quantizer.transformations import transformation_utils
22
22
  from ai_edge_litert import schema_py_generated # pylint: disable=g-direct-tensorflow-import
23
23
 
24
24
 
25
- # TODO: b/335014051 - support distinguishing INT, FLOAT & UINT, BFLOAT
25
+ # TODO: b/335014051 - Support distinguishing INT, FLOAT & UINT, BFLOAT.
26
26
  def quant_params_to_tflite_type(
27
27
  bitwidth: int,
28
28
  ) -> Optional[schema_py_generated.TensorType]:
29
- """Given specifications from quant param return the corresponding tflite dtype.
29
+ """Given specifications from quant param return the corresponding TFLite dtype.
30
30
 
31
31
  Args:
32
- bitwidth: bitwidth from UniformQuantParams
32
+ bitwidth: Bit width from UniformQuantParams.
33
33
 
34
34
  Returns:
35
- the corresponding tflite tensortype
35
+ The corresponding TFLite tensor type.
36
36
  """
37
- if bitwidth <= 4:
37
+ if bitwidth == 4:
38
38
  return schema_py_generated.TensorType.INT4
39
39
  elif bitwidth <= 8:
40
40
  return schema_py_generated.TensorType.INT8
@@ -68,19 +68,19 @@ def nonlinear_quant_params_to_tflite_type(
68
68
 
69
69
 
70
70
  def _pack_data(bitwidth: int, flattened_data: np.ndarray) -> np.ndarray:
71
- """Pack the data to the corresponding bitwidth.
71
+ """Pack the data to the corresponding bit width.
72
72
 
73
- If no packing is needed, the original data is returned. Any bitwidth equal or
74
- less than 4 bits will be packed to 4 bits.
73
+ Currently only support 4 bits. If no packing is needed, the original data is
74
+ returned.
75
75
 
76
76
  Args:
77
- bitwidth: Bitwidth from NonLinearQuantParams.
77
+ bitwidth: Bit width from NonLinearQuantParams.
78
78
  flattened_data: The data to be packed.
79
79
 
80
80
  Returns:
81
81
  Packed data.
82
82
  """
83
- if bitwidth <= 4:
83
+ if bitwidth == 4:
84
84
  even_data = flattened_data[::2] & 0x0F
85
85
  odd_data = np.left_shift(flattened_data[1::2], 4).astype(np.uint8)
86
86
  if odd_data.shape[0] == even_data.shape[0] - 1:
@@ -18,6 +18,7 @@
18
18
  import os
19
19
  import numpy as np
20
20
  from tensorflow.python.platform import googletest
21
+ from absl.testing import parameterized
21
22
  from ai_edge_quantizer import qtyping
22
23
  from ai_edge_quantizer.transformations import quantize_tensor
23
24
  from ai_edge_quantizer.transformations import transformation_utils
@@ -28,7 +29,7 @@ from ai_edge_litert import schema_py_generated # pylint: disable=g-direct-tenso
28
29
  TEST_DATA_PREFIX_PATH = test_utils.get_path_to_datafile("..")
29
30
 
30
31
 
31
- class QuantizeTensorTest(googletest.TestCase):
32
+ class QuantizeTensorTest(parameterized.TestCase):
32
33
 
33
34
  def setUp(self):
34
35
  super().setUp()
@@ -179,40 +180,44 @@ class QuantizeTensorTest(googletest.TestCase):
179
180
  np.testing.assert_array_equal(quant_param.zeroPoint, [1])
180
181
  self.assertEqual(quant_param.quantizedDimension, 0)
181
182
 
182
- def test_int5_constant_not_packed(self):
183
+ @parameterized.named_parameters(
184
+ dict(
185
+ testcase_name="int5",
186
+ num_bits=5,
187
+ ),
188
+ dict(
189
+ testcase_name="int2",
190
+ num_bits=2,
191
+ ),
192
+ )
193
+ def test_int_constant_not_packed(self, num_bits):
183
194
  subgraph = self._model.subgraphs[0]
184
195
  model = self._model
185
- data = np.array(
186
- [
187
- 0x0,
188
- 0x1,
189
- 0x2,
190
- 0x3,
191
- 0x4,
192
- 0x5,
193
- 0x6,
194
- 0x7,
195
- ],
196
- dtype=np.int8,
197
- )
196
+ tensor_id = 7
197
+ data = np.array([0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7], dtype=np.int8)
198
198
  expected = np.array([0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7])
199
199
  ret = quantize_tensor.quantize_tensor(
200
200
  transformation_utils.TransformationInput(
201
- tensor_id=7,
201
+ tensor_id=tensor_id,
202
202
  op_codes=model.operatorCodes,
203
203
  buffers=model.buffers,
204
204
  subgraph=subgraph,
205
205
  producer=-1,
206
206
  consumers=[4],
207
207
  quant_params=qtyping.UniformQuantParams(
208
- 5, None, np.ones(1), np.ones(1), True, data
208
+ num_bits=num_bits,
209
+ quantized_dimension=None,
210
+ scale=np.ones(1),
211
+ zero_point=np.ones(1),
212
+ symmetric=True,
213
+ quantized_data=data,
209
214
  ),
210
215
  )
211
216
  )
212
217
  self.assertEqual(ret.op_id, 0)
213
218
  self.assertEqual(ret.num_ops_added, 0)
214
219
  np.testing.assert_array_equal(model.buffers[8].data, expected)
215
- quant_param = subgraph.tensors[7].quantization
220
+ quant_param = subgraph.tensors[tensor_id].quantization
216
221
  np.testing.assert_array_equal(quant_param.scale, [1])
217
222
  np.testing.assert_array_equal(quant_param.zeroPoint, [1])
218
223
  self.assertEqual(quant_param.quantizedDimension, 0)
@@ -60,6 +60,8 @@ TFL_OP_NAME_TO_CODE = immutabledict.immutabledict({
60
60
  _TFLOpName.SPLIT: schema_py_generated.BuiltinOperator.SPLIT,
61
61
  _TFLOpName.LOGISTIC: schema_py_generated.BuiltinOperator.LOGISTIC,
62
62
  _TFLOpName.SLICE: schema_py_generated.BuiltinOperator.SLICE,
63
+ _TFLOpName.SUM: schema_py_generated.BuiltinOperator.SUM,
64
+ _TFLOpName.SELECT_V2: schema_py_generated.BuiltinOperator.SELECT_V2,
63
65
  })
64
66
 
65
67
  TFL_OP_CODE_TO_NAME = immutabledict.immutabledict(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ai-edge-quantizer-nightly
3
- Version: 0.0.1.dev20241119
3
+ Version: 0.0.1.dev20241125
4
4
  Summary: A quantizer for advanced developers to quantize converted AI Edge models.
5
5
  Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
6
6
  Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
@@ -1,18 +1,18 @@
1
1
  ai_edge_quantizer/__init__.py,sha256=4pFSkukSwahYyzwqia0yPRyz8TnFQfGRthVJhYpMWas,793
2
- ai_edge_quantizer/algorithm_manager.py,sha256=PiRvUH6CY4EzPBLJhMyYR5u7_pMYvg0eOGTupdfyxTA,5825
2
+ ai_edge_quantizer/algorithm_manager.py,sha256=9nd4Txfl2z-14rFHmL7vqSfnkAQeagCRKyCIQ7ru0_Y,5981
3
3
  ai_edge_quantizer/algorithm_manager_api.py,sha256=u903TG0s1uIDhJqfeJne3CFl8A93phZrwgV2-hwdcXU,9247
4
4
  ai_edge_quantizer/algorithm_manager_api_test.py,sha256=tL_ozYFTsOPX8qGcti0KTz37nVsCxf0SSG5C45SyT-g,7319
5
5
  ai_edge_quantizer/calibrator.py,sha256=BSu0DPzVhAgFFA0JsHZtawPFlr0YPirRxItuA9SerNg,11007
6
- ai_edge_quantizer/calibrator_test.py,sha256=U7aiy9COsuDMV0xT5le1PxON8eP8lUeiF_mzkPL9k9c,11354
6
+ ai_edge_quantizer/calibrator_test.py,sha256=5DGvKWRRjjU3L5wZoN56AyOVljmxOitwhuBUp6GL_bU,11354
7
7
  ai_edge_quantizer/conftest.py,sha256=SxCz-5LlRD_lQm4hQc4c6IGG7DS8d7IyEWY9gnscPN0,794
8
- ai_edge_quantizer/default_policy.py,sha256=MYURzrGnXaGOjckAFc5cIKcW3bF8bDrHQYRUpkL2-oI,9015
8
+ ai_edge_quantizer/default_policy.py,sha256=TQ9yY8jtrSpMsTBsTyKW6TY-voGH_psvwGZoFglAbiA,9079
9
9
  ai_edge_quantizer/model_modifier.py,sha256=Z8EYtrz4zhCFpzd1zVwl2AetVE3BGBf5OvB2DbVQuds,5850
10
10
  ai_edge_quantizer/model_modifier_test.py,sha256=cJd04SLOG-fQZZNZPcisoBLx3cLtWEwGqUBbLb-pif4,4751
11
11
  ai_edge_quantizer/model_validator.py,sha256=QvlG1TewSBo9FMwzDYPFGqR4mOa_Xhn21wi2OFAvbCI,12593
12
12
  ai_edge_quantizer/model_validator_test.py,sha256=ctvVmMHvnmFbkG4o8Jaa6kXXRrGHzhYpNylgLSmOboA,12951
13
13
  ai_edge_quantizer/params_generator.py,sha256=FvBub5yM2q98k7wNLgEyRerf8sVIETvGbrFcXFPUPdA,13523
14
14
  ai_edge_quantizer/params_generator_test.py,sha256=d9JwR-yxNJgg1SW-m8sFFPkIRdhgsDwMpVKsBQFL0gg,37658
15
- ai_edge_quantizer/qtyping.py,sha256=L2-G6k1cZh3uxAkjJ2zmqF5Q0TyZB7vm0i7LGGeBFdk,14537
15
+ ai_edge_quantizer/qtyping.py,sha256=bue_WfK05QTkQcoyVVWeIxh8LRVGhHMWruXk3cgpFpw,14577
16
16
  ai_edge_quantizer/quantizer.py,sha256=OYfSo06JcoursXbJBRfHQbR2-Pa4sHnZB2n9od9OzEY,13039
17
17
  ai_edge_quantizer/quantizer_test.py,sha256=38oTMJwMmxwPDeqT3eaVbazjtuIUIzMQ3mJNKh_eNQY,20493
18
18
  ai_edge_quantizer/recipe.py,sha256=r5tJiUs-ihZFzeK_jP2sUIUgTqZsL5SWvbUokuIUPDo,2251
@@ -28,7 +28,7 @@ ai_edge_quantizer/algorithms/nonlinear_quantize/__init__.py,sha256=lpq1g2ayg3lCP
28
28
  ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting.py,sha256=Bs9CK7wZAw6jNaZ8xEtbwO2vM34VYXNZSMVWvxJo9nw,9297
29
29
  ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting_test.py,sha256=s64eDDH9bmRWy6Bl1peHnhGewLnFJjvnhYOdjo1zYOA,22625
30
30
  ai_edge_quantizer/algorithms/uniform_quantize/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
31
- ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=Oc_B0KSNoPj1SPkXW5drxN4zZMDoVAC5UeuLsHilgb4,20133
31
+ ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=oU4EhsDhWoipR0M28yILGdRcs2duWQBU5RpDyGiAGHk,21065
32
32
  ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=MfN2JFogBVsPIJ_Fdqf3ot4vn-WLIjcnyL2oBnmrwiI,6739
33
33
  ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=B-s1KMfb9tqvaDhHJV-M2zRR078z5Mwv-P9h77S3Mis,12229
34
34
  ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=WZ4_bvbG999nOtCIqn7mrMnpRdoJOdiyzxhsL_QiPHA,11395
@@ -42,22 +42,22 @@ ai_edge_quantizer/transformations/emulated_subchannel.py,sha256=HVaRxoC8PCAvy3xe
42
42
  ai_edge_quantizer/transformations/emulated_subchannel_test.py,sha256=gZP6u9NdPXl7s19qB_Un8evou9ZZV6I9Gy0E1rdobHM,7722
43
43
  ai_edge_quantizer/transformations/quant_insert.py,sha256=jn6HsJaV-sqBiFPY-Aqbd64t8zgcYVkEkZI375x_FWY,3958
44
44
  ai_edge_quantizer/transformations/quant_insert_test.py,sha256=X9ptPDvJCFkR5tejKnD1SlHFGPazQTW-wNNMV9MEAuw,10107
45
- ai_edge_quantizer/transformations/quantize_tensor.py,sha256=6lLJHA0G7tf9nrydnSPnWj1rYRN17dH-x5aCkEy3YDQ,5464
46
- ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=QAyV3IrvCc9puIWdDz-iONNCuKob7ZejgTbSvT5K3YA,7335
45
+ ai_edge_quantizer/transformations/quantize_tensor.py,sha256=KsJbvhoyBu3D1G5R4nkl54w0TbdYPyit6JfABwlvtbw,5437
46
+ ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=xfbVNdMbvfJXQcl0vPtmyqKhifVxNZlhu_Xq7RLL2NI,7638
47
47
  ai_edge_quantizer/transformations/transformation_utils.py,sha256=BaKy5LYWgqli62XGo3AGRDNtHjwpBNp5VF5XgFbfVmg,4298
48
48
  ai_edge_quantizer/transformations/transformation_utils_test.py,sha256=ks81nNvruOC88Tjdk3_qwku0V8p54p3gOqfObzNhWMM,5371
49
49
  ai_edge_quantizer/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
50
50
  ai_edge_quantizer/utils/calibration_utils.py,sha256=1Fj9MIO6aLZIRgyd4axvZN4S_O64nB_-Miu1WP664js,2536
51
51
  ai_edge_quantizer/utils/calibration_utils_test.py,sha256=Z-AcdTieesWFKyKBb08ZXm4Mgu6cvJ4bg2-MJ7hLD10,2856
52
52
  ai_edge_quantizer/utils/test_utils.py,sha256=95BDAdjE4Zvd6JZ90fG8FE3wKWE-Lu0ZIE3hQ1B6adI,3616
53
- ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=2aSyXNeS1V0gCbw72GVJplj-0qiLbKVJLAymc3j-t2k,9992
53
+ ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=F6_AkCSv35FAhJX2qel8VTARhGOVwaeo7_mqRZygrpA,10126
54
54
  ai_edge_quantizer/utils/tfl_flatbuffer_utils_test.py,sha256=AbyDxoM62k4ojD8gPdkWo--xe5hlX3t0kobQSA80kuk,7740
55
55
  ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=GzrsaL3fkOXN5iPRJv7lqhNISY6lnrBVTotWDHzI5m8,10344
56
56
  ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=Op3JxtOqlrjzmYF18jnnstL1k9xiY9kKJ8S2vklKGkc,11327
57
57
  ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
58
58
  ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
59
- ai_edge_quantizer_nightly-0.0.1.dev20241119.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
60
- ai_edge_quantizer_nightly-0.0.1.dev20241119.dist-info/METADATA,sha256=YA9hk0ihn6J3h_tew_SjSEOlLN-GOI1C-xDEPSp1e5s,1484
61
- ai_edge_quantizer_nightly-0.0.1.dev20241119.dist-info/WHEEL,sha256=bFJAMchF8aTQGUgMZzHJyDDMPTO3ToJ7x23SLJa1SVo,92
62
- ai_edge_quantizer_nightly-0.0.1.dev20241119.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
63
- ai_edge_quantizer_nightly-0.0.1.dev20241119.dist-info/RECORD,,
59
+ ai_edge_quantizer_nightly-0.0.1.dev20241125.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
60
+ ai_edge_quantizer_nightly-0.0.1.dev20241125.dist-info/METADATA,sha256=24moeOFxxWCBOchvK6RDViIhwKbRdvcLL68HOZrbbrQ,1484
61
+ ai_edge_quantizer_nightly-0.0.1.dev20241125.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
62
+ ai_edge_quantizer_nightly-0.0.1.dev20241125.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
63
+ ai_edge_quantizer_nightly-0.0.1.dev20241125.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.45.0)
2
+ Generator: bdist_wheel (0.45.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5