ai-edge-quantizer-nightly 0.4.0.dev20250827__py3-none-any.whl → 0.4.0.dev20250829__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -266,7 +266,11 @@ DEFAULT_JSON_POLICY = """
266
266
  }
267
267
  }
268
268
  """
269
- QUANTIZABLE_COMPOSITES = ["od" + "ml.npu_call", "od" + "ml.rms_norm"]
269
+ QUANTIZABLE_COMPOSITES = [
270
+ "od" + "ml.npu_call",
271
+ "od" + "ml.rms_norm",
272
+ "od" + "ml.l2_norm",
273
+ ]
270
274
 
271
275
 
272
276
  def _unroll_json_config(
@@ -20,13 +20,11 @@ from typing import Any, Union
20
20
 
21
21
  import numpy as np
22
22
 
23
- from ai_edge_quantizer import algorithm_manager
24
23
  from ai_edge_quantizer import qtyping
25
- from ai_edge_quantizer.algorithms.uniform_quantize import common_quantize
26
24
  from ai_edge_quantizer.algorithms.utils import common_utils
25
+ from ai_edge_quantizer.utils import constrained_ops_utils
27
26
  from ai_edge_quantizer.utils import tfl_flatbuffer_utils
28
27
  from ai_edge_quantizer.utils import tfl_interpreter_utils
29
- from ai_edge_litert import schema_py_generated as schema_fb # pylint: disable=g-direct-tensorflow-import
30
28
  from tensorflow.lite.tools import flatbuffer_utils # pylint: disable=g-direct-tensorflow-import
31
29
 
32
30
 
@@ -133,7 +131,11 @@ class CalibrationQsvAlignmentUtils:
133
131
  """
134
132
 
135
133
  def __init__(self, model_path: str):
136
- self._same_as_input_scale_ops = []
134
+ self._same_as_input_scale_ops = (
135
+ constrained_ops_utils.get_constrained_op_list(
136
+ _OpQuantConstraint.SAME_AS_INPUT_SCALE
137
+ )
138
+ )
137
139
 
138
140
  tfl_interpreter = tfl_interpreter_utils.create_tfl_interpreter(model_path)
139
141
  self._flatbuffer_object = tfl_flatbuffer_utils.read_model(model_path)
@@ -146,87 +148,6 @@ class CalibrationQsvAlignmentUtils:
146
148
  signature_runner = tfl_interpreter.get_signature_runner(signature_key)
147
149
  self._signature_runners[signature_key] = signature_runner
148
150
 
149
- # Make a list of `SAME_AS_INPUT_SCALE` operators. This is used to identify
150
- # the operators that need to be constrained to the same scale as the input.
151
- self._build_same_as_input_scale_op_list()
152
-
153
- def _build_same_as_input_scale_op_list(self, verbose: bool = False):
154
- """Constructs a list of SAME_AS_INPUT_SCALE operators.
155
-
156
- This is achieved by invoking all materialization functions and extracting
157
- the constraint argument, using monkey patching to redirect logic to wrapper
158
- functions.
159
-
160
- Args:
161
- verbose: Flag to enable verbose output.
162
- """
163
-
164
- def materialize_standard_op_wrapper(
165
- op_info: qtyping.OpInfo,
166
- *_args,
167
- constraint: _OpQuantConstraint = _OpQuantConstraint.NO_CONSTRAIN,
168
- **_kwargs,
169
- ) -> list[qtyping.TensorTransformationParams]:
170
- if constraint == _OpQuantConstraint.SAME_AS_INPUT_SCALE:
171
- self._same_as_input_scale_ops.append(op_info.op_name)
172
- # Return dummy values to avoid exceptions.
173
- dummy_value = [qtyping.TensorTransformationParams("")] * 2
174
- return dummy_value
175
-
176
- # Dummy implementation of the `_are_weights_too_small` function to support
177
- # `materialize_standard_op_wrapper` above.
178
- def are_weights_too_small_wrapper(*_args, **_kwargs) -> bool:
179
- return False
180
-
181
- # Dummy implementation of the `_materialize_bias_for_conv_ops` function to
182
- # support `materialize_standard_op_wrapper` above.
183
- def materialize_bias_for_conv_ops_wrapper(*_args, **_kwargs):
184
- return
185
-
186
- # Do monkey patch to intercept the `materialize_standard_op` function to
187
- # support `materialize_standard_op_wrapper` above.
188
- original_materialize_standard_op = common_utils.materialize_standard_op
189
- original_are_weights_too_small = common_quantize._are_weights_too_small # pylint: disable=protected-access
190
- original_materialize_bias_for_conv_ops = (
191
- common_quantize._materialize_bias_for_conv_ops # pylint: disable=protected-access
192
- )
193
- common_utils.materialize_standard_op = materialize_standard_op_wrapper
194
- common_quantize._are_weights_too_small = are_weights_too_small_wrapper # pylint: disable=protected-access
195
- common_quantize._materialize_bias_for_conv_ops = ( # pylint: disable=protected-access
196
- materialize_bias_for_conv_ops_wrapper
197
- )
198
- minmax_func_dict = algorithm_manager.MIN_MAX_OP_NAME_MATERIALIZE_FUNC_DICT
199
-
200
- # Loop over all available materialization functions to build up a list of
201
- # `SAME_AS_INPUT_SCALE` constrained ops.
202
- for op, materialize_fn in minmax_func_dict.items():
203
- # Create a dummy op info to trigger the materialization.
204
- mock_op = schema_fb.OperatorT()
205
- mock_op.inputs = [0]
206
- mock_op.outputs = [0]
207
- op_info = qtyping.OpInfo(
208
- op=mock_op,
209
- op_name=op,
210
- subgraph_op_index=0,
211
- op_quant_config=qtyping.OpQuantizationConfig(),
212
- )
213
- materialize_fn(
214
- get_tensor_quant_params_fn=None,
215
- op_info=op_info,
216
- graph_info=None,
217
- tensor_name_to_qsv=None,
218
- )
219
-
220
- if verbose:
221
- print(f" Constrained op list: {self._same_as_input_scale_ops}")
222
-
223
- # Restore the original functions.
224
- common_utils.materialize_standard_op = original_materialize_standard_op
225
- common_quantize._are_weights_too_small = original_are_weights_too_small # pylint: disable=protected-access
226
- common_quantize._materialize_bias_for_conv_ops = ( # pylint: disable=protected-access
227
- original_materialize_bias_for_conv_ops
228
- )
229
-
230
151
  def _search_tensor_by_signature_name(
231
152
  self, signature_key: str, signature_input_output_name: str, verbose=False
232
153
  ) -> list[str]:
@@ -0,0 +1,111 @@
1
+ # Copyright 2024 The AI Edge Quantizer Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ """Utils for handling operators with quantization constraints."""
17
+
18
+ from ai_edge_quantizer import algorithm_manager
19
+ from ai_edge_quantizer import qtyping
20
+ from ai_edge_quantizer.algorithms.uniform_quantize import common_quantize
21
+ from ai_edge_quantizer.algorithms.utils import common_utils
22
+ from ai_edge_litert import schema_py_generated as schema_fb # pylint: disable=g-direct-tensorflow-import
23
+
24
+
25
+ _OpQuantConstraint = common_utils.OpQuantConstraint
26
+
27
+
28
+ def get_constrained_op_list(
29
+ quant_constraint: _OpQuantConstraint, verbose: bool = False
30
+ ) -> list[str]:
31
+ """Constructs and returns a list of constrained operators.
32
+
33
+ This is achieved by invoking all materialization functions and extracting
34
+ the constraint argument, using monkey patching to redirect logic to wrapper
35
+ functions.
36
+
37
+ Args:
38
+ quant_constraint: The quantization constraint to filter operators by.
39
+ verbose: Flag to enable verbose output.
40
+
41
+ Returns:
42
+ A list containing operators with the specified constraint.
43
+ """
44
+ constrained_ops = []
45
+
46
+ def materialize_standard_op_wrapper(
47
+ op_info: qtyping.OpInfo,
48
+ *_args,
49
+ constraint: _OpQuantConstraint = _OpQuantConstraint.NO_CONSTRAIN,
50
+ **_kwargs,
51
+ ) -> list[qtyping.TensorTransformationParams]:
52
+ if constraint == quant_constraint:
53
+ constrained_ops.append(op_info.op_name)
54
+ # Return dummy values to avoid exceptions.
55
+ dummy_value = [qtyping.TensorTransformationParams("")] * 2
56
+ return dummy_value
57
+
58
+ # Dummy implementation of the `_are_weights_too_small` function to support
59
+ # `materialize_standard_op_wrapper` above.
60
+ def are_weights_too_small_wrapper(*_args, **_kwargs) -> bool:
61
+ return False
62
+
63
+ # Dummy implementation of the `_materialize_bias_for_conv_ops` function to
64
+ # support `materialize_standard_op_wrapper` above.
65
+ def materialize_bias_for_conv_ops_wrapper(*_args, **_kwargs):
66
+ return
67
+
68
+ # Do monkey patch to intercept the `materialize_standard_op` function to
69
+ # support `materialize_standard_op_wrapper` above.
70
+ original_materialize_standard_op = common_utils.materialize_standard_op
71
+ original_are_weights_too_small = common_quantize._are_weights_too_small # pylint: disable=protected-access
72
+ original_materialize_bias_for_conv_ops = (
73
+ common_quantize._materialize_bias_for_conv_ops # pylint: disable=protected-access
74
+ )
75
+ common_utils.materialize_standard_op = materialize_standard_op_wrapper
76
+ common_quantize._are_weights_too_small = are_weights_too_small_wrapper # pylint: disable=protected-access
77
+ common_quantize._materialize_bias_for_conv_ops = ( # pylint: disable=protected-access
78
+ materialize_bias_for_conv_ops_wrapper
79
+ )
80
+ minmax_func_dict = algorithm_manager.MIN_MAX_OP_NAME_MATERIALIZE_FUNC_DICT
81
+
82
+ # Loop over all available materialization functions to build up a list of
83
+ # ops with the given constraint.
84
+ for op, materialize_fn in minmax_func_dict.items():
85
+ # Create a dummy op info to trigger the materialization.
86
+ mock_op = schema_fb.OperatorT()
87
+ mock_op.inputs = [0]
88
+ mock_op.outputs = [0]
89
+ op_info = qtyping.OpInfo(
90
+ op=mock_op,
91
+ op_name=op,
92
+ subgraph_op_index=0,
93
+ op_quant_config=qtyping.OpQuantizationConfig(),
94
+ )
95
+ materialize_fn(
96
+ get_tensor_quant_params_fn=None,
97
+ op_info=op_info,
98
+ graph_info=None,
99
+ tensor_name_to_qsv=None,
100
+ )
101
+
102
+ if verbose:
103
+ print(f" {quant_constraint} op list: {constrained_ops}")
104
+
105
+ # Restore the original functions.
106
+ common_utils.materialize_standard_op = original_materialize_standard_op
107
+ common_quantize._are_weights_too_small = original_are_weights_too_small # pylint: disable=protected-access
108
+ common_quantize._materialize_bias_for_conv_ops = ( # pylint: disable=protected-access
109
+ original_materialize_bias_for_conv_ops
110
+ )
111
+ return constrained_ops
@@ -0,0 +1,50 @@
1
+ # Copyright 2024 The AI Edge Quantizer Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ from tensorflow.python.platform import googletest
17
+ from absl.testing import parameterized
18
+ from ai_edge_quantizer.algorithms.utils import common_utils
19
+ from ai_edge_quantizer.utils import constrained_ops_utils
20
+
21
+
22
+ _OpQuantConstraint = common_utils.OpQuantConstraint
23
+
24
+
25
+ class ConstrainedOpsUtilsTest(parameterized.TestCase):
26
+
27
+ @parameterized.named_parameters(
28
+ dict(
29
+ testcase_name="same_as_input_scale",
30
+ constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
31
+ expected_num_ops=14,
32
+ ),
33
+ dict(
34
+ testcase_name="same_as_output_scale",
35
+ constraint=_OpQuantConstraint.SAME_AS_OUTPUT_SCALE,
36
+ expected_num_ops=6,
37
+ ),
38
+ dict(
39
+ testcase_name="no_constrain",
40
+ constraint=_OpQuantConstraint.NO_CONSTRAIN,
41
+ expected_num_ops=22,
42
+ ),
43
+ )
44
+ def test_get_constrained_op_list(self, constraint, expected_num_ops):
45
+ constrained_ops = constrained_ops_utils.get_constrained_op_list(constraint)
46
+ self.assertLen(constrained_ops, expected_num_ops)
47
+
48
+
49
+ if __name__ == "__main__":
50
+ googletest.main()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ai-edge-quantizer-nightly
3
- Version: 0.4.0.dev20250827
3
+ Version: 0.4.0.dev20250829
4
4
  Summary: A quantizer for advanced developers to quantize converted AI Edge models.
5
5
  Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
6
6
  Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
@@ -5,7 +5,7 @@ ai_edge_quantizer/algorithm_manager_api_test.py,sha256=w6bSONvXkX6bzXAGc0-7b6gND
5
5
  ai_edge_quantizer/calibrator.py,sha256=Sms7_AIHPH9G5xFaz5Ef3a5gPhxuIWQI8d2LUM8C96I,12071
6
6
  ai_edge_quantizer/calibrator_test.py,sha256=ZLzIMWB2FSFU4TOatDioYuwp_kLh8iSCefZ5_Q9FU7s,11900
7
7
  ai_edge_quantizer/conftest.py,sha256=SxCz-5LlRD_lQm4hQc4c6IGG7DS8d7IyEWY9gnscPN0,794
8
- ai_edge_quantizer/default_policy.py,sha256=LXEdwdr0SiCfWo6ZwbHQ8ykoqA40GV6fGAT1aofry3o,11556
8
+ ai_edge_quantizer/default_policy.py,sha256=G_JZtZaQAnrWyfCusDWXwO27iLysk27RS91GlS61m_Q,11592
9
9
  ai_edge_quantizer/model_modifier.py,sha256=teGa8I6kGvn6TQY6Xv53YFIc_pQEhNvM9Zb4bvhezyw,7110
10
10
  ai_edge_quantizer/model_modifier_test.py,sha256=cJd04SLOG-fQZZNZPcisoBLx3cLtWEwGqUBbLb-pif4,4751
11
11
  ai_edge_quantizer/model_validator.py,sha256=Hj0_5o-Oa3dSlJ3ryVjRhvsyelHNyek1GrtG9buMczg,13153
@@ -59,8 +59,10 @@ ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=mHLO3_MRt36A8-Z
59
59
  ai_edge_quantizer/transformations/transformation_utils.py,sha256=efJdAkA24wlg6Vj5NFO7_7MDuvQLSNn-l11Vs_JPktI,7123
60
60
  ai_edge_quantizer/transformations/transformation_utils_test.py,sha256=MWgq29t7rvxRQIfi4ny9IoODFCTcbpjnIwoCL40zDKk,8698
61
61
  ai_edge_quantizer/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
62
- ai_edge_quantizer/utils/calibration_utils.py,sha256=e3dG7Nm94Ix0hkTWTWPUhEG6a8QR_cAM3PSwblfJV5g,15106
62
+ ai_edge_quantizer/utils/calibration_utils.py,sha256=iMf_bSCf-O86MzDt5D9hLKqbTydqLwirluaC6BJ9yHo,11553
63
63
  ai_edge_quantizer/utils/calibration_utils_test.py,sha256=4BlksXl7b4yptL8xPR67hmJCnjhN9V10a2PunzfHrUE,9372
64
+ ai_edge_quantizer/utils/constrained_ops_utils.py,sha256=EAITCf7Ku_PFZcw3K-wd-8hGbyuRd5W5UtNdGvalwAE,4478
65
+ ai_edge_quantizer/utils/constrained_ops_utils_test.py,sha256=6k_AqfB-NmiLkW5WwEV5NSuswFWky2sL0xBGmV6Fdwk,1756
64
66
  ai_edge_quantizer/utils/test_utils.py,sha256=a4Nk-wbeB09dFjTDZiA0K67d26j5DD0UDH_GIVmVG_4,8685
65
67
  ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=RL6oq6FzZj-xV0Zgh0UBn7-fOQaRXSxZ-PPG_LmtyUY,11384
66
68
  ai_edge_quantizer/utils/tfl_flatbuffer_utils_test.py,sha256=K1SbK8q92qYVtiVj0I0GtugsPTkpIpEKv9zakvFV_Sc,8555
@@ -68,8 +70,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOi
68
70
  ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
69
71
  ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
70
72
  ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
71
- ai_edge_quantizer_nightly-0.4.0.dev20250827.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
72
- ai_edge_quantizer_nightly-0.4.0.dev20250827.dist-info/METADATA,sha256=aGdUBHwbY4eeSd9c7pQyQLu2D5LoOEDXmKmM06XN-yI,1535
73
- ai_edge_quantizer_nightly-0.4.0.dev20250827.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
74
- ai_edge_quantizer_nightly-0.4.0.dev20250827.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
75
- ai_edge_quantizer_nightly-0.4.0.dev20250827.dist-info/RECORD,,
73
+ ai_edge_quantizer_nightly-0.4.0.dev20250829.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
74
+ ai_edge_quantizer_nightly-0.4.0.dev20250829.dist-info/METADATA,sha256=fg5k0J7zQJc0ufSBvuidEZKz57iydiIhRI4teV-7AZI,1535
75
+ ai_edge_quantizer_nightly-0.4.0.dev20250829.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
76
+ ai_edge_quantizer_nightly-0.4.0.dev20250829.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
77
+ ai_edge_quantizer_nightly-0.4.0.dev20250829.dist-info/RECORD,,