ai-edge-quantizer-nightly 0.0.1.dev20250204__py3-none-any.whl → 0.0.1.dev20250205__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -281,7 +281,13 @@ class Calibrator:
281
281
  algorithm_name, op_key
282
282
  )
283
283
  op_info = qtyping.OpInfo(op, op_key, subgraph_op_id, op_quant_config)
284
- op_qsvs = qsv_init_func(op_info, graph_info)
284
+ # Ignore the input tensors where any dimension of the shape is 0.
285
+ inputs_to_ignore = [
286
+ idx
287
+ for idx in op.inputs
288
+ if not np.all(graph_info.subgraph_tensors[idx].shape)
289
+ ]
290
+ op_qsvs = qsv_init_func(op_info, graph_info, inputs_to_ignore)
285
291
  # Step3: initialize tensor qsvs.
286
292
  for tensor_name, qsv in op_qsvs.items():
287
293
  if tensor_name not in self._model_qsvs:
@@ -228,6 +228,18 @@ class CalibratorTest(googletest.TestCase):
228
228
  )
229
229
  self.assertLen(test_calibrator.get_cached_output(), 10)
230
230
 
231
+ def test_calibrate_reshape_with_empty_shape_success(self):
232
+ test_model_path = os.path.join(
233
+ TEST_DATA_PREFIX_PATH, "tests/models/reshape_with_empty_shape.tflite"
234
+ )
235
+ test_calibrator = calibrator.Calibrator(test_model_path)
236
+ _add_default_int8xint8_integer_recipe(self._recipe_manager)
237
+ calib_data = test_utils.create_random_normal_input_data(
238
+ test_model_path, num_samples=4
239
+ )
240
+ test_calibrator.calibrate(calib_data, self._recipe_manager)
241
+ self.assertNotEmpty(test_calibrator.get_model_qsvs())
242
+
231
243
 
232
244
  class CalibratorAlreadyQuantizedModelTest(googletest.TestCase):
233
245
 
@@ -120,6 +120,8 @@ class UniformQuantParams:
120
120
  zero_point: The zero point of the quantization.
121
121
  symmetric: Whether the quantization is symmetric (force zero_point to be 0).
122
122
  quantized_data: The quantized data.
123
+ block_size: The block size for blockwise quantization, block_size=0 meaning
124
+ no blockwise quantization.
123
125
  """
124
126
 
125
127
  num_bits: int
@@ -128,6 +130,7 @@ class UniformQuantParams:
128
130
  zero_point: np.ndarray
129
131
  symmetric: bool = True
130
132
  quantized_data: Optional[np.ndarray] = None
133
+ block_size: int = 0
131
134
 
132
135
  @classmethod
133
136
  def from_tfl_tensor_details(cls, tensor_detail) -> 'UniformQuantParams':
@@ -170,6 +173,7 @@ class UniformQuantParams:
170
173
  and np.array_equal(self.zero_point, other.zero_point)
171
174
  and self.symmetric == other.symmetric
172
175
  and _compare_array_or_none(self.quantized_data, other.quantized_data)
176
+ and self.block_size == other.block_size
173
177
  )
174
178
 
175
179
 
@@ -90,24 +90,99 @@ def _pack_data(bitwidth: int, flattened_data: np.ndarray) -> np.ndarray:
90
90
  return flattened_data
91
91
 
92
92
 
93
+ def _perform_channelwise_quantization(
94
+ transformation_input: transformation_utils.TransformationInput,
95
+ ) -> schema_py_generated.QuantizationParametersT():
96
+ """Perform channelwise quantization and fill the quantization parameters.
97
+
98
+ Args:
99
+ transformation_input: Input structure that contains all information needed
100
+ for the transformation.
101
+
102
+ Returns:
103
+ The quantization parameters.
104
+ """
105
+ assert isinstance(
106
+ transformation_input.quant_params, qtyping.UniformQuantParams
107
+ )
108
+ flatbuffer_quantization = schema_py_generated.QuantizationParametersT()
109
+ flatbuffer_quantization.scale = list(
110
+ transformation_input.quant_params.scale.flatten().astype(np.float32)
111
+ ) # Flatbuffer requires scale as list[float].
112
+ if transformation_input.quant_params.zero_point is not None:
113
+ flatbuffer_quantization.zeroPoint = list(
114
+ transformation_input.quant_params.zero_point.flatten().astype(np.int64)
115
+ ) # Flatbuffer requires zeroPoint as list[int64]
116
+ if transformation_input.quant_params.quantized_dimension is not None:
117
+ flatbuffer_quantization.quantizedDimension = (
118
+ transformation_input.quant_params.quantized_dimension
119
+ )
120
+
121
+ return flatbuffer_quantization
122
+
123
+
124
+ def _perform_blockwise_quantization(
125
+ transformation_input: transformation_utils.TransformationInput,
126
+ ) -> schema_py_generated.QuantizationParametersT():
127
+ """Perform blockwise quantization and fill the quantization parameters.
128
+
129
+ Args:
130
+ transformation_input: Input structure that contains all information needed
131
+ for the transformation.
132
+
133
+ Returns:
134
+ The quantization parameters.
135
+ """
136
+ assert isinstance(
137
+ transformation_input.quant_params, qtyping.UniformQuantParams
138
+ )
139
+ flatbuffer_quantization = schema_py_generated.QuantizationParametersT()
140
+ flatbuffer_quantization.detailsType = (
141
+ schema_py_generated.QuantizationDetails.BlockwiseQuantization
142
+ )
143
+ tensor = transformation_input.subgraph.tensors[transformation_input.tensor_id]
144
+ blockwise_details = schema_py_generated.BlockwiseQuantizationT()
145
+ scale_tensor_id = transformation_utils.add_new_constant_tensor(
146
+ tensor.name + b"_scale",
147
+ transformation_input.quant_params.scale,
148
+ schema_py_generated.TensorType.FLOAT16,
149
+ transformation_input.subgraph,
150
+ transformation_input.buffers,
151
+ )
152
+ blockwise_details.scale = scale_tensor_id
153
+ blockwise_details.blockSize = transformation_input.quant_params.block_size
154
+ # blockwise quantization allows optional zero point.
155
+ if transformation_input.quant_params.zero_point is not None:
156
+ zero_point_tensor_id = transformation_utils.add_new_constant_tensor(
157
+ tensor.name + b"_zero_point",
158
+ transformation_input.quant_params.zero_point,
159
+ schema_py_generated.TensorType.INT32,
160
+ transformation_input.subgraph,
161
+ transformation_input.buffers,
162
+ )
163
+ blockwise_details.zeroPoint = zero_point_tensor_id
164
+ flatbuffer_quantization.details = blockwise_details
165
+ return flatbuffer_quantization
166
+
167
+
93
168
  def quantize_tensor(
94
169
  transformation_input: transformation_utils.TransformationInput,
95
170
  ) -> qtyping.TransformationInfo:
96
171
  """Quantize the tensor at the tensor_id in the given subgraph.
97
172
 
98
173
  Args:
99
- transformation_input: input structure that contains all information needed
174
+ transformation_input: Input structure that contains all information needed
100
175
  for the transformation.
101
176
 
102
177
  Returns:
103
178
  TransformationInfo:
104
- op_id: the producer index for tensor
105
- num_ops_added: the total number of ops inserted by this operation, which
106
- is 0
179
+ op_id: The producer index for tensor.
180
+ num_ops_added: The total number of ops inserted by this operation, which
181
+ is 0.
107
182
  """
108
183
  tensor = transformation_input.subgraph.tensors[transformation_input.tensor_id]
109
- # TODO: b/336385820 - suppport quantize buffer directly when quantized_data
110
- # is not provided
184
+ # TODO: b/336385820 - Suppport quantize buffer directly when quantized_data
185
+ # is not provided.
111
186
  if tensor.buffer:
112
187
  if transformation_input.quant_params.quantized_data is not None:
113
188
  transformation_input.buffers[tensor.buffer].data = _pack_data(
@@ -121,29 +196,18 @@ def quantize_tensor(
121
196
  )
122
197
 
123
198
  if isinstance(transformation_input.quant_params, qtyping.UniformQuantParams):
124
- flatbuffer_quantization = schema_py_generated.QuantizationParametersT()
125
- flatbuffer_quantization.scale = list(
126
- transformation_input.quant_params.scale.flatten().astype(np.float32)
127
- ) # flatbuffer requires scale as list[float]
128
- flatbuffer_quantization.zeroPoint = list(
129
- transformation_input.quant_params.zero_point.flatten().astype(np.int64)
130
- ) # flatbuffer requires zeroPoint as list[int64]
131
- if transformation_input.quant_params.quantized_dimension is not None:
132
- flatbuffer_quantization.quantizedDimension = (
133
- transformation_input.quant_params.quantized_dimension
199
+ if transformation_input.quant_params.block_size == 0:
200
+ flatbuffer_quantization = _perform_channelwise_quantization(
201
+ transformation_input
202
+ )
203
+ else:
204
+ flatbuffer_quantization = _perform_blockwise_quantization(
205
+ transformation_input
134
206
  )
135
207
  tensor.quantization = flatbuffer_quantization
136
208
  tensor.type = quant_params_to_tflite_type(
137
209
  transformation_input.quant_params.num_bits
138
210
  )
139
-
140
- if isinstance(
141
- transformation_input.quant_params, qtyping.NonLinearQuantParams
142
- ):
143
- tensor.type = nonlinear_quant_params_to_tflite_type(
144
- transformation_input.quant_params.num_bits
145
- )
146
-
147
211
  if isinstance(
148
212
  transformation_input.quant_params, qtyping.NonLinearQuantParams
149
213
  ):
@@ -42,7 +42,7 @@ class QuantizeTensorTest(parameterized.TestCase):
42
42
  """test quantizing a constant tensor."""
43
43
  subgraph = self._model.subgraphs[0]
44
44
  model = self._model
45
- data = np.ones([1, 112, 112, 3], dtype=np.int8)
45
+ data = np.ones([1, 112, 112, 32], dtype=np.int8)
46
46
  ret = quantize_tensor.quantize_tensor(
47
47
  transformation_utils.TransformationInput(
48
48
  7,
@@ -135,6 +135,42 @@ class QuantizeTensorTest(parameterized.TestCase):
135
135
  subgraph.tensors[4].type, schema_py_generated.TensorType.FLOAT16
136
136
  )
137
137
 
138
+ def test_blockwise_quantization_with_zero_point(self):
139
+ """Test blockwise quantization with explicit zero point."""
140
+ subgraph = self._model.subgraphs[0]
141
+ model = self._model
142
+ tensor_wh = 112
143
+ test_tensor_id = 7
144
+ data = np.ones([1, tensor_wh, tensor_wh, 32]).astype(np.int8)
145
+ quantize_tensor.quantize_tensor(
146
+ transformation_utils.TransformationInput(
147
+ tensor_id=test_tensor_id,
148
+ op_codes=model.operatorCodes,
149
+ buffers=model.buffers,
150
+ subgraph=subgraph,
151
+ producer=1,
152
+ consumers=[3],
153
+ quant_params=qtyping.UniformQuantParams(
154
+ num_bits=8,
155
+ quantized_dimension=None,
156
+ scale=np.ones([1, tensor_wh, tensor_wh, 1]).astype(np.float16),
157
+ zero_point=np.zeros([1, tensor_wh, tensor_wh, 1]),
158
+ symmetric=True,
159
+ quantized_data=data,
160
+ block_size=32,
161
+ ),
162
+ )
163
+ )
164
+ quant_param = subgraph.tensors[test_tensor_id].quantization
165
+ self.assertEqual(
166
+ quant_param.detailsType,
167
+ schema_py_generated.QuantizationDetails.BlockwiseQuantization,
168
+ )
169
+ self.assertEqual(quant_param.details.blockSize, 32)
170
+ # Check if the scale and zero point tensors are inserted correctly.
171
+ self.assertEqual(quant_param.details.scale, 9)
172
+ self.assertEqual(quant_param.details.zeroPoint, 10)
173
+
138
174
  def test_int4_constant_packed_correctly(self):
139
175
  subgraph = self._model.subgraphs[0]
140
176
  model = self._model
@@ -188,9 +188,14 @@ def get_tensor_name_to_content_map(
188
188
  """
189
189
  tensors = {}
190
190
  for tensor_detail in tflite_interpreter.get_tensor_details(subgraph_index):
191
- # Don't return temporary, unnamed tensors
191
+ # Don't return temporary, unnamed tensors.
192
192
  if not tensor_detail["name"]:
193
193
  continue
194
+
195
+ # Don't return tensors where any dimension of the shape is 0.
196
+ if not np.all(tensor_detail["shape"]):
197
+ continue
198
+
194
199
  tensors[tensor_detail["name"]] = get_tensor_data(
195
200
  tflite_interpreter, tensor_detail, subgraph_index, dequantize
196
201
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ai-edge-quantizer-nightly
3
- Version: 0.0.1.dev20250204
3
+ Version: 0.0.1.dev20250205
4
4
  Summary: A quantizer for advanced developers to quantize converted AI Edge models.
5
5
  Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
6
6
  Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
@@ -2,8 +2,8 @@ ai_edge_quantizer/__init__.py,sha256=4pFSkukSwahYyzwqia0yPRyz8TnFQfGRthVJhYpMWas
2
2
  ai_edge_quantizer/algorithm_manager.py,sha256=9nd4Txfl2z-14rFHmL7vqSfnkAQeagCRKyCIQ7ru0_Y,5981
3
3
  ai_edge_quantizer/algorithm_manager_api.py,sha256=u903TG0s1uIDhJqfeJne3CFl8A93phZrwgV2-hwdcXU,9247
4
4
  ai_edge_quantizer/algorithm_manager_api_test.py,sha256=tL_ozYFTsOPX8qGcti0KTz37nVsCxf0SSG5C45SyT-g,7319
5
- ai_edge_quantizer/calibrator.py,sha256=0zAWrSpl_08u6BNLVgG_TQeNcT16wJ-oLeQgznziGoo,11079
6
- ai_edge_quantizer/calibrator_test.py,sha256=5DGvKWRRjjU3L5wZoN56AyOVljmxOitwhuBUp6GL_bU,11354
5
+ ai_edge_quantizer/calibrator.py,sha256=IqNMnKpZy24Fl_-94AIsp77KzqypMeyHkkxitMBxl58,11325
6
+ ai_edge_quantizer/calibrator_test.py,sha256=hQk61YUvw1X02CDVAddm2n6Dnyk9GWoDgSpO6nuSJiY,11889
7
7
  ai_edge_quantizer/conftest.py,sha256=SxCz-5LlRD_lQm4hQc4c6IGG7DS8d7IyEWY9gnscPN0,794
8
8
  ai_edge_quantizer/default_policy.py,sha256=TQ9yY8jtrSpMsTBsTyKW6TY-voGH_psvwGZoFglAbiA,9079
9
9
  ai_edge_quantizer/model_modifier.py,sha256=Z8EYtrz4zhCFpzd1zVwl2AetVE3BGBf5OvB2DbVQuds,5850
@@ -12,7 +12,7 @@ ai_edge_quantizer/model_validator.py,sha256=oZk0b1qGczaEm5erJFm4SbwadDnl7DFhC0bX
12
12
  ai_edge_quantizer/model_validator_test.py,sha256=ctvVmMHvnmFbkG4o8Jaa6kXXRrGHzhYpNylgLSmOboA,12951
13
13
  ai_edge_quantizer/params_generator.py,sha256=FvBub5yM2q98k7wNLgEyRerf8sVIETvGbrFcXFPUPdA,13523
14
14
  ai_edge_quantizer/params_generator_test.py,sha256=d9JwR-yxNJgg1SW-m8sFFPkIRdhgsDwMpVKsBQFL0gg,37658
15
- ai_edge_quantizer/qtyping.py,sha256=bue_WfK05QTkQcoyVVWeIxh8LRVGhHMWruXk3cgpFpw,14577
15
+ ai_edge_quantizer/qtyping.py,sha256=eZNwNNjXf67OjIhTDGpmJe-4HuaohS6BYB8v7Tnq34A,14760
16
16
  ai_edge_quantizer/quantizer.py,sha256=Gny7WLuRibiIuDtcRn_g8RCD-zAm_fuDG7WmGq5dRx8,13238
17
17
  ai_edge_quantizer/quantizer_test.py,sha256=38oTMJwMmxwPDeqT3eaVbazjtuIUIzMQ3mJNKh_eNQY,20493
18
18
  ai_edge_quantizer/recipe.py,sha256=r5tJiUs-ihZFzeK_jP2sUIUgTqZsL5SWvbUokuIUPDo,2251
@@ -42,8 +42,8 @@ ai_edge_quantizer/transformations/emulated_subchannel.py,sha256=HVaRxoC8PCAvy3xe
42
42
  ai_edge_quantizer/transformations/emulated_subchannel_test.py,sha256=gZP6u9NdPXl7s19qB_Un8evou9ZZV6I9Gy0E1rdobHM,7722
43
43
  ai_edge_quantizer/transformations/quant_insert.py,sha256=jn6HsJaV-sqBiFPY-Aqbd64t8zgcYVkEkZI375x_FWY,3958
44
44
  ai_edge_quantizer/transformations/quant_insert_test.py,sha256=X9ptPDvJCFkR5tejKnD1SlHFGPazQTW-wNNMV9MEAuw,10107
45
- ai_edge_quantizer/transformations/quantize_tensor.py,sha256=KsJbvhoyBu3D1G5R4nkl54w0TbdYPyit6JfABwlvtbw,5437
46
- ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=xfbVNdMbvfJXQcl0vPtmyqKhifVxNZlhu_Xq7RLL2NI,7638
45
+ ai_edge_quantizer/transformations/quantize_tensor.py,sha256=6CyUFR7fGmzbS-mSuDlSSCJJGxY9X_WnCmEuKqL4LzQ,7864
46
+ ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=QnJmQ_-XN5X0oR57FoY9bWGTp7migf11psbdO9R2pLg,9050
47
47
  ai_edge_quantizer/transformations/transformation_utils.py,sha256=BaKy5LYWgqli62XGo3AGRDNtHjwpBNp5VF5XgFbfVmg,4298
48
48
  ai_edge_quantizer/transformations/transformation_utils_test.py,sha256=ks81nNvruOC88Tjdk3_qwku0V8p54p3gOqfObzNhWMM,5371
49
49
  ai_edge_quantizer/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
@@ -52,12 +52,12 @@ ai_edge_quantizer/utils/calibration_utils_test.py,sha256=Z-AcdTieesWFKyKBb08ZXm4
52
52
  ai_edge_quantizer/utils/test_utils.py,sha256=95BDAdjE4Zvd6JZ90fG8FE3wKWE-Lu0ZIE3hQ1B6adI,3616
53
53
  ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=F6_AkCSv35FAhJX2qel8VTARhGOVwaeo7_mqRZygrpA,10126
54
54
  ai_edge_quantizer/utils/tfl_flatbuffer_utils_test.py,sha256=AbyDxoM62k4ojD8gPdkWo--xe5hlX3t0kobQSA80kuk,7740
55
- ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=yta7b_VmhVZmntwHK27vqVnie3XRejN459P0uJHbpb8,10431
55
+ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=SM8H4i7Jq_nfdsJpImopHndNCJznlLr-6ptUbp5bVWA,10558
56
56
  ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=Op3JxtOqlrjzmYF18jnnstL1k9xiY9kKJ8S2vklKGkc,11327
57
57
  ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
58
58
  ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
59
- ai_edge_quantizer_nightly-0.0.1.dev20250204.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
60
- ai_edge_quantizer_nightly-0.0.1.dev20250204.dist-info/METADATA,sha256=8OfFupnLdT4RmcSu0nr8uDfNWQayk1KTb1hdMKqAEBc,1484
61
- ai_edge_quantizer_nightly-0.0.1.dev20250204.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
62
- ai_edge_quantizer_nightly-0.0.1.dev20250204.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
63
- ai_edge_quantizer_nightly-0.0.1.dev20250204.dist-info/RECORD,,
59
+ ai_edge_quantizer_nightly-0.0.1.dev20250205.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
60
+ ai_edge_quantizer_nightly-0.0.1.dev20250205.dist-info/METADATA,sha256=fy1r2mGtmXIX4XYgkpQePrnnTgMiuHw7c393xq_5OWI,1484
61
+ ai_edge_quantizer_nightly-0.0.1.dev20250205.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
62
+ ai_edge_quantizer_nightly-0.0.1.dev20250205.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
63
+ ai_edge_quantizer_nightly-0.0.1.dev20250205.dist-info/RECORD,,