ai-edge-quantizer-nightly 0.4.0.dev20251007__py3-none-any.whl → 0.4.0.dev20251009__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -119,6 +119,7 @@ def fix_quantization_params_rank(
119
119
  symmetric=quantization_params.symmetric,
120
120
  quantized_dimension=quantization_params.quantized_dimension,
121
121
  quantized_data=quantization_params.quantized_data,
122
+ block_size=quantization_params.block_size,
122
123
  )
123
124
 
124
125
 
@@ -209,13 +210,16 @@ def _broadcast_scale_zp_for_blockwise(
209
210
  ),
210
211
  tensor_content.shape,
211
212
  )
212
- expanded_zp = np.reshape(
213
- np.broadcast_to(
214
- np.expand_dims(quant_params.zero_point, quantized_dim + 1),
215
- expanded_tensor_shape,
216
- ),
217
- tensor_content.shape,
218
- )
213
+ if quant_params.zero_point is None or quant_params.zero_point.size == 0:
214
+ expanded_zp = np.zeros_like(tensor_content, dtype=np.int32)
215
+ else:
216
+ expanded_zp = np.reshape(
217
+ np.broadcast_to(
218
+ np.expand_dims(quant_params.zero_point, quantized_dim + 1),
219
+ expanded_tensor_shape,
220
+ ),
221
+ tensor_content.shape,
222
+ )
219
223
  return qtyping.UniformQuantParams(
220
224
  scale=expanded_scale,
221
225
  zero_point=expanded_zp,
@@ -295,6 +299,26 @@ def uniform_dequantize(
295
299
  Returns:
296
300
  The dequantized tensor.
297
301
  """
302
+ if quantization_params.block_size != 0:
303
+ # b/443830202: The quantized dimension is currently increased by 1 because
304
+ # AEQ expects 1 and XNNPack expects 0.
305
+ quantization_params = dataclasses.replace(
306
+ quantization_params,
307
+ quantized_dimension=quantization_params.quantized_dimension + 1,
308
+ )
309
+ scale_shape = list(tensor_data.shape)
310
+ scale_shape[quantization_params.quantized_dimension] = (
311
+ scale_shape[quantization_params.quantized_dimension]
312
+ // quantization_params.block_size
313
+ )
314
+ quantization_params = dataclasses.replace(
315
+ quantization_params,
316
+ scale=quantization_params.scale.reshape(scale_shape),
317
+ )
318
+ quantization_params = _broadcast_scale_zp_for_blockwise(
319
+ tensor_data, quantization_params
320
+ )
321
+
298
322
  # quant params in flatbuffer is flattened, expand the rank to be the same
299
323
  # as the tensor rank to avoid ambiguous broadcasting.
300
324
  quantization_params = fix_quantization_params_rank(
@@ -300,6 +300,30 @@ class TensorUtilsTest(parameterized.TestCase):
300
300
  ),
301
301
  )
302
302
 
303
+ def test_uniform_dequantize_blockwise(self):
304
+ quantized_tensor = np.array([[-8, -5, -4, 7], [-4, 7, -8, -5]])
305
+ expected_output_tensor = np.array([
306
+ [-10.1333336, -6.3333335, -5.0666668, 8.8666669],
307
+ [-5.0666668, 8.8666669, -10.1333336, -6.3333335],
308
+ ])
309
+ quant_params = qtyping.UniformQuantParams(
310
+ # b/443830202:
311
+ quantized_dimension=0,
312
+ num_bits=4,
313
+ scale=np.array([[[1.2666667, 1.2666667], [1.2666667, 1.2666667]]]),
314
+ zero_point=np.array([[0]]),
315
+ symmetric=True,
316
+ block_size=2,
317
+ )
318
+
319
+ dequantized_tensor = uniform_quantize_tensor.uniform_dequantize(
320
+ np.array(quantized_tensor), quant_params
321
+ )
322
+
323
+ self.assertSequenceAlmostEqual(
324
+ expected_output_tensor.flatten(), dequantized_tensor.flatten(), places=4
325
+ )
326
+
303
327
  @parameterized.parameters(
304
328
  (8, 8, True, True),
305
329
  (8, 4, False, True),
@@ -219,6 +219,7 @@ class UniformQuantParams:
219
219
  scale=quant_params['scales'],
220
220
  zero_point=quant_params['zero_points'],
221
221
  symmetric=symmetric,
222
+ block_size=quant_params['block_size'],
222
223
  )
223
224
 
224
225
  def __eq__(self, other):
@@ -126,12 +126,16 @@ class Quantizer:
126
126
  float_model: TFLite model file path or bytearray.
127
127
  quantization_recipe: Quantization recipe .json filepath or in loaded json
128
128
  format.
129
+ previous_quantized_model: Optional previously quantized TFLite model file
130
+ path or bytearray. This is useful for validating a quantized model
131
+ without quantizing it again.
129
132
  """
130
133
 
131
134
  def __init__(
132
135
  self,
133
136
  float_model: Union[str, bytearray],
134
137
  quantization_recipe: Optional[Union[str, _QuantRecipe]] = None,
138
+ previous_quantized_model: Optional[Union[str, bytearray]] = None,
135
139
  ):
136
140
  """Initializes the quantizer.
137
141
 
@@ -139,6 +143,9 @@ class Quantizer:
139
143
  float_model: Path to the float tflite model.
140
144
  quantization_recipe: Quantization recipe in .json filepath or loaded json
141
145
  format.
146
+ previous_quantized_model: Path to an optional previously quantized tflite
147
+ model. This is useful for validating a quantized model without
148
+ quantizing it again.
142
149
  """
143
150
  # Use `float model` as bytes for memory efficiency.
144
151
  self.float_model: bytes = (
@@ -146,6 +153,14 @@ class Quantizer:
146
153
  if isinstance(float_model, str)
147
154
  else float_model
148
155
  )
156
+ if previous_quantized_model is not None:
157
+ self.previous_quantized_model: bytes = (
158
+ tfl_flatbuffer_utils.get_model_content(previous_quantized_model)
159
+ if isinstance(previous_quantized_model, str)
160
+ else previous_quantized_model
161
+ )
162
+ else:
163
+ self.previous_quantized_model = None
149
164
 
150
165
  self._recipe_manager: recipe_manager.RecipeManager = (
151
166
  recipe_manager.RecipeManager()
@@ -153,6 +168,7 @@ class Quantizer:
153
168
  if quantization_recipe is not None:
154
169
  self.load_quantization_recipe(quantization_recipe)
155
170
  self._result: QuantizationResult = QuantizationResult([{}], None)
171
+ self._quantize_called = False
156
172
 
157
173
  def load_quantization_recipe(self, recipe: Union[str, _QuantRecipe]) -> None:
158
174
  """Loads a quantization recipe.
@@ -399,7 +415,7 @@ class Quantizer:
399
415
  Raises:
400
416
  RuntimeError: If quantization recipe is empty.
401
417
  """
402
-
418
+ self._quantize_called = True
403
419
  if calibration_result is not None:
404
420
  self._ensure_model_qsv_sufficient(calibration_result)
405
421
 
@@ -445,9 +461,16 @@ class Quantizer:
445
461
  test_data = tfl_interpreter_utils.create_random_normal_input_data(
446
462
  self.float_model, num_samples=1
447
463
  )
464
+ if self._quantize_called:
465
+ quantized_model = self._result.quantized_model
466
+ else:
467
+ quantized_model = self.previous_quantized_model
468
+
469
+ if quantized_model is None:
470
+ raise ValueError('No quantized model available to validate.')
448
471
  return model_validator.compare_model(
449
472
  self.float_model,
450
- self._result.quantized_model,
473
+ quantized_model,
451
474
  test_data,
452
475
  error_metrics,
453
476
  validation_utils.get_validation_func(error_metrics),
@@ -337,6 +337,21 @@ class QuantizerTest(parameterized.TestCase):
337
337
  'sequential/dense_1/MatMul', validation_result.intermediate_tensors
338
338
  )
339
339
 
340
+ def test_validate_with_quantized_model_arg_succeeds(self):
341
+ self._quantizer.quantize()
342
+ quantized_model = self._quantizer._result.quantized_model
343
+ self.assertIsNotNone(quantized_model)
344
+
345
+ new_quantizer = quantizer.Quantizer(
346
+ self._test_model_path, previous_quantized_model=quantized_model
347
+ )
348
+ validation_result = new_quantizer.validate()
349
+ validation_result = validation_result.get_signature_comparison_result()
350
+ self.assertIsNotNone(validation_result)
351
+ self.assertIn(
352
+ 'sequential/dense_1/MatMul', validation_result.intermediate_tensors
353
+ )
354
+
340
355
  def test_load_custom_policies_succeeds(self):
341
356
 
342
357
  test_op_config = qtyping.OpQuantizationConfig(
@@ -131,9 +131,14 @@ def _perform_blockwise_quantization(
131
131
  transformation_input.buffers,
132
132
  )
133
133
  blockwise_details.scales = scale_tensor_id
134
- blockwise_details.blockSize = transformation_input.quant_params.block_size
134
+ # Blockwise quantization does not support zero point yet, so this points to
135
+ # a -1 buffer index.
135
136
  # TODO: b/404909258 - Add optional zero point to blockwise quantization.
137
+ blockwise_details.zeroPoints = -1
138
+ blockwise_details.blockSize = transformation_input.quant_params.block_size
136
139
  flatbuffer_quantization.details = blockwise_details
140
+ # TODO: b/443830202 - Hardcoding to 0 for now.
141
+ flatbuffer_quantization.quantizedDimension = 0
137
142
  return flatbuffer_quantization
138
143
 
139
144
 
@@ -170,7 +170,7 @@ class QuantizeTensorTest(parameterized.TestCase):
170
170
  # Check if the scale and zero point tensors are inserted correctly.
171
171
  self.assertEqual(quant_param.details.scales, 9)
172
172
  # So far we don't have zero point in blockwise quantization.
173
- self.assertEqual(quant_param.details.zeroPoints, 0)
173
+ self.assertEqual(quant_param.details.zeroPoints, -1)
174
174
 
175
175
  def test_int4_constant_packed_correctly(self):
176
176
  subgraph = self._model.subgraphs[0]
@@ -102,7 +102,8 @@ def median_diff_ratio(
102
102
 
103
103
 
104
104
  def cosine_similarity(
105
- data1: np._typing.ArrayLike, data2: np._typing.ArrayLike
105
+ data1: np._typing.ArrayLike,
106
+ data2: np._typing.ArrayLike,
106
107
  ) -> float:
107
108
  """Calculates the cosine similarity between data1 & data2.
108
109
 
@@ -123,9 +124,14 @@ def cosine_similarity(
123
124
  # special handling for tensor of size 0
124
125
  if data1.size == 0:
125
126
  return float(0)
126
- return float(
127
- np.dot(data1, data2) / (np.linalg.norm(data1) * np.linalg.norm(data2))
128
- )
127
+ norm_data1 = np.linalg.norm(data1)
128
+ norm_data2 = np.linalg.norm(data2)
129
+ # special handling for tensor of length 0
130
+ if norm_data1 == 0 and norm_data2 == 0:
131
+ return 1.0
132
+ if norm_data1 == 0 or norm_data2 == 0:
133
+ return 0.0
134
+ return np.dot(data1, data2) / (norm_data1 * norm_data2)
129
135
 
130
136
 
131
137
  def _preprocess_same_size_arrays(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ai-edge-quantizer-nightly
3
- Version: 0.4.0.dev20251007
3
+ Version: 0.4.0.dev20251009
4
4
  Summary: A quantizer for advanced developers to quantize converted AI Edge models.
5
5
  Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
6
6
  Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
@@ -12,9 +12,9 @@ ai_edge_quantizer/model_validator.py,sha256=Hj0_5o-Oa3dSlJ3ryVjRhvsyelHNyek1GrtG
12
12
  ai_edge_quantizer/model_validator_test.py,sha256=EeqOP_mrZsnZ3rug756s0ryDDqd2KgIDld5Lm_gDuWY,13020
13
13
  ai_edge_quantizer/params_generator.py,sha256=0w-sDGk84sVNkXoduon1wDqq30sGOHVgBVbdg44QVF4,20153
14
14
  ai_edge_quantizer/params_generator_test.py,sha256=RDYoRZDJfEZRtjlTAU2kZ_4t3JHOqEHxfJX9V4ETAhg,40597
15
- ai_edge_quantizer/qtyping.py,sha256=7aEMPA4qr4CGD3NXtZgG2fDoQX5NzK9jwSv1yWNqQV4,17149
16
- ai_edge_quantizer/quantizer.py,sha256=ckAEOnnBxuCKZuvlzdChevCKPuE-IeDPHCNtFTWr250,17857
17
- ai_edge_quantizer/quantizer_test.py,sha256=bh4IowxRF249p_XKIKQ0f17PmeDddfcOUzvQ2ht1L0E,26530
15
+ ai_edge_quantizer/qtyping.py,sha256=rp2jdmCuSsP6Ay8rD7NxDCpbFkRNkbYP29Uwe0xBfnA,17196
16
+ ai_edge_quantizer/quantizer.py,sha256=teYeONdIS31IAY6ubLujCRi1t6lYAd0LkC8dRPxQdbw,18919
17
+ ai_edge_quantizer/quantizer_test.py,sha256=9BVwt7oyM8IsSC7jN73nI0O-4MikBkymm_FigJnSeCM,27117
18
18
  ai_edge_quantizer/recipe.py,sha256=MEkfQ2Sg3KAE9LAORHWcbjYNPg06EUbwc1d-VspQA2U,6461
19
19
  ai_edge_quantizer/recipe_manager.py,sha256=6l2uq8KL23KLu9OQDmPGkxrFiwHrdDB9xnn-ni8WdEM,15036
20
20
  ai_edge_quantizer/recipe_manager_test.py,sha256=qjgGUF-wggXnSXqZ5khmqrDMIQI5CShk52IVWTahq6s,36817
@@ -40,8 +40,8 @@ ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=1
40
40
  ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=nscKDvNb14ErZdAfG0aXRWyRs6bTvhMqMjKx2vxvUK0,8725
41
41
  ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=Umxh4kJyeHddZf-Wd4aXE5MTI1XWFa5KRuM17uYU714,6922
42
42
  ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=sha1d99Xk87bI87tgz0g5LeDC-EeE4WMfM5rRC98-m4,9140
43
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=so5pMeoclPdXV_5dDiqWaA_cZ0Ud-OWnXxRbzNh9x1E,18576
44
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=eBTi2I12e65_lxVZjGaN2TAiMzvsyyXAhWmEpKEmkLA,15126
43
+ ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=0bLDAjCm5wxasGXKT3XiS4quk-zXlWK6JKb-14FQAd4,19570
44
+ ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=0xOdoIWuEo9JlXvZdX_Gbq5lfwCwEcG7RwOxhXAFOOY,15939
45
45
  ai_edge_quantizer/algorithms/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
46
46
  ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=4eAlGph6DDW18bUdoY0XcUoOXEr3P_3_W1ptidD8qK4,37611
47
47
  ai_edge_quantizer/algorithms/utils/common_utils_test.py,sha256=zqapGEfYhjQWe9cNGPLmdbwtEUUYQRhlO_kNe0cXX6E,18104
@@ -58,8 +58,8 @@ ai_edge_quantizer/transformations/insert_hadamard_rotation.py,sha256=5D5WwrJCE6h
58
58
  ai_edge_quantizer/transformations/insert_hadamard_rotation_test.py,sha256=iV1p3nZfHUATV2YRoBOYurnu3pLy8n3aFppLWGQOPdA,7268
59
59
  ai_edge_quantizer/transformations/quant_insert.py,sha256=jn6HsJaV-sqBiFPY-Aqbd64t8zgcYVkEkZI375x_FWY,3958
60
60
  ai_edge_quantizer/transformations/quant_insert_test.py,sha256=X9ptPDvJCFkR5tejKnD1SlHFGPazQTW-wNNMV9MEAuw,10107
61
- ai_edge_quantizer/transformations/quantize_tensor.py,sha256=unqInO0we6_cgwPjtHB3tLWIHPajfNuJSLGW-IFnI9E,7029
62
- ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=mHLO3_MRt36A8-ZN8ADn5tBBJlqjTWa7ZUN8Mmu5Rcw,9116
61
+ ai_edge_quantizer/transformations/quantize_tensor.py,sha256=VGTVpZWla9R-LPfhTzH1NVAp2soOqDF_duIm8ez_z3Y,7264
62
+ ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=CD7OboBcIQxQY8OaRd5ISC1JcwQW726P_vneY4LKVpA,9117
63
63
  ai_edge_quantizer/transformations/transformation_utils.py,sha256=IKrtXJNH0msiTcI7KXkCYn2EkzmbZKWMMX_r5PMEx2U,8857
64
64
  ai_edge_quantizer/transformations/transformation_utils_test.py,sha256=MWgq29t7rvxRQIfi4ny9IoODFCTcbpjnIwoCL40zDKk,8698
65
65
  ai_edge_quantizer/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
@@ -72,10 +72,10 @@ ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=LN-WonrcJLP9bB4lULd5VIg_8
72
72
  ai_edge_quantizer/utils/tfl_flatbuffer_utils_test.py,sha256=K1SbK8q92qYVtiVj0I0GtugsPTkpIpEKv9zakvFV_Sc,8555
73
73
  ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOihexmizeJqt4SQcET9aA,14925
74
74
  ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
75
- ai_edge_quantizer/utils/validation_utils.py,sha256=yJH9Cvepr_XWn-3Hsh91j7HuC5iLQHAyskyQ48bGNoc,4797
75
+ ai_edge_quantizer/utils/validation_utils.py,sha256=0sOdH4pzk_Pwh1r8O47iaECRng1Xn0ABn9GVc8UPNcY,4994
76
76
  ai_edge_quantizer/utils/validation_utils_test.py,sha256=1sblJWHLTYTbn1Qi9rwnrREOSXRy5KwHAWSwgI1e_aU,3697
77
- ai_edge_quantizer_nightly-0.4.0.dev20251007.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
78
- ai_edge_quantizer_nightly-0.4.0.dev20251007.dist-info/METADATA,sha256=RKb2qd_e3o9Vv-kxr5lF0ApMyfJEHfoVVaMwE24_ydw,1508
79
- ai_edge_quantizer_nightly-0.4.0.dev20251007.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
80
- ai_edge_quantizer_nightly-0.4.0.dev20251007.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
81
- ai_edge_quantizer_nightly-0.4.0.dev20251007.dist-info/RECORD,,
77
+ ai_edge_quantizer_nightly-0.4.0.dev20251009.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
78
+ ai_edge_quantizer_nightly-0.4.0.dev20251009.dist-info/METADATA,sha256=4dOIX2nEFMfQ0kC7a4v9tfAHkpgqBFnr5W0fZyFWadY,1508
79
+ ai_edge_quantizer_nightly-0.4.0.dev20251009.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
80
+ ai_edge_quantizer_nightly-0.4.0.dev20251009.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
81
+ ai_edge_quantizer_nightly-0.4.0.dev20251009.dist-info/RECORD,,