ai-edge-quantizer-nightly 0.4.0.dev20251007__py3-none-any.whl → 0.4.0.dev20251008__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -119,6 +119,7 @@ def fix_quantization_params_rank(
119
119
  symmetric=quantization_params.symmetric,
120
120
  quantized_dimension=quantization_params.quantized_dimension,
121
121
  quantized_data=quantization_params.quantized_data,
122
+ block_size=quantization_params.block_size,
122
123
  )
123
124
 
124
125
 
@@ -209,13 +210,16 @@ def _broadcast_scale_zp_for_blockwise(
209
210
  ),
210
211
  tensor_content.shape,
211
212
  )
212
- expanded_zp = np.reshape(
213
- np.broadcast_to(
214
- np.expand_dims(quant_params.zero_point, quantized_dim + 1),
215
- expanded_tensor_shape,
216
- ),
217
- tensor_content.shape,
218
- )
213
+ if quant_params.zero_point is None or quant_params.zero_point.size == 0:
214
+ expanded_zp = np.zeros_like(tensor_content, dtype=np.int32)
215
+ else:
216
+ expanded_zp = np.reshape(
217
+ np.broadcast_to(
218
+ np.expand_dims(quant_params.zero_point, quantized_dim + 1),
219
+ expanded_tensor_shape,
220
+ ),
221
+ tensor_content.shape,
222
+ )
219
223
  return qtyping.UniformQuantParams(
220
224
  scale=expanded_scale,
221
225
  zero_point=expanded_zp,
@@ -295,6 +299,26 @@ def uniform_dequantize(
295
299
  Returns:
296
300
  The dequantized tensor.
297
301
  """
302
+ if quantization_params.block_size != 0:
303
+ # b/443830202: The quantized dimension is currently increased by 1 because
304
+ # AEQ expects 1 and XNNPack expects 0.
305
+ quantization_params = dataclasses.replace(
306
+ quantization_params,
307
+ quantized_dimension=quantization_params.quantized_dimension + 1,
308
+ )
309
+ scale_shape = list(tensor_data.shape)
310
+ scale_shape[quantization_params.quantized_dimension] = (
311
+ scale_shape[quantization_params.quantized_dimension]
312
+ // quantization_params.block_size
313
+ )
314
+ quantization_params = dataclasses.replace(
315
+ quantization_params,
316
+ scale=quantization_params.scale.reshape(scale_shape),
317
+ )
318
+ quantization_params = _broadcast_scale_zp_for_blockwise(
319
+ tensor_data, quantization_params
320
+ )
321
+
298
322
  # quant params in flatbuffer is flattened, expand the rank to be the same
299
323
  # as the tensor rank to avoid ambiguous broadcasting.
300
324
  quantization_params = fix_quantization_params_rank(
@@ -300,6 +300,30 @@ class TensorUtilsTest(parameterized.TestCase):
300
300
  ),
301
301
  )
302
302
 
303
+ def test_uniform_dequantize_blockwise(self):
304
+ quantized_tensor = np.array([[-8, -5, -4, 7], [-4, 7, -8, -5]])
305
+ expected_output_tensor = np.array([
306
+ [-10.1333336, -6.3333335, -5.0666668, 8.8666669],
307
+ [-5.0666668, 8.8666669, -10.1333336, -6.3333335],
308
+ ])
309
+ quant_params = qtyping.UniformQuantParams(
310
+ # b/443830202:
311
+ quantized_dimension=0,
312
+ num_bits=4,
313
+ scale=np.array([[[1.2666667, 1.2666667], [1.2666667, 1.2666667]]]),
314
+ zero_point=np.array([[0]]),
315
+ symmetric=True,
316
+ block_size=2,
317
+ )
318
+
319
+ dequantized_tensor = uniform_quantize_tensor.uniform_dequantize(
320
+ np.array(quantized_tensor), quant_params
321
+ )
322
+
323
+ self.assertSequenceAlmostEqual(
324
+ expected_output_tensor.flatten(), dequantized_tensor.flatten(), places=4
325
+ )
326
+
303
327
  @parameterized.parameters(
304
328
  (8, 8, True, True),
305
329
  (8, 4, False, True),
@@ -219,6 +219,7 @@ class UniformQuantParams:
219
219
  scale=quant_params['scales'],
220
220
  zero_point=quant_params['zero_points'],
221
221
  symmetric=symmetric,
222
+ block_size=quant_params['block_size'],
222
223
  )
223
224
 
224
225
  def __eq__(self, other):
@@ -131,9 +131,14 @@ def _perform_blockwise_quantization(
131
131
  transformation_input.buffers,
132
132
  )
133
133
  blockwise_details.scales = scale_tensor_id
134
- blockwise_details.blockSize = transformation_input.quant_params.block_size
134
+ # Blockwise quantization does not support zero point yet, so this points to
135
+ # a -1 buffer index.
135
136
  # TODO: b/404909258 - Add optional zero point to blockwise quantization.
137
+ blockwise_details.zeroPoints = -1
138
+ blockwise_details.blockSize = transformation_input.quant_params.block_size
136
139
  flatbuffer_quantization.details = blockwise_details
140
+ # TODO: b/443830202 - Hardcoding to 0 for now.
141
+ flatbuffer_quantization.quantizedDimension = 0
137
142
  return flatbuffer_quantization
138
143
 
139
144
 
@@ -170,7 +170,7 @@ class QuantizeTensorTest(parameterized.TestCase):
170
170
  # Check if the scale and zero point tensors are inserted correctly.
171
171
  self.assertEqual(quant_param.details.scales, 9)
172
172
  # So far we don't have zero point in blockwise quantization.
173
- self.assertEqual(quant_param.details.zeroPoints, 0)
173
+ self.assertEqual(quant_param.details.zeroPoints, -1)
174
174
 
175
175
  def test_int4_constant_packed_correctly(self):
176
176
  subgraph = self._model.subgraphs[0]
@@ -102,7 +102,8 @@ def median_diff_ratio(
102
102
 
103
103
 
104
104
  def cosine_similarity(
105
- data1: np._typing.ArrayLike, data2: np._typing.ArrayLike
105
+ data1: np._typing.ArrayLike,
106
+ data2: np._typing.ArrayLike,
106
107
  ) -> float:
107
108
  """Calculates the cosine similarity between data1 & data2.
108
109
 
@@ -123,9 +124,14 @@ def cosine_similarity(
123
124
  # special handling for tensor of size 0
124
125
  if data1.size == 0:
125
126
  return float(0)
126
- return float(
127
- np.dot(data1, data2) / (np.linalg.norm(data1) * np.linalg.norm(data2))
128
- )
127
+ norm_data1 = np.linalg.norm(data1)
128
+ norm_data2 = np.linalg.norm(data2)
129
+ # special handling for tensor of length 0
130
+ if norm_data1 == 0 and norm_data2 == 0:
131
+ return 1.0
132
+ if norm_data1 == 0 or norm_data2 == 0:
133
+ return 0.0
134
+ return np.dot(data1, data2) / (norm_data1 * norm_data2)
129
135
 
130
136
 
131
137
  def _preprocess_same_size_arrays(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ai-edge-quantizer-nightly
3
- Version: 0.4.0.dev20251007
3
+ Version: 0.4.0.dev20251008
4
4
  Summary: A quantizer for advanced developers to quantize converted AI Edge models.
5
5
  Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
6
6
  Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
@@ -12,7 +12,7 @@ ai_edge_quantizer/model_validator.py,sha256=Hj0_5o-Oa3dSlJ3ryVjRhvsyelHNyek1GrtG
12
12
  ai_edge_quantizer/model_validator_test.py,sha256=EeqOP_mrZsnZ3rug756s0ryDDqd2KgIDld5Lm_gDuWY,13020
13
13
  ai_edge_quantizer/params_generator.py,sha256=0w-sDGk84sVNkXoduon1wDqq30sGOHVgBVbdg44QVF4,20153
14
14
  ai_edge_quantizer/params_generator_test.py,sha256=RDYoRZDJfEZRtjlTAU2kZ_4t3JHOqEHxfJX9V4ETAhg,40597
15
- ai_edge_quantizer/qtyping.py,sha256=7aEMPA4qr4CGD3NXtZgG2fDoQX5NzK9jwSv1yWNqQV4,17149
15
+ ai_edge_quantizer/qtyping.py,sha256=rp2jdmCuSsP6Ay8rD7NxDCpbFkRNkbYP29Uwe0xBfnA,17196
16
16
  ai_edge_quantizer/quantizer.py,sha256=ckAEOnnBxuCKZuvlzdChevCKPuE-IeDPHCNtFTWr250,17857
17
17
  ai_edge_quantizer/quantizer_test.py,sha256=bh4IowxRF249p_XKIKQ0f17PmeDddfcOUzvQ2ht1L0E,26530
18
18
  ai_edge_quantizer/recipe.py,sha256=MEkfQ2Sg3KAE9LAORHWcbjYNPg06EUbwc1d-VspQA2U,6461
@@ -40,8 +40,8 @@ ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=1
40
40
  ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=nscKDvNb14ErZdAfG0aXRWyRs6bTvhMqMjKx2vxvUK0,8725
41
41
  ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=Umxh4kJyeHddZf-Wd4aXE5MTI1XWFa5KRuM17uYU714,6922
42
42
  ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=sha1d99Xk87bI87tgz0g5LeDC-EeE4WMfM5rRC98-m4,9140
43
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=so5pMeoclPdXV_5dDiqWaA_cZ0Ud-OWnXxRbzNh9x1E,18576
44
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=eBTi2I12e65_lxVZjGaN2TAiMzvsyyXAhWmEpKEmkLA,15126
43
+ ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=0bLDAjCm5wxasGXKT3XiS4quk-zXlWK6JKb-14FQAd4,19570
44
+ ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=0xOdoIWuEo9JlXvZdX_Gbq5lfwCwEcG7RwOxhXAFOOY,15939
45
45
  ai_edge_quantizer/algorithms/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
46
46
  ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=4eAlGph6DDW18bUdoY0XcUoOXEr3P_3_W1ptidD8qK4,37611
47
47
  ai_edge_quantizer/algorithms/utils/common_utils_test.py,sha256=zqapGEfYhjQWe9cNGPLmdbwtEUUYQRhlO_kNe0cXX6E,18104
@@ -58,8 +58,8 @@ ai_edge_quantizer/transformations/insert_hadamard_rotation.py,sha256=5D5WwrJCE6h
58
58
  ai_edge_quantizer/transformations/insert_hadamard_rotation_test.py,sha256=iV1p3nZfHUATV2YRoBOYurnu3pLy8n3aFppLWGQOPdA,7268
59
59
  ai_edge_quantizer/transformations/quant_insert.py,sha256=jn6HsJaV-sqBiFPY-Aqbd64t8zgcYVkEkZI375x_FWY,3958
60
60
  ai_edge_quantizer/transformations/quant_insert_test.py,sha256=X9ptPDvJCFkR5tejKnD1SlHFGPazQTW-wNNMV9MEAuw,10107
61
- ai_edge_quantizer/transformations/quantize_tensor.py,sha256=unqInO0we6_cgwPjtHB3tLWIHPajfNuJSLGW-IFnI9E,7029
62
- ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=mHLO3_MRt36A8-ZN8ADn5tBBJlqjTWa7ZUN8Mmu5Rcw,9116
61
+ ai_edge_quantizer/transformations/quantize_tensor.py,sha256=VGTVpZWla9R-LPfhTzH1NVAp2soOqDF_duIm8ez_z3Y,7264
62
+ ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=CD7OboBcIQxQY8OaRd5ISC1JcwQW726P_vneY4LKVpA,9117
63
63
  ai_edge_quantizer/transformations/transformation_utils.py,sha256=IKrtXJNH0msiTcI7KXkCYn2EkzmbZKWMMX_r5PMEx2U,8857
64
64
  ai_edge_quantizer/transformations/transformation_utils_test.py,sha256=MWgq29t7rvxRQIfi4ny9IoODFCTcbpjnIwoCL40zDKk,8698
65
65
  ai_edge_quantizer/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
@@ -72,10 +72,10 @@ ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=LN-WonrcJLP9bB4lULd5VIg_8
72
72
  ai_edge_quantizer/utils/tfl_flatbuffer_utils_test.py,sha256=K1SbK8q92qYVtiVj0I0GtugsPTkpIpEKv9zakvFV_Sc,8555
73
73
  ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOihexmizeJqt4SQcET9aA,14925
74
74
  ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
75
- ai_edge_quantizer/utils/validation_utils.py,sha256=yJH9Cvepr_XWn-3Hsh91j7HuC5iLQHAyskyQ48bGNoc,4797
75
+ ai_edge_quantizer/utils/validation_utils.py,sha256=0sOdH4pzk_Pwh1r8O47iaECRng1Xn0ABn9GVc8UPNcY,4994
76
76
  ai_edge_quantizer/utils/validation_utils_test.py,sha256=1sblJWHLTYTbn1Qi9rwnrREOSXRy5KwHAWSwgI1e_aU,3697
77
- ai_edge_quantizer_nightly-0.4.0.dev20251007.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
78
- ai_edge_quantizer_nightly-0.4.0.dev20251007.dist-info/METADATA,sha256=RKb2qd_e3o9Vv-kxr5lF0ApMyfJEHfoVVaMwE24_ydw,1508
79
- ai_edge_quantizer_nightly-0.4.0.dev20251007.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
80
- ai_edge_quantizer_nightly-0.4.0.dev20251007.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
81
- ai_edge_quantizer_nightly-0.4.0.dev20251007.dist-info/RECORD,,
77
+ ai_edge_quantizer_nightly-0.4.0.dev20251008.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
78
+ ai_edge_quantizer_nightly-0.4.0.dev20251008.dist-info/METADATA,sha256=OhwG-gg_4N_0x8rm_4M_-aCX0mBjdPxj__btANE2Sok,1508
79
+ ai_edge_quantizer_nightly-0.4.0.dev20251008.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
80
+ ai_edge_quantizer_nightly-0.4.0.dev20251008.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
81
+ ai_edge_quantizer_nightly-0.4.0.dev20251008.dist-info/RECORD,,