ai-edge-quantizer-nightly 0.4.0.dev20251006__py3-none-any.whl → 0.4.0.dev20251008__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py +31 -7
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py +24 -0
- ai_edge_quantizer/qtyping.py +1 -0
- ai_edge_quantizer/transformations/quantize_tensor.py +6 -1
- ai_edge_quantizer/transformations/quantize_tensor_test.py +1 -1
- ai_edge_quantizer/utils/validation_utils.py +10 -4
- {ai_edge_quantizer_nightly-0.4.0.dev20251006.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20251008.dist-info}/METADATA +1 -1
- {ai_edge_quantizer_nightly-0.4.0.dev20251006.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20251008.dist-info}/RECORD +11 -11
- {ai_edge_quantizer_nightly-0.4.0.dev20251006.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20251008.dist-info}/LICENSE +0 -0
- {ai_edge_quantizer_nightly-0.4.0.dev20251006.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20251008.dist-info}/WHEEL +0 -0
- {ai_edge_quantizer_nightly-0.4.0.dev20251006.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20251008.dist-info}/top_level.txt +0 -0
|
@@ -119,6 +119,7 @@ def fix_quantization_params_rank(
|
|
|
119
119
|
symmetric=quantization_params.symmetric,
|
|
120
120
|
quantized_dimension=quantization_params.quantized_dimension,
|
|
121
121
|
quantized_data=quantization_params.quantized_data,
|
|
122
|
+
block_size=quantization_params.block_size,
|
|
122
123
|
)
|
|
123
124
|
|
|
124
125
|
|
|
@@ -209,13 +210,16 @@ def _broadcast_scale_zp_for_blockwise(
|
|
|
209
210
|
),
|
|
210
211
|
tensor_content.shape,
|
|
211
212
|
)
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
213
|
+
if quant_params.zero_point is None or quant_params.zero_point.size == 0:
|
|
214
|
+
expanded_zp = np.zeros_like(tensor_content, dtype=np.int32)
|
|
215
|
+
else:
|
|
216
|
+
expanded_zp = np.reshape(
|
|
217
|
+
np.broadcast_to(
|
|
218
|
+
np.expand_dims(quant_params.zero_point, quantized_dim + 1),
|
|
219
|
+
expanded_tensor_shape,
|
|
220
|
+
),
|
|
221
|
+
tensor_content.shape,
|
|
222
|
+
)
|
|
219
223
|
return qtyping.UniformQuantParams(
|
|
220
224
|
scale=expanded_scale,
|
|
221
225
|
zero_point=expanded_zp,
|
|
@@ -295,6 +299,26 @@ def uniform_dequantize(
|
|
|
295
299
|
Returns:
|
|
296
300
|
The dequantized tensor.
|
|
297
301
|
"""
|
|
302
|
+
if quantization_params.block_size != 0:
|
|
303
|
+
# b/443830202: The quantized dimension is currently increased by 1 because
|
|
304
|
+
# AEQ expects 1 and XNNPack expects 0.
|
|
305
|
+
quantization_params = dataclasses.replace(
|
|
306
|
+
quantization_params,
|
|
307
|
+
quantized_dimension=quantization_params.quantized_dimension + 1,
|
|
308
|
+
)
|
|
309
|
+
scale_shape = list(tensor_data.shape)
|
|
310
|
+
scale_shape[quantization_params.quantized_dimension] = (
|
|
311
|
+
scale_shape[quantization_params.quantized_dimension]
|
|
312
|
+
// quantization_params.block_size
|
|
313
|
+
)
|
|
314
|
+
quantization_params = dataclasses.replace(
|
|
315
|
+
quantization_params,
|
|
316
|
+
scale=quantization_params.scale.reshape(scale_shape),
|
|
317
|
+
)
|
|
318
|
+
quantization_params = _broadcast_scale_zp_for_blockwise(
|
|
319
|
+
tensor_data, quantization_params
|
|
320
|
+
)
|
|
321
|
+
|
|
298
322
|
# quant params in flatbuffer is flattened, expand the rank to be the same
|
|
299
323
|
# as the tensor rank to avoid ambiguous broadcasting.
|
|
300
324
|
quantization_params = fix_quantization_params_rank(
|
|
@@ -300,6 +300,30 @@ class TensorUtilsTest(parameterized.TestCase):
|
|
|
300
300
|
),
|
|
301
301
|
)
|
|
302
302
|
|
|
303
|
+
def test_uniform_dequantize_blockwise(self):
|
|
304
|
+
quantized_tensor = np.array([[-8, -5, -4, 7], [-4, 7, -8, -5]])
|
|
305
|
+
expected_output_tensor = np.array([
|
|
306
|
+
[-10.1333336, -6.3333335, -5.0666668, 8.8666669],
|
|
307
|
+
[-5.0666668, 8.8666669, -10.1333336, -6.3333335],
|
|
308
|
+
])
|
|
309
|
+
quant_params = qtyping.UniformQuantParams(
|
|
310
|
+
# b/443830202:
|
|
311
|
+
quantized_dimension=0,
|
|
312
|
+
num_bits=4,
|
|
313
|
+
scale=np.array([[[1.2666667, 1.2666667], [1.2666667, 1.2666667]]]),
|
|
314
|
+
zero_point=np.array([[0]]),
|
|
315
|
+
symmetric=True,
|
|
316
|
+
block_size=2,
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
dequantized_tensor = uniform_quantize_tensor.uniform_dequantize(
|
|
320
|
+
np.array(quantized_tensor), quant_params
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
self.assertSequenceAlmostEqual(
|
|
324
|
+
expected_output_tensor.flatten(), dequantized_tensor.flatten(), places=4
|
|
325
|
+
)
|
|
326
|
+
|
|
303
327
|
@parameterized.parameters(
|
|
304
328
|
(8, 8, True, True),
|
|
305
329
|
(8, 4, False, True),
|
ai_edge_quantizer/qtyping.py
CHANGED
|
@@ -131,9 +131,14 @@ def _perform_blockwise_quantization(
|
|
|
131
131
|
transformation_input.buffers,
|
|
132
132
|
)
|
|
133
133
|
blockwise_details.scales = scale_tensor_id
|
|
134
|
-
|
|
134
|
+
# Blockwise quantization does not support zero point yet, so this points to
|
|
135
|
+
# a -1 buffer index.
|
|
135
136
|
# TODO: b/404909258 - Add optional zero point to blockwise quantization.
|
|
137
|
+
blockwise_details.zeroPoints = -1
|
|
138
|
+
blockwise_details.blockSize = transformation_input.quant_params.block_size
|
|
136
139
|
flatbuffer_quantization.details = blockwise_details
|
|
140
|
+
# TODO: b/443830202 - Hardcoding to 0 for now.
|
|
141
|
+
flatbuffer_quantization.quantizedDimension = 0
|
|
137
142
|
return flatbuffer_quantization
|
|
138
143
|
|
|
139
144
|
|
|
@@ -170,7 +170,7 @@ class QuantizeTensorTest(parameterized.TestCase):
|
|
|
170
170
|
# Check if the scale and zero point tensors are inserted correctly.
|
|
171
171
|
self.assertEqual(quant_param.details.scales, 9)
|
|
172
172
|
# So far we don't have zero point in blockwise quantization.
|
|
173
|
-
self.assertEqual(quant_param.details.zeroPoints,
|
|
173
|
+
self.assertEqual(quant_param.details.zeroPoints, -1)
|
|
174
174
|
|
|
175
175
|
def test_int4_constant_packed_correctly(self):
|
|
176
176
|
subgraph = self._model.subgraphs[0]
|
|
@@ -102,7 +102,8 @@ def median_diff_ratio(
|
|
|
102
102
|
|
|
103
103
|
|
|
104
104
|
def cosine_similarity(
|
|
105
|
-
data1: np._typing.ArrayLike,
|
|
105
|
+
data1: np._typing.ArrayLike,
|
|
106
|
+
data2: np._typing.ArrayLike,
|
|
106
107
|
) -> float:
|
|
107
108
|
"""Calculates the cosine similarity between data1 & data2.
|
|
108
109
|
|
|
@@ -123,9 +124,14 @@ def cosine_similarity(
|
|
|
123
124
|
# special handling for tensor of size 0
|
|
124
125
|
if data1.size == 0:
|
|
125
126
|
return float(0)
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
127
|
+
norm_data1 = np.linalg.norm(data1)
|
|
128
|
+
norm_data2 = np.linalg.norm(data2)
|
|
129
|
+
# special handling for tensor of length 0
|
|
130
|
+
if norm_data1 == 0 and norm_data2 == 0:
|
|
131
|
+
return 1.0
|
|
132
|
+
if norm_data1 == 0 or norm_data2 == 0:
|
|
133
|
+
return 0.0
|
|
134
|
+
return np.dot(data1, data2) / (norm_data1 * norm_data2)
|
|
129
135
|
|
|
130
136
|
|
|
131
137
|
def _preprocess_same_size_arrays(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ai-edge-quantizer-nightly
|
|
3
|
-
Version: 0.4.0.
|
|
3
|
+
Version: 0.4.0.dev20251008
|
|
4
4
|
Summary: A quantizer for advanced developers to quantize converted AI Edge models.
|
|
5
5
|
Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
|
|
6
6
|
Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
|
|
@@ -12,7 +12,7 @@ ai_edge_quantizer/model_validator.py,sha256=Hj0_5o-Oa3dSlJ3ryVjRhvsyelHNyek1GrtG
|
|
|
12
12
|
ai_edge_quantizer/model_validator_test.py,sha256=EeqOP_mrZsnZ3rug756s0ryDDqd2KgIDld5Lm_gDuWY,13020
|
|
13
13
|
ai_edge_quantizer/params_generator.py,sha256=0w-sDGk84sVNkXoduon1wDqq30sGOHVgBVbdg44QVF4,20153
|
|
14
14
|
ai_edge_quantizer/params_generator_test.py,sha256=RDYoRZDJfEZRtjlTAU2kZ_4t3JHOqEHxfJX9V4ETAhg,40597
|
|
15
|
-
ai_edge_quantizer/qtyping.py,sha256=
|
|
15
|
+
ai_edge_quantizer/qtyping.py,sha256=rp2jdmCuSsP6Ay8rD7NxDCpbFkRNkbYP29Uwe0xBfnA,17196
|
|
16
16
|
ai_edge_quantizer/quantizer.py,sha256=ckAEOnnBxuCKZuvlzdChevCKPuE-IeDPHCNtFTWr250,17857
|
|
17
17
|
ai_edge_quantizer/quantizer_test.py,sha256=bh4IowxRF249p_XKIKQ0f17PmeDddfcOUzvQ2ht1L0E,26530
|
|
18
18
|
ai_edge_quantizer/recipe.py,sha256=MEkfQ2Sg3KAE9LAORHWcbjYNPg06EUbwc1d-VspQA2U,6461
|
|
@@ -40,8 +40,8 @@ ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=1
|
|
|
40
40
|
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=nscKDvNb14ErZdAfG0aXRWyRs6bTvhMqMjKx2vxvUK0,8725
|
|
41
41
|
ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=Umxh4kJyeHddZf-Wd4aXE5MTI1XWFa5KRuM17uYU714,6922
|
|
42
42
|
ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=sha1d99Xk87bI87tgz0g5LeDC-EeE4WMfM5rRC98-m4,9140
|
|
43
|
-
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=
|
|
44
|
-
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=
|
|
43
|
+
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=0bLDAjCm5wxasGXKT3XiS4quk-zXlWK6JKb-14FQAd4,19570
|
|
44
|
+
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=0xOdoIWuEo9JlXvZdX_Gbq5lfwCwEcG7RwOxhXAFOOY,15939
|
|
45
45
|
ai_edge_quantizer/algorithms/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
|
46
46
|
ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=4eAlGph6DDW18bUdoY0XcUoOXEr3P_3_W1ptidD8qK4,37611
|
|
47
47
|
ai_edge_quantizer/algorithms/utils/common_utils_test.py,sha256=zqapGEfYhjQWe9cNGPLmdbwtEUUYQRhlO_kNe0cXX6E,18104
|
|
@@ -58,8 +58,8 @@ ai_edge_quantizer/transformations/insert_hadamard_rotation.py,sha256=5D5WwrJCE6h
|
|
|
58
58
|
ai_edge_quantizer/transformations/insert_hadamard_rotation_test.py,sha256=iV1p3nZfHUATV2YRoBOYurnu3pLy8n3aFppLWGQOPdA,7268
|
|
59
59
|
ai_edge_quantizer/transformations/quant_insert.py,sha256=jn6HsJaV-sqBiFPY-Aqbd64t8zgcYVkEkZI375x_FWY,3958
|
|
60
60
|
ai_edge_quantizer/transformations/quant_insert_test.py,sha256=X9ptPDvJCFkR5tejKnD1SlHFGPazQTW-wNNMV9MEAuw,10107
|
|
61
|
-
ai_edge_quantizer/transformations/quantize_tensor.py,sha256=
|
|
62
|
-
ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=
|
|
61
|
+
ai_edge_quantizer/transformations/quantize_tensor.py,sha256=VGTVpZWla9R-LPfhTzH1NVAp2soOqDF_duIm8ez_z3Y,7264
|
|
62
|
+
ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=CD7OboBcIQxQY8OaRd5ISC1JcwQW726P_vneY4LKVpA,9117
|
|
63
63
|
ai_edge_quantizer/transformations/transformation_utils.py,sha256=IKrtXJNH0msiTcI7KXkCYn2EkzmbZKWMMX_r5PMEx2U,8857
|
|
64
64
|
ai_edge_quantizer/transformations/transformation_utils_test.py,sha256=MWgq29t7rvxRQIfi4ny9IoODFCTcbpjnIwoCL40zDKk,8698
|
|
65
65
|
ai_edge_quantizer/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
|
@@ -72,10 +72,10 @@ ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=LN-WonrcJLP9bB4lULd5VIg_8
|
|
|
72
72
|
ai_edge_quantizer/utils/tfl_flatbuffer_utils_test.py,sha256=K1SbK8q92qYVtiVj0I0GtugsPTkpIpEKv9zakvFV_Sc,8555
|
|
73
73
|
ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOihexmizeJqt4SQcET9aA,14925
|
|
74
74
|
ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
|
|
75
|
-
ai_edge_quantizer/utils/validation_utils.py,sha256=
|
|
75
|
+
ai_edge_quantizer/utils/validation_utils.py,sha256=0sOdH4pzk_Pwh1r8O47iaECRng1Xn0ABn9GVc8UPNcY,4994
|
|
76
76
|
ai_edge_quantizer/utils/validation_utils_test.py,sha256=1sblJWHLTYTbn1Qi9rwnrREOSXRy5KwHAWSwgI1e_aU,3697
|
|
77
|
-
ai_edge_quantizer_nightly-0.4.0.
|
|
78
|
-
ai_edge_quantizer_nightly-0.4.0.
|
|
79
|
-
ai_edge_quantizer_nightly-0.4.0.
|
|
80
|
-
ai_edge_quantizer_nightly-0.4.0.
|
|
81
|
-
ai_edge_quantizer_nightly-0.4.0.
|
|
77
|
+
ai_edge_quantizer_nightly-0.4.0.dev20251008.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
78
|
+
ai_edge_quantizer_nightly-0.4.0.dev20251008.dist-info/METADATA,sha256=OhwG-gg_4N_0x8rm_4M_-aCX0mBjdPxj__btANE2Sok,1508
|
|
79
|
+
ai_edge_quantizer_nightly-0.4.0.dev20251008.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
|
80
|
+
ai_edge_quantizer_nightly-0.4.0.dev20251008.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
|
|
81
|
+
ai_edge_quantizer_nightly-0.4.0.dev20251008.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|