ai-edge-quantizer-nightly 0.4.0.dev20250908__py3-none-any.whl → 0.4.0.dev20250910__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py +21 -6
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py +24 -1
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py +51 -1
- {ai_edge_quantizer_nightly-0.4.0.dev20250908.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250910.dist-info}/METADATA +1 -1
- {ai_edge_quantizer_nightly-0.4.0.dev20250908.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250910.dist-info}/RECORD +8 -8
- {ai_edge_quantizer_nightly-0.4.0.dev20250908.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250910.dist-info}/LICENSE +0 -0
- {ai_edge_quantizer_nightly-0.4.0.dev20250908.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250910.dist-info}/WHEEL +0 -0
- {ai_edge_quantizer_nightly-0.4.0.dev20250908.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250910.dist-info}/top_level.txt +0 -0
@@ -309,13 +309,28 @@ def _materialize_bias_for_conv_ops(
|
|
309
309
|
bias_tensor,
|
310
310
|
graph_info.buffers,
|
311
311
|
)
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
)
|
312
|
+
input_consumer_params = (
|
313
|
+
op_tensor_params[op_input_index].consumers[0].parameters
|
314
|
+
)
|
315
|
+
weight_consumer_params = (
|
316
|
+
op_tensor_params[op_weight_index].consumers[0].parameters
|
318
317
|
)
|
318
|
+
try:
|
319
|
+
# Bias quantization is using fixed quantization scale:
|
320
|
+
# input_scale * weight_scale. To avoid hidden numerics error, we check
|
321
|
+
# the quantization error in bias quantization.
|
322
|
+
bias_quant_params = (
|
323
|
+
uniform_quantize_tensor.symmetric_quantize_bias_tensor(
|
324
|
+
bias_content,
|
325
|
+
input_consumer_params,
|
326
|
+
weight_consumer_params,
|
327
|
+
)
|
328
|
+
)
|
329
|
+
except ValueError as e:
|
330
|
+
raise ValueError(
|
331
|
+
f"Failed to quantize bias tensor for op {op_info.op_name} with op"
|
332
|
+
f" id {op_info.subgraph_op_index}."
|
333
|
+
) from e
|
319
334
|
# We only quantize bias under SRQ. Setting is_constant=True for SRQ only
|
320
335
|
# to avoid quantize bias for DRQ and weight-only cases.
|
321
336
|
is_constant = (
|
@@ -305,6 +305,7 @@ def symmetric_quantize_bias_tensor(
|
|
305
305
|
bias_content: np.ndarray,
|
306
306
|
input_tensor_quant_params: qtyping.UniformQuantParams,
|
307
307
|
weight_tensor_quant_params: qtyping.UniformQuantParams,
|
308
|
+
check_error: bool = True,
|
308
309
|
) -> qtyping.UniformQuantParams:
|
309
310
|
"""Quantize bias tensor (symmetrically, i.e., zero_point = 0).
|
310
311
|
|
@@ -316,6 +317,12 @@ def symmetric_quantize_bias_tensor(
|
|
316
317
|
bias_content: The bias content.
|
317
318
|
input_tensor_quant_params: The quantization parameters of input tensor.
|
318
319
|
weight_tensor_quant_params: The quantization parameters of weight tensor.
|
320
|
+
check_error: Whether to check if the quantization error (the difference
|
321
|
+
between the original and dequantized bias) is larger than the quantization
|
322
|
+
scale. This check is important because bias quantization parameters are
|
323
|
+
fixed (bias_scale = input_scale * weight_scale), which can lead to large
|
324
|
+
quantization errors. Raising an error when the quantization error is
|
325
|
+
larger than the scale helps to identify unexpected numerical issues.
|
319
326
|
|
320
327
|
Returns:
|
321
328
|
The quantized bias tensor.
|
@@ -330,7 +337,8 @@ def symmetric_quantize_bias_tensor(
|
|
330
337
|
|
331
338
|
# symmetric
|
332
339
|
bias_zp = np.zeros_like(effective_output_scale, dtype=np.int32)
|
333
|
-
|
340
|
+
# Fixed to 32 bits since most of the accelerators use int32 accumulator.
|
341
|
+
bias_number_bits = 32
|
334
342
|
symmetric = True
|
335
343
|
quantized_dimension = None if len(effective_output_scale) == 1 else 0
|
336
344
|
bias_quant_params = qtyping.UniformQuantParams(
|
@@ -342,6 +350,21 @@ def symmetric_quantize_bias_tensor(
|
|
342
350
|
)
|
343
351
|
|
344
352
|
quantized_vars = uniform_quantize(bias_content, bias_quant_params)
|
353
|
+
if check_error:
|
354
|
+
dequantized_bias = uniform_dequantize(quantized_vars, bias_quant_params)
|
355
|
+
quantization_error = np.abs(dequantized_bias - bias_content)
|
356
|
+
if np.any(quantization_error > effective_output_scale):
|
357
|
+
raise ValueError(
|
358
|
+
"Quantization error is too large for bias tensor quantization."
|
359
|
+
)
|
360
|
+
|
361
|
+
# Save the int32 quantized bias as int64 if the input tensor is quantized to
|
362
|
+
# 16 bits. This is to assume the matmul is using int64 accumulator (safe from
|
363
|
+
# overflow). For accelerators with int32 accumulator, it is safe to cast int64
|
364
|
+
# back to int32.
|
365
|
+
if input_tensor_quant_params.num_bits == 16:
|
366
|
+
quantized_vars = quantized_vars.astype(np.int64)
|
367
|
+
bias_number_bits = 64
|
345
368
|
|
346
369
|
# UniformQuantParams is frozen dataclass, need to recreate.
|
347
370
|
return qtyping.UniformQuantParams(
|
@@ -15,8 +15,11 @@
|
|
15
15
|
|
16
16
|
"""Tests for tensor_utils."""
|
17
17
|
|
18
|
+
import dataclasses
|
19
|
+
|
18
20
|
from absl.testing import parameterized
|
19
21
|
import numpy as np
|
22
|
+
|
20
23
|
from tensorflow.python.platform import googletest
|
21
24
|
from ai_edge_quantizer import qtyping
|
22
25
|
from ai_edge_quantizer.algorithms.uniform_quantize import uniform_quantize_tensor
|
@@ -276,7 +279,10 @@ class TensorUtilsTest(parameterized.TestCase):
|
|
276
279
|
)
|
277
280
|
|
278
281
|
@parameterized.parameters(
|
279
|
-
(8, 8, True, True),
|
282
|
+
(8, 8, True, True),
|
283
|
+
(8, 4, False, True),
|
284
|
+
(16, 8, True, False),
|
285
|
+
(16, 8, True, True),
|
280
286
|
)
|
281
287
|
def test_quantize_bias_tensor(
|
282
288
|
self,
|
@@ -334,6 +340,26 @@ class TensorUtilsTest(parameterized.TestCase):
|
|
334
340
|
self.assertSequenceAlmostEqual(
|
335
341
|
list(dequantized_bias.flatten()), list(bias_tensor_data), places=5
|
336
342
|
)
|
343
|
+
|
344
|
+
if activation_num_bits == 16:
|
345
|
+
# Check if it is safe to cast int64 bias to int32. We save the int32
|
346
|
+
# quantized bias as int64 if the input tensor is quantized to 16 bits.
|
347
|
+
# This is to assume the matmul is using int64 accumulator (safe from
|
348
|
+
# overflow). For accelerators with int32 accumulator, it is safe to cast
|
349
|
+
# int64 back to int32.
|
350
|
+
quantized_bias = bias_quant_config.quantized_data
|
351
|
+
self.assertIsNotNone(quantized_bias)
|
352
|
+
self.assertEqual(quantized_bias.dtype, np.int64)
|
353
|
+
self.assertSequenceEqual(
|
354
|
+
list(quantized_bias.flatten()),
|
355
|
+
list(quantized_bias.astype(np.int32).flatten()),
|
356
|
+
)
|
357
|
+
|
358
|
+
bias_quant_config = dataclasses.replace(
|
359
|
+
bias_quant_config,
|
360
|
+
num_bits=32,
|
361
|
+
)
|
362
|
+
|
337
363
|
expected_quantized_data = uniform_quantize_tensor.uniform_quantize(
|
338
364
|
bias_tensor_data, bias_quant_config
|
339
365
|
)
|
@@ -342,6 +368,30 @@ class TensorUtilsTest(parameterized.TestCase):
|
|
342
368
|
list(bias_quant_config.quantized_data.flatten()), # pytype: disable=attribute-error
|
343
369
|
)
|
344
370
|
|
371
|
+
def test_quantize_bias_tensor_raises_error_for_large_quantization_error(self):
|
372
|
+
input_quant_config = qtyping.UniformQuantParams(
|
373
|
+
scale=np.array([0.1]),
|
374
|
+
zero_point=np.array([10]),
|
375
|
+
num_bits=8,
|
376
|
+
symmetric=False,
|
377
|
+
quantized_dimension=None,
|
378
|
+
)
|
379
|
+
weight_quant_config = qtyping.UniformQuantParams(
|
380
|
+
scale=np.array([0.1]),
|
381
|
+
zero_point=np.array([-1]),
|
382
|
+
num_bits=8,
|
383
|
+
symmetric=True,
|
384
|
+
quantized_dimension=None,
|
385
|
+
)
|
386
|
+
# This will result in quantized bias of 3e9, which is larger than int32 max.
|
387
|
+
bias_tensor_data = np.array([3e7])
|
388
|
+
with self.assertRaises(ValueError):
|
389
|
+
uniform_quantize_tensor.symmetric_quantize_bias_tensor(
|
390
|
+
bias_tensor_data,
|
391
|
+
input_quant_config,
|
392
|
+
weight_quant_config,
|
393
|
+
)
|
394
|
+
|
345
395
|
@parameterized.parameters((8, True), (16, False))
|
346
396
|
def test_tensor_zp_scale_from_min_max(self, num_bits, symmetric):
|
347
397
|
min_val = np.min(self._test_data, keepdims=True)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ai-edge-quantizer-nightly
|
3
|
-
Version: 0.4.0.
|
3
|
+
Version: 0.4.0.dev20250910
|
4
4
|
Summary: A quantizer for advanced developers to quantize converted AI Edge models.
|
5
5
|
Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
|
6
6
|
Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
|
@@ -28,7 +28,7 @@ ai_edge_quantizer/algorithms/nonlinear_quantize/__init__.py,sha256=lpq1g2ayg3lCP
|
|
28
28
|
ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting.py,sha256=Bs9CK7wZAw6jNaZ8xEtbwO2vM34VYXNZSMVWvxJo9nw,9297
|
29
29
|
ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting_test.py,sha256=EqIHGEZ1LgUrTN7zf880RuAzEv3Qy7kgh5ivObJGHSo,22646
|
30
30
|
ai_edge_quantizer/algorithms/uniform_quantize/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
31
|
-
ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=
|
31
|
+
ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=TUxqc3cG66H77Rz0N3ynFnKKmFySDUAExK--3-VS7a4,36487
|
32
32
|
ai_edge_quantizer/algorithms/uniform_quantize/common_quantize_test.py,sha256=GGf_n3wIeg3GB_eGsmyNJ0fTcxgpeMMbugTMRONK6TQ,3553
|
33
33
|
ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha256=BDdn_uBZakfHyzdMJPKadsOqxqyC-s6W2ZzFH99L4fE,8652
|
34
34
|
ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py,sha256=sT5eX5TLZEHTtPfnSkCPDlS0sQxlTFWbCsbvOuj--yY,8889
|
@@ -38,8 +38,8 @@ ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=1
|
|
38
38
|
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=nscKDvNb14ErZdAfG0aXRWyRs6bTvhMqMjKx2vxvUK0,8725
|
39
39
|
ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=Umxh4kJyeHddZf-Wd4aXE5MTI1XWFa5KRuM17uYU714,6922
|
40
40
|
ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=sha1d99Xk87bI87tgz0g5LeDC-EeE4WMfM5rRC98-m4,9140
|
41
|
-
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=
|
42
|
-
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=
|
41
|
+
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=vsvBGEGFEEUP4kXRUh9hMpVXjsMBpfs6UDk8m4BNGTs,18375
|
42
|
+
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=Ympigz0BGcaO5x3OozxNxrRAGiF0to6V_HXAcxNNEpI,14399
|
43
43
|
ai_edge_quantizer/algorithms/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
44
44
|
ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=4eAlGph6DDW18bUdoY0XcUoOXEr3P_3_W1ptidD8qK4,37611
|
45
45
|
ai_edge_quantizer/algorithms/utils/common_utils_test.py,sha256=zqapGEfYhjQWe9cNGPLmdbwtEUUYQRhlO_kNe0cXX6E,18104
|
@@ -70,8 +70,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOi
|
|
70
70
|
ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
|
71
71
|
ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
|
72
72
|
ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
|
73
|
-
ai_edge_quantizer_nightly-0.4.0.
|
74
|
-
ai_edge_quantizer_nightly-0.4.0.
|
75
|
-
ai_edge_quantizer_nightly-0.4.0.
|
76
|
-
ai_edge_quantizer_nightly-0.4.0.
|
77
|
-
ai_edge_quantizer_nightly-0.4.0.
|
73
|
+
ai_edge_quantizer_nightly-0.4.0.dev20250910.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
74
|
+
ai_edge_quantizer_nightly-0.4.0.dev20250910.dist-info/METADATA,sha256=OHvvjpu55-8eASitbDgp6fKhpBkVhF-AXT652QFhswg,1508
|
75
|
+
ai_edge_quantizer_nightly-0.4.0.dev20250910.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
76
|
+
ai_edge_quantizer_nightly-0.4.0.dev20250910.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
|
77
|
+
ai_edge_quantizer_nightly-0.4.0.dev20250910.dist-info/RECORD,,
|
File without changes
|
File without changes
|