ai-edge-quantizer-nightly 0.4.0.dev20250907__py3-none-any.whl → 0.4.0.dev20250909__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py +9 -1
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py +27 -1
- {ai_edge_quantizer_nightly-0.4.0.dev20250907.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250909.dist-info}/METADATA +1 -1
- {ai_edge_quantizer_nightly-0.4.0.dev20250907.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250909.dist-info}/RECORD +7 -7
- {ai_edge_quantizer_nightly-0.4.0.dev20250907.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250909.dist-info}/LICENSE +0 -0
- {ai_edge_quantizer_nightly-0.4.0.dev20250907.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250909.dist-info}/WHEEL +0 -0
- {ai_edge_quantizer_nightly-0.4.0.dev20250907.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250909.dist-info}/top_level.txt +0 -0
@@ -330,7 +330,8 @@ def symmetric_quantize_bias_tensor(
|
|
330
330
|
|
331
331
|
# symmetric
|
332
332
|
bias_zp = np.zeros_like(effective_output_scale, dtype=np.int32)
|
333
|
-
|
333
|
+
# Fixed to 32 bits since most of the accelerators use int32 accumulator.
|
334
|
+
bias_number_bits = 32
|
334
335
|
symmetric = True
|
335
336
|
quantized_dimension = None if len(effective_output_scale) == 1 else 0
|
336
337
|
bias_quant_params = qtyping.UniformQuantParams(
|
@@ -342,6 +343,13 @@ def symmetric_quantize_bias_tensor(
|
|
342
343
|
)
|
343
344
|
|
344
345
|
quantized_vars = uniform_quantize(bias_content, bias_quant_params)
|
346
|
+
# Save the int32 quantized bias as int64 if the input tensor is quantized to
|
347
|
+
# 16 bits. This is to assume the matmul is using int64 accumulator (safe from
|
348
|
+
# overflow). For accelerators with int32 accumulator, it is safe to cast int64
|
349
|
+
# back to int32.
|
350
|
+
if input_tensor_quant_params.num_bits == 16:
|
351
|
+
quantized_vars = quantized_vars.astype(np.int64)
|
352
|
+
bias_number_bits = 64
|
345
353
|
|
346
354
|
# UniformQuantParams is frozen dataclass, need to recreate.
|
347
355
|
return qtyping.UniformQuantParams(
|
@@ -15,8 +15,11 @@
|
|
15
15
|
|
16
16
|
"""Tests for tensor_utils."""
|
17
17
|
|
18
|
+
import dataclasses
|
19
|
+
|
18
20
|
from absl.testing import parameterized
|
19
21
|
import numpy as np
|
22
|
+
|
20
23
|
from tensorflow.python.platform import googletest
|
21
24
|
from ai_edge_quantizer import qtyping
|
22
25
|
from ai_edge_quantizer.algorithms.uniform_quantize import uniform_quantize_tensor
|
@@ -276,7 +279,10 @@ class TensorUtilsTest(parameterized.TestCase):
|
|
276
279
|
)
|
277
280
|
|
278
281
|
@parameterized.parameters(
|
279
|
-
(8, 8, True, True),
|
282
|
+
(8, 8, True, True),
|
283
|
+
(8, 4, False, True),
|
284
|
+
(16, 8, True, False),
|
285
|
+
(16, 8, True, True),
|
280
286
|
)
|
281
287
|
def test_quantize_bias_tensor(
|
282
288
|
self,
|
@@ -334,6 +340,26 @@ class TensorUtilsTest(parameterized.TestCase):
|
|
334
340
|
self.assertSequenceAlmostEqual(
|
335
341
|
list(dequantized_bias.flatten()), list(bias_tensor_data), places=5
|
336
342
|
)
|
343
|
+
|
344
|
+
if activation_num_bits == 16:
|
345
|
+
# Check if it is safe to cast int64 bias to int32. We save the int32
|
346
|
+
# quantized bias as int64 if the input tensor is quantized to 16 bits.
|
347
|
+
# This is to assume the matmul is using int64 accumulator (safe from
|
348
|
+
# overflow). For accelerators with int32 accumulator, it is safe to cast
|
349
|
+
# int64 back to int32.
|
350
|
+
quantized_bias = bias_quant_config.quantized_data
|
351
|
+
self.assertIsNotNone(quantized_bias)
|
352
|
+
self.assertEqual(quantized_bias.dtype, np.int64)
|
353
|
+
self.assertSequenceEqual(
|
354
|
+
list(quantized_bias.flatten()),
|
355
|
+
list(quantized_bias.astype(np.int32).flatten()), # pytype: disable=attribute-error
|
356
|
+
)
|
357
|
+
|
358
|
+
bias_quant_config = dataclasses.replace(
|
359
|
+
bias_quant_config,
|
360
|
+
num_bits=32,
|
361
|
+
)
|
362
|
+
|
337
363
|
expected_quantized_data = uniform_quantize_tensor.uniform_quantize(
|
338
364
|
bias_tensor_data, bias_quant_config
|
339
365
|
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ai-edge-quantizer-nightly
|
3
|
-
Version: 0.4.0.
|
3
|
+
Version: 0.4.0.dev20250909
|
4
4
|
Summary: A quantizer for advanced developers to quantize converted AI Edge models.
|
5
5
|
Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
|
6
6
|
Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
|
@@ -38,8 +38,8 @@ ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=1
|
|
38
38
|
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=nscKDvNb14ErZdAfG0aXRWyRs6bTvhMqMjKx2vxvUK0,8725
|
39
39
|
ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=Umxh4kJyeHddZf-Wd4aXE5MTI1XWFa5KRuM17uYU714,6922
|
40
40
|
ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=sha1d99Xk87bI87tgz0g5LeDC-EeE4WMfM5rRC98-m4,9140
|
41
|
-
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=
|
42
|
-
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=
|
41
|
+
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=_WfxguP9dVsWFjUc2l8cvvx6kK_PEiUS5HU1vfC7d5c,17554
|
42
|
+
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=OH1etR0btp2ep7PIP4uc7LCOydrouVBeeBJzkIkMF-k,13625
|
43
43
|
ai_edge_quantizer/algorithms/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
44
44
|
ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=4eAlGph6DDW18bUdoY0XcUoOXEr3P_3_W1ptidD8qK4,37611
|
45
45
|
ai_edge_quantizer/algorithms/utils/common_utils_test.py,sha256=zqapGEfYhjQWe9cNGPLmdbwtEUUYQRhlO_kNe0cXX6E,18104
|
@@ -70,8 +70,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOi
|
|
70
70
|
ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
|
71
71
|
ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
|
72
72
|
ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
|
73
|
-
ai_edge_quantizer_nightly-0.4.0.
|
74
|
-
ai_edge_quantizer_nightly-0.4.0.
|
75
|
-
ai_edge_quantizer_nightly-0.4.0.
|
76
|
-
ai_edge_quantizer_nightly-0.4.0.
|
77
|
-
ai_edge_quantizer_nightly-0.4.0.
|
73
|
+
ai_edge_quantizer_nightly-0.4.0.dev20250909.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
74
|
+
ai_edge_quantizer_nightly-0.4.0.dev20250909.dist-info/METADATA,sha256=-_Cqm5vEJXyhzkezt-hEOtZOZJ8kOtv8-0oUkMad7aQ,1508
|
75
|
+
ai_edge_quantizer_nightly-0.4.0.dev20250909.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
76
|
+
ai_edge_quantizer_nightly-0.4.0.dev20250909.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
|
77
|
+
ai_edge_quantizer_nightly-0.4.0.dev20250909.dist-info/RECORD,,
|
File without changes
|
File without changes
|