ai-edge-quantizer-nightly 0.1.0.dev20250320__py3-none-any.whl → 0.1.0.dev20250322__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py +5 -2
- ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py +7 -4
- ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py +3 -3
- ai_edge_quantizer/transformations/quantize_tensor.py +3 -12
- ai_edge_quantizer/transformations/quantize_tensor_test.py +2 -1
- ai_edge_quantizer/utils/tfl_flatbuffer_utils.py +5 -0
- {ai_edge_quantizer_nightly-0.1.0.dev20250320.dist-info → ai_edge_quantizer_nightly-0.1.0.dev20250322.dist-info}/METADATA +1 -1
- {ai_edge_quantizer_nightly-0.1.0.dev20250320.dist-info → ai_edge_quantizer_nightly-0.1.0.dev20250322.dist-info}/RECORD +11 -11
- {ai_edge_quantizer_nightly-0.1.0.dev20250320.dist-info → ai_edge_quantizer_nightly-0.1.0.dev20250322.dist-info}/LICENSE +0 -0
- {ai_edge_quantizer_nightly-0.1.0.dev20250320.dist-info → ai_edge_quantizer_nightly-0.1.0.dev20250322.dist-info}/WHEEL +0 -0
- {ai_edge_quantizer_nightly-0.1.0.dev20250320.dist-info → ai_edge_quantizer_nightly-0.1.0.dev20250322.dist-info}/top_level.txt +0 -0
@@ -790,8 +790,6 @@ def init_tensor_min_max(
|
|
790
790
|
quantized_dim = None
|
791
791
|
if weight_tensor_config is not None and (
|
792
792
|
weight_tensor_config.granularity == qtyping.QuantGranularity.CHANNELWISE
|
793
|
-
or weight_tensor_config.granularity
|
794
|
-
== qtyping.QuantGranularity.BLOCKWISE
|
795
793
|
):
|
796
794
|
quantized_dim = common_utils.get_weight_quantized_dim(
|
797
795
|
op_info, tensor_data
|
@@ -801,6 +799,11 @@ def init_tensor_min_max(
|
|
801
799
|
and weight_tensor_config.granularity
|
802
800
|
== qtyping.QuantGranularity.BLOCKWISE
|
803
801
|
):
|
802
|
+
quantized_dim = (
|
803
|
+
tfl_flatbuffer_utils.TFL_OP_TO_BLOCKWISE_WEIGHT_QUANTIZED_DIM[
|
804
|
+
op_info.op_name
|
805
|
+
]
|
806
|
+
)
|
804
807
|
reshaped_data, reduce_dims = _reshape_data_for_blockwise(
|
805
808
|
tensor_data,
|
806
809
|
quantized_dim,
|
@@ -80,13 +80,16 @@ def get_tensor_quant_params(
|
|
80
80
|
tensor_quant_config.symmetric,
|
81
81
|
)
|
82
82
|
quantized_dim = None
|
83
|
-
if
|
84
|
-
tensor_quant_config.granularity == qtyping.QuantGranularity.CHANNELWISE
|
85
|
-
or tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE
|
86
|
-
):
|
83
|
+
if tensor_quant_config.granularity == qtyping.QuantGranularity.CHANNELWISE:
|
87
84
|
quantized_dim = common_utils.get_weight_quantized_dim(
|
88
85
|
op_info, tensor_content
|
89
86
|
)
|
87
|
+
elif tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE:
|
88
|
+
quantized_dim = (
|
89
|
+
tfl_flatbuffer_utils.TFL_OP_TO_BLOCKWISE_WEIGHT_QUANTIZED_DIM[
|
90
|
+
op_info.op_name
|
91
|
+
]
|
92
|
+
)
|
90
93
|
quant_params = qtyping.UniformQuantParams(
|
91
94
|
scale=scale,
|
92
95
|
zero_point=zp,
|
@@ -187,8 +187,8 @@ class NaiveMinMaxQuantizeTest(parameterized.TestCase):
|
|
187
187
|
zp = quant_params.zero_point
|
188
188
|
expected_zp, expected_scale = (
|
189
189
|
uniform_quantize_tensor.tensor_zp_scale_from_min_max(
|
190
|
-
min_value=np.array([[-7, 4], [-4,
|
191
|
-
max_value=np.array([[
|
190
|
+
min_value=np.array([[-7], [-4], [-4], [7]]),
|
191
|
+
max_value=np.array([[7], [4], [4], [7]]),
|
192
192
|
num_bits=4,
|
193
193
|
symmetric=True,
|
194
194
|
)
|
@@ -200,7 +200,7 @@ class NaiveMinMaxQuantizeTest(parameterized.TestCase):
|
|
200
200
|
cast(np.ndarray, quant_params.quantized_data).shape, test_data.shape
|
201
201
|
)
|
202
202
|
self.assertEqual(quant_params.block_size, 2)
|
203
|
-
self.assertEqual(quant_params.quantized_dimension,
|
203
|
+
self.assertEqual(quant_params.quantized_dimension, 1)
|
204
204
|
|
205
205
|
|
206
206
|
if __name__ == "__main__":
|
@@ -143,24 +143,15 @@ def _perform_blockwise_quantization(
|
|
143
143
|
tensor = transformation_input.subgraph.tensors[transformation_input.tensor_id]
|
144
144
|
blockwise_details = schema_py_generated.BlockwiseQuantizationT()
|
145
145
|
scale_tensor_id = transformation_utils.add_new_constant_tensor(
|
146
|
-
tensor.name + b"
|
147
|
-
transformation_input.quant_params.scale,
|
146
|
+
tensor.name + b"_scales",
|
147
|
+
transformation_input.quant_params.scale.astype(np.float16),
|
148
148
|
schema_py_generated.TensorType.FLOAT16,
|
149
149
|
transformation_input.subgraph,
|
150
150
|
transformation_input.buffers,
|
151
151
|
)
|
152
152
|
blockwise_details.scales = scale_tensor_id
|
153
153
|
blockwise_details.blockSize = transformation_input.quant_params.block_size
|
154
|
-
#
|
155
|
-
if transformation_input.quant_params.zero_point is not None:
|
156
|
-
zero_point_tensor_id = transformation_utils.add_new_constant_tensor(
|
157
|
-
tensor.name + b"_zero_point",
|
158
|
-
transformation_input.quant_params.zero_point,
|
159
|
-
schema_py_generated.TensorType.INT32,
|
160
|
-
transformation_input.subgraph,
|
161
|
-
transformation_input.buffers,
|
162
|
-
)
|
163
|
-
blockwise_details.zeroPoints = zero_point_tensor_id
|
154
|
+
# TODO: b/404909258 - Add optional zero point to blockwise quantization.
|
164
155
|
flatbuffer_quantization.details = blockwise_details
|
165
156
|
return flatbuffer_quantization
|
166
157
|
|
@@ -169,7 +169,8 @@ class QuantizeTensorTest(parameterized.TestCase):
|
|
169
169
|
self.assertEqual(quant_param.details.blockSize, 32)
|
170
170
|
# Check if the scale and zero point tensors are inserted correctly.
|
171
171
|
self.assertEqual(quant_param.details.scales, 9)
|
172
|
-
|
172
|
+
# So far we don't have zero point in blockwise quantization.
|
173
|
+
self.assertEqual(quant_param.details.zeroPoints, 0)
|
173
174
|
|
174
175
|
def test_int4_constant_packed_correctly(self):
|
175
176
|
subgraph = self._model.subgraphs[0]
|
@@ -72,6 +72,11 @@ TFL_OP_TO_WEIGHT_QUANTIZED_DIM = immutabledict.immutabledict({
|
|
72
72
|
_TFLOpName.CONV_2D_TRANSPOSE: 0,
|
73
73
|
})
|
74
74
|
|
75
|
+
TFL_OP_TO_BLOCKWISE_WEIGHT_QUANTIZED_DIM = immutabledict.immutabledict({
|
76
|
+
_TFLOpName.FULLY_CONNECTED: 1,
|
77
|
+
_TFLOpName.EMBEDDING_LOOKUP: 1,
|
78
|
+
})
|
79
|
+
|
75
80
|
NUM_TFL_DATATYPES = 18
|
76
81
|
TENSOR_CODE_TO_TYPE = {}
|
77
82
|
for dtype_code in range(NUM_TFL_DATATYPES):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ai-edge-quantizer-nightly
|
3
|
-
Version: 0.1.0.
|
3
|
+
Version: 0.1.0.dev20250322
|
4
4
|
Summary: A quantizer for advanced developers to quantize converted AI Edge models.
|
5
5
|
Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
|
6
6
|
Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
|
@@ -28,12 +28,12 @@ ai_edge_quantizer/algorithms/nonlinear_quantize/__init__.py,sha256=lpq1g2ayg3lCP
|
|
28
28
|
ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting.py,sha256=Bs9CK7wZAw6jNaZ8xEtbwO2vM34VYXNZSMVWvxJo9nw,9297
|
29
29
|
ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting_test.py,sha256=s64eDDH9bmRWy6Bl1peHnhGewLnFJjvnhYOdjo1zYOA,22625
|
30
30
|
ai_edge_quantizer/algorithms/uniform_quantize/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
31
|
-
ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=
|
31
|
+
ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=SVu1RSX5xOWhuNEi9hHqgIDGe_ywyHBZAczp7KAcl3k,27220
|
32
32
|
ai_edge_quantizer/algorithms/uniform_quantize/common_quantize_test.py,sha256=qMmKbWqxrCoVKbLKHn9WuCrGKPfHkEyU0Nmhokh8Qeo,2597
|
33
33
|
ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha256=OTXjEZ3Ctq3ffYzisX-6HwgK_DuA7uos_aap5PiIUPE,8686
|
34
34
|
ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py,sha256=y7BK11fkF63Ex_Jzg3fbIdy0D_Ca6HuvChVZR7Uwggc,8073
|
35
|
-
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=
|
36
|
-
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=
|
35
|
+
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=cbyyYAoQnEraOYSV00wZ557ElBndHduVGeHikYUEFCE,7995
|
36
|
+
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=B30SEISYZ9DPs3suKeG2elgXylR98pCEMWSEGgZo20o,7648
|
37
37
|
ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=e5wYtki-vl739gSVAZHAKcs2hA87GvFUjVoSUPlnkyM,6433
|
38
38
|
ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=IcTOaJ1pxtqsitqxOEP9LROVEP_19VFutHalqNied4I,6940
|
39
39
|
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=WmZzKQlzfu9gFr9SbUDoPY3rFqTl363om8-0rTLwotw,11629
|
@@ -50,22 +50,22 @@ ai_edge_quantizer/transformations/emulated_subchannel.py,sha256=HVaRxoC8PCAvy3xe
|
|
50
50
|
ai_edge_quantizer/transformations/emulated_subchannel_test.py,sha256=gZP6u9NdPXl7s19qB_Un8evou9ZZV6I9Gy0E1rdobHM,7722
|
51
51
|
ai_edge_quantizer/transformations/quant_insert.py,sha256=jn6HsJaV-sqBiFPY-Aqbd64t8zgcYVkEkZI375x_FWY,3958
|
52
52
|
ai_edge_quantizer/transformations/quant_insert_test.py,sha256=X9ptPDvJCFkR5tejKnD1SlHFGPazQTW-wNNMV9MEAuw,10107
|
53
|
-
ai_edge_quantizer/transformations/quantize_tensor.py,sha256=
|
54
|
-
ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=
|
53
|
+
ai_edge_quantizer/transformations/quantize_tensor.py,sha256=vzKtrXILqVsr1NGlribhdtKEIsXA93o37embLRe9TwQ,7493
|
54
|
+
ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=mHLO3_MRt36A8-ZN8ADn5tBBJlqjTWa7ZUN8Mmu5Rcw,9116
|
55
55
|
ai_edge_quantizer/transformations/transformation_utils.py,sha256=R42OIbzwQ7JYJ-Qt46jsqwb6u4MfDGiIPCRZCUGLVCw,4664
|
56
56
|
ai_edge_quantizer/transformations/transformation_utils_test.py,sha256=xH64SF3UHDh84vYbt-WvmXNjM-Jg-mefES1ACO1tkqw,6269
|
57
57
|
ai_edge_quantizer/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
58
58
|
ai_edge_quantizer/utils/calibration_utils.py,sha256=1Fj9MIO6aLZIRgyd4axvZN4S_O64nB_-Miu1WP664js,2536
|
59
59
|
ai_edge_quantizer/utils/calibration_utils_test.py,sha256=Z-AcdTieesWFKyKBb08ZXm4Mgu6cvJ4bg2-MJ7hLD10,2856
|
60
60
|
ai_edge_quantizer/utils/test_utils.py,sha256=HwZCIpO9fJRAhuN6t6voXKOYQtcioFtt_tpkAlDsAYk,6205
|
61
|
-
ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=
|
61
|
+
ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=51GRkwj7PK0XvAqohdv6mAepOWRk1AnW2y-9ne6LzWo,10628
|
62
62
|
ai_edge_quantizer/utils/tfl_flatbuffer_utils_test.py,sha256=AbyDxoM62k4ojD8gPdkWo--xe5hlX3t0kobQSA80kuk,7740
|
63
63
|
ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=x2xA2CFPpe_2trcV8v5xGaBETvVCfwAcJuq6yieGJ0Y,12687
|
64
64
|
ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=Op3JxtOqlrjzmYF18jnnstL1k9xiY9kKJ8S2vklKGkc,11327
|
65
65
|
ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
|
66
66
|
ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
|
67
|
-
ai_edge_quantizer_nightly-0.1.0.
|
68
|
-
ai_edge_quantizer_nightly-0.1.0.
|
69
|
-
ai_edge_quantizer_nightly-0.1.0.
|
70
|
-
ai_edge_quantizer_nightly-0.1.0.
|
71
|
-
ai_edge_quantizer_nightly-0.1.0.
|
67
|
+
ai_edge_quantizer_nightly-0.1.0.dev20250322.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
68
|
+
ai_edge_quantizer_nightly-0.1.0.dev20250322.dist-info/METADATA,sha256=paWcF2o3qHGKO-7DDgm1hB0tqqAK6F1AtnIpezmNI80,1527
|
69
|
+
ai_edge_quantizer_nightly-0.1.0.dev20250322.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
70
|
+
ai_edge_quantizer_nightly-0.1.0.dev20250322.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
|
71
|
+
ai_edge_quantizer_nightly-0.1.0.dev20250322.dist-info/RECORD,,
|
File without changes
|
File without changes
|