ai-edge-quantizer-nightly 0.4.0.dev20251102__py3-none-any.whl → 0.5.0.dev20251130__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py +22 -25
- ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py +1 -1
- ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py +1 -2
- ai_edge_quantizer/algorithms/uniform_quantize/mse.py +5 -3
- ai_edge_quantizer/algorithms/uniform_quantize/mse_test.py +1 -1
- ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py +6 -11
- ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py +18 -14
- ai_edge_quantizer/algorithms/uniform_quantize/octav.py +9 -5
- ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py +1 -2
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py +34 -10
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py +1 -1
- ai_edge_quantizer/algorithms/utils/common_utils.py +46 -33
- ai_edge_quantizer/calibrator.py +1 -50
- ai_edge_quantizer/calibrator_test.py +2 -67
- ai_edge_quantizer/default_policy.py +5 -13
- ai_edge_quantizer/qtyping.py +24 -3
- ai_edge_quantizer/quantizer_test.py +41 -6
- ai_edge_quantizer/recipe_manager_test.py +0 -6
- ai_edge_quantizer/utils/validation_utils.py +44 -8
- ai_edge_quantizer/utils/validation_utils_test.py +28 -0
- {ai_edge_quantizer_nightly-0.4.0.dev20251102.dist-info → ai_edge_quantizer_nightly-0.5.0.dev20251130.dist-info}/METADATA +1 -1
- {ai_edge_quantizer_nightly-0.4.0.dev20251102.dist-info → ai_edge_quantizer_nightly-0.5.0.dev20251130.dist-info}/RECORD +25 -25
- {ai_edge_quantizer_nightly-0.4.0.dev20251102.dist-info → ai_edge_quantizer_nightly-0.5.0.dev20251130.dist-info}/WHEEL +0 -0
- {ai_edge_quantizer_nightly-0.4.0.dev20251102.dist-info → ai_edge_quantizer_nightly-0.5.0.dev20251130.dist-info}/licenses/LICENSE +0 -0
- {ai_edge_quantizer_nightly-0.4.0.dev20251102.dist-info → ai_edge_quantizer_nightly-0.5.0.dev20251130.dist-info}/top_level.txt +0 -0
|
@@ -1165,39 +1165,36 @@ def init_tensor_min_max(
|
|
|
1165
1165
|
A dictionary containing the min/max values for the tensor, or an empty
|
|
1166
1166
|
dictionary if the tensor data is None.
|
|
1167
1167
|
"""
|
|
1168
|
-
|
|
1168
|
+
weight_tensor_config = op_info.op_quant_config.weight_tensor_config
|
|
1169
|
+
if tensor_data is None or weight_tensor_config is None:
|
|
1169
1170
|
return {}
|
|
1170
1171
|
else:
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1172
|
+
# Get reduce dimension for min/max calculation based on quantization
|
|
1173
|
+
# granularity.
|
|
1174
|
+
granularity = weight_tensor_config.granularity
|
|
1175
|
+
if granularity == qtyping.QuantGranularity.TENSORWISE:
|
|
1176
|
+
reduce_dims = None
|
|
1177
|
+
keep_dims = True
|
|
1178
|
+
elif granularity == qtyping.QuantGranularity.CHANNELWISE:
|
|
1176
1179
|
quantized_dim = common_utils.get_weight_quantized_dim(
|
|
1177
1180
|
op_info, tensor_data, weight_tensor_config.granularity
|
|
1178
1181
|
)
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
):
|
|
1184
|
-
|
|
1182
|
+
reduce_dims = common_utils.get_reduce_dims(
|
|
1183
|
+
quantized_dim, tensor_data.shape
|
|
1184
|
+
)
|
|
1185
|
+
keep_dims = True
|
|
1186
|
+
elif uniform_quantize_tensor.is_blockwise(granularity):
|
|
1187
|
+
tensor_data, reduce_dims = (
|
|
1185
1188
|
uniform_quantize_tensor.reshape_data_for_blockwise(
|
|
1186
1189
|
tensor_data,
|
|
1187
1190
|
op_info.op_name,
|
|
1188
|
-
|
|
1191
|
+
granularity,
|
|
1189
1192
|
)
|
|
1190
1193
|
)
|
|
1191
|
-
|
|
1192
|
-
"min": np.min(reshaped_data, axis=reduce_dims, keepdims=False),
|
|
1193
|
-
"max": np.max(reshaped_data, axis=reduce_dims, keepdims=False),
|
|
1194
|
-
}
|
|
1195
|
-
|
|
1194
|
+
keep_dims = False
|
|
1196
1195
|
else:
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
"max": np.max(tensor_data, axis=reduce_dims, keepdims=True),
|
|
1203
|
-
}
|
|
1196
|
+
raise ValueError(f"Unsupported granularity: {granularity}")
|
|
1197
|
+
return {
|
|
1198
|
+
"min": np.min(tensor_data, axis=reduce_dims, keepdims=keep_dims),
|
|
1199
|
+
"max": np.max(tensor_data, axis=reduce_dims, keepdims=keep_dims),
|
|
1200
|
+
}
|
|
@@ -158,7 +158,7 @@ def get_tensor_quant_params(
|
|
|
158
158
|
op_info, tensor_quant_config, tensor_content, tensor_qsv
|
|
159
159
|
)
|
|
160
160
|
|
|
161
|
-
if tensor_quant_config.granularity
|
|
161
|
+
if uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity):
|
|
162
162
|
raise ValueError(
|
|
163
163
|
"Blockwise quantization is not supported for dequantized weight"
|
|
164
164
|
" recovery."
|
|
@@ -147,8 +147,7 @@ class HadamardRotationFullyConnectedTest(parameterized.TestCase):
|
|
|
147
147
|
weight_tensor_config=_TensorQuantConfig(
|
|
148
148
|
num_bits=8,
|
|
149
149
|
symmetric=True,
|
|
150
|
-
granularity=qtyping.QuantGranularity.
|
|
151
|
-
block_size=32,
|
|
150
|
+
granularity=qtyping.QuantGranularity.BLOCKWISE_32,
|
|
152
151
|
),
|
|
153
152
|
),
|
|
154
153
|
)
|
|
@@ -55,7 +55,7 @@ def get_tensor_quant_params(
|
|
|
55
55
|
ValueError: `tensor_qsv` must contain min/max values, or `tensor_content`
|
|
56
56
|
must be provided so that they can be inferred.
|
|
57
57
|
"""
|
|
58
|
-
if tensor_quant_config.granularity
|
|
58
|
+
if uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity):
|
|
59
59
|
raise ValueError(
|
|
60
60
|
"Blockwise quantization is not supported for MSE quantization."
|
|
61
61
|
)
|
|
@@ -113,13 +113,15 @@ def get_tensor_quant_params(
|
|
|
113
113
|
num_bits=tensor_quant_config.num_bits,
|
|
114
114
|
symmetric=tensor_quant_config.symmetric,
|
|
115
115
|
quantized_dimension=quantized_dim,
|
|
116
|
-
block_size=
|
|
116
|
+
block_size=uniform_quantize_tensor.extract_block_size_from_granularity(
|
|
117
|
+
tensor_quant_config.granularity
|
|
118
|
+
),
|
|
117
119
|
)
|
|
118
120
|
|
|
119
121
|
quantized_vars = uniform_quantize_tensor.uniform_quantize(
|
|
120
122
|
tensor_content,
|
|
121
123
|
quant_params,
|
|
122
|
-
tensor_quant_config.granularity
|
|
124
|
+
uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity),
|
|
123
125
|
)
|
|
124
126
|
|
|
125
127
|
return dataclasses.replace(quant_params, quantized_data=quantized_vars)
|
|
@@ -84,7 +84,7 @@ class MseQuantizeTest(parameterized.TestCase):
|
|
|
84
84
|
tensor_quant_config=qtyping.TensorQuantizationConfig(
|
|
85
85
|
num_bits=4,
|
|
86
86
|
symmetric=True,
|
|
87
|
-
granularity=qtyping.QuantGranularity.
|
|
87
|
+
granularity=qtyping.QuantGranularity.BLOCKWISE_32,
|
|
88
88
|
),
|
|
89
89
|
tensor_content=test_data,
|
|
90
90
|
)
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
|
|
16
16
|
"""Performs naive min/max uniform quantization."""
|
|
17
17
|
|
|
18
|
+
import dataclasses
|
|
18
19
|
from typing import Any, Optional
|
|
19
20
|
import numpy as np
|
|
20
21
|
from ai_edge_quantizer import qtyping
|
|
@@ -91,7 +92,9 @@ def get_tensor_quant_params(
|
|
|
91
92
|
num_bits=tensor_quant_config.num_bits,
|
|
92
93
|
symmetric=tensor_quant_config.symmetric,
|
|
93
94
|
quantized_dimension=quantized_dim,
|
|
94
|
-
block_size=
|
|
95
|
+
block_size=uniform_quantize_tensor.extract_block_size_from_granularity(
|
|
96
|
+
tensor_quant_config.granularity
|
|
97
|
+
),
|
|
95
98
|
)
|
|
96
99
|
if tensor_content is None:
|
|
97
100
|
return quant_params
|
|
@@ -99,18 +102,10 @@ def get_tensor_quant_params(
|
|
|
99
102
|
quantized_vars = uniform_quantize_tensor.uniform_quantize(
|
|
100
103
|
tensor_content,
|
|
101
104
|
quant_params,
|
|
102
|
-
tensor_quant_config.granularity
|
|
105
|
+
uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity),
|
|
103
106
|
)
|
|
104
107
|
# Update with quantized values.
|
|
105
|
-
return
|
|
106
|
-
scale=scale,
|
|
107
|
-
zero_point=zp,
|
|
108
|
-
num_bits=tensor_quant_config.num_bits,
|
|
109
|
-
symmetric=tensor_quant_config.symmetric,
|
|
110
|
-
quantized_dimension=quantized_dim,
|
|
111
|
-
quantized_data=quantized_vars,
|
|
112
|
-
block_size=tensor_quant_config.block_size,
|
|
113
|
-
)
|
|
108
|
+
return dataclasses.replace(quant_params, quantized_data=quantized_vars)
|
|
114
109
|
|
|
115
110
|
|
|
116
111
|
# TODO: b/333731147 - Use named tuple to store min/max.
|
|
@@ -17,6 +17,7 @@ import os
|
|
|
17
17
|
from typing import cast
|
|
18
18
|
|
|
19
19
|
from absl.testing import parameterized
|
|
20
|
+
import ml_dtypes
|
|
20
21
|
import numpy as np
|
|
21
22
|
|
|
22
23
|
from tensorflow.python.platform import googletest
|
|
@@ -165,8 +166,7 @@ class NaiveMinMaxQuantizeTest(parameterized.TestCase):
|
|
|
165
166
|
weight_tensor_config = _TensorQuantConfig(
|
|
166
167
|
num_bits=4,
|
|
167
168
|
symmetric=True,
|
|
168
|
-
granularity=qtyping.QuantGranularity.
|
|
169
|
-
block_size=2,
|
|
169
|
+
granularity=qtyping.QuantGranularity.BLOCKWISE_32,
|
|
170
170
|
)
|
|
171
171
|
op_info = qtyping.OpInfo(
|
|
172
172
|
op=fc_op,
|
|
@@ -176,28 +176,32 @@ class NaiveMinMaxQuantizeTest(parameterized.TestCase):
|
|
|
176
176
|
weight_tensor_config=weight_tensor_config,
|
|
177
177
|
),
|
|
178
178
|
)
|
|
179
|
-
test_data = np.
|
|
179
|
+
test_data = np.random.uniform(low=-10, high=10, size=(4, 32)).astype(
|
|
180
|
+
np.float32
|
|
181
|
+
)
|
|
180
182
|
quant_params = naive_min_max_quantize.get_tensor_quant_params(
|
|
181
183
|
op_info=op_info,
|
|
182
184
|
tensor_quant_config=weight_tensor_config,
|
|
183
185
|
tensor_content=test_data,
|
|
184
186
|
)
|
|
185
|
-
scale = quant_params.scale
|
|
186
187
|
zp = quant_params.zero_point
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
188
|
+
self.assertEqual(zp.shape, (4, 1))
|
|
189
|
+
self.assertTrue(np.array_equal(zp, np.zeros([4, 1])))
|
|
190
|
+
|
|
191
|
+
self.assertEqual(quant_params.scale.shape, (4, 1))
|
|
192
|
+
expected_scales = np.max(np.abs(test_data), axis=1, keepdims=True) / 7.0
|
|
193
|
+
expected_scales = (
|
|
194
|
+
expected_scales.astype(ml_dtypes.bfloat16)
|
|
195
|
+
.astype(np.float16)
|
|
196
|
+
.astype(np.float32)
|
|
197
|
+
)
|
|
198
|
+
self.assertTrue(np.allclose(quant_params.scale, expected_scales, atol=1e-5))
|
|
199
|
+
|
|
196
200
|
self.assertIsNotNone(quant_params.quantized_data)
|
|
197
201
|
self.assertTupleEqual(
|
|
198
202
|
cast(np.ndarray, quant_params.quantized_data).shape, test_data.shape
|
|
199
203
|
)
|
|
200
|
-
self.assertEqual(quant_params.block_size,
|
|
204
|
+
self.assertEqual(quant_params.block_size, 32)
|
|
201
205
|
self.assertEqual(quant_params.quantized_dimension, 1)
|
|
202
206
|
|
|
203
207
|
def test_calibrate_ignores_inf_min_max(self):
|
|
@@ -131,12 +131,12 @@ def get_tensor_quant_params(
|
|
|
131
131
|
quantized_dim = common_utils.get_weight_quantized_dim(
|
|
132
132
|
op_info, tensor_content, tensor_quant_config.granularity
|
|
133
133
|
)
|
|
134
|
-
if tensor_quant_config.granularity
|
|
134
|
+
if uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity):
|
|
135
135
|
reshaped_data, reduce_dims = (
|
|
136
136
|
uniform_quantize_tensor.reshape_data_for_blockwise(
|
|
137
137
|
tensor_content,
|
|
138
138
|
op_info.op_name,
|
|
139
|
-
tensor_quant_config.
|
|
139
|
+
tensor_quant_config.granularity,
|
|
140
140
|
)
|
|
141
141
|
)
|
|
142
142
|
else:
|
|
@@ -154,7 +154,7 @@ def get_tensor_quant_params(
|
|
|
154
154
|
# We created a new dimension in order to reduce properly for blockwise
|
|
155
155
|
# quantization, so we need to reshape the clipping constants back to the
|
|
156
156
|
# min/max shape for the next step.
|
|
157
|
-
if tensor_quant_config.granularity
|
|
157
|
+
if uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity):
|
|
158
158
|
clipping_constants = clipping_constants.reshape(tensor_min_max["min"].shape)
|
|
159
159
|
|
|
160
160
|
zp, scale = uniform_quantize_tensor.tensor_zp_scale_from_min_max(
|
|
@@ -172,13 +172,17 @@ def get_tensor_quant_params(
|
|
|
172
172
|
num_bits=tensor_quant_config.num_bits,
|
|
173
173
|
symmetric=tensor_quant_config.symmetric,
|
|
174
174
|
quantized_dimension=quantized_dim,
|
|
175
|
-
block_size=
|
|
175
|
+
block_size=uniform_quantize_tensor.extract_block_size_from_granularity(
|
|
176
|
+
tensor_quant_config.granularity
|
|
177
|
+
),
|
|
176
178
|
)
|
|
177
179
|
|
|
178
180
|
quantized_vars = uniform_quantize_tensor.uniform_quantize(
|
|
179
181
|
tensor_content,
|
|
180
182
|
quant_params,
|
|
181
|
-
|
|
183
|
+
is_blockwise_quant=uniform_quantize_tensor.is_blockwise(
|
|
184
|
+
tensor_quant_config.granularity
|
|
185
|
+
),
|
|
182
186
|
)
|
|
183
187
|
|
|
184
188
|
return dataclasses.replace(quant_params, quantized_data=quantized_vars)
|
|
@@ -196,8 +196,7 @@ class OctavQuantizeTest(parameterized.TestCase):
|
|
|
196
196
|
tensor_config = qtyping.TensorQuantizationConfig(
|
|
197
197
|
num_bits=4,
|
|
198
198
|
symmetric=True,
|
|
199
|
-
granularity=qtyping.QuantGranularity.
|
|
200
|
-
block_size=32,
|
|
199
|
+
granularity=qtyping.QuantGranularity.BLOCKWISE_32,
|
|
201
200
|
)
|
|
202
201
|
fc_op_info = qtyping.OpInfo(
|
|
203
202
|
op=self._fc_op,
|
|
@@ -29,6 +29,11 @@ class IntType:
|
|
|
29
29
|
signed: bool
|
|
30
30
|
|
|
31
31
|
|
|
32
|
+
def is_blockwise(granularity: qtyping.QuantGranularity) -> bool:
|
|
33
|
+
"""Checks if the quantization granularity is blockwise."""
|
|
34
|
+
return "BLOCKWISE" in str(granularity)
|
|
35
|
+
|
|
36
|
+
|
|
32
37
|
def get_quantized_range(qtype: IntType) -> tuple[float, float]:
|
|
33
38
|
"""Calculates range of the quantized type."""
|
|
34
39
|
if qtype.signed:
|
|
@@ -40,6 +45,22 @@ def get_quantized_range(qtype: IntType) -> tuple[float, float]:
|
|
|
40
45
|
return float(qmin), float(qmax)
|
|
41
46
|
|
|
42
47
|
|
|
48
|
+
def extract_block_size_from_granularity(
|
|
49
|
+
granularity: qtyping.QuantGranularity,
|
|
50
|
+
) -> int:
|
|
51
|
+
"""Get the block size for blockwise quantization."""
|
|
52
|
+
if granularity == qtyping.QuantGranularity.BLOCKWISE_32:
|
|
53
|
+
return 32
|
|
54
|
+
elif granularity == qtyping.QuantGranularity.BLOCKWISE_64:
|
|
55
|
+
return 64
|
|
56
|
+
elif granularity == qtyping.QuantGranularity.BLOCKWISE_128:
|
|
57
|
+
return 128
|
|
58
|
+
elif granularity == qtyping.QuantGranularity.BLOCKWISE_256:
|
|
59
|
+
return 256
|
|
60
|
+
else:
|
|
61
|
+
return 0
|
|
62
|
+
|
|
63
|
+
|
|
43
64
|
def _round_and_clip(
|
|
44
65
|
tensor: np.ndarray, qtype: IntType, narrow: bool
|
|
45
66
|
) -> np.ndarray:
|
|
@@ -157,14 +178,16 @@ def _get_tensor_shape_for_blockwise(
|
|
|
157
178
|
|
|
158
179
|
|
|
159
180
|
def reshape_data_for_blockwise(
|
|
160
|
-
tensor_data: np.ndarray,
|
|
181
|
+
tensor_data: np.ndarray,
|
|
182
|
+
op_name: qtyping.TFLOperationName,
|
|
183
|
+
granularity: qtyping.QuantGranularity,
|
|
161
184
|
) -> tuple[np.ndarray, int]:
|
|
162
185
|
"""Reshapes data for blockwise quantization.
|
|
163
186
|
|
|
164
187
|
Args:
|
|
165
188
|
tensor_data: The original tensor data.
|
|
166
189
|
op_name: The name of the TFL op.
|
|
167
|
-
|
|
190
|
+
granularity: The quantization granularity for the tensor.
|
|
168
191
|
|
|
169
192
|
Returns:
|
|
170
193
|
A tuple containing the reshaped tensor data and the new reduce dimension.
|
|
@@ -172,11 +195,11 @@ def reshape_data_for_blockwise(
|
|
|
172
195
|
quantized_dim = tfl_flatbuffer_utils.TFL_OP_TO_BLOCKWISE_WEIGHT_QUANTIZED_DIM[
|
|
173
196
|
op_name
|
|
174
197
|
]
|
|
198
|
+
block_size = extract_block_size_from_granularity(granularity)
|
|
175
199
|
new_shape = _get_tensor_shape_for_blockwise(
|
|
176
200
|
tensor_data.shape, quantized_dim, block_size
|
|
177
201
|
)
|
|
178
|
-
|
|
179
|
-
return reshaped_data, quantized_dim + 1
|
|
202
|
+
return tensor_data.reshape(new_shape), quantized_dim + 1
|
|
180
203
|
|
|
181
204
|
|
|
182
205
|
def _broadcast_scale_zp_for_blockwise(
|
|
@@ -233,21 +256,21 @@ def _broadcast_scale_zp_for_blockwise(
|
|
|
233
256
|
def uniform_quantize(
|
|
234
257
|
tensor_data: np.ndarray,
|
|
235
258
|
quantization_params: qtyping.UniformQuantParams,
|
|
236
|
-
|
|
259
|
+
is_blockwise_quant: bool = False,
|
|
237
260
|
):
|
|
238
261
|
"""Uniform quantize a tensor.
|
|
239
262
|
|
|
240
263
|
Args:
|
|
241
264
|
tensor_data: The tensor to be quantized.
|
|
242
265
|
quantization_params: The quantization parameters.
|
|
243
|
-
|
|
266
|
+
is_blockwise_quant: Whether the tensor is blockwise quantized.
|
|
244
267
|
|
|
245
268
|
Returns:
|
|
246
269
|
The quantized tensor.
|
|
247
270
|
"""
|
|
248
271
|
# The reshaping for blockwise quantization is unique hence we do this here
|
|
249
272
|
# to avoid unexpected broadcast behavior downstream.
|
|
250
|
-
if
|
|
273
|
+
if is_blockwise_quant:
|
|
251
274
|
quantization_params = _broadcast_scale_zp_for_blockwise(
|
|
252
275
|
tensor_data, quantization_params
|
|
253
276
|
)
|
|
@@ -435,17 +458,18 @@ def tensor_zp_scale_from_min_max(
|
|
|
435
458
|
Returns:
|
|
436
459
|
The zero point and scale of the tensor.
|
|
437
460
|
"""
|
|
461
|
+
|
|
438
462
|
# TODO: b/332574603 - support unsigned data type.
|
|
439
463
|
qtype = IntType(
|
|
440
464
|
num_bits,
|
|
441
465
|
signed=True,
|
|
442
466
|
)
|
|
443
467
|
qmin, qmax = get_quantized_range(qtype)
|
|
444
|
-
min_bound = 1e-
|
|
468
|
+
min_bound = 1e-9 # Avoid zero scale.
|
|
445
469
|
pos_clipping_values = None if clipping_values is None else clipping_values
|
|
446
470
|
neg_clipping_values = None if clipping_values is None else -clipping_values
|
|
447
471
|
|
|
448
|
-
if granularity
|
|
472
|
+
if is_blockwise(granularity):
|
|
449
473
|
# Blockwise quantization uses float16 scale,
|
|
450
474
|
# with 7 bit mantissa, so the maximum scale value is 65280 and maximum
|
|
451
475
|
# representable range is [-65280 * (2 ** num_bits),
|
|
@@ -493,7 +517,7 @@ def tensor_zp_scale_from_min_max(
|
|
|
493
517
|
zp = qmin - bound_min / scale
|
|
494
518
|
zp = np.rint(zp)
|
|
495
519
|
|
|
496
|
-
if granularity
|
|
520
|
+
if is_blockwise(granularity):
|
|
497
521
|
# Round the scale values to 7 bit mantissa.
|
|
498
522
|
scale = (
|
|
499
523
|
scale.astype(ml_dtypes.bfloat16).astype(np.float16).astype(np.float32)
|
|
@@ -51,8 +51,9 @@ def check_subchannel_config(
|
|
|
51
51
|
"""Checks the op quantization config for subchannel quantization."""
|
|
52
52
|
if (
|
|
53
53
|
op_quant_config.weight_tensor_config is not None
|
|
54
|
-
and
|
|
55
|
-
|
|
54
|
+
and uniform_quantize_tensor.is_blockwise(
|
|
55
|
+
op_quant_config.weight_tensor_config.granularity
|
|
56
|
+
)
|
|
56
57
|
):
|
|
57
58
|
if op_name not in _SUPPORTED_SUBCHANNEL_OPS:
|
|
58
59
|
raise ValueError(f"Unsupported op for blockwise quantization: {op_name}.")
|
|
@@ -66,10 +67,6 @@ def check_subchannel_config(
|
|
|
66
67
|
"Blockwise quantization does not support for asymmetric weight"
|
|
67
68
|
" quantization."
|
|
68
69
|
)
|
|
69
|
-
if op_quant_config.weight_tensor_config.block_size <= 0:
|
|
70
|
-
raise ValueError(
|
|
71
|
-
"Blockwise quantization must have a non-zero block size."
|
|
72
|
-
)
|
|
73
70
|
|
|
74
71
|
|
|
75
72
|
def check_if_valid_op_config(
|
|
@@ -369,11 +366,28 @@ def _materialize_standard_op_with_same_as_input_scale(
|
|
|
369
366
|
|
|
370
367
|
# Change output qsv to be the same as input qsv. This is safe since TFL
|
|
371
368
|
# subgraph is acyclic.
|
|
372
|
-
input_tensor_qsv = tensor_name_to_qsv
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
369
|
+
input_tensor_qsv = tensor_name_to_qsv.get(
|
|
370
|
+
input_tensor_params.tensor_name, None
|
|
371
|
+
)
|
|
372
|
+
if input_tensor_qsv is None:
|
|
373
|
+
input_tensor_data = tfl_flatbuffer_utils.get_tensor_data(
|
|
374
|
+
input_tensors[0], graph_info.buffers
|
|
376
375
|
)
|
|
376
|
+
# If the input tensor is a constant tensor without qsv, compute qsv from
|
|
377
|
+
# its quant params.
|
|
378
|
+
if input_tensor_data is None:
|
|
379
|
+
# If the only input to an op that needs to match input to
|
|
380
|
+
# output has no qsv and is not a constant tensor, then this is an error.
|
|
381
|
+
raise ValueError(
|
|
382
|
+
"Input tensor qsv is None for tensor"
|
|
383
|
+
f" {input_tensor_params.tensor_name}."
|
|
384
|
+
)
|
|
385
|
+
min_val, max_val = _get_min_max_from_quant_params(input_quant_params)
|
|
386
|
+
input_tensor_qsv = {"min": min_val, "max": max_val}
|
|
387
|
+
for output_tensor in output_tensors:
|
|
388
|
+
tensor_name_to_qsv[
|
|
389
|
+
tfl_flatbuffer_utils.get_tensor_name(output_tensor)
|
|
390
|
+
] = input_tensor_qsv
|
|
377
391
|
|
|
378
392
|
return op_tensor_params
|
|
379
393
|
|
|
@@ -697,6 +711,26 @@ def _add_non_match_tensors_to_ignored_lists(
|
|
|
697
711
|
return inputs_to_ignore, outputs_to_ignore
|
|
698
712
|
|
|
699
713
|
|
|
714
|
+
def _get_min_max_from_quant_params(
|
|
715
|
+
quant_params: qtyping.UniformQuantParams,
|
|
716
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
|
717
|
+
"""Recalculate min/max from tensor quantization params."""
|
|
718
|
+
q_min, q_max = uniform_quantize_tensor.get_quantized_range(
|
|
719
|
+
_IntType(quant_params.num_bits, True)
|
|
720
|
+
)
|
|
721
|
+
float_min = uniform_quantize_tensor.uniform_dequantize(
|
|
722
|
+
np.array(q_min), quant_params
|
|
723
|
+
)
|
|
724
|
+
float_max = uniform_quantize_tensor.uniform_dequantize(
|
|
725
|
+
np.array(q_max), quant_params
|
|
726
|
+
)
|
|
727
|
+
# We use qmax values to compute scale for symmetric quantization (see
|
|
728
|
+
# uniform_quantize_tensor.tensor_zp_scale_from_min_max).
|
|
729
|
+
if quant_params.symmetric:
|
|
730
|
+
float_min = -float_max
|
|
731
|
+
return float_min, float_max
|
|
732
|
+
|
|
733
|
+
|
|
700
734
|
def materialize_standard_op(
|
|
701
735
|
op_info: qtyping.OpInfo,
|
|
702
736
|
graph_info: qtyping.GraphInfo,
|
|
@@ -863,8 +897,6 @@ def materialize_op_with_output_activation_constraint(
|
|
|
863
897
|
output_tensor_params.producer = op_tensor_params
|
|
864
898
|
# Update the tensor_name_to_qsv map using the output activation constraints.
|
|
865
899
|
min_val, max_val = _get_min_max_from_quant_params(
|
|
866
|
-
activation_num_bits,
|
|
867
|
-
activation_tensor_config.symmetric,
|
|
868
900
|
fixed_quant_params,
|
|
869
901
|
)
|
|
870
902
|
tensor_name_to_qsv[output_tensor_params.tensor_name]["min"] = min_val
|
|
@@ -993,7 +1025,7 @@ def get_weight_quantized_dim(
|
|
|
993
1025
|
quantized_dim = tfl_flatbuffer_utils.TFL_OP_TO_WEIGHT_QUANTIZED_DIM.get(
|
|
994
1026
|
op_info.op_name, None
|
|
995
1027
|
)
|
|
996
|
-
elif granularity
|
|
1028
|
+
elif uniform_quantize_tensor.is_blockwise(granularity):
|
|
997
1029
|
quantized_dim = (
|
|
998
1030
|
tfl_flatbuffer_utils.TFL_OP_TO_BLOCKWISE_WEIGHT_QUANTIZED_DIM[
|
|
999
1031
|
op_info.op_name
|
|
@@ -1027,23 +1059,4 @@ def get_bmm_weight_quantized_dim(
|
|
|
1027
1059
|
return rank - 1
|
|
1028
1060
|
|
|
1029
1061
|
|
|
1030
|
-
|
|
1031
|
-
num_bits: int,
|
|
1032
|
-
symmetric: bool,
|
|
1033
|
-
tensor_params: qtyping.UniformQuantParams,
|
|
1034
|
-
) -> tuple[float, float]:
|
|
1035
|
-
"""Recalculate min/max from tensor quantization params."""
|
|
1036
|
-
q_min, q_max = uniform_quantize_tensor.get_quantized_range(
|
|
1037
|
-
_IntType(num_bits, True)
|
|
1038
|
-
)
|
|
1039
|
-
float_min = uniform_quantize_tensor.uniform_dequantize(
|
|
1040
|
-
np.array(q_min), tensor_params
|
|
1041
|
-
)
|
|
1042
|
-
float_max = uniform_quantize_tensor.uniform_dequantize(
|
|
1043
|
-
np.array(q_max), tensor_params
|
|
1044
|
-
)
|
|
1045
|
-
# We use qmax values to compute scale for symmetric quantization (see
|
|
1046
|
-
# uniform_quantize_tensor.tensor_zp_scale_from_min_max).
|
|
1047
|
-
if symmetric:
|
|
1048
|
-
float_min = -float_max
|
|
1049
|
-
return (float_min, float_max)
|
|
1062
|
+
|
ai_edge_quantizer/calibrator.py
CHANGED
|
@@ -98,9 +98,7 @@ class Calibrator:
|
|
|
98
98
|
qsv_update_func: The function to update the QSVs.
|
|
99
99
|
"""
|
|
100
100
|
op_codes = self._flatbuffer_model.operatorCodes
|
|
101
|
-
if
|
|
102
|
-
self._initialize_model_qsvs(model_recipe_manager)
|
|
103
|
-
else:
|
|
101
|
+
if self._model_qsvs:
|
|
104
102
|
logging.warning(
|
|
105
103
|
"Calibrator contains non-empty model qsvs, and the current"
|
|
106
104
|
" calibration process will start on top of this state (i.e., update"
|
|
@@ -263,50 +261,3 @@ class Calibrator:
|
|
|
263
261
|
output_tensor = subgraph_tensors[output_tensor_idx]
|
|
264
262
|
scope += tfl_flatbuffer_utils.get_tensor_name(output_tensor)
|
|
265
263
|
return scope
|
|
266
|
-
|
|
267
|
-
# TODO: b/354224138 - Remove code duplication between calibrate and
|
|
268
|
-
# _initialize_model_qsvs.
|
|
269
|
-
def _initialize_model_qsvs(
|
|
270
|
-
self, model_recipe_manager: recipe_manager.RecipeManager
|
|
271
|
-
) -> None:
|
|
272
|
-
"""Initialize the model qsvs.
|
|
273
|
-
|
|
274
|
-
Args:
|
|
275
|
-
model_recipe_manager: A RecipeManager object that contains the
|
|
276
|
-
quantization recipe.
|
|
277
|
-
"""
|
|
278
|
-
op_codes = self._flatbuffer_model.operatorCodes
|
|
279
|
-
for subgraph in self._flatbuffer_model.subgraphs:
|
|
280
|
-
graph_info = qtyping.GraphInfo(
|
|
281
|
-
subgraph.tensors, self._flatbuffer_model.buffers
|
|
282
|
-
)
|
|
283
|
-
for subgraph_op_id, op in enumerate(subgraph.operators):
|
|
284
|
-
op_code = op_codes[op.opcodeIndex].builtinCode
|
|
285
|
-
if op_code not in tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME:
|
|
286
|
-
continue
|
|
287
|
-
op_key = tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME[op_code]
|
|
288
|
-
# Step1: query the quantization_recipe to get op quantization
|
|
289
|
-
# settings.
|
|
290
|
-
op_scope = self._get_op_scope(op, subgraph.tensors)
|
|
291
|
-
algorithm_name, op_quant_config = (
|
|
292
|
-
model_recipe_manager.get_quantization_configs(op_key, op_scope)
|
|
293
|
-
)
|
|
294
|
-
if algorithm_name == algorithm_manager.AlgorithmName.NO_QUANTIZE:
|
|
295
|
-
continue
|
|
296
|
-
# Step2: query algorithm_manager to get/call the related qsv init
|
|
297
|
-
# function.
|
|
298
|
-
qsv_init_func = algorithm_manager.get_init_qsv_func(
|
|
299
|
-
algorithm_name, op_key
|
|
300
|
-
)
|
|
301
|
-
op_info = qtyping.OpInfo(op, op_key, subgraph_op_id, op_quant_config)
|
|
302
|
-
# Ignore the input tensors where any dimension of the shape is 0.
|
|
303
|
-
inputs_to_ignore = [
|
|
304
|
-
opr_idx
|
|
305
|
-
for opr_idx, tensor_idx in enumerate(op.inputs)
|
|
306
|
-
if not np.all(graph_info.subgraph_tensors[tensor_idx].shape)
|
|
307
|
-
]
|
|
308
|
-
op_qsvs = qsv_init_func(op_info, graph_info, inputs_to_ignore)
|
|
309
|
-
# Step3: initialize tensor qsvs.
|
|
310
|
-
for tensor_name, qsv in op_qsvs.items():
|
|
311
|
-
if tensor_name not in self._model_qsvs:
|
|
312
|
-
self._model_qsvs[tensor_name] = qsv
|
|
@@ -103,58 +103,6 @@ class CalibratorTest(googletest.TestCase):
|
|
|
103
103
|
model_tensor_qsvs = self._calibrator.get_model_qsvs()
|
|
104
104
|
self.assertEmpty(model_tensor_qsvs)
|
|
105
105
|
|
|
106
|
-
def test_calibrator_initialize_qsv(self):
|
|
107
|
-
_add_default_int8xint8_integer_recipe(self._recipe_manager)
|
|
108
|
-
# Overwrite the single op to fc
|
|
109
|
-
self._recipe_manager.add_quantization_config(
|
|
110
|
-
regex=".*Stateful.*",
|
|
111
|
-
operation_name=qtyping.TFLOperationName.FULLY_CONNECTED,
|
|
112
|
-
algorithm_key=_AlgorithmName.MIN_MAX_UNIFORM_QUANT,
|
|
113
|
-
op_config=qtyping.OpQuantizationConfig(
|
|
114
|
-
weight_tensor_config=_TENSOR_QUANT_CONFIG(
|
|
115
|
-
num_bits=4,
|
|
116
|
-
granularity=qtyping.QuantGranularity.CHANNELWISE,
|
|
117
|
-
),
|
|
118
|
-
compute_precision=_ComputePrecision.INTEGER,
|
|
119
|
-
),
|
|
120
|
-
)
|
|
121
|
-
self._calibrator._initialize_model_qsvs(self._recipe_manager)
|
|
122
|
-
model_tensor_qsvs = self._calibrator.get_model_qsvs()
|
|
123
|
-
|
|
124
|
-
self.assertLen(model_tensor_qsvs, 4)
|
|
125
|
-
self.assertIn("serving_default_input_1:0", model_tensor_qsvs) # input
|
|
126
|
-
input_qsv = model_tensor_qsvs["serving_default_input_1:0"]
|
|
127
|
-
self.assertEmpty(input_qsv)
|
|
128
|
-
|
|
129
|
-
self.assertIn("sequential/dense/MatMul", model_tensor_qsvs) # weight
|
|
130
|
-
weight_tensor_qsv = model_tensor_qsvs["sequential/dense/MatMul"]
|
|
131
|
-
mins_maxs_shape = (16, 1)
|
|
132
|
-
self.assertTupleEqual(weight_tensor_qsv["min"].shape, mins_maxs_shape)
|
|
133
|
-
self.assertAlmostEqual(weight_tensor_qsv["min"][0][0], -0.40436327)
|
|
134
|
-
self.assertTupleEqual(weight_tensor_qsv["max"].shape, mins_maxs_shape)
|
|
135
|
-
self.assertAlmostEqual(weight_tensor_qsv["max"][0][0], 0.46138108)
|
|
136
|
-
|
|
137
|
-
self.assertIn(
|
|
138
|
-
"sequential/dense/BiasAdd/ReadVariableOp", model_tensor_qsvs
|
|
139
|
-
) # bias
|
|
140
|
-
bias_tensor_qsv = model_tensor_qsvs[
|
|
141
|
-
"sequential/dense/BiasAdd/ReadVariableOp"
|
|
142
|
-
]
|
|
143
|
-
mins_maxs_shape = (16,)
|
|
144
|
-
self.assertTupleEqual(bias_tensor_qsv["min"].shape, mins_maxs_shape)
|
|
145
|
-
self.assertAlmostEqual(bias_tensor_qsv["min"][0], -0.26978338)
|
|
146
|
-
self.assertTupleEqual(bias_tensor_qsv["max"].shape, mins_maxs_shape)
|
|
147
|
-
# Here bias min/max will be the same as each element is a scalar
|
|
148
|
-
# Bias will be quantized with input_scale * weight_scale.
|
|
149
|
-
self.assertSequenceEqual(
|
|
150
|
-
list(bias_tensor_qsv["max"].flatten()),
|
|
151
|
-
list(bias_tensor_qsv["min"].flatten()),
|
|
152
|
-
)
|
|
153
|
-
|
|
154
|
-
self.assertIn("StatefulPartitionedCall:0", model_tensor_qsvs) # output
|
|
155
|
-
output_qsv = model_tensor_qsvs["StatefulPartitionedCall:0"]
|
|
156
|
-
self.assertEmpty(output_qsv)
|
|
157
|
-
|
|
158
106
|
def test_calibrate_single_fc_success(self):
|
|
159
107
|
_add_default_int8xint8_integer_recipe(self._recipe_manager)
|
|
160
108
|
self._calibrator.calibrate(
|
|
@@ -162,7 +110,7 @@ class CalibratorTest(googletest.TestCase):
|
|
|
162
110
|
)
|
|
163
111
|
model_tensor_qsvs = self._calibrator.get_model_qsvs()
|
|
164
112
|
|
|
165
|
-
self.assertLen(model_tensor_qsvs,
|
|
113
|
+
self.assertLen(model_tensor_qsvs, 2)
|
|
166
114
|
self.assertIn("serving_default_input_1:0", model_tensor_qsvs) # input
|
|
167
115
|
input_qsv = model_tensor_qsvs["serving_default_input_1:0"]
|
|
168
116
|
self.assertSequenceAlmostEqual(
|
|
@@ -171,19 +119,6 @@ class CalibratorTest(googletest.TestCase):
|
|
|
171
119
|
self.assertSequenceAlmostEqual(
|
|
172
120
|
input_qsv["max"].flatten(), [TEST_MAX_VAL], delta=1e-5
|
|
173
121
|
)
|
|
174
|
-
|
|
175
|
-
self.assertIn("sequential/dense/MatMul", model_tensor_qsvs) # weight
|
|
176
|
-
weight_qsv = model_tensor_qsvs["sequential/dense/MatMul"]
|
|
177
|
-
self.assertSequenceAlmostEqual(weight_qsv["min"].flatten(), [-0.49114203])
|
|
178
|
-
self.assertSequenceAlmostEqual(weight_qsv["max"].flatten(), [0.4903704])
|
|
179
|
-
|
|
180
|
-
self.assertIn(
|
|
181
|
-
"sequential/dense/BiasAdd/ReadVariableOp", model_tensor_qsvs
|
|
182
|
-
) # bias
|
|
183
|
-
bias_qsv = model_tensor_qsvs["sequential/dense/BiasAdd/ReadVariableOp"]
|
|
184
|
-
self.assertSequenceAlmostEqual(bias_qsv["min"].flatten(), [-0.38401994])
|
|
185
|
-
self.assertSequenceAlmostEqual(bias_qsv["max"].flatten(), [0.31727126])
|
|
186
|
-
|
|
187
122
|
self.assertIn("StatefulPartitionedCall:0", model_tensor_qsvs) # output
|
|
188
123
|
output_qsv = model_tensor_qsvs["StatefulPartitionedCall:0"]
|
|
189
124
|
# Relu, only check the min
|
|
@@ -302,7 +237,7 @@ class CalibratorToyGemma2Test(googletest.TestCase):
|
|
|
302
237
|
self._toy_gemma2_calibration_dataset,
|
|
303
238
|
model_recipe_manager=recipe_mngr,
|
|
304
239
|
)
|
|
305
|
-
self.assertLen(calib.get_model_qsvs(),
|
|
240
|
+
self.assertLen(calib.get_model_qsvs(), 202)
|
|
306
241
|
|
|
307
242
|
|
|
308
243
|
if __name__ == "__main__":
|
|
@@ -61,9 +61,8 @@ DEFAULT_JSON_POLICY = """
|
|
|
61
61
|
"weight_tensor_config": {
|
|
62
62
|
"num_bits": 4,
|
|
63
63
|
"symmetric": [true],
|
|
64
|
-
"granularity": ["
|
|
65
|
-
"dtype": "INT"
|
|
66
|
-
"block_size": [32, 64, 96, 128, 256]
|
|
64
|
+
"granularity": ["BLOCKWISE_32", "BLOCKWISE_64", "BLOCKWISE_128", "BLOCKWISE_256"],
|
|
65
|
+
"dtype": "INT"
|
|
67
66
|
},
|
|
68
67
|
"explicit_dequantize": false,
|
|
69
68
|
"compute_precision": "INTEGER"
|
|
@@ -320,16 +319,9 @@ def _unroll_json_config(
|
|
|
320
319
|
"granularity": granularity,
|
|
321
320
|
"dtype": json_config["weight_tensor_config"]["dtype"],
|
|
322
321
|
}
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
weight_configs.append(
|
|
327
|
-
qtyping.TensorQuantizationConfig.from_dict(tensor_config)
|
|
328
|
-
)
|
|
329
|
-
else:
|
|
330
|
-
weight_configs.append(
|
|
331
|
-
qtyping.TensorQuantizationConfig.from_dict(tensor_config)
|
|
332
|
-
)
|
|
322
|
+
weight_configs.append(
|
|
323
|
+
qtyping.TensorQuantizationConfig.from_dict(tensor_config)
|
|
324
|
+
)
|
|
333
325
|
|
|
334
326
|
if activation_configs:
|
|
335
327
|
for activation_config in activation_configs:
|
ai_edge_quantizer/qtyping.py
CHANGED
|
@@ -112,7 +112,11 @@ class TensorDataType(str, enum.Enum):
|
|
|
112
112
|
class QuantGranularity(str, enum.Enum):
|
|
113
113
|
TENSORWISE = 'TENSORWISE'
|
|
114
114
|
CHANNELWISE = 'CHANNELWISE'
|
|
115
|
-
|
|
115
|
+
# Blockwise quantization with various block sizes.
|
|
116
|
+
BLOCKWISE_32 = 'BLOCKWISE_32'
|
|
117
|
+
BLOCKWISE_64 = 'BLOCKWISE_64'
|
|
118
|
+
BLOCKWISE_128 = 'BLOCKWISE_128'
|
|
119
|
+
BLOCKWISE_256 = 'BLOCKWISE_256'
|
|
116
120
|
|
|
117
121
|
|
|
118
122
|
class QuantTransformation(enum.Enum):
|
|
@@ -310,7 +314,6 @@ class TensorQuantizationConfig:
|
|
|
310
314
|
granularity: Whether to perform per-tensor, per-channel or per-block
|
|
311
315
|
quantization.
|
|
312
316
|
dtype: The data type of the tensor.
|
|
313
|
-
block_size: The block size for blockwise quantization, ignored otherwise.
|
|
314
317
|
algorithm_key: The algorithm key to use for quantization.
|
|
315
318
|
"""
|
|
316
319
|
|
|
@@ -318,7 +321,6 @@ class TensorQuantizationConfig:
|
|
|
318
321
|
symmetric: bool = True
|
|
319
322
|
granularity: QuantGranularity = QuantGranularity.TENSORWISE
|
|
320
323
|
dtype: TensorDataType = TensorDataType.INT
|
|
321
|
-
block_size: int = 0
|
|
322
324
|
|
|
323
325
|
def to_dict(self) -> dict[str, Any]:
|
|
324
326
|
"""Converts ActivationQuantizationConfig to dict."""
|
|
@@ -336,9 +338,28 @@ class TensorQuantizationConfig:
|
|
|
336
338
|
def from_dict(cls, params: dict[str, Any]) -> 'TensorQuantizationConfig':
|
|
337
339
|
"""Converts a given dict to TensorQuantizationConfig."""
|
|
338
340
|
params_copy = copy.deepcopy(params)
|
|
341
|
+
# Process block_size config from legacy recipe.
|
|
342
|
+
params_copy = _process_block_size(params_copy)
|
|
339
343
|
return cls(**params_copy)
|
|
340
344
|
|
|
341
345
|
|
|
346
|
+
def _process_block_size(params: dict[str, Any]) -> dict[str, Any]:
|
|
347
|
+
"""Processes block size in the params."""
|
|
348
|
+
block_size = params.pop('block_size', 0)
|
|
349
|
+
if block_size > 0:
|
|
350
|
+
if block_size == 32:
|
|
351
|
+
params['granularity'] = QuantGranularity.BLOCKWISE_32
|
|
352
|
+
elif block_size == 64:
|
|
353
|
+
params['granularity'] = QuantGranularity.BLOCKWISE_64
|
|
354
|
+
elif block_size == 128:
|
|
355
|
+
params['granularity'] = QuantGranularity.BLOCKWISE_128
|
|
356
|
+
elif block_size == 256:
|
|
357
|
+
params['granularity'] = QuantGranularity.BLOCKWISE_256
|
|
358
|
+
else:
|
|
359
|
+
raise ValueError(f'Unsupported block size: {block_size}')
|
|
360
|
+
return params
|
|
361
|
+
|
|
362
|
+
|
|
342
363
|
@dataclasses.dataclass(frozen=True)
|
|
343
364
|
class OpQuantizationConfig:
|
|
344
365
|
"""Configuration class to control the quantization process behavior.
|
|
@@ -212,7 +212,7 @@ class QuantizerTest(parameterized.TestCase):
|
|
|
212
212
|
# Calibrate with empty state.
|
|
213
213
|
calib_data = _get_calibration_data()
|
|
214
214
|
calibration_result = self._quantizer.calibrate(calib_data)
|
|
215
|
-
self.assertLen(calibration_result,
|
|
215
|
+
self.assertLen(calibration_result, 7)
|
|
216
216
|
|
|
217
217
|
@parameterized.parameters(
|
|
218
218
|
'recipes/default_a8w8_recipe.json',
|
|
@@ -227,7 +227,7 @@ class QuantizerTest(parameterized.TestCase):
|
|
|
227
227
|
updated_calibration_result = self._quantizer.calibrate(
|
|
228
228
|
calib_data, previous_calibration_result=calibration_result
|
|
229
229
|
)
|
|
230
|
-
self.assertLen(updated_calibration_result,
|
|
230
|
+
self.assertLen(updated_calibration_result, 7)
|
|
231
231
|
self.assertNotEqual(
|
|
232
232
|
calibration_result['StatefulPartitionedCall:0'],
|
|
233
233
|
updated_calibration_result['StatefulPartitionedCall:0'],
|
|
@@ -309,6 +309,44 @@ class QuantizerTest(parameterized.TestCase):
|
|
|
309
309
|
saved_recipe = json.load(json_file)
|
|
310
310
|
self.assertEqual(saved_recipe, self._test_recipe)
|
|
311
311
|
|
|
312
|
+
def test_saved_legacy_recipe_lacks_block_size(self):
|
|
313
|
+
model_name = 'test_model'
|
|
314
|
+
legacy_recipe_path = os.path.join(
|
|
315
|
+
TEST_DATA_PREFIX_PATH,
|
|
316
|
+
'recipes/dynamic_legacy_wi8_afp32_recipe.json',
|
|
317
|
+
)
|
|
318
|
+
self._quantizer.load_quantization_recipe(legacy_recipe_path)
|
|
319
|
+
result = self._quantizer.quantize()
|
|
320
|
+
result.save(self._tmp_save_path, model_name)
|
|
321
|
+
saved_recipe_path = os.path.join(
|
|
322
|
+
self._tmp_save_path, model_name + '_recipe.json'
|
|
323
|
+
)
|
|
324
|
+
with open(saved_recipe_path) as json_file:
|
|
325
|
+
saved_recipe = json.load(json_file)
|
|
326
|
+
with open(legacy_recipe_path) as json_file:
|
|
327
|
+
legacy_recipe = json.load(json_file)
|
|
328
|
+
|
|
329
|
+
self.assertNotEqual(saved_recipe, legacy_recipe)
|
|
330
|
+
|
|
331
|
+
# Verify that the default test recipe contains 'block_size'.
|
|
332
|
+
has_block_size = False
|
|
333
|
+
for config in legacy_recipe:
|
|
334
|
+
op_config = config.get('op_config')
|
|
335
|
+
if op_config:
|
|
336
|
+
weight_config = op_config.get('weight_tensor_config')
|
|
337
|
+
if weight_config and 'block_size' in weight_config:
|
|
338
|
+
has_block_size = True
|
|
339
|
+
break
|
|
340
|
+
self.assertTrue(has_block_size)
|
|
341
|
+
|
|
342
|
+
# Verify that the saved recipe does not have 'block_size'.
|
|
343
|
+
for config in saved_recipe:
|
|
344
|
+
op_config = config.get('op_config')
|
|
345
|
+
if op_config:
|
|
346
|
+
weight_config = op_config.get('weight_tensor_config')
|
|
347
|
+
if weight_config:
|
|
348
|
+
self.assertNotIn('block_size', weight_config)
|
|
349
|
+
|
|
312
350
|
def test_save_no_quantize_raise_error(self):
|
|
313
351
|
error_message = 'No quantized model to save.'
|
|
314
352
|
with self.assertRaisesWithPredicateMatch(
|
|
@@ -535,14 +573,12 @@ class QuantizerMultiSignatureModelTest(parameterized.TestCase):
|
|
|
535
573
|
'symmetric': False,
|
|
536
574
|
'granularity': 'TENSORWISE',
|
|
537
575
|
'dtype': 'INT',
|
|
538
|
-
'block_size': 0,
|
|
539
576
|
},
|
|
540
577
|
'weight_tensor_config': {
|
|
541
578
|
'num_bits': 8,
|
|
542
579
|
'symmetric': True,
|
|
543
580
|
'granularity': 'CHANNELWISE',
|
|
544
581
|
'dtype': 'INT',
|
|
545
|
-
'block_size': 0,
|
|
546
582
|
},
|
|
547
583
|
'compute_precision': 'INTEGER',
|
|
548
584
|
'explicit_dequantize': False,
|
|
@@ -563,8 +599,7 @@ class QuantizerMultiSignatureModelTest(parameterized.TestCase):
|
|
|
563
599
|
|
|
564
600
|
# Quantize and expect an error about missing signature in calibration data.
|
|
565
601
|
error_message = (
|
|
566
|
-
'
|
|
567
|
-
" 'multiply'."
|
|
602
|
+
'MUL(index: 0) not found in tensor_name_to_qsv'
|
|
568
603
|
)
|
|
569
604
|
with self.assertRaisesWithPredicateMatch(
|
|
570
605
|
ValueError, lambda err: error_message in str(err)
|
|
@@ -569,14 +569,12 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
|
|
|
569
569
|
'symmetric': False,
|
|
570
570
|
'granularity': _QuantGranularity.TENSORWISE,
|
|
571
571
|
'dtype': 'INT',
|
|
572
|
-
'block_size': 0,
|
|
573
572
|
},
|
|
574
573
|
'weight_tensor_config': {
|
|
575
574
|
'num_bits': 8,
|
|
576
575
|
'symmetric': True,
|
|
577
576
|
'granularity': _QuantGranularity.TENSORWISE,
|
|
578
577
|
'dtype': 'INT',
|
|
579
|
-
'block_size': 0,
|
|
580
578
|
},
|
|
581
579
|
# WEIGHT_ONLY.
|
|
582
580
|
'compute_precision': _ComputePrecision.INTEGER,
|
|
@@ -595,7 +593,6 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
|
|
|
595
593
|
'num_bits': 8,
|
|
596
594
|
'symmetric': True,
|
|
597
595
|
'granularity': _QuantGranularity.TENSORWISE,
|
|
598
|
-
'block_size': 0,
|
|
599
596
|
},
|
|
600
597
|
# WEIGHT_ONLY.
|
|
601
598
|
'compute_precision': _ComputePrecision.FLOAT,
|
|
@@ -614,7 +611,6 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
|
|
|
614
611
|
'num_bits': 4,
|
|
615
612
|
'symmetric': True,
|
|
616
613
|
'granularity': _QuantGranularity.TENSORWISE,
|
|
617
|
-
'block_size': 0,
|
|
618
614
|
},
|
|
619
615
|
# WEIGHT_ONLY.
|
|
620
616
|
'compute_precision': _ComputePrecision.FLOAT,
|
|
@@ -633,7 +629,6 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
|
|
|
633
629
|
'num_bits': 6,
|
|
634
630
|
'symmetric': True,
|
|
635
631
|
'granularity': _QuantGranularity.TENSORWISE,
|
|
636
|
-
'block_size': 0,
|
|
637
632
|
},
|
|
638
633
|
# WEIGHT_ONLY.
|
|
639
634
|
'compute_precision': _ComputePrecision.FLOAT,
|
|
@@ -652,7 +647,6 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
|
|
|
652
647
|
'num_bits': 3,
|
|
653
648
|
'symmetric': True,
|
|
654
649
|
'granularity': _QuantGranularity.TENSORWISE,
|
|
655
|
-
'block_size': 0,
|
|
656
650
|
},
|
|
657
651
|
# WEIGHT_ONLY.
|
|
658
652
|
'compute_precision': _ComputePrecision.FLOAT,
|
|
@@ -32,7 +32,7 @@ def get_validation_func(
|
|
|
32
32
|
a validation function
|
|
33
33
|
|
|
34
34
|
Raises:
|
|
35
|
-
|
|
35
|
+
ValueError: if the function name is not supported
|
|
36
36
|
"""
|
|
37
37
|
if func_name == "mse":
|
|
38
38
|
return mean_squared_difference
|
|
@@ -42,6 +42,8 @@ def get_validation_func(
|
|
|
42
42
|
return cosine_similarity
|
|
43
43
|
elif func_name == "kl_divergence":
|
|
44
44
|
return kl_divergence
|
|
45
|
+
elif func_name == "snr":
|
|
46
|
+
return signal_to_noise_ratio
|
|
45
47
|
else:
|
|
46
48
|
raise ValueError(f"Validation function {func_name} not supported")
|
|
47
49
|
|
|
@@ -62,7 +64,7 @@ def mean_squared_difference(
|
|
|
62
64
|
a float value representing the MSD between data1 & 2
|
|
63
65
|
|
|
64
66
|
Raises:
|
|
65
|
-
|
|
67
|
+
ValueError: if the two inputs don't have the same number of elements
|
|
66
68
|
"""
|
|
67
69
|
data1, data2 = _preprocess_same_size_arrays(data1, data2)
|
|
68
70
|
# special handling for tensor of size 0
|
|
@@ -91,7 +93,7 @@ def median_diff_ratio(
|
|
|
91
93
|
a float value representing the median diff ratio between data1 & 2
|
|
92
94
|
|
|
93
95
|
Raises:
|
|
94
|
-
|
|
96
|
+
ValueError: if the two inputs don't have the same number of elements
|
|
95
97
|
"""
|
|
96
98
|
data1, data2 = _preprocess_same_size_arrays(data1, data2)
|
|
97
99
|
# special handling for tensor of size 0
|
|
@@ -120,7 +122,7 @@ def cosine_similarity(
|
|
|
120
122
|
a float value representing the cosine similarity between data1 & 2
|
|
121
123
|
|
|
122
124
|
Raises:
|
|
123
|
-
|
|
125
|
+
ValueError: if the two inputs don't have the same number of elements
|
|
124
126
|
"""
|
|
125
127
|
data1, data2 = _preprocess_same_size_arrays(data1, data2)
|
|
126
128
|
# special handling for tensor of size 0
|
|
@@ -152,15 +154,15 @@ def kl_divergence(
|
|
|
152
154
|
|
|
153
155
|
Args:
|
|
154
156
|
data1: input data to be used for comparison (distribution Q)
|
|
155
|
-
data2: input data to be used for comparison (distribution P),
|
|
156
|
-
|
|
157
|
+
data2: input data to be used for comparison (distribution P), data1 & 2 must
|
|
158
|
+
be of the same shape
|
|
157
159
|
epsilon: small value to avoid log(0) and division by zero.
|
|
158
160
|
|
|
159
161
|
Returns:
|
|
160
162
|
A float value representing the KL divergence between data1 & 2.
|
|
161
163
|
|
|
162
164
|
Raises:
|
|
163
|
-
|
|
165
|
+
ValueError: if the two inputs don't have the same number of elements.
|
|
164
166
|
"""
|
|
165
167
|
data1, data2 = _preprocess_same_size_arrays(data1, data2)
|
|
166
168
|
# special handling for tensor of size 0
|
|
@@ -173,6 +175,40 @@ def kl_divergence(
|
|
|
173
175
|
return float(np.sum(p * np.log((p + epsilon) / (q + epsilon))))
|
|
174
176
|
|
|
175
177
|
|
|
178
|
+
def signal_to_noise_ratio(
|
|
179
|
+
noisy_signal: np._typing.ArrayLike,
|
|
180
|
+
signal: np._typing.ArrayLike,
|
|
181
|
+
epsilon: float = 1e-9,
|
|
182
|
+
) -> float:
|
|
183
|
+
"""Calculates the signal to noise ratio between noisy_signal & signal.
|
|
184
|
+
|
|
185
|
+
SNR = P_signal / P_noise, where signal is treated as the clean signal and
|
|
186
|
+
noisy_signal-signal is treated as the noise samples.
|
|
187
|
+
P_signal = mean(signal^2)
|
|
188
|
+
P_noise = mean((noisy_signal-signal)^2) = mse(noisy_signal, signal)
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
noisy_signal: Input data to be used for comparison (e.g. noisy signal).
|
|
192
|
+
signal: Input data to be used for comparison (e.g. clean signal),
|
|
193
|
+
noisy_signal & signal must be of the same shape.
|
|
194
|
+
epsilon: Small value to avoid division by zero.
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
A float value representing the SNR between noisy_signal & signal.
|
|
198
|
+
|
|
199
|
+
Raises:
|
|
200
|
+
ValueError: If the two inputs don't have the same number of elements.
|
|
201
|
+
"""
|
|
202
|
+
noisy_signal, signal = _preprocess_same_size_arrays(noisy_signal, signal)
|
|
203
|
+
if signal.size == 0:
|
|
204
|
+
return float(0)
|
|
205
|
+
|
|
206
|
+
mse = mean_squared_difference(noisy_signal, signal)
|
|
207
|
+
signal_power = float(np.square(signal).mean())
|
|
208
|
+
snr = signal_power / (mse + epsilon)
|
|
209
|
+
return snr
|
|
210
|
+
|
|
211
|
+
|
|
176
212
|
def _preprocess_same_size_arrays(
|
|
177
213
|
data1: np._typing.ArrayLike, data2: np._typing.ArrayLike
|
|
178
214
|
) -> Tuple[np.ndarray, np.ndarray]:
|
|
@@ -187,7 +223,7 @@ def _preprocess_same_size_arrays(
|
|
|
187
223
|
a tuple of the preprocessed data1 & 2
|
|
188
224
|
|
|
189
225
|
Raises:
|
|
190
|
-
|
|
226
|
+
ValueError: if the two inputs don't have the same number of elements
|
|
191
227
|
"""
|
|
192
228
|
data1 = np.array(data1, dtype=np.float32).flatten()
|
|
193
229
|
data2 = np.array(data2, dtype=np.float32).flatten()
|
|
@@ -134,6 +134,34 @@ class ValidationUtilTest(googletest.TestCase):
|
|
|
134
134
|
func = validation_utils.get_validation_func("kl_divergence")
|
|
135
135
|
self.assertEqual(func, validation_utils.kl_divergence)
|
|
136
136
|
|
|
137
|
+
def test_signal_to_noise_ratio_0d(self):
|
|
138
|
+
data1 = []
|
|
139
|
+
data2 = []
|
|
140
|
+
result = validation_utils.signal_to_noise_ratio(data1, data2)
|
|
141
|
+
self.assertEqual(result, 0)
|
|
142
|
+
|
|
143
|
+
def test_signal_to_noise_ratio_identical(self):
|
|
144
|
+
data1 = [1, 2, 3]
|
|
145
|
+
data2 = [1, 2, 3]
|
|
146
|
+
result = validation_utils.signal_to_noise_ratio(data1, data2)
|
|
147
|
+
self.assertGreater(result, 1e8) # mse=0, so snr should be large
|
|
148
|
+
|
|
149
|
+
def test_signal_to_noise_ratio_with_noise(self):
|
|
150
|
+
data1 = [2, 3, 4]
|
|
151
|
+
data2 = [1, 2, 3]
|
|
152
|
+
result = validation_utils.signal_to_noise_ratio(data1, data2)
|
|
153
|
+
self.assertAlmostEqual(result, 14 / 3, places=5)
|
|
154
|
+
|
|
155
|
+
def test_signal_to_noise_ratio_simple(self):
|
|
156
|
+
data1 = [1, 1]
|
|
157
|
+
data2 = [1, 0]
|
|
158
|
+
result = validation_utils.signal_to_noise_ratio(data1, data2)
|
|
159
|
+
self.assertAlmostEqual(result, 1.0, places=5)
|
|
160
|
+
|
|
161
|
+
def test_get_validation_func_snr(self):
|
|
162
|
+
func = validation_utils.get_validation_func("snr")
|
|
163
|
+
self.assertEqual(func, validation_utils.signal_to_noise_ratio)
|
|
164
|
+
|
|
137
165
|
|
|
138
166
|
if __name__ == "__main__":
|
|
139
167
|
googletest.main()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-edge-quantizer-nightly
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0.dev20251130
|
|
4
4
|
Summary: A quantizer for advanced developers to quantize converted AI Edge models.
|
|
5
5
|
Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
|
|
6
6
|
Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
|
|
@@ -2,22 +2,22 @@ ai_edge_quantizer/__init__.py,sha256=4pFSkukSwahYyzwqia0yPRyz8TnFQfGRthVJhYpMWas
|
|
|
2
2
|
ai_edge_quantizer/algorithm_manager.py,sha256=0jSNITKl0Ge1XeYKueOUj9brlS4B5ZcdcVQ1kZS3JKg,16518
|
|
3
3
|
ai_edge_quantizer/algorithm_manager_api.py,sha256=u903TG0s1uIDhJqfeJne3CFl8A93phZrwgV2-hwdcXU,9247
|
|
4
4
|
ai_edge_quantizer/algorithm_manager_api_test.py,sha256=w6bSONvXkX6bzXAGc0-7b6gNDt9oz9ieq97KP8Sg_JU,7666
|
|
5
|
-
ai_edge_quantizer/calibrator.py,sha256=
|
|
6
|
-
ai_edge_quantizer/calibrator_test.py,sha256=
|
|
5
|
+
ai_edge_quantizer/calibrator.py,sha256=brB6ENjZFQnIzlshr0zAFo0g-XjwvD-Wsy5VasJspRU,9986
|
|
6
|
+
ai_edge_quantizer/calibrator_test.py,sha256=VKK6p9M3EwSq4D7Sna2v1EFeop2zfL-Af-YiusIuyb8,8957
|
|
7
7
|
ai_edge_quantizer/conftest.py,sha256=SxCz-5LlRD_lQm4hQc4c6IGG7DS8d7IyEWY9gnscPN0,794
|
|
8
|
-
ai_edge_quantizer/default_policy.py,sha256=
|
|
8
|
+
ai_edge_quantizer/default_policy.py,sha256=YcwwtVzoWUhjYgMtJ7b9f647740lURKteDOeJvwe17o,11384
|
|
9
9
|
ai_edge_quantizer/model_modifier.py,sha256=U70JByv6CItP8tg4bdyMfX-R3UlwylAGSviZkF_FSAM,10468
|
|
10
10
|
ai_edge_quantizer/model_modifier_test.py,sha256=CV4pgMEQkBJr_qbYR720TO8HBCutbEYLHptDHgdQMUE,7274
|
|
11
11
|
ai_edge_quantizer/model_validator.py,sha256=Hj0_5o-Oa3dSlJ3ryVjRhvsyelHNyek1GrtG9buMczg,13153
|
|
12
12
|
ai_edge_quantizer/model_validator_test.py,sha256=EeqOP_mrZsnZ3rug756s0ryDDqd2KgIDld5Lm_gDuWY,13020
|
|
13
13
|
ai_edge_quantizer/params_generator.py,sha256=0w-sDGk84sVNkXoduon1wDqq30sGOHVgBVbdg44QVF4,20153
|
|
14
14
|
ai_edge_quantizer/params_generator_test.py,sha256=RDYoRZDJfEZRtjlTAU2kZ_4t3JHOqEHxfJX9V4ETAhg,40597
|
|
15
|
-
ai_edge_quantizer/qtyping.py,sha256=
|
|
15
|
+
ai_edge_quantizer/qtyping.py,sha256=y9KretGzUGztyLdmto2XV6U0cxrSrfLWP1UOVcwR4dY,18011
|
|
16
16
|
ai_edge_quantizer/quantizer.py,sha256=teYeONdIS31IAY6ubLujCRi1t6lYAd0LkC8dRPxQdbw,18919
|
|
17
|
-
ai_edge_quantizer/quantizer_test.py,sha256=
|
|
17
|
+
ai_edge_quantizer/quantizer_test.py,sha256=pavS0mezRdBAGD4lqBUhj53pXJDFH3U02ldxxh5RDYQ,28407
|
|
18
18
|
ai_edge_quantizer/recipe.py,sha256=MEkfQ2Sg3KAE9LAORHWcbjYNPg06EUbwc1d-VspQA2U,6461
|
|
19
19
|
ai_edge_quantizer/recipe_manager.py,sha256=6l2uq8KL23KLu9OQDmPGkxrFiwHrdDB9xnn-ni8WdEM,15036
|
|
20
|
-
ai_edge_quantizer/recipe_manager_test.py,sha256=
|
|
20
|
+
ai_edge_quantizer/recipe_manager_test.py,sha256=gYK3haUJ8-AISQvTI6tD-E-drJXQPSXPqBZdgpc5QTo,36595
|
|
21
21
|
ai_edge_quantizer/recipe_test.py,sha256=QisyaTol8JRZFcGOGyee7QRCvqj5VbF4guKWdIoMUOE,6213
|
|
22
22
|
ai_edge_quantizer/transformation_instruction_generator.py,sha256=O0U2aZcB8aXQgOV8r9g1rGNzDUiuI5Ta53XnxZbVffE,31576
|
|
23
23
|
ai_edge_quantizer/transformation_instruction_generator_test.py,sha256=KW5-WoTTo9IqLEVnWxVC8ut8eWLi_91xfKgGqVQ9QDk,54635
|
|
@@ -28,22 +28,22 @@ ai_edge_quantizer/algorithms/nonlinear_quantize/__init__.py,sha256=lpq1g2ayg3lCP
|
|
|
28
28
|
ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting.py,sha256=Bs9CK7wZAw6jNaZ8xEtbwO2vM34VYXNZSMVWvxJo9nw,9297
|
|
29
29
|
ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting_test.py,sha256=EqIHGEZ1LgUrTN7zf880RuAzEv3Qy7kgh5ivObJGHSo,22646
|
|
30
30
|
ai_edge_quantizer/algorithms/uniform_quantize/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
|
31
|
-
ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=
|
|
31
|
+
ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=wrp4F2Wo9ammz_6VXFjXu04RMJV4_MxGfp4XyFMhZHc,39904
|
|
32
32
|
ai_edge_quantizer/algorithms/uniform_quantize/common_quantize_test.py,sha256=GGf_n3wIeg3GB_eGsmyNJ0fTcxgpeMMbugTMRONK6TQ,3553
|
|
33
|
-
ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha256=
|
|
33
|
+
ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha256=VjBDxGxjITHJc7xJABqBbZt6_qhobtZAl2gnVQrYJgc,8652
|
|
34
34
|
ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py,sha256=sT5eX5TLZEHTtPfnSkCPDlS0sQxlTFWbCsbvOuj--yY,8889
|
|
35
35
|
ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation.py,sha256=qxt9CPDcidVWIxp5nSWPN2hKKj1XZcsOOLBd2SYIvW0,14572
|
|
36
|
-
ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py,sha256=
|
|
37
|
-
ai_edge_quantizer/algorithms/uniform_quantize/mse.py,sha256=
|
|
38
|
-
ai_edge_quantizer/algorithms/uniform_quantize/mse_test.py,sha256=-
|
|
39
|
-
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=
|
|
40
|
-
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=
|
|
41
|
-
ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256
|
|
42
|
-
ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=
|
|
43
|
-
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=
|
|
44
|
-
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=
|
|
36
|
+
ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py,sha256=1ejj5WS3GZwFk3qpsPiPS8jcmVS1-e7zRmvj2Nj8fKw,15440
|
|
37
|
+
ai_edge_quantizer/algorithms/uniform_quantize/mse.py,sha256=EP5yPw6khAhTo6VNTPXEE2aGKLfNnqz8COeJnTKaGWs,4641
|
|
38
|
+
ai_edge_quantizer/algorithms/uniform_quantize/mse_test.py,sha256=-E1LIlxadckspltdgBWTiUzsiwbawSubndavHhWLt1g,7145
|
|
39
|
+
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=3HldmkAZv1EN0GeUWr574L9brknb569KB8i1iIGgcx0,8334
|
|
40
|
+
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=Eqa4OUqoCGywbHz-HxJ9dWRj9BKlVzJPuIhVzvrpdLM,8925
|
|
41
|
+
ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=-n-QZyp9y8WCy5FPSpXZXHfOA-p-RLvfSaCzAfhHiHI,7040
|
|
42
|
+
ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=6m2U-9JdNei0XzOORg2gt87TJdD0XHZ-z5h9c4g_TB4,9120
|
|
43
|
+
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=qZxTj3B-tqNTLCViwuJj285YncvwjWeay2QKWd8nr6A,20420
|
|
44
|
+
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=eTrrc8AGaSf1Ytp5gsRONAZ94PHFJUTd4dGi5ZnKZjU,16038
|
|
45
45
|
ai_edge_quantizer/algorithms/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
|
46
|
-
ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=
|
|
46
|
+
ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=M3VZsdLC4jCPfSI_aGAY4XjiHvoXtR-UyPZdZdz8GD0,38082
|
|
47
47
|
ai_edge_quantizer/algorithms/utils/common_utils_test.py,sha256=zqapGEfYhjQWe9cNGPLmdbwtEUUYQRhlO_kNe0cXX6E,18104
|
|
48
48
|
ai_edge_quantizer/transformations/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
|
49
49
|
ai_edge_quantizer/transformations/dequant_insert.py,sha256=sL1LHFVzBDSd9jgrzlHz38LWU0bwmVX7iBkaNcui0ts,3566
|
|
@@ -72,10 +72,10 @@ ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=42OWzQsRTXq3XQYmoxlz177_d
|
|
|
72
72
|
ai_edge_quantizer/utils/tfl_flatbuffer_utils_test.py,sha256=K1SbK8q92qYVtiVj0I0GtugsPTkpIpEKv9zakvFV_Sc,8555
|
|
73
73
|
ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOihexmizeJqt4SQcET9aA,14925
|
|
74
74
|
ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
|
|
75
|
-
ai_edge_quantizer/utils/validation_utils.py,sha256=
|
|
76
|
-
ai_edge_quantizer/utils/validation_utils_test.py,sha256=
|
|
77
|
-
ai_edge_quantizer_nightly-0.
|
|
78
|
-
ai_edge_quantizer_nightly-0.
|
|
79
|
-
ai_edge_quantizer_nightly-0.
|
|
80
|
-
ai_edge_quantizer_nightly-0.
|
|
81
|
-
ai_edge_quantizer_nightly-0.
|
|
75
|
+
ai_edge_quantizer/utils/validation_utils.py,sha256=Mr0D6X-pTDLODFAnCX3IlqdV1OL02tlq0ZjHbqx8nzg,7439
|
|
76
|
+
ai_edge_quantizer/utils/validation_utils_test.py,sha256=T8K5mCWeMcihND2KS_dHvCJUU9lEdG2sD95EgPkaX3w,5584
|
|
77
|
+
ai_edge_quantizer_nightly-0.5.0.dev20251130.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
78
|
+
ai_edge_quantizer_nightly-0.5.0.dev20251130.dist-info/METADATA,sha256=bkOsJLKqRynroS7W0blZKvvFESlG3DZh4xHs_Vx9Y1A,1707
|
|
79
|
+
ai_edge_quantizer_nightly-0.5.0.dev20251130.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
|
|
80
|
+
ai_edge_quantizer_nightly-0.5.0.dev20251130.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
|
|
81
|
+
ai_edge_quantizer_nightly-0.5.0.dev20251130.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|