ai-edge-quantizer-nightly 0.4.0.dev20250919__py3-none-any.whl → 0.4.0.dev20250921__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation.py +2 -16
- ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py +84 -3
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py +5 -0
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py +22 -0
- {ai_edge_quantizer_nightly-0.4.0.dev20250919.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250921.dist-info}/METADATA +1 -1
- {ai_edge_quantizer_nightly-0.4.0.dev20250919.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250921.dist-info}/RECORD +9 -9
- {ai_edge_quantizer_nightly-0.4.0.dev20250919.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250921.dist-info}/LICENSE +0 -0
- {ai_edge_quantizer_nightly-0.4.0.dev20250919.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250921.dist-info}/WHEEL +0 -0
- {ai_edge_quantizer_nightly-0.4.0.dev20250919.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250921.dist-info}/top_level.txt +0 -0
|
@@ -127,23 +127,9 @@ def get_tensor_quant_params(
|
|
|
127
127
|
"Hadamard rotation is only supported for tensors with rank >= 2."
|
|
128
128
|
)
|
|
129
129
|
|
|
130
|
-
if tensor_quant_config.granularity != qtyping.QuantGranularity.CHANNELWISE:
|
|
131
|
-
raise ValueError(
|
|
132
|
-
"Hadamard rotation is not supported for"
|
|
133
|
-
f" {tensor_quant_config.granularity} granularity."
|
|
134
|
-
)
|
|
135
|
-
|
|
136
|
-
quantized_dim = common_utils.get_weight_quantized_dim(
|
|
137
|
-
op_info, tensor_content, tensor_quant_config.granularity
|
|
138
|
-
)
|
|
139
|
-
if quantized_dim != 0:
|
|
140
|
-
raise ValueError(
|
|
141
|
-
f"Unsupported quantized dimension: {quantized_dim}. Only 0 is"
|
|
142
|
-
" supported."
|
|
143
|
-
)
|
|
144
|
-
|
|
145
130
|
# Reduction axis is the last non-quantized dimension. Since we only support
|
|
146
|
-
# quantized_dim of 0, the reduction axis is the last
|
|
131
|
+
# quantized_dim of 0 (or 1 for blockwise), the reduction axis is the last
|
|
132
|
+
# axis.
|
|
147
133
|
reduce_axis = tensor_content.ndim - 1
|
|
148
134
|
|
|
149
135
|
# Rotate the tensor with a Hadamard matrix.
|
|
@@ -46,13 +46,13 @@ class HadamardRotationFullyConnectedTest(parameterized.TestCase):
|
|
|
46
46
|
)
|
|
47
47
|
self._tensor_name_to_qsv = None
|
|
48
48
|
self._subgraph = self._test_model.subgraphs[0]
|
|
49
|
-
|
|
50
|
-
self._fc_op = self._subgraph.operators[
|
|
49
|
+
self._fc_subgraph_op_index = 3
|
|
50
|
+
self._fc_op = self._subgraph.operators[self._fc_subgraph_op_index]
|
|
51
51
|
self._fc_buffer_id = self._subgraph.tensors[self._fc_op.inputs[1]].buffer
|
|
52
52
|
self._op_info = qtyping.OpInfo(
|
|
53
53
|
op=self._fc_op,
|
|
54
54
|
op_name=_TFLOpName.FULLY_CONNECTED,
|
|
55
|
-
subgraph_op_index=
|
|
55
|
+
subgraph_op_index=self._fc_subgraph_op_index,
|
|
56
56
|
op_quant_config=qtyping.OpQuantizationConfig(
|
|
57
57
|
weight_tensor_config=_TensorQuantConfig(
|
|
58
58
|
num_bits=8,
|
|
@@ -98,6 +98,87 @@ class HadamardRotationFullyConnectedTest(parameterized.TestCase):
|
|
|
98
98
|
[qtyping.QuantTransformation.NO_QUANTIZE],
|
|
99
99
|
)
|
|
100
100
|
|
|
101
|
+
def test_fully_connected_tensorwise_supported(self):
|
|
102
|
+
self._op_info = qtyping.OpInfo(
|
|
103
|
+
op=self._fc_op,
|
|
104
|
+
op_name=_TFLOpName.FULLY_CONNECTED,
|
|
105
|
+
subgraph_op_index=self._fc_subgraph_op_index,
|
|
106
|
+
op_quant_config=qtyping.OpQuantizationConfig(
|
|
107
|
+
weight_tensor_config=_TensorQuantConfig(
|
|
108
|
+
num_bits=8,
|
|
109
|
+
symmetric=True,
|
|
110
|
+
granularity=qtyping.QuantGranularity.TENSORWISE,
|
|
111
|
+
),
|
|
112
|
+
),
|
|
113
|
+
)
|
|
114
|
+
params = hadamard_rotation.materialize_fully_connected(
|
|
115
|
+
self._op_info, self._graph_info, self._tensor_name_to_qsv
|
|
116
|
+
)
|
|
117
|
+
self.assertLen(params, 4)
|
|
118
|
+
fc_input = params[0]
|
|
119
|
+
self.assertIsNotNone(fc_input)
|
|
120
|
+
self.assertIsNotNone(fc_input.consumers)
|
|
121
|
+
self.assertIsNotNone(fc_input.consumers[0].parameters)
|
|
122
|
+
self.assertIsInstance(
|
|
123
|
+
fc_input.consumers[0].parameters, qtyping.UniformQuantParams
|
|
124
|
+
)
|
|
125
|
+
if isinstance(
|
|
126
|
+
fc_input.consumers[0].parameters, qtyping.UniformQuantParams
|
|
127
|
+
):
|
|
128
|
+
self.assertIsNone(fc_input.consumers[0].parameters.quantized_dimension)
|
|
129
|
+
weight = params[1]
|
|
130
|
+
self.assertIsNotNone(weight)
|
|
131
|
+
self.assertIsNotNone(weight.consumers)
|
|
132
|
+
self.assertIsNotNone(weight.consumers[0].parameters)
|
|
133
|
+
self.assertIsInstance(
|
|
134
|
+
weight.consumers[0].parameters, qtyping.UniformQuantParams
|
|
135
|
+
)
|
|
136
|
+
if isinstance(
|
|
137
|
+
weight.consumers[0].parameters, qtyping.UniformQuantParams
|
|
138
|
+
):
|
|
139
|
+
self.assertIsNone(weight.consumers[0].parameters.quantized_dimension)
|
|
140
|
+
|
|
141
|
+
def test_fully_connected_blockwise_supported(self):
|
|
142
|
+
self._op_info = qtyping.OpInfo(
|
|
143
|
+
op=self._fc_op,
|
|
144
|
+
op_name=_TFLOpName.FULLY_CONNECTED,
|
|
145
|
+
subgraph_op_index=self._fc_subgraph_op_index,
|
|
146
|
+
op_quant_config=qtyping.OpQuantizationConfig(
|
|
147
|
+
weight_tensor_config=_TensorQuantConfig(
|
|
148
|
+
num_bits=8,
|
|
149
|
+
symmetric=True,
|
|
150
|
+
granularity=qtyping.QuantGranularity.BLOCKWISE,
|
|
151
|
+
block_size=32,
|
|
152
|
+
),
|
|
153
|
+
),
|
|
154
|
+
)
|
|
155
|
+
params = hadamard_rotation.materialize_fully_connected(
|
|
156
|
+
self._op_info, self._graph_info, self._tensor_name_to_qsv
|
|
157
|
+
)
|
|
158
|
+
self.assertLen(params, 4)
|
|
159
|
+
fc_input = params[0]
|
|
160
|
+
self.assertIsNotNone(fc_input)
|
|
161
|
+
self.assertIsNotNone(fc_input.consumers)
|
|
162
|
+
self.assertIsNotNone(fc_input.consumers[0].parameters)
|
|
163
|
+
self.assertIsInstance(
|
|
164
|
+
fc_input.consumers[0].parameters, qtyping.UniformQuantParams
|
|
165
|
+
)
|
|
166
|
+
if isinstance(
|
|
167
|
+
fc_input.consumers[0].parameters, qtyping.UniformQuantParams
|
|
168
|
+
):
|
|
169
|
+
self.assertEqual(fc_input.consumers[0].parameters.quantized_dimension, 1)
|
|
170
|
+
weight = params[1]
|
|
171
|
+
self.assertIsNotNone(weight)
|
|
172
|
+
self.assertIsNotNone(weight.consumers)
|
|
173
|
+
self.assertIsNotNone(weight.consumers[0].parameters)
|
|
174
|
+
self.assertIsInstance(
|
|
175
|
+
weight.consumers[0].parameters, qtyping.UniformQuantParams
|
|
176
|
+
)
|
|
177
|
+
if isinstance(
|
|
178
|
+
weight.consumers[0].parameters, qtyping.UniformQuantParams
|
|
179
|
+
):
|
|
180
|
+
self.assertEqual(weight.consumers[0].parameters.quantized_dimension, 1)
|
|
181
|
+
|
|
101
182
|
def test_get_tensor_quant_params_basic(self):
|
|
102
183
|
input_tensor = self._subgraph.tensors[self._fc_op.inputs[1]]
|
|
103
184
|
buffer = self._graph_info.buffers[self._fc_buffer_id]
|
|
@@ -143,6 +143,11 @@ def _get_tensor_shape_for_blockwise(
|
|
|
143
143
|
new_shape = []
|
|
144
144
|
for index, val in enumerate(tensor_shape):
|
|
145
145
|
if index == quantized_dim:
|
|
146
|
+
if val % block_size != 0:
|
|
147
|
+
raise ValueError(
|
|
148
|
+
f"Quantized dimension {val} in tensor shape {tensor_shape} is not"
|
|
149
|
+
f" divisible by block size {block_size}."
|
|
150
|
+
)
|
|
146
151
|
new_shape.append(int(val / block_size))
|
|
147
152
|
new_shape.append(block_size)
|
|
148
153
|
else:
|
|
@@ -203,6 +203,28 @@ class TensorUtilsTest(parameterized.TestCase):
|
|
|
203
203
|
),
|
|
204
204
|
)
|
|
205
205
|
|
|
206
|
+
def test_uniform_quantize_quant_dim_not_divisible_by_block_size_raise(self):
|
|
207
|
+
tensor = np.random.rand(34, 2)
|
|
208
|
+
error_message = (
|
|
209
|
+
"Quantized dimension 34 in tensor shape (34, 2) is not divisible by"
|
|
210
|
+
" block size 32."
|
|
211
|
+
)
|
|
212
|
+
with self.assertRaisesWithPredicateMatch(
|
|
213
|
+
ValueError, lambda err: error_message in str(err)
|
|
214
|
+
):
|
|
215
|
+
uniform_quantize_tensor.uniform_quantize(
|
|
216
|
+
np.array(tensor),
|
|
217
|
+
qtyping.UniformQuantParams(
|
|
218
|
+
quantized_dimension=0,
|
|
219
|
+
block_size=32,
|
|
220
|
+
num_bits=4,
|
|
221
|
+
scale=np.array([1.2666667]),
|
|
222
|
+
zero_point=np.array([-6]),
|
|
223
|
+
symmetric=True,
|
|
224
|
+
),
|
|
225
|
+
is_blockwise=True,
|
|
226
|
+
)
|
|
227
|
+
|
|
206
228
|
@parameterized.parameters(
|
|
207
229
|
(
|
|
208
230
|
8,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ai-edge-quantizer-nightly
|
|
3
|
-
Version: 0.4.0.
|
|
3
|
+
Version: 0.4.0.dev20250921
|
|
4
4
|
Summary: A quantizer for advanced developers to quantize converted AI Edge models.
|
|
5
5
|
Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
|
|
6
6
|
Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
|
|
@@ -32,14 +32,14 @@ ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=E17cSR-M
|
|
|
32
32
|
ai_edge_quantizer/algorithms/uniform_quantize/common_quantize_test.py,sha256=GGf_n3wIeg3GB_eGsmyNJ0fTcxgpeMMbugTMRONK6TQ,3553
|
|
33
33
|
ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha256=BDdn_uBZakfHyzdMJPKadsOqxqyC-s6W2ZzFH99L4fE,8652
|
|
34
34
|
ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py,sha256=sT5eX5TLZEHTtPfnSkCPDlS0sQxlTFWbCsbvOuj--yY,8889
|
|
35
|
-
ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation.py,sha256=
|
|
36
|
-
ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py,sha256=
|
|
35
|
+
ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation.py,sha256=otKRiZn_C0QH0891pxLsIPIBT1mLDwbKYYP7bI-MXAA,12279
|
|
36
|
+
ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py,sha256=_SpP12aDLujv_7tWf_mCt89WknNXTSGE-JpZWO1bYSE,13238
|
|
37
37
|
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=1sB2j1vlvvWDKyjcGvA_JLCpN2KbCmMslGCBUc4--V4,8461
|
|
38
38
|
ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=nscKDvNb14ErZdAfG0aXRWyRs6bTvhMqMjKx2vxvUK0,8725
|
|
39
39
|
ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=Umxh4kJyeHddZf-Wd4aXE5MTI1XWFa5KRuM17uYU714,6922
|
|
40
40
|
ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=sha1d99Xk87bI87tgz0g5LeDC-EeE4WMfM5rRC98-m4,9140
|
|
41
|
-
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=
|
|
42
|
-
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=
|
|
41
|
+
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=so5pMeoclPdXV_5dDiqWaA_cZ0Ud-OWnXxRbzNh9x1E,18576
|
|
42
|
+
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=eBTi2I12e65_lxVZjGaN2TAiMzvsyyXAhWmEpKEmkLA,15126
|
|
43
43
|
ai_edge_quantizer/algorithms/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
|
44
44
|
ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=4eAlGph6DDW18bUdoY0XcUoOXEr3P_3_W1ptidD8qK4,37611
|
|
45
45
|
ai_edge_quantizer/algorithms/utils/common_utils_test.py,sha256=zqapGEfYhjQWe9cNGPLmdbwtEUUYQRhlO_kNe0cXX6E,18104
|
|
@@ -70,8 +70,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOi
|
|
|
70
70
|
ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
|
|
71
71
|
ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
|
|
72
72
|
ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
|
|
73
|
-
ai_edge_quantizer_nightly-0.4.0.
|
|
74
|
-
ai_edge_quantizer_nightly-0.4.0.
|
|
75
|
-
ai_edge_quantizer_nightly-0.4.0.
|
|
76
|
-
ai_edge_quantizer_nightly-0.4.0.
|
|
77
|
-
ai_edge_quantizer_nightly-0.4.0.
|
|
73
|
+
ai_edge_quantizer_nightly-0.4.0.dev20250921.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
74
|
+
ai_edge_quantizer_nightly-0.4.0.dev20250921.dist-info/METADATA,sha256=XYKz5uSw06qVDCUQf-DqGJdgaT-Y5jxREjbdp_g42LY,1508
|
|
75
|
+
ai_edge_quantizer_nightly-0.4.0.dev20250921.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
|
76
|
+
ai_edge_quantizer_nightly-0.4.0.dev20250921.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
|
|
77
|
+
ai_edge_quantizer_nightly-0.4.0.dev20250921.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|