ai-edge-quantizer-nightly 0.4.0.dev20250919__py3-none-any.whl → 0.4.0.dev20250921__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -127,23 +127,9 @@ def get_tensor_quant_params(
127
127
  "Hadamard rotation is only supported for tensors with rank >= 2."
128
128
  )
129
129
 
130
- if tensor_quant_config.granularity != qtyping.QuantGranularity.CHANNELWISE:
131
- raise ValueError(
132
- "Hadamard rotation is not supported for"
133
- f" {tensor_quant_config.granularity} granularity."
134
- )
135
-
136
- quantized_dim = common_utils.get_weight_quantized_dim(
137
- op_info, tensor_content, tensor_quant_config.granularity
138
- )
139
- if quantized_dim != 0:
140
- raise ValueError(
141
- f"Unsupported quantized dimension: {quantized_dim}. Only 0 is"
142
- " supported."
143
- )
144
-
145
130
  # Reduction axis is the last non-quantized dimension. Since we only support
146
- # quantized_dim of 0, the reduction axis is the last axis.
131
+ # quantized_dim of 0 (or 1 for blockwise), the reduction axis is the last
132
+ # axis.
147
133
  reduce_axis = tensor_content.ndim - 1
148
134
 
149
135
  # Rotate the tensor with a Hadamard matrix.
@@ -46,13 +46,13 @@ class HadamardRotationFullyConnectedTest(parameterized.TestCase):
46
46
  )
47
47
  self._tensor_name_to_qsv = None
48
48
  self._subgraph = self._test_model.subgraphs[0]
49
- fc_subgraph_op_index = 3
50
- self._fc_op = self._subgraph.operators[fc_subgraph_op_index]
49
+ self._fc_subgraph_op_index = 3
50
+ self._fc_op = self._subgraph.operators[self._fc_subgraph_op_index]
51
51
  self._fc_buffer_id = self._subgraph.tensors[self._fc_op.inputs[1]].buffer
52
52
  self._op_info = qtyping.OpInfo(
53
53
  op=self._fc_op,
54
54
  op_name=_TFLOpName.FULLY_CONNECTED,
55
- subgraph_op_index=fc_subgraph_op_index,
55
+ subgraph_op_index=self._fc_subgraph_op_index,
56
56
  op_quant_config=qtyping.OpQuantizationConfig(
57
57
  weight_tensor_config=_TensorQuantConfig(
58
58
  num_bits=8,
@@ -98,6 +98,87 @@ class HadamardRotationFullyConnectedTest(parameterized.TestCase):
98
98
  [qtyping.QuantTransformation.NO_QUANTIZE],
99
99
  )
100
100
 
101
+ def test_fully_connected_tensorwise_supported(self):
102
+ self._op_info = qtyping.OpInfo(
103
+ op=self._fc_op,
104
+ op_name=_TFLOpName.FULLY_CONNECTED,
105
+ subgraph_op_index=self._fc_subgraph_op_index,
106
+ op_quant_config=qtyping.OpQuantizationConfig(
107
+ weight_tensor_config=_TensorQuantConfig(
108
+ num_bits=8,
109
+ symmetric=True,
110
+ granularity=qtyping.QuantGranularity.TENSORWISE,
111
+ ),
112
+ ),
113
+ )
114
+ params = hadamard_rotation.materialize_fully_connected(
115
+ self._op_info, self._graph_info, self._tensor_name_to_qsv
116
+ )
117
+ self.assertLen(params, 4)
118
+ fc_input = params[0]
119
+ self.assertIsNotNone(fc_input)
120
+ self.assertIsNotNone(fc_input.consumers)
121
+ self.assertIsNotNone(fc_input.consumers[0].parameters)
122
+ self.assertIsInstance(
123
+ fc_input.consumers[0].parameters, qtyping.UniformQuantParams
124
+ )
125
+ if isinstance(
126
+ fc_input.consumers[0].parameters, qtyping.UniformQuantParams
127
+ ):
128
+ self.assertIsNone(fc_input.consumers[0].parameters.quantized_dimension)
129
+ weight = params[1]
130
+ self.assertIsNotNone(weight)
131
+ self.assertIsNotNone(weight.consumers)
132
+ self.assertIsNotNone(weight.consumers[0].parameters)
133
+ self.assertIsInstance(
134
+ weight.consumers[0].parameters, qtyping.UniformQuantParams
135
+ )
136
+ if isinstance(
137
+ weight.consumers[0].parameters, qtyping.UniformQuantParams
138
+ ):
139
+ self.assertIsNone(weight.consumers[0].parameters.quantized_dimension)
140
+
141
+ def test_fully_connected_blockwise_supported(self):
142
+ self._op_info = qtyping.OpInfo(
143
+ op=self._fc_op,
144
+ op_name=_TFLOpName.FULLY_CONNECTED,
145
+ subgraph_op_index=self._fc_subgraph_op_index,
146
+ op_quant_config=qtyping.OpQuantizationConfig(
147
+ weight_tensor_config=_TensorQuantConfig(
148
+ num_bits=8,
149
+ symmetric=True,
150
+ granularity=qtyping.QuantGranularity.BLOCKWISE,
151
+ block_size=32,
152
+ ),
153
+ ),
154
+ )
155
+ params = hadamard_rotation.materialize_fully_connected(
156
+ self._op_info, self._graph_info, self._tensor_name_to_qsv
157
+ )
158
+ self.assertLen(params, 4)
159
+ fc_input = params[0]
160
+ self.assertIsNotNone(fc_input)
161
+ self.assertIsNotNone(fc_input.consumers)
162
+ self.assertIsNotNone(fc_input.consumers[0].parameters)
163
+ self.assertIsInstance(
164
+ fc_input.consumers[0].parameters, qtyping.UniformQuantParams
165
+ )
166
+ if isinstance(
167
+ fc_input.consumers[0].parameters, qtyping.UniformQuantParams
168
+ ):
169
+ self.assertEqual(fc_input.consumers[0].parameters.quantized_dimension, 1)
170
+ weight = params[1]
171
+ self.assertIsNotNone(weight)
172
+ self.assertIsNotNone(weight.consumers)
173
+ self.assertIsNotNone(weight.consumers[0].parameters)
174
+ self.assertIsInstance(
175
+ weight.consumers[0].parameters, qtyping.UniformQuantParams
176
+ )
177
+ if isinstance(
178
+ weight.consumers[0].parameters, qtyping.UniformQuantParams
179
+ ):
180
+ self.assertEqual(weight.consumers[0].parameters.quantized_dimension, 1)
181
+
101
182
  def test_get_tensor_quant_params_basic(self):
102
183
  input_tensor = self._subgraph.tensors[self._fc_op.inputs[1]]
103
184
  buffer = self._graph_info.buffers[self._fc_buffer_id]
@@ -143,6 +143,11 @@ def _get_tensor_shape_for_blockwise(
143
143
  new_shape = []
144
144
  for index, val in enumerate(tensor_shape):
145
145
  if index == quantized_dim:
146
+ if val % block_size != 0:
147
+ raise ValueError(
148
+ f"Quantized dimension {val} in tensor shape {tensor_shape} is not"
149
+ f" divisible by block size {block_size}."
150
+ )
146
151
  new_shape.append(int(val / block_size))
147
152
  new_shape.append(block_size)
148
153
  else:
@@ -203,6 +203,28 @@ class TensorUtilsTest(parameterized.TestCase):
203
203
  ),
204
204
  )
205
205
 
206
+ def test_uniform_quantize_quant_dim_not_divisible_by_block_size_raise(self):
207
+ tensor = np.random.rand(34, 2)
208
+ error_message = (
209
+ "Quantized dimension 34 in tensor shape (34, 2) is not divisible by"
210
+ " block size 32."
211
+ )
212
+ with self.assertRaisesWithPredicateMatch(
213
+ ValueError, lambda err: error_message in str(err)
214
+ ):
215
+ uniform_quantize_tensor.uniform_quantize(
216
+ np.array(tensor),
217
+ qtyping.UniformQuantParams(
218
+ quantized_dimension=0,
219
+ block_size=32,
220
+ num_bits=4,
221
+ scale=np.array([1.2666667]),
222
+ zero_point=np.array([-6]),
223
+ symmetric=True,
224
+ ),
225
+ is_blockwise=True,
226
+ )
227
+
206
228
  @parameterized.parameters(
207
229
  (
208
230
  8,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ai-edge-quantizer-nightly
3
- Version: 0.4.0.dev20250919
3
+ Version: 0.4.0.dev20250921
4
4
  Summary: A quantizer for advanced developers to quantize converted AI Edge models.
5
5
  Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
6
6
  Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
@@ -32,14 +32,14 @@ ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=E17cSR-M
32
32
  ai_edge_quantizer/algorithms/uniform_quantize/common_quantize_test.py,sha256=GGf_n3wIeg3GB_eGsmyNJ0fTcxgpeMMbugTMRONK6TQ,3553
33
33
  ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha256=BDdn_uBZakfHyzdMJPKadsOqxqyC-s6W2ZzFH99L4fE,8652
34
34
  ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py,sha256=sT5eX5TLZEHTtPfnSkCPDlS0sQxlTFWbCsbvOuj--yY,8889
35
- ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation.py,sha256=U3h5scCHSOdqHA-pb1C3pNgwumT4ydGbtkCSM0ORhrs,12740
36
- ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py,sha256=5VUxlaKP1jz4HV-LcKxXMMtmb6eWamq0A6qWJd63cR4,10179
35
+ ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation.py,sha256=otKRiZn_C0QH0891pxLsIPIBT1mLDwbKYYP7bI-MXAA,12279
36
+ ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py,sha256=_SpP12aDLujv_7tWf_mCt89WknNXTSGE-JpZWO1bYSE,13238
37
37
  ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=1sB2j1vlvvWDKyjcGvA_JLCpN2KbCmMslGCBUc4--V4,8461
38
38
  ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=nscKDvNb14ErZdAfG0aXRWyRs6bTvhMqMjKx2vxvUK0,8725
39
39
  ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=Umxh4kJyeHddZf-Wd4aXE5MTI1XWFa5KRuM17uYU714,6922
40
40
  ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=sha1d99Xk87bI87tgz0g5LeDC-EeE4WMfM5rRC98-m4,9140
41
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=vsvBGEGFEEUP4kXRUh9hMpVXjsMBpfs6UDk8m4BNGTs,18375
42
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=Ympigz0BGcaO5x3OozxNxrRAGiF0to6V_HXAcxNNEpI,14399
41
+ ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=so5pMeoclPdXV_5dDiqWaA_cZ0Ud-OWnXxRbzNh9x1E,18576
42
+ ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=eBTi2I12e65_lxVZjGaN2TAiMzvsyyXAhWmEpKEmkLA,15126
43
43
  ai_edge_quantizer/algorithms/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
44
44
  ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=4eAlGph6DDW18bUdoY0XcUoOXEr3P_3_W1ptidD8qK4,37611
45
45
  ai_edge_quantizer/algorithms/utils/common_utils_test.py,sha256=zqapGEfYhjQWe9cNGPLmdbwtEUUYQRhlO_kNe0cXX6E,18104
@@ -70,8 +70,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOi
70
70
  ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
71
71
  ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
72
72
  ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
73
- ai_edge_quantizer_nightly-0.4.0.dev20250919.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
74
- ai_edge_quantizer_nightly-0.4.0.dev20250919.dist-info/METADATA,sha256=xhmp6LEFOIxQNTiacffhdU7GGs_8YE2MfdxUGWXqzEo,1508
75
- ai_edge_quantizer_nightly-0.4.0.dev20250919.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
76
- ai_edge_quantizer_nightly-0.4.0.dev20250919.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
77
- ai_edge_quantizer_nightly-0.4.0.dev20250919.dist-info/RECORD,,
73
+ ai_edge_quantizer_nightly-0.4.0.dev20250921.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
74
+ ai_edge_quantizer_nightly-0.4.0.dev20250921.dist-info/METADATA,sha256=XYKz5uSw06qVDCUQf-DqGJdgaT-Y5jxREjbdp_g42LY,1508
75
+ ai_edge_quantizer_nightly-0.4.0.dev20250921.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
76
+ ai_edge_quantizer_nightly-0.4.0.dev20250921.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
77
+ ai_edge_quantizer_nightly-0.4.0.dev20250921.dist-info/RECORD,,