ai-edge-quantizer-nightly 0.1.0.dev20250319__py3-none-any.whl → 0.1.0.dev20250321__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -790,8 +790,6 @@ def init_tensor_min_max(
790
790
  quantized_dim = None
791
791
  if weight_tensor_config is not None and (
792
792
  weight_tensor_config.granularity == qtyping.QuantGranularity.CHANNELWISE
793
- or weight_tensor_config.granularity
794
- == qtyping.QuantGranularity.BLOCKWISE
795
793
  ):
796
794
  quantized_dim = common_utils.get_weight_quantized_dim(
797
795
  op_info, tensor_data
@@ -801,6 +799,11 @@ def init_tensor_min_max(
801
799
  and weight_tensor_config.granularity
802
800
  == qtyping.QuantGranularity.BLOCKWISE
803
801
  ):
802
+ quantized_dim = (
803
+ tfl_flatbuffer_utils.TFL_OP_TO_BLOCKWISE_WEIGHT_QUANTIZED_DIM[
804
+ op_info.op_name
805
+ ]
806
+ )
804
807
  reshaped_data, reduce_dims = _reshape_data_for_blockwise(
805
808
  tensor_data,
806
809
  quantized_dim,
@@ -80,13 +80,16 @@ def get_tensor_quant_params(
80
80
  tensor_quant_config.symmetric,
81
81
  )
82
82
  quantized_dim = None
83
- if (
84
- tensor_quant_config.granularity == qtyping.QuantGranularity.CHANNELWISE
85
- or tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE
86
- ):
83
+ if tensor_quant_config.granularity == qtyping.QuantGranularity.CHANNELWISE:
87
84
  quantized_dim = common_utils.get_weight_quantized_dim(
88
85
  op_info, tensor_content
89
86
  )
87
+ elif tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE:
88
+ quantized_dim = (
89
+ tfl_flatbuffer_utils.TFL_OP_TO_BLOCKWISE_WEIGHT_QUANTIZED_DIM[
90
+ op_info.op_name
91
+ ]
92
+ )
90
93
  quant_params = qtyping.UniformQuantParams(
91
94
  scale=scale,
92
95
  zero_point=zp,
@@ -187,8 +187,8 @@ class NaiveMinMaxQuantizeTest(parameterized.TestCase):
187
187
  zp = quant_params.zero_point
188
188
  expected_zp, expected_scale = (
189
189
  uniform_quantize_tensor.tensor_zp_scale_from_min_max(
190
- min_value=np.array([[-7, 4], [-4, -4]]),
191
- max_value=np.array([[4, 7], [7, 7]]),
190
+ min_value=np.array([[-7], [-4], [-4], [7]]),
191
+ max_value=np.array([[7], [4], [4], [7]]),
192
192
  num_bits=4,
193
193
  symmetric=True,
194
194
  )
@@ -200,7 +200,7 @@ class NaiveMinMaxQuantizeTest(parameterized.TestCase):
200
200
  cast(np.ndarray, quant_params.quantized_data).shape, test_data.shape
201
201
  )
202
202
  self.assertEqual(quant_params.block_size, 2)
203
- self.assertEqual(quant_params.quantized_dimension, 0)
203
+ self.assertEqual(quant_params.quantized_dimension, 1)
204
204
 
205
205
 
206
206
  if __name__ == "__main__":
@@ -143,24 +143,15 @@ def _perform_blockwise_quantization(
143
143
  tensor = transformation_input.subgraph.tensors[transformation_input.tensor_id]
144
144
  blockwise_details = schema_py_generated.BlockwiseQuantizationT()
145
145
  scale_tensor_id = transformation_utils.add_new_constant_tensor(
146
- tensor.name + b"_scale",
147
- transformation_input.quant_params.scale,
146
+ tensor.name + b"_scales",
147
+ transformation_input.quant_params.scale.astype(np.float16),
148
148
  schema_py_generated.TensorType.FLOAT16,
149
149
  transformation_input.subgraph,
150
150
  transformation_input.buffers,
151
151
  )
152
152
  blockwise_details.scales = scale_tensor_id
153
153
  blockwise_details.blockSize = transformation_input.quant_params.block_size
154
- # blockwise quantization allows optional zero point.
155
- if transformation_input.quant_params.zero_point is not None:
156
- zero_point_tensor_id = transformation_utils.add_new_constant_tensor(
157
- tensor.name + b"_zero_point",
158
- transformation_input.quant_params.zero_point,
159
- schema_py_generated.TensorType.INT32,
160
- transformation_input.subgraph,
161
- transformation_input.buffers,
162
- )
163
- blockwise_details.zeroPoints = zero_point_tensor_id
154
+ # TODO: b/404909258 - Add optional zero point to blockwise quantization.
164
155
  flatbuffer_quantization.details = blockwise_details
165
156
  return flatbuffer_quantization
166
157
 
@@ -169,7 +169,8 @@ class QuantizeTensorTest(parameterized.TestCase):
169
169
  self.assertEqual(quant_param.details.blockSize, 32)
170
170
  # Check if the scale and zero point tensors are inserted correctly.
171
171
  self.assertEqual(quant_param.details.scales, 9)
172
- self.assertEqual(quant_param.details.zeroPoints, 10)
172
+ # So far we don't have zero point in blockwise quantization.
173
+ self.assertEqual(quant_param.details.zeroPoints, 0)
173
174
 
174
175
  def test_int4_constant_packed_correctly(self):
175
176
  subgraph = self._model.subgraphs[0]
@@ -72,6 +72,11 @@ TFL_OP_TO_WEIGHT_QUANTIZED_DIM = immutabledict.immutabledict({
72
72
  _TFLOpName.CONV_2D_TRANSPOSE: 0,
73
73
  })
74
74
 
75
+ TFL_OP_TO_BLOCKWISE_WEIGHT_QUANTIZED_DIM = immutabledict.immutabledict({
76
+ _TFLOpName.FULLY_CONNECTED: 1,
77
+ _TFLOpName.EMBEDDING_LOOKUP: 1,
78
+ })
79
+
75
80
  NUM_TFL_DATATYPES = 18
76
81
  TENSOR_CODE_TO_TYPE = {}
77
82
  for dtype_code in range(NUM_TFL_DATATYPES):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ai-edge-quantizer-nightly
3
- Version: 0.1.0.dev20250319
3
+ Version: 0.1.0.dev20250321
4
4
  Summary: A quantizer for advanced developers to quantize converted AI Edge models.
5
5
  Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
6
6
  Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
@@ -28,12 +28,12 @@ ai_edge_quantizer/algorithms/nonlinear_quantize/__init__.py,sha256=lpq1g2ayg3lCP
28
28
  ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting.py,sha256=Bs9CK7wZAw6jNaZ8xEtbwO2vM34VYXNZSMVWvxJo9nw,9297
29
29
  ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting_test.py,sha256=s64eDDH9bmRWy6Bl1peHnhGewLnFJjvnhYOdjo1zYOA,22625
30
30
  ai_edge_quantizer/algorithms/uniform_quantize/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
31
- ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=LnItMEsR47qe8T5pg9UI5NGfhi4cOxt0vAU35IkWnaY,27163
31
+ ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=SVu1RSX5xOWhuNEi9hHqgIDGe_ywyHBZAczp7KAcl3k,27220
32
32
  ai_edge_quantizer/algorithms/uniform_quantize/common_quantize_test.py,sha256=qMmKbWqxrCoVKbLKHn9WuCrGKPfHkEyU0Nmhokh8Qeo,2597
33
33
  ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha256=OTXjEZ3Ctq3ffYzisX-6HwgK_DuA7uos_aap5PiIUPE,8686
34
34
  ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py,sha256=y7BK11fkF63Ex_Jzg3fbIdy0D_Ca6HuvChVZR7Uwggc,8073
35
- ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=aWHU4rneBv7ErufEWKQGAWTK-pgfn-rG9mAkC0d9V6Q,7871
36
- ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=Hok09dloSyBfD0oDM5VABdSZjM9JWSQhm_hDHNbFujA,7640
35
+ ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=cbyyYAoQnEraOYSV00wZ557ElBndHduVGeHikYUEFCE,7995
36
+ ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=B30SEISYZ9DPs3suKeG2elgXylR98pCEMWSEGgZo20o,7648
37
37
  ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=e5wYtki-vl739gSVAZHAKcs2hA87GvFUjVoSUPlnkyM,6433
38
38
  ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=IcTOaJ1pxtqsitqxOEP9LROVEP_19VFutHalqNied4I,6940
39
39
  ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=WmZzKQlzfu9gFr9SbUDoPY3rFqTl363om8-0rTLwotw,11629
@@ -50,22 +50,22 @@ ai_edge_quantizer/transformations/emulated_subchannel.py,sha256=HVaRxoC8PCAvy3xe
50
50
  ai_edge_quantizer/transformations/emulated_subchannel_test.py,sha256=gZP6u9NdPXl7s19qB_Un8evou9ZZV6I9Gy0E1rdobHM,7722
51
51
  ai_edge_quantizer/transformations/quant_insert.py,sha256=jn6HsJaV-sqBiFPY-Aqbd64t8zgcYVkEkZI375x_FWY,3958
52
52
  ai_edge_quantizer/transformations/quant_insert_test.py,sha256=X9ptPDvJCFkR5tejKnD1SlHFGPazQTW-wNNMV9MEAuw,10107
53
- ai_edge_quantizer/transformations/quantize_tensor.py,sha256=9YaaWR6osxZoyUM8DUPJr_AjWO0QuNhFc65OFnSGzY4,7866
54
- ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=XZOollD1jnpCb78gMZx7yocF7RDBSf9HIf-XdG-y_io,9052
53
+ ai_edge_quantizer/transformations/quantize_tensor.py,sha256=vzKtrXILqVsr1NGlribhdtKEIsXA93o37embLRe9TwQ,7493
54
+ ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=mHLO3_MRt36A8-ZN8ADn5tBBJlqjTWa7ZUN8Mmu5Rcw,9116
55
55
  ai_edge_quantizer/transformations/transformation_utils.py,sha256=R42OIbzwQ7JYJ-Qt46jsqwb6u4MfDGiIPCRZCUGLVCw,4664
56
56
  ai_edge_quantizer/transformations/transformation_utils_test.py,sha256=xH64SF3UHDh84vYbt-WvmXNjM-Jg-mefES1ACO1tkqw,6269
57
57
  ai_edge_quantizer/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
58
58
  ai_edge_quantizer/utils/calibration_utils.py,sha256=1Fj9MIO6aLZIRgyd4axvZN4S_O64nB_-Miu1WP664js,2536
59
59
  ai_edge_quantizer/utils/calibration_utils_test.py,sha256=Z-AcdTieesWFKyKBb08ZXm4Mgu6cvJ4bg2-MJ7hLD10,2856
60
60
  ai_edge_quantizer/utils/test_utils.py,sha256=HwZCIpO9fJRAhuN6t6voXKOYQtcioFtt_tpkAlDsAYk,6205
61
- ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=_A-h_MqwElzjgkLDmXTZ1iAIWtTRcLjSFGfjNT8fuHU,10480
61
+ ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=51GRkwj7PK0XvAqohdv6mAepOWRk1AnW2y-9ne6LzWo,10628
62
62
  ai_edge_quantizer/utils/tfl_flatbuffer_utils_test.py,sha256=AbyDxoM62k4ojD8gPdkWo--xe5hlX3t0kobQSA80kuk,7740
63
63
  ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=x2xA2CFPpe_2trcV8v5xGaBETvVCfwAcJuq6yieGJ0Y,12687
64
64
  ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=Op3JxtOqlrjzmYF18jnnstL1k9xiY9kKJ8S2vklKGkc,11327
65
65
  ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
66
66
  ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
67
- ai_edge_quantizer_nightly-0.1.0.dev20250319.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
68
- ai_edge_quantizer_nightly-0.1.0.dev20250319.dist-info/METADATA,sha256=WTz-_FHdUgNLhVPcpu4VW9rw2drBw92tUqa35_OsDWg,1527
69
- ai_edge_quantizer_nightly-0.1.0.dev20250319.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
70
- ai_edge_quantizer_nightly-0.1.0.dev20250319.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
71
- ai_edge_quantizer_nightly-0.1.0.dev20250319.dist-info/RECORD,,
67
+ ai_edge_quantizer_nightly-0.1.0.dev20250321.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
68
+ ai_edge_quantizer_nightly-0.1.0.dev20250321.dist-info/METADATA,sha256=2IxDz8eWG3N0LQ7Qm9Y3_WH7hckqK_IwwzVc60-rkW4,1527
69
+ ai_edge_quantizer_nightly-0.1.0.dev20250321.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
70
+ ai_edge_quantizer_nightly-0.1.0.dev20250321.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
71
+ ai_edge_quantizer_nightly-0.1.0.dev20250321.dist-info/RECORD,,