ai-edge-quantizer-nightly 0.1.0.dev20250327__py3-none-any.whl → 0.1.0.dev20250328__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -394,14 +394,6 @@ def _compatible_tensor_params(
394
394
  ]
395
395
  if _same_tensor_params_except_id(params1, params2):
396
396
  return True
397
- if (
398
- params1.transformations[0] != _QuantTrans.NO_QUANTIZE
399
- and params2.transformations[0] != _QuantTrans.NO_QUANTIZE
400
- ):
401
- # NO_QUANTIZE has no parameters. So only if both params aren't NO_QUANTIZE
402
- # do we expect the parameters to be the same.
403
- if params1.parameters != params2.parameters:
404
- return False
405
397
  # We only need to check the first transformation because transformations are
406
398
  # applied in order, and as long as the one that's immediately after the tensor
407
399
  # is the same, it's compatible.
@@ -413,6 +405,7 @@ def _compatible_tensor_params(
413
405
  if (
414
406
  params1.transformations[0] in quantized_source_transformations
415
407
  and params2.transformations[0] in quantized_source_transformations
408
+ and params1.parameters == params2.parameters
416
409
  ):
417
410
  return True
418
411
  return False
@@ -914,6 +914,58 @@ class ParamsGeneratorTest(parameterized.TestCase):
914
914
  ),
915
915
  expected=True,
916
916
  ),
917
+ dict(
918
+ testcase_name='compatible_no_numeric_check',
919
+ param1=qtyping.TensorTransformationParams(
920
+ tensor_name='tfl.quantize',
921
+ producer=None,
922
+ consumers=[
923
+ qtyping.OpToTensorParams(
924
+ subgraph_op_id=4,
925
+ transformations=[
926
+ qtyping.QuantTransformation.ADD_QUANTIZE,
927
+ ],
928
+ parameters=qtyping.UniformQuantParams(
929
+ 8, None, np.array([0.00028806]), np.array([0])
930
+ ),
931
+ ),
932
+ qtyping.OpToTensorParams(
933
+ subgraph_op_id=5,
934
+ transformations=[
935
+ qtyping.QuantTransformation.ADD_QUANTIZE,
936
+ ],
937
+ parameters=qtyping.UniformQuantParams(
938
+ 8, None, np.array([0.00027501]), np.array([0])
939
+ ),
940
+ ),
941
+ ],
942
+ ),
943
+ param2=qtyping.TensorTransformationParams(
944
+ tensor_name='tfl.quantize',
945
+ producer=None,
946
+ consumers=[
947
+ qtyping.OpToTensorParams(
948
+ subgraph_op_id=4,
949
+ transformations=[
950
+ qtyping.QuantTransformation.ADD_QUANTIZE,
951
+ ],
952
+ parameters=qtyping.UniformQuantParams(
953
+ 8, None, np.array([0.00028806]), np.array([0])
954
+ ),
955
+ ),
956
+ qtyping.OpToTensorParams(
957
+ subgraph_op_id=5,
958
+ transformations=[
959
+ qtyping.QuantTransformation.ADD_QUANTIZE,
960
+ ],
961
+ parameters=qtyping.UniformQuantParams(
962
+ 8, None, np.array([0.00027501]), np.array([0])
963
+ ),
964
+ ),
965
+ ],
966
+ ),
967
+ expected=True,
968
+ ),
917
969
  )
918
970
  def test_params_compatible(self, param1, param2, expected):
919
971
  # adding a test to make production coverage happy.
@@ -121,6 +121,26 @@ def _perform_channelwise_quantization(
121
121
  return flatbuffer_quantization
122
122
 
123
123
 
124
+ def _downcast_and_truncate_scale(input_scale: np.ndarray) -> np.ndarray:
125
+ """Given a fp32 scale, downcast it to fp16 and truncate mantissa to 7 bits.
126
+
127
+ CPU kernel can only utilize 7 bits of mantissa for fp16, so we want to produce
128
+ scale this way to unify behaviours across different platforms.
129
+
130
+ Args:
131
+ input_scale: The input scale in fp32.
132
+
133
+ Returns:
134
+ The downcasted & truncated scale in fp16.
135
+ """
136
+
137
+ # A regular fp16 has 10 bits of mantissa, so we need to zero out the 3 least
138
+ # significant bits.
139
+ return (
140
+ input_scale.astype(np.float16).view(dtype=np.uint16) & np.uint16(0xFFF8)
141
+ ).view(dtype=np.float16)
142
+
143
+
124
144
  def _perform_blockwise_quantization(
125
145
  transformation_input: transformation_utils.TransformationInput,
126
146
  ) -> schema_py_generated.QuantizationParametersT():
@@ -142,9 +162,13 @@ def _perform_blockwise_quantization(
142
162
  )
143
163
  tensor = transformation_input.subgraph.tensors[transformation_input.tensor_id]
144
164
  blockwise_details = schema_py_generated.BlockwiseQuantizationT()
165
+ # Downcast and truncate the scale to fp16.
166
+ downcasted_scale = _downcast_and_truncate_scale(
167
+ transformation_input.quant_params.scale
168
+ )
145
169
  scale_tensor_id = transformation_utils.add_new_constant_tensor(
146
170
  tensor.name + b"_scales",
147
- transformation_input.quant_params.scale.astype(np.float16),
171
+ downcasted_scale,
148
172
  schema_py_generated.TensorType.FLOAT16,
149
173
  transformation_input.subgraph,
150
174
  transformation_input.buffers,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ai-edge-quantizer-nightly
3
- Version: 0.1.0.dev20250327
3
+ Version: 0.1.0.dev20250328
4
4
  Summary: A quantizer for advanced developers to quantize converted AI Edge models.
5
5
  Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
6
6
  Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
@@ -10,8 +10,8 @@ ai_edge_quantizer/model_modifier.py,sha256=SPt9X-xBzRvcd4xIS24zLHt3aUS2QwsNDqweF
10
10
  ai_edge_quantizer/model_modifier_test.py,sha256=cJd04SLOG-fQZZNZPcisoBLx3cLtWEwGqUBbLb-pif4,4751
11
11
  ai_edge_quantizer/model_validator.py,sha256=fRNz0jO54cthPTibsCuViUXUuFRHl_fbvEiCukIVy20,13030
12
12
  ai_edge_quantizer/model_validator_test.py,sha256=EeqOP_mrZsnZ3rug756s0ryDDqd2KgIDld5Lm_gDuWY,13020
13
- ai_edge_quantizer/params_generator.py,sha256=f-KhJMFdRv2oHxfM8tAANPOtfBMw8vD7Vjv0rYQbnF4,16062
14
- ai_edge_quantizer/params_generator_test.py,sha256=zmDS6jG5zKhHL_hzJw2wlMTx1LLcNCK6S5WlwogWF-A,41122
13
+ ai_edge_quantizer/params_generator.py,sha256=46XDjnP4R3m4xsoXNp7brv0sNQPdQMg217_CbEl-Wgg,15780
14
+ ai_edge_quantizer/params_generator_test.py,sha256=9WTUl87XqbM4NruX5ypLuVRtuhcw-CmxndsMOUzZ92Q,43171
15
15
  ai_edge_quantizer/qtyping.py,sha256=UBZ3HgO8IDLY6VJmO05rGtFv_idMD3Os3WWsnriA0NA,15235
16
16
  ai_edge_quantizer/quantizer.py,sha256=g3DMqFMrMpt9jQttCE0WcdNbMtk0JZnmN5MmCHrNdyM,13202
17
17
  ai_edge_quantizer/quantizer_test.py,sha256=K_HBA56JkFI3HL8VLWCqGEfC0ISh5ldMKoNyBdGRAJg,20368
@@ -50,7 +50,7 @@ ai_edge_quantizer/transformations/emulated_subchannel.py,sha256=HVaRxoC8PCAvy3xe
50
50
  ai_edge_quantizer/transformations/emulated_subchannel_test.py,sha256=gZP6u9NdPXl7s19qB_Un8evou9ZZV6I9Gy0E1rdobHM,7722
51
51
  ai_edge_quantizer/transformations/quant_insert.py,sha256=jn6HsJaV-sqBiFPY-Aqbd64t8zgcYVkEkZI375x_FWY,3958
52
52
  ai_edge_quantizer/transformations/quant_insert_test.py,sha256=X9ptPDvJCFkR5tejKnD1SlHFGPazQTW-wNNMV9MEAuw,10107
53
- ai_edge_quantizer/transformations/quantize_tensor.py,sha256=vzKtrXILqVsr1NGlribhdtKEIsXA93o37embLRe9TwQ,7493
53
+ ai_edge_quantizer/transformations/quantize_tensor.py,sha256=y6As38mTzhva50YvNQ7p0SFpuWet3LPqFwE3qIO0gEQ,8231
54
54
  ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=mHLO3_MRt36A8-ZN8ADn5tBBJlqjTWa7ZUN8Mmu5Rcw,9116
55
55
  ai_edge_quantizer/transformations/transformation_utils.py,sha256=R42OIbzwQ7JYJ-Qt46jsqwb6u4MfDGiIPCRZCUGLVCw,4664
56
56
  ai_edge_quantizer/transformations/transformation_utils_test.py,sha256=xH64SF3UHDh84vYbt-WvmXNjM-Jg-mefES1ACO1tkqw,6269
@@ -64,8 +64,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=x2xA2CFPpe_2trcV8v5xGaBE
64
64
  ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=Op3JxtOqlrjzmYF18jnnstL1k9xiY9kKJ8S2vklKGkc,11327
65
65
  ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
66
66
  ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
67
- ai_edge_quantizer_nightly-0.1.0.dev20250327.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
68
- ai_edge_quantizer_nightly-0.1.0.dev20250327.dist-info/METADATA,sha256=KES2W7tXAwTOpymOVvoQXovvZ2eaObKUUUT5L06a2gw,1527
69
- ai_edge_quantizer_nightly-0.1.0.dev20250327.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
70
- ai_edge_quantizer_nightly-0.1.0.dev20250327.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
71
- ai_edge_quantizer_nightly-0.1.0.dev20250327.dist-info/RECORD,,
67
+ ai_edge_quantizer_nightly-0.1.0.dev20250328.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
68
+ ai_edge_quantizer_nightly-0.1.0.dev20250328.dist-info/METADATA,sha256=ajE8NjYUjSmr3VZQWBzARMc9MLqLkeHooAmK-fCg7Ms,1527
69
+ ai_edge_quantizer_nightly-0.1.0.dev20250328.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
70
+ ai_edge_quantizer_nightly-0.1.0.dev20250328.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
71
+ ai_edge_quantizer_nightly-0.1.0.dev20250328.dist-info/RECORD,,