ai-edge-quantizer-nightly 0.3.0.dev20250611__py3-none-any.whl → 0.3.0.dev20250613__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -435,7 +435,8 @@ def _is_valid_quantization_params(
435
435
  """Checks if the quantization parameters are valid.
436
436
 
437
437
  A valid quantization params requires:
438
- 1. scale and zero point have the same shape (TFL Runtime requirement).
438
+ 1. scale and zero point either have the same shape or the zero point is a
439
+ scalar.
439
440
  2. scale and zero point have the same rank as the tensor content (avoid
440
441
  ambiguous broadcasting).
441
442
 
@@ -446,17 +447,20 @@ def _is_valid_quantization_params(
446
447
  Returns:
447
448
  True if the quantization parameters are valid.
448
449
  """
449
- if quantization_params.scale.shape != quantization_params.zero_point.shape:
450
+ if (
451
+ quantization_params.scale.shape != quantization_params.zero_point.shape
452
+ and quantization_params.zero_point.size != 1
453
+ ):
450
454
  raise ValueError(
451
- "scale and zero_point must have the same shape. Got"
452
- f" {quantization_params.scale.shape} and"
455
+ "scale and zero_point must have the same shape or zero_point must have"
456
+ f" only one element. Got {quantization_params.scale.shape} and"
453
457
  f" {quantization_params.zero_point.shape}"
454
458
  )
455
459
 
456
460
  tensor_rank = tensor_data.ndim
457
461
  scale_rank = quantization_params.scale.ndim
458
462
  zero_point_rank = quantization_params.zero_point.ndim
459
- if (tensor_rank != scale_rank) or (tensor_rank != zero_point_rank):
463
+ if tensor_rank != scale_rank or (tensor_rank != zero_point_rank):
460
464
  raise ValueError(
461
465
  f"Ranks of scales ({scale_rank}) and zps"
462
466
  f" ({zero_point_rank}) must be the same as the tensor rank"
@@ -160,7 +160,9 @@ class TensorUtilsTest(parameterized.TestCase):
160
160
  def test_uniform_quantize_wrong_shape(self):
161
161
  tensor = [-3.0, 1.3, 2.4, 16.0]
162
162
 
163
- error_message = "scale and zero_point must have the same shape."
163
+ error_message = (
164
+ "Ranks of scales (3) and zps (2) must be the same as the tensor rank"
165
+ )
164
166
  with self.assertRaisesWithPredicateMatch(
165
167
  ValueError, lambda err: error_message in str(err)
166
168
  ):
@@ -233,7 +235,9 @@ class TensorUtilsTest(parameterized.TestCase):
233
235
  def test_uniform_dequantize_wrong_shape(self):
234
236
  tensor = [-3.0, 1.3, 2.4, 16.0]
235
237
 
236
- error_message = "scale and zero_point must have the same shape."
238
+ error_message = (
239
+ "Ranks of scales (3) and zps (2) must be the same as the tensor rank"
240
+ )
237
241
  with self.assertRaisesWithPredicateMatch(
238
242
  ValueError, lambda err: error_message in str(err)
239
243
  ):
@@ -165,7 +165,7 @@ class Calibrator:
165
165
  )
166
166
  if algorithm_name == algorithm_manager.AlgorithmName.NO_QUANTIZE:
167
167
  continue
168
- if policy.is_conditionally_unquantized(op):
168
+ if policy.is_non_quantizable_composite_op(op):
169
169
  continue
170
170
 
171
171
  # Step2.2: query algorithm_manager to get/call the related
@@ -245,6 +245,7 @@ DEFAULT_JSON_POLICY = """
245
245
  }
246
246
  }
247
247
  """
248
+ QUANTIZABLE_COMPOSITES = ["od" + "ml.npu_call", "od" + "ml.rms_norm"]
248
249
 
249
250
 
250
251
  def _unroll_json_config(
@@ -322,10 +323,10 @@ def _unroll_json_config(
322
323
 
323
324
 
324
325
  # TODO: b/401024954 - Have a better way to specify recipes based on op options.
325
- def is_conditionally_unquantized(
326
+ def is_non_quantizable_composite_op(
326
327
  op: Union[schema.Operator, schema.OperatorT],
327
328
  ) -> bool:
328
- """Checks if the operator is conditionally unquantized.
329
+ """Checks if the operator is a non-quantizable composite op.
329
330
 
330
331
  We may want to quantize an op only when its has certain options.
331
332
  Policies/recipes
@@ -340,10 +341,9 @@ def is_conditionally_unquantized(
340
341
  if opts := flatbuffer_utils.get_options_as(
341
342
  op, schema.StableHLOCompositeOptionsT
342
343
  ):
343
- name: bytes = opts.name
344
- # Non npu_call composites may have a kernel and as such will not be
345
- # quantized.
346
- return ("od" + "ml.npu_call") not in name.decode("utf-8")
344
+ name = opts.name.decode("utf-8")
345
+ if name not in QUANTIZABLE_COMPOSITES:
346
+ return True
347
347
 
348
348
  return False
349
349
 
@@ -109,7 +109,7 @@ class ParamsGenerator:
109
109
  algorithm_name, op_quant_config = (
110
110
  model_recipe_manager.get_quantization_configs(op_key, op_scope)
111
111
  )
112
- if policy.is_conditionally_unquantized(op):
112
+ if policy.is_non_quantizable_composite_op(op):
113
113
  algorithm_name = algorithm_manager.AlgorithmName.NO_QUANTIZE
114
114
 
115
115
  if algorithm_name == algorithm_manager.AlgorithmName.NO_QUANTIZE:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ai-edge-quantizer-nightly
3
- Version: 0.3.0.dev20250611
3
+ Version: 0.3.0.dev20250613
4
4
  Summary: A quantizer for advanced developers to quantize converted AI Edge models.
5
5
  Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
6
6
  Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
@@ -2,15 +2,15 @@ ai_edge_quantizer/__init__.py,sha256=4pFSkukSwahYyzwqia0yPRyz8TnFQfGRthVJhYpMWas
2
2
  ai_edge_quantizer/algorithm_manager.py,sha256=lfCazb2b0Q4L3of0cTWkF5lMr3AD6LWW1ekmFoEGB_4,12062
3
3
  ai_edge_quantizer/algorithm_manager_api.py,sha256=u903TG0s1uIDhJqfeJne3CFl8A93phZrwgV2-hwdcXU,9247
4
4
  ai_edge_quantizer/algorithm_manager_api_test.py,sha256=w6bSONvXkX6bzXAGc0-7b6gNDt9oz9ieq97KP8Sg_JU,7666
5
- ai_edge_quantizer/calibrator.py,sha256=-_jX_KkfIepkQAwxxDrZjvPO1JsoSjHXVy1DPc1iFjM,12068
5
+ ai_edge_quantizer/calibrator.py,sha256=Sms7_AIHPH9G5xFaz5Ef3a5gPhxuIWQI8d2LUM8C96I,12071
6
6
  ai_edge_quantizer/calibrator_test.py,sha256=C_oWOaRugPKYX74jF-eRFH-k6nGOdA8I9_uPiocaOuE,11900
7
7
  ai_edge_quantizer/conftest.py,sha256=SxCz-5LlRD_lQm4hQc4c6IGG7DS8d7IyEWY9gnscPN0,794
8
- ai_edge_quantizer/default_policy.py,sha256=nKtghUjTQ8QS9CgLRwQb3iB2eZOyQv0FqyISlcgzSH4,11195
8
+ ai_edge_quantizer/default_policy.py,sha256=9CNd5zIk_BA560kOLkoXD5mapDBWj0yXzSFDedLhzYw,11192
9
9
  ai_edge_quantizer/model_modifier.py,sha256=teGa8I6kGvn6TQY6Xv53YFIc_pQEhNvM9Zb4bvhezyw,7110
10
10
  ai_edge_quantizer/model_modifier_test.py,sha256=cJd04SLOG-fQZZNZPcisoBLx3cLtWEwGqUBbLb-pif4,4751
11
11
  ai_edge_quantizer/model_validator.py,sha256=Hj0_5o-Oa3dSlJ3ryVjRhvsyelHNyek1GrtG9buMczg,13153
12
12
  ai_edge_quantizer/model_validator_test.py,sha256=EeqOP_mrZsnZ3rug756s0ryDDqd2KgIDld5Lm_gDuWY,13020
13
- ai_edge_quantizer/params_generator.py,sha256=j1BV2cGFLlQmUY6aoW5uglYqf77b9ytN8oZ1gh6o0mM,20096
13
+ ai_edge_quantizer/params_generator.py,sha256=gC7G6Ne4Fumc8RSmIAbx96ZBhszZlHqBKSmE9p6RPTo,20099
14
14
  ai_edge_quantizer/params_generator_test.py,sha256=RDYoRZDJfEZRtjlTAU2kZ_4t3JHOqEHxfJX9V4ETAhg,40597
15
15
  ai_edge_quantizer/qtyping.py,sha256=0Dwz6LHQG8LhZMhVAo_h6ieZ_gcfkJl2yJcsGf17YYs,16527
16
16
  ai_edge_quantizer/quantizer.py,sha256=g3DMqFMrMpt9jQttCE0WcdNbMtk0JZnmN5MmCHrNdyM,13202
@@ -38,8 +38,8 @@ ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=8
38
38
  ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=zoF_EHjYqsKkuev8wfuutIITEmp_maa70IpJI_Df3ck,7431
39
39
  ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=Umxh4kJyeHddZf-Wd4aXE5MTI1XWFa5KRuM17uYU714,6922
40
40
  ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=sha1d99Xk87bI87tgz0g5LeDC-EeE4WMfM5rRC98-m4,9140
41
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=W2QbXP96xeleAmA7qFwco1iq_bOtArGDK6Qj_g6kNl8,15986
42
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=MgG7Qh2_z4I6InBqEEDSVlaR0q48aMz4xqAlxeG2EMk,12436
41
+ ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=3zq2AO_PRYKHuNvHzwg0pVDZT7kcpaMgXx6OEyEl6co,16103
42
+ ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=JlX3fLHiknGH1osu6gwWEGUizLrEsE6d8iRpzDODmXo,12510
43
43
  ai_edge_quantizer/algorithms/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
44
44
  ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=UoZxeAQmZk3b3hK51KFwq6XfdbeduXVjdYIxAxlAzB8,34982
45
45
  ai_edge_quantizer/algorithms/utils/common_utils_test.py,sha256=zqapGEfYhjQWe9cNGPLmdbwtEUUYQRhlO_kNe0cXX6E,18104
@@ -70,8 +70,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EtOv6cpKM_F0uv2bWuSXylYm
70
70
  ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
71
71
  ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
72
72
  ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
73
- ai_edge_quantizer_nightly-0.3.0.dev20250611.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
74
- ai_edge_quantizer_nightly-0.3.0.dev20250611.dist-info/METADATA,sha256=FPK-WqVTMEz-w5yycBejT4oRBxMY4fiYH-AAL6Pf4-w,1528
75
- ai_edge_quantizer_nightly-0.3.0.dev20250611.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
76
- ai_edge_quantizer_nightly-0.3.0.dev20250611.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
77
- ai_edge_quantizer_nightly-0.3.0.dev20250611.dist-info/RECORD,,
73
+ ai_edge_quantizer_nightly-0.3.0.dev20250613.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
74
+ ai_edge_quantizer_nightly-0.3.0.dev20250613.dist-info/METADATA,sha256=YePqVOXDrP8t8VahK71fzW8xL0X14-coPWeR7eq45Jg,1528
75
+ ai_edge_quantizer_nightly-0.3.0.dev20250613.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
76
+ ai_edge_quantizer_nightly-0.3.0.dev20250613.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
77
+ ai_edge_quantizer_nightly-0.3.0.dev20250613.dist-info/RECORD,,