ai-edge-quantizer-nightly 0.4.0.dev20250926__py3-none-any.whl → 0.4.0.dev20250928__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -82,7 +82,6 @@ class RecipeManager:
82
82
  str, list[OpQuantizationRecipe]
83
83
  ] = collections.OrderedDict()
84
84
 
85
- # TODO: b/335254997 - Check if an op quantization config is supported.
86
85
  def add_quantization_config(
87
86
  self,
88
87
  regex: str,
@@ -272,7 +271,8 @@ class RecipeManager:
272
271
  """
273
272
  weight_config = qtyping.TensorQuantizationConfig(
274
273
  num_bits=num_bits,
275
- symmetric=True, # LiteRT kernels only support symmetric quantized weights.
274
+ symmetric=True, # LiteRT kernels only support symmetric quantized
275
+ # weights.
276
276
  granularity=granularity,
277
277
  )
278
278
  self.add_quantization_config(
@@ -316,10 +316,18 @@ class RecipeManager:
316
316
  granularity: Granularity of quantization.
317
317
  algorithm_key: Algorithm key to be applied.
318
318
  """
319
+ # Default to integer quantization but allow float quantization for
320
+ # FLOAT_CASTING algorithm. This is to support weight-only quantization with
321
+ # fp16 weights.
322
+ weight_dtype = qtyping.TensorDataType.INT
323
+ if algorithm_key == AlgorithmName.FLOAT_CASTING:
324
+ weight_dtype = qtyping.TensorDataType.FLOAT
325
+
319
326
  weight_config = qtyping.TensorQuantizationConfig(
320
327
  num_bits=num_bits,
321
328
  symmetric=True, # TFL kernels only support symmetric quantized weights.
322
329
  granularity=granularity,
330
+ dtype=weight_dtype,
323
331
  )
324
332
  self.add_quantization_config(
325
333
  regex,
@@ -365,7 +373,8 @@ class RecipeManager:
365
373
  raise ValueError(
366
374
  'Activation quantization is only supported for 16 or 8 bits.'
367
375
  )
368
- # INT16 is symmetric and INT8 is asymmetric due to LiteRT kernel limitations.
376
+ # INT16 is symmetric and INT8 is asymmetric due to LiteRT kernel
377
+ # limitations.
369
378
  activation_symmetric = activation_num_bits == 16
370
379
  activation_config = qtyping.TensorQuantizationConfig(
371
380
  num_bits=activation_num_bits, symmetric=activation_symmetric
@@ -315,11 +315,12 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
315
315
  _QuantGranularity.CHANNELWISE,
316
316
  )
317
317
 
318
- def test_add_weight_only_config(self):
318
+ @parameterized.parameters(4, 8)
319
+ def test_add_weight_only_config_int(self, num_bits):
319
320
  self._recipe_manager.add_weight_only_config(
320
321
  regex='.*/Dense/.*',
321
322
  operation_name=_TFLOpName.FULLY_CONNECTED,
322
- num_bits=4,
323
+ num_bits=num_bits,
323
324
  )
324
325
  alg_key, op_config = self._recipe_manager.get_quantization_configs(
325
326
  _TFLOpName.FULLY_CONNECTED, 'model/Dense/op'
@@ -330,6 +331,72 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
330
331
  self.assertIsNone(op_config.activation_tensor_config)
331
332
  weight_tensor_config = op_config.weight_tensor_config
332
333
  self.assertIsNotNone(weight_tensor_config)
334
+ self.assertEqual(weight_tensor_config.num_bits, num_bits)
335
+ self.assertTrue(weight_tensor_config.symmetric)
336
+ self.assertEqual(
337
+ weight_tensor_config.granularity,
338
+ _QuantGranularity.CHANNELWISE,
339
+ )
340
+ self.assertEqual(weight_tensor_config.dtype, _TensorDataType.INT)
341
+
342
+ def test_add_weight_only_config_fp16(self):
343
+ self._recipe_manager.add_weight_only_config(
344
+ regex='.*/Dense2/.*',
345
+ operation_name=_TFLOpName.FULLY_CONNECTED,
346
+ num_bits=16,
347
+ algorithm_key=_AlgorithmName.FLOAT_CASTING,
348
+ )
349
+ alg_key, op_config = self._recipe_manager.get_quantization_configs(
350
+ _TFLOpName.FULLY_CONNECTED, 'model/Dense2/op'
351
+ )
352
+ self.assertEqual(alg_key, _AlgorithmName.FLOAT_CASTING)
353
+ self.assertEqual(op_config.compute_precision, _ComputePrecision.FLOAT)
354
+ self.assertTrue(op_config.explicit_dequantize)
355
+ self.assertIsNone(op_config.activation_tensor_config)
356
+ weight_tensor_config = op_config.weight_tensor_config
357
+ self.assertIsNotNone(weight_tensor_config)
358
+ self.assertEqual(weight_tensor_config.num_bits, 16)
359
+ self.assertTrue(weight_tensor_config.symmetric)
360
+ self.assertEqual(
361
+ weight_tensor_config.granularity,
362
+ _QuantGranularity.CHANNELWISE,
363
+ )
364
+ self.assertEqual(weight_tensor_config.dtype, _TensorDataType.FLOAT)
365
+
366
+ def test_add_weight_only_config_fp8_raise_error(self):
367
+ error_message = (
368
+ 'float casting quantization config requires number of bits to be set'
369
+ ' as 16'
370
+ )
371
+ with self.assertRaisesWithPredicateMatch(
372
+ ValueError, lambda err: error_message in str(err)
373
+ ):
374
+ self._recipe_manager.add_weight_only_config(
375
+ regex='.*/Dense2/.*',
376
+ operation_name=_TFLOpName.FULLY_CONNECTED,
377
+ num_bits=8,
378
+ algorithm_key=_AlgorithmName.FLOAT_CASTING,
379
+ )
380
+
381
+ def test_add_static_config(self):
382
+ self._recipe_manager.add_static_config(
383
+ regex='.*/Dense/.*',
384
+ operation_name=_TFLOpName.FULLY_CONNECTED,
385
+ activation_num_bits=8,
386
+ weight_num_bits=4,
387
+ )
388
+ alg_key, op_config = self._recipe_manager.get_quantization_configs(
389
+ _TFLOpName.FULLY_CONNECTED, 'model/Dense/op'
390
+ )
391
+ self.assertEqual(alg_key, _AlgorithmName.MIN_MAX_UNIFORM_QUANT)
392
+ self.assertEqual(op_config.compute_precision, _ComputePrecision.INTEGER)
393
+ self.assertFalse(op_config.explicit_dequantize)
394
+ activation_tensor_config = op_config.activation_tensor_config
395
+ self.assertIsNotNone(activation_tensor_config)
396
+ self.assertEqual(activation_tensor_config.num_bits, 8)
397
+ self.assertFalse(activation_tensor_config.symmetric)
398
+ weight_tensor_config = op_config.weight_tensor_config
399
+ self.assertIsNotNone(weight_tensor_config)
333
400
  self.assertEqual(weight_tensor_config.num_bits, 4)
334
401
  self.assertTrue(weight_tensor_config.symmetric)
335
402
  self.assertEqual(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ai-edge-quantizer-nightly
3
- Version: 0.4.0.dev20250926
3
+ Version: 0.4.0.dev20250928
4
4
  Summary: A quantizer for advanced developers to quantize converted AI Edge models.
5
5
  Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
6
6
  Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
@@ -16,8 +16,8 @@ ai_edge_quantizer/qtyping.py,sha256=tfrPip-uzJuF_PASgUExx5Oy9gghWUbQaApR0XaBpNw,
16
16
  ai_edge_quantizer/quantizer.py,sha256=ckAEOnnBxuCKZuvlzdChevCKPuE-IeDPHCNtFTWr250,17857
17
17
  ai_edge_quantizer/quantizer_test.py,sha256=m6f4ayyaF3yQb9i4V0aFAbmGw0OKZ2Zam1RoTPh-u24,22917
18
18
  ai_edge_quantizer/recipe.py,sha256=MEkfQ2Sg3KAE9LAORHWcbjYNPg06EUbwc1d-VspQA2U,6461
19
- ai_edge_quantizer/recipe_manager.py,sha256=6dgbE-IZfEetzXH3p3Qm_9eQutNDOpZnMpiaLTbP-ZQ,14744
20
- ai_edge_quantizer/recipe_manager_test.py,sha256=H-B75vwPN5ND-nUa3pOXizeHTv4mufPiC5cL_OlDIYU,34040
19
+ ai_edge_quantizer/recipe_manager.py,sha256=6l2uq8KL23KLu9OQDmPGkxrFiwHrdDB9xnn-ni8WdEM,15036
20
+ ai_edge_quantizer/recipe_manager_test.py,sha256=qjgGUF-wggXnSXqZ5khmqrDMIQI5CShk52IVWTahq6s,36817
21
21
  ai_edge_quantizer/recipe_test.py,sha256=QisyaTol8JRZFcGOGyee7QRCvqj5VbF4guKWdIoMUOE,6213
22
22
  ai_edge_quantizer/transformation_instruction_generator.py,sha256=O0U2aZcB8aXQgOV8r9g1rGNzDUiuI5Ta53XnxZbVffE,31576
23
23
  ai_edge_quantizer/transformation_instruction_generator_test.py,sha256=KW5-WoTTo9IqLEVnWxVC8ut8eWLi_91xfKgGqVQ9QDk,54635
@@ -72,8 +72,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOi
72
72
  ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
73
73
  ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
74
74
  ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
75
- ai_edge_quantizer_nightly-0.4.0.dev20250926.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
76
- ai_edge_quantizer_nightly-0.4.0.dev20250926.dist-info/METADATA,sha256=6ymhTobT9E998G5IZCfmysJbNYMcQr_vrngEtQf5VsE,1508
77
- ai_edge_quantizer_nightly-0.4.0.dev20250926.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
78
- ai_edge_quantizer_nightly-0.4.0.dev20250926.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
79
- ai_edge_quantizer_nightly-0.4.0.dev20250926.dist-info/RECORD,,
75
+ ai_edge_quantizer_nightly-0.4.0.dev20250928.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
76
+ ai_edge_quantizer_nightly-0.4.0.dev20250928.dist-info/METADATA,sha256=Vi0g_M6-Fk7O8Xk5h5AtzbeTEFzse_8cYiBZ1S-H330,1508
77
+ ai_edge_quantizer_nightly-0.4.0.dev20250928.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
78
+ ai_edge_quantizer_nightly-0.4.0.dev20250928.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
79
+ ai_edge_quantizer_nightly-0.4.0.dev20250928.dist-info/RECORD,,