ai-edge-quantizer-nightly 0.4.0.dev20250926__py3-none-any.whl → 0.4.0.dev20250928__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_edge_quantizer/recipe_manager.py +12 -3
- ai_edge_quantizer/recipe_manager_test.py +69 -2
- {ai_edge_quantizer_nightly-0.4.0.dev20250926.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250928.dist-info}/METADATA +1 -1
- {ai_edge_quantizer_nightly-0.4.0.dev20250926.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250928.dist-info}/RECORD +7 -7
- {ai_edge_quantizer_nightly-0.4.0.dev20250926.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250928.dist-info}/LICENSE +0 -0
- {ai_edge_quantizer_nightly-0.4.0.dev20250926.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250928.dist-info}/WHEEL +0 -0
- {ai_edge_quantizer_nightly-0.4.0.dev20250926.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20250928.dist-info}/top_level.txt +0 -0
|
@@ -82,7 +82,6 @@ class RecipeManager:
|
|
|
82
82
|
str, list[OpQuantizationRecipe]
|
|
83
83
|
] = collections.OrderedDict()
|
|
84
84
|
|
|
85
|
-
# TODO: b/335254997 - Check if an op quantization config is supported.
|
|
86
85
|
def add_quantization_config(
|
|
87
86
|
self,
|
|
88
87
|
regex: str,
|
|
@@ -272,7 +271,8 @@ class RecipeManager:
|
|
|
272
271
|
"""
|
|
273
272
|
weight_config = qtyping.TensorQuantizationConfig(
|
|
274
273
|
num_bits=num_bits,
|
|
275
|
-
symmetric=True, # LiteRT kernels only support symmetric quantized
|
|
274
|
+
symmetric=True, # LiteRT kernels only support symmetric quantized
|
|
275
|
+
# weights.
|
|
276
276
|
granularity=granularity,
|
|
277
277
|
)
|
|
278
278
|
self.add_quantization_config(
|
|
@@ -316,10 +316,18 @@ class RecipeManager:
|
|
|
316
316
|
granularity: Granularity of quantization.
|
|
317
317
|
algorithm_key: Algorithm key to be applied.
|
|
318
318
|
"""
|
|
319
|
+
# Default to integer quantization but allow float quantization for
|
|
320
|
+
# FLOAT_CASTING algorithm. This is to support weight-only quantization with
|
|
321
|
+
# fp16 weights.
|
|
322
|
+
weight_dtype = qtyping.TensorDataType.INT
|
|
323
|
+
if algorithm_key == AlgorithmName.FLOAT_CASTING:
|
|
324
|
+
weight_dtype = qtyping.TensorDataType.FLOAT
|
|
325
|
+
|
|
319
326
|
weight_config = qtyping.TensorQuantizationConfig(
|
|
320
327
|
num_bits=num_bits,
|
|
321
328
|
symmetric=True, # TFL kernels only support symmetric quantized weights.
|
|
322
329
|
granularity=granularity,
|
|
330
|
+
dtype=weight_dtype,
|
|
323
331
|
)
|
|
324
332
|
self.add_quantization_config(
|
|
325
333
|
regex,
|
|
@@ -365,7 +373,8 @@ class RecipeManager:
|
|
|
365
373
|
raise ValueError(
|
|
366
374
|
'Activation quantization is only supported for 16 or 8 bits.'
|
|
367
375
|
)
|
|
368
|
-
# INT16 is symmetric and INT8 is asymmetric due to LiteRT kernel
|
|
376
|
+
# INT16 is symmetric and INT8 is asymmetric due to LiteRT kernel
|
|
377
|
+
# limitations.
|
|
369
378
|
activation_symmetric = activation_num_bits == 16
|
|
370
379
|
activation_config = qtyping.TensorQuantizationConfig(
|
|
371
380
|
num_bits=activation_num_bits, symmetric=activation_symmetric
|
|
@@ -315,11 +315,12 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
|
|
|
315
315
|
_QuantGranularity.CHANNELWISE,
|
|
316
316
|
)
|
|
317
317
|
|
|
318
|
-
|
|
318
|
+
@parameterized.parameters(4, 8)
|
|
319
|
+
def test_add_weight_only_config_int(self, num_bits):
|
|
319
320
|
self._recipe_manager.add_weight_only_config(
|
|
320
321
|
regex='.*/Dense/.*',
|
|
321
322
|
operation_name=_TFLOpName.FULLY_CONNECTED,
|
|
322
|
-
num_bits=
|
|
323
|
+
num_bits=num_bits,
|
|
323
324
|
)
|
|
324
325
|
alg_key, op_config = self._recipe_manager.get_quantization_configs(
|
|
325
326
|
_TFLOpName.FULLY_CONNECTED, 'model/Dense/op'
|
|
@@ -330,6 +331,72 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
|
|
|
330
331
|
self.assertIsNone(op_config.activation_tensor_config)
|
|
331
332
|
weight_tensor_config = op_config.weight_tensor_config
|
|
332
333
|
self.assertIsNotNone(weight_tensor_config)
|
|
334
|
+
self.assertEqual(weight_tensor_config.num_bits, num_bits)
|
|
335
|
+
self.assertTrue(weight_tensor_config.symmetric)
|
|
336
|
+
self.assertEqual(
|
|
337
|
+
weight_tensor_config.granularity,
|
|
338
|
+
_QuantGranularity.CHANNELWISE,
|
|
339
|
+
)
|
|
340
|
+
self.assertEqual(weight_tensor_config.dtype, _TensorDataType.INT)
|
|
341
|
+
|
|
342
|
+
def test_add_weight_only_config_fp16(self):
|
|
343
|
+
self._recipe_manager.add_weight_only_config(
|
|
344
|
+
regex='.*/Dense2/.*',
|
|
345
|
+
operation_name=_TFLOpName.FULLY_CONNECTED,
|
|
346
|
+
num_bits=16,
|
|
347
|
+
algorithm_key=_AlgorithmName.FLOAT_CASTING,
|
|
348
|
+
)
|
|
349
|
+
alg_key, op_config = self._recipe_manager.get_quantization_configs(
|
|
350
|
+
_TFLOpName.FULLY_CONNECTED, 'model/Dense2/op'
|
|
351
|
+
)
|
|
352
|
+
self.assertEqual(alg_key, _AlgorithmName.FLOAT_CASTING)
|
|
353
|
+
self.assertEqual(op_config.compute_precision, _ComputePrecision.FLOAT)
|
|
354
|
+
self.assertTrue(op_config.explicit_dequantize)
|
|
355
|
+
self.assertIsNone(op_config.activation_tensor_config)
|
|
356
|
+
weight_tensor_config = op_config.weight_tensor_config
|
|
357
|
+
self.assertIsNotNone(weight_tensor_config)
|
|
358
|
+
self.assertEqual(weight_tensor_config.num_bits, 16)
|
|
359
|
+
self.assertTrue(weight_tensor_config.symmetric)
|
|
360
|
+
self.assertEqual(
|
|
361
|
+
weight_tensor_config.granularity,
|
|
362
|
+
_QuantGranularity.CHANNELWISE,
|
|
363
|
+
)
|
|
364
|
+
self.assertEqual(weight_tensor_config.dtype, _TensorDataType.FLOAT)
|
|
365
|
+
|
|
366
|
+
def test_add_weight_only_config_fp8_raise_error(self):
|
|
367
|
+
error_message = (
|
|
368
|
+
'float casting quantization config requires number of bits to be set'
|
|
369
|
+
' as 16'
|
|
370
|
+
)
|
|
371
|
+
with self.assertRaisesWithPredicateMatch(
|
|
372
|
+
ValueError, lambda err: error_message in str(err)
|
|
373
|
+
):
|
|
374
|
+
self._recipe_manager.add_weight_only_config(
|
|
375
|
+
regex='.*/Dense2/.*',
|
|
376
|
+
operation_name=_TFLOpName.FULLY_CONNECTED,
|
|
377
|
+
num_bits=8,
|
|
378
|
+
algorithm_key=_AlgorithmName.FLOAT_CASTING,
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
def test_add_static_config(self):
|
|
382
|
+
self._recipe_manager.add_static_config(
|
|
383
|
+
regex='.*/Dense/.*',
|
|
384
|
+
operation_name=_TFLOpName.FULLY_CONNECTED,
|
|
385
|
+
activation_num_bits=8,
|
|
386
|
+
weight_num_bits=4,
|
|
387
|
+
)
|
|
388
|
+
alg_key, op_config = self._recipe_manager.get_quantization_configs(
|
|
389
|
+
_TFLOpName.FULLY_CONNECTED, 'model/Dense/op'
|
|
390
|
+
)
|
|
391
|
+
self.assertEqual(alg_key, _AlgorithmName.MIN_MAX_UNIFORM_QUANT)
|
|
392
|
+
self.assertEqual(op_config.compute_precision, _ComputePrecision.INTEGER)
|
|
393
|
+
self.assertFalse(op_config.explicit_dequantize)
|
|
394
|
+
activation_tensor_config = op_config.activation_tensor_config
|
|
395
|
+
self.assertIsNotNone(activation_tensor_config)
|
|
396
|
+
self.assertEqual(activation_tensor_config.num_bits, 8)
|
|
397
|
+
self.assertFalse(activation_tensor_config.symmetric)
|
|
398
|
+
weight_tensor_config = op_config.weight_tensor_config
|
|
399
|
+
self.assertIsNotNone(weight_tensor_config)
|
|
333
400
|
self.assertEqual(weight_tensor_config.num_bits, 4)
|
|
334
401
|
self.assertTrue(weight_tensor_config.symmetric)
|
|
335
402
|
self.assertEqual(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ai-edge-quantizer-nightly
|
|
3
|
-
Version: 0.4.0.
|
|
3
|
+
Version: 0.4.0.dev20250928
|
|
4
4
|
Summary: A quantizer for advanced developers to quantize converted AI Edge models.
|
|
5
5
|
Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
|
|
6
6
|
Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
|
|
@@ -16,8 +16,8 @@ ai_edge_quantizer/qtyping.py,sha256=tfrPip-uzJuF_PASgUExx5Oy9gghWUbQaApR0XaBpNw,
|
|
|
16
16
|
ai_edge_quantizer/quantizer.py,sha256=ckAEOnnBxuCKZuvlzdChevCKPuE-IeDPHCNtFTWr250,17857
|
|
17
17
|
ai_edge_quantizer/quantizer_test.py,sha256=m6f4ayyaF3yQb9i4V0aFAbmGw0OKZ2Zam1RoTPh-u24,22917
|
|
18
18
|
ai_edge_quantizer/recipe.py,sha256=MEkfQ2Sg3KAE9LAORHWcbjYNPg06EUbwc1d-VspQA2U,6461
|
|
19
|
-
ai_edge_quantizer/recipe_manager.py,sha256=
|
|
20
|
-
ai_edge_quantizer/recipe_manager_test.py,sha256=
|
|
19
|
+
ai_edge_quantizer/recipe_manager.py,sha256=6l2uq8KL23KLu9OQDmPGkxrFiwHrdDB9xnn-ni8WdEM,15036
|
|
20
|
+
ai_edge_quantizer/recipe_manager_test.py,sha256=qjgGUF-wggXnSXqZ5khmqrDMIQI5CShk52IVWTahq6s,36817
|
|
21
21
|
ai_edge_quantizer/recipe_test.py,sha256=QisyaTol8JRZFcGOGyee7QRCvqj5VbF4guKWdIoMUOE,6213
|
|
22
22
|
ai_edge_quantizer/transformation_instruction_generator.py,sha256=O0U2aZcB8aXQgOV8r9g1rGNzDUiuI5Ta53XnxZbVffE,31576
|
|
23
23
|
ai_edge_quantizer/transformation_instruction_generator_test.py,sha256=KW5-WoTTo9IqLEVnWxVC8ut8eWLi_91xfKgGqVQ9QDk,54635
|
|
@@ -72,8 +72,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOi
|
|
|
72
72
|
ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
|
|
73
73
|
ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
|
|
74
74
|
ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
|
|
75
|
-
ai_edge_quantizer_nightly-0.4.0.
|
|
76
|
-
ai_edge_quantizer_nightly-0.4.0.
|
|
77
|
-
ai_edge_quantizer_nightly-0.4.0.
|
|
78
|
-
ai_edge_quantizer_nightly-0.4.0.
|
|
79
|
-
ai_edge_quantizer_nightly-0.4.0.
|
|
75
|
+
ai_edge_quantizer_nightly-0.4.0.dev20250928.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
76
|
+
ai_edge_quantizer_nightly-0.4.0.dev20250928.dist-info/METADATA,sha256=Vi0g_M6-Fk7O8Xk5h5AtzbeTEFzse_8cYiBZ1S-H330,1508
|
|
77
|
+
ai_edge_quantizer_nightly-0.4.0.dev20250928.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
|
78
|
+
ai_edge_quantizer_nightly-0.4.0.dev20250928.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
|
|
79
|
+
ai_edge_quantizer_nightly-0.4.0.dev20250928.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|