ai-edge-quantizer-nightly 0.3.0.dev20250814__py3-none-any.whl → 0.3.0.dev20250816__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -102,6 +102,7 @@ MIN_MAX_OP_NAME_MATERIALIZE_FUNC_DICT = {
102
102
  _TFLOpName.LOGISTIC: common_quantize.materialize_softmax_and_logistic,
103
103
  _TFLOpName.SLICE: common_quantize.materialize_slice,
104
104
  _TFLOpName.SUM: common_quantize.materialize_sum,
105
+ _TFLOpName.SELECT: common_quantize.materialize_select,
105
106
  _TFLOpName.SELECT_V2: common_quantize.materialize_select_v2,
106
107
  _TFLOpName.DYNAMIC_UPDATE_SLICE: (
107
108
  common_quantize.materialize_dynamic_update_slice
@@ -250,6 +251,7 @@ _OCTAV_OP_NAME_MATERIALIZE_FUNC_DICT = immutabledict({
250
251
  _TFLOpName.LOGISTIC: common_quantize.materialize_softmax_and_logistic,
251
252
  _TFLOpName.SLICE: common_quantize.materialize_slice,
252
253
  _TFLOpName.SUM: common_quantize.materialize_sum,
254
+ _TFLOpName.SELECT: common_quantize.materialize_select,
253
255
  _TFLOpName.SELECT_V2: common_quantize.materialize_select_v2,
254
256
  _TFLOpName.DYNAMIC_UPDATE_SLICE: (
255
257
  common_quantize.materialize_dynamic_update_slice
@@ -371,6 +371,25 @@ def materialize_slice(
371
371
  )
372
372
 
373
373
 
374
+ def materialize_select(
375
+ get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
376
+ op_info: qtyping.OpInfo,
377
+ graph_info: qtyping.GraphInfo,
378
+ tensor_name_to_qsv: dict[str, Any],
379
+ ) -> list[qtyping.TensorTransformationParams]:
380
+ """Materialize tensors in tfl.select."""
381
+ return common_utils.materialize_standard_op(
382
+ op_info,
383
+ graph_info,
384
+ tensor_name_to_qsv,
385
+ get_tensor_quant_params_fn,
386
+ constraint=_OpQuantConstraint.SAME_AS_OUTPUT_SCALE,
387
+ inputs_to_ignore=[
388
+ 0,
389
+ ], # Condition tensor does not need to be quantized.
390
+ )
391
+
392
+
374
393
  def materialize_select_v2(
375
394
  get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
376
395
  op_info: qtyping.OpInfo,
@@ -387,22 +387,37 @@ def tensor_zp_scale_from_min_max(
387
387
  )
388
388
  qmin, qmax = get_quantized_range(qtype)
389
389
  min_bound = 1e-4 # 1e-6 precision for int8 and 1e-8 for int16.
390
+ pos_clipping_values = None if clipping_values is None else clipping_values
391
+ neg_clipping_values = None if clipping_values is None else -clipping_values
390
392
 
391
393
  if granularity == qtyping.QuantGranularity.BLOCKWISE:
392
- # Blockwise quantization uses float16 scale, with 7 bit mantissa,
393
- # so the maximum representable value is 65280.
394
- float16_max = np.broadcast_to(np.array(65280), min_value.shape)
395
- clipping_values = (
394
+ # Blockwise quantization uses float16 scale,
395
+ # with 7 bit mantissa, so the maximum scale value is 65280 and maximum
396
+ # representable range is [-65280 * (2 ** num_bits),
397
+ # 65280 * (2 ** num_bits - 1)].
398
+ # Note that we have one extra value on the negative side.
399
+ float16_max = np.broadcast_to(
400
+ np.array(65280) * (2**num_bits - 1), max_value.shape
401
+ )
402
+ float16_min = np.broadcast_to(
403
+ np.array(-65280) * (2**num_bits), min_value.shape
404
+ )
405
+ pos_clipping_values = (
396
406
  float16_max
397
- if clipping_values is None
398
- else np.minimum(clipping_values, float16_max)
407
+ if pos_clipping_values is None
408
+ else np.minimum(pos_clipping_values, float16_max)
409
+ )
410
+ neg_clipping_values = (
411
+ float16_min
412
+ if neg_clipping_values is None
413
+ else np.maximum(neg_clipping_values, float16_min)
399
414
  )
400
415
 
401
416
  if symmetric:
402
417
  bound = np.maximum(np.abs(min_value), np.abs(max_value))
403
418
  bound = np.maximum(bound, min_bound)
404
419
  if clipping_values is not None:
405
- bound = np.clip(bound, -clipping_values, clipping_values)
420
+ bound = np.clip(bound, neg_clipping_values, pos_clipping_values)
406
421
  if not qtype.signed:
407
422
  half_q = (qmax - 1) / 2
408
423
  scale = bound / half_q
@@ -302,7 +302,7 @@ class CalibratorToyGemma2Test(googletest.TestCase):
302
302
  self._toy_gemma2_calibration_dataset,
303
303
  model_recipe_manager=recipe_mngr,
304
304
  )
305
- self.assertLen(calib.get_model_qsvs(), 288)
305
+ self.assertLen(calib.get_model_qsvs(), 290)
306
306
 
307
307
 
308
308
  if __name__ == "__main__":
@@ -180,6 +180,7 @@ DEFAULT_JSON_POLICY = """
180
180
  "SLICE",
181
181
  "EMBEDDING_LOOKUP",
182
182
  "SUM",
183
+ "SELECT",
183
184
  "SELECT_V2",
184
185
  "DYNAMIC_UPDATE_SLICE",
185
186
  "SELECT_V2",
@@ -222,6 +223,7 @@ DEFAULT_JSON_POLICY = """
222
223
  "SLICE",
223
224
  "EMBEDDING_LOOKUP",
224
225
  "SUM",
226
+ "SELECT",
225
227
  "SELECT_V2",
226
228
  "DYNAMIC_UPDATE_SLICE",
227
229
  "SELECT_V2",
@@ -59,6 +59,7 @@ class TFLOperationName(str, enum.Enum):
59
59
  LOGISTIC = 'LOGISTIC'
60
60
  SLICE = 'SLICE'
61
61
  SUM = 'SUM'
62
+ SELECT = 'SELECT'
62
63
  SELECT_V2 = 'SELECT_V2'
63
64
  DYNAMIC_UPDATE_SLICE = 'DYNAMIC_UPDATE_SLICE'
64
65
  STABLEHLO_COMPOSITE = 'STABLEHLO_COMPOSITE'
@@ -35,6 +35,7 @@ from ai_edge_quantizer.utils import tfl_interpreter_utils
35
35
  from ai_edge_quantizer.utils import validation_utils
36
36
  from tensorflow.python.platform import gfile # pylint: disable=g-direct-tensorflow-import
37
37
 
38
+
38
39
  # Expose algorithm names to users.
39
40
  AlgorithmName = algorithm_manager.AlgorithmName
40
41
 
@@ -220,6 +221,109 @@ class Quantizer:
220
221
  regex, operation_name, op_config, algorithm_key
221
222
  )
222
223
 
224
+ def add_dynamic_config(
225
+ self,
226
+ regex: str,
227
+ operation_name: _TFLOpName,
228
+ num_bits: int,
229
+ granularity: qtyping.QuantGranularity = qtyping.QuantGranularity.CHANNELWISE,
230
+ algorithm_key: str = algorithm_manager.AlgorithmName.MIN_MAX_UNIFORM_QUANT,
231
+ ):
232
+ """Adds a dynamic quantization configuration to the recipe.
233
+
234
+ During dynamic quantization, activations are not processed by AEQ and
235
+ remain in float format. The runtime kernel is expected to quantize these
236
+ activations on-the-fly, as indicated by compute_precision=Integer and
237
+ explicit_dequantize=False.
238
+
239
+ The model quality may suffer due to the on-the-fly quantization. If quality
240
+ is a concern, consider using weight-only
241
+ quantization.
242
+
243
+ Args:
244
+ regex: Regular expression for layer name (op's output tensor name)
245
+ matching.
246
+ operation_name: Target TFLite operation.
247
+ num_bits: Number of bits for quantization.
248
+ granularity: Granularity of quantization.
249
+ algorithm_key: Algorithm key to be applied.
250
+ """
251
+ self._recipe_manager.add_dynamic_config(
252
+ regex, operation_name, num_bits, granularity, algorithm_key
253
+ )
254
+
255
+ def add_weight_only_config(
256
+ self,
257
+ regex: str,
258
+ operation_name: _TFLOpName,
259
+ num_bits: int,
260
+ granularity: qtyping.QuantGranularity = qtyping.QuantGranularity.CHANNELWISE,
261
+ algorithm_key: str = algorithm_manager.AlgorithmName.MIN_MAX_UNIFORM_QUANT,
262
+ ):
263
+ """Adds a weight only quantization configuration to the recipe.
264
+
265
+ In weight-only quantization, weights are quantized, but the actual operation
266
+ (op) computation remains in float. The quantized weight is explicitly
267
+ dequantized before being fed into the op. This is achieved by inserting a
268
+ dequantize op between the quantized weight and the consuming op. To enable
269
+ this, both compute_precision will be set to Float and explicit_dequantize to
270
+ True.
271
+
272
+ Weight-only quantization is useful for reducing model size but may
273
+ not decrease latency due to float computation. However, quantized model
274
+ generally has better quality than other quantization options (e.g., dynamic
275
+ range quantization) due to no loss of precision on activations. If latency
276
+ is a concern, consider using dynamic quantization.
277
+
278
+ Args:
279
+ regex: Regular expression for layer name matching.
280
+ operation_name: Target TFLite operation.
281
+ num_bits: Number of bits for quantization.
282
+ granularity: Granularity of quantization.
283
+ algorithm_key: Algorithm key to be applied.
284
+ """
285
+ self._recipe_manager.add_weight_only_config(
286
+ regex, operation_name, num_bits, granularity, algorithm_key
287
+ )
288
+
289
+ def add_static_config(
290
+ self,
291
+ regex: str,
292
+ operation_name: _TFLOpName,
293
+ activation_num_bits: int,
294
+ weight_num_bits: int,
295
+ weight_granularity: qtyping.QuantGranularity = qtyping.QuantGranularity.CHANNELWISE,
296
+ algorithm_key: str = algorithm_manager.AlgorithmName.MIN_MAX_UNIFORM_QUANT,
297
+ ):
298
+ """Adds a static quantization configuration to the recipe.
299
+
300
+ In static quantization, both weights and activations are quantized. This
301
+ requires a calibration step to determine the quantization parameters (e.g.,
302
+ min/max ranges) for activations. The quantized model uses integer arithmetic
303
+ for computations, which can lead to significant latency reductions.
304
+
305
+ However, calibration is needed to determine the quantization parameters for
306
+ activations, which requires sample data and may lead to quality loss. If
307
+ there is no hardware requirement for full integer quantization, consider
308
+ using dynamic quantization for simplicity.
309
+
310
+ Args:
311
+ regex: Regular expression for layer name matching.
312
+ operation_name: Target TFLite operation.
313
+ activation_num_bits: Number of bits for activation quantization.
314
+ weight_num_bits: Number of bits for weight quantization.
315
+ weight_granularity: Granularity of weight quantization.
316
+ algorithm_key: Algorithm key to be applied.
317
+ """
318
+ self._recipe_manager.add_static_config(
319
+ regex,
320
+ operation_name,
321
+ activation_num_bits,
322
+ weight_num_bits,
323
+ weight_granularity,
324
+ algorithm_key,
325
+ )
326
+
223
327
  @property
224
328
  def need_calibration(self) -> bool:
225
329
  """Checks if the current recipe needs calibration."""
@@ -92,6 +92,76 @@ class QuantizerTest(parameterized.TestCase):
92
92
  new_op_config.compute_precision,
93
93
  )
94
94
 
95
+ def test_add_dynamic_config_succeeds(self):
96
+ self._quantizer.load_quantization_recipe(self._test_recipe_path)
97
+ scope_regex = '.*/Dense/.*'
98
+ self._quantizer.add_dynamic_config(
99
+ regex=scope_regex,
100
+ operation_name=qtyping.TFLOperationName.FULLY_CONNECTED,
101
+ num_bits=8,
102
+ )
103
+ updated_recipe = self._quantizer.get_quantization_recipe()
104
+ self.assertLen(updated_recipe, 2)
105
+
106
+ added_config = updated_recipe[-1]
107
+ self.assertEqual(added_config['regex'], scope_regex)
108
+ self.assertEqual(
109
+ added_config['op_config']['compute_precision'],
110
+ qtyping.ComputePrecision.INTEGER,
111
+ )
112
+ self.assertFalse(added_config['op_config']['explicit_dequantize'])
113
+ self.assertEqual(
114
+ added_config['op_config']['weight_tensor_config']['num_bits'], 8
115
+ )
116
+
117
+ def test_add_weight_only_config_succeeds(self):
118
+ self._quantizer.load_quantization_recipe(self._test_recipe_path)
119
+ scope_regex = '.*/Dense/.*'
120
+ self._quantizer.add_weight_only_config(
121
+ regex=scope_regex,
122
+ operation_name=qtyping.TFLOperationName.FULLY_CONNECTED,
123
+ num_bits=4,
124
+ )
125
+ updated_recipe = self._quantizer.get_quantization_recipe()
126
+ self.assertLen(updated_recipe, 2)
127
+
128
+ added_config = updated_recipe[-1]
129
+ self.assertEqual(added_config['regex'], scope_regex)
130
+ self.assertEqual(
131
+ added_config['op_config']['compute_precision'],
132
+ qtyping.ComputePrecision.FLOAT,
133
+ )
134
+ self.assertTrue(added_config['op_config']['explicit_dequantize'])
135
+ self.assertEqual(
136
+ added_config['op_config']['weight_tensor_config']['num_bits'], 4
137
+ )
138
+
139
+ def test_add_static_config_succeeds(self):
140
+ self._quantizer.load_quantization_recipe(self._test_recipe_path)
141
+ scope_regex = '.*/Dense/.*'
142
+ self._quantizer.add_static_config(
143
+ regex=scope_regex,
144
+ operation_name=qtyping.TFLOperationName.FULLY_CONNECTED,
145
+ activation_num_bits=8,
146
+ weight_num_bits=4,
147
+ )
148
+ updated_recipe = self._quantizer.get_quantization_recipe()
149
+ self.assertLen(updated_recipe, 2)
150
+
151
+ added_config = updated_recipe[-1]
152
+ self.assertEqual(added_config['regex'], scope_regex)
153
+ self.assertEqual(
154
+ added_config['op_config']['compute_precision'],
155
+ qtyping.ComputePrecision.INTEGER,
156
+ )
157
+ self.assertFalse(added_config['op_config']['explicit_dequantize'])
158
+ self.assertEqual(
159
+ added_config['op_config']['activation_tensor_config']['num_bits'], 8
160
+ )
161
+ self.assertEqual(
162
+ added_config['op_config']['weight_tensor_config']['num_bits'], 4
163
+ )
164
+
95
165
  def test_load_quantization_recipe_succeeds(self):
96
166
  qt = quantizer.Quantizer(self._test_model_path, None)
97
167
  qt.load_quantization_recipe(self._test_recipe_path)
@@ -15,51 +15,163 @@
15
15
 
16
16
  """Quantization recipe module."""
17
17
 
18
+ from ai_edge_quantizer import algorithm_manager
19
+ from ai_edge_quantizer import qtyping
20
+ from ai_edge_quantizer import recipe_manager
18
21
 
19
- def dynamic_wi8_afp32():
20
- """Returns a dynamic quantization recipe with int8 weights and float32 activation."""
21
- return [
22
- dict({
23
- 'regex': '.*',
24
- 'operation': '*',
25
- 'algorithm_key': 'min_max_uniform_quantize',
26
- 'op_config': {
27
- 'weight_tensor_config': {
28
- 'num_bits': 8,
29
- 'symmetric': True,
30
- 'granularity': 'CHANNELWISE',
31
- 'dtype': 'INT',
32
- 'block_size': 0,
33
- },
34
- 'compute_precision': 'INTEGER',
35
- 'explicit_dequantize': False,
36
- 'skip_checks': False,
37
- },
38
- })
39
- ]
22
+ AlgorithmName = algorithm_manager.AlgorithmName
40
23
 
41
24
 
42
- def dynamic_wi4_afp32():
43
- """Returns a dynamic quantization recipe with int4 weights and float32 activation."""
44
- return [
45
- dict({
46
- 'regex': '.*',
47
- 'operation': '*',
48
- 'algorithm_key': 'min_max_uniform_quantize',
49
- 'op_config': {
50
- 'weight_tensor_config': {
51
- 'num_bits': 4,
52
- 'symmetric': True,
53
- 'granularity': 'CHANNELWISE',
54
- 'dtype': 'INT',
55
- 'block_size': 0,
56
- },
57
- 'compute_precision': 'INTEGER',
58
- 'explicit_dequantize': False,
59
- 'skip_checks': False,
60
- },
61
- })
62
- ]
25
+ def dynamic_wi8_afp32(
26
+ algorithm_key: AlgorithmName = AlgorithmName.MIN_MAX_UNIFORM_QUANT,
27
+ ):
28
+ """Returns a dynamic quantization recipe with int8 weights and float32 activation.
29
+
30
+ All supported ops will be quantized with int8 weights and float32 activations,
31
+ which will be dynamically quantized to int8 during inference to enable int8
32
+ compute. The model quality may suffer due to the on-the-fly quantization. If
33
+ quality is a concern, consider using weight-only quantization.
34
+
35
+ Args:
36
+ algorithm_key: The algorithm to use for quantization.
37
+
38
+ Returns:
39
+ A dynamic quantization recipe.
40
+ """
41
+ rp_manager = recipe_manager.RecipeManager()
42
+ rp_manager.add_dynamic_config(
43
+ regex='.*',
44
+ operation_name=qtyping.TFLOperationName.ALL_SUPPORTED,
45
+ num_bits=8,
46
+ algorithm_key=algorithm_key,
47
+ )
48
+ return rp_manager.get_quantization_recipe()
49
+
50
+
51
+ def dynamic_wi4_afp32(
52
+ algorithm_key: AlgorithmName = AlgorithmName.MIN_MAX_UNIFORM_QUANT,
53
+ ):
54
+ """Returns a dynamic quantization recipe with int4 weights and float32 activation.
55
+
56
+ All supported ops will be quantized with int4 weights and float32 activations,
57
+ which will be dynamically quantized to int4 during inference to enable int4
58
+ compute.
59
+
60
+ Args:
61
+ algorithm_key: The algorithm to use for quantization.
62
+
63
+ Returns:
64
+ A dynamic quantization recipe.
65
+ """
66
+ rp_manager = recipe_manager.RecipeManager()
67
+ rp_manager.add_dynamic_config(
68
+ regex='.*',
69
+ operation_name=qtyping.TFLOperationName.ALL_SUPPORTED,
70
+ num_bits=4,
71
+ algorithm_key=algorithm_key,
72
+ )
73
+ return rp_manager.get_quantization_recipe()
74
+
75
+
76
+ def weight_only_wi8_afp32(
77
+ algorithm_key: AlgorithmName = AlgorithmName.MIN_MAX_UNIFORM_QUANT,
78
+ ):
79
+ """Returns a weight-only quantization recipe with int8 weights and float32 activation.
80
+
81
+ All supported ops will be quantized with int8 weights and float32 activations.
82
+ The weights will be explicitly dequantized before being fed into the op to
83
+ enable float compute thus retain model quality. If latency is a concern,
84
+ consider using dynamic range quantization.
85
+
86
+ Args:
87
+ algorithm_key: The algorithm to use for quantization.
88
+
89
+ Returns:
90
+ A weight-only quantization recipe.
91
+ """
92
+ rp_manager = recipe_manager.RecipeManager()
93
+ rp_manager.add_weight_only_config(
94
+ regex='.*',
95
+ operation_name=qtyping.TFLOperationName.ALL_SUPPORTED,
96
+ num_bits=8,
97
+ algorithm_key=algorithm_key,
98
+ )
99
+ return rp_manager.get_quantization_recipe()
100
+
101
+
102
+ def weight_only_wi4_afp32(
103
+ algorithm_key: AlgorithmName = AlgorithmName.MIN_MAX_UNIFORM_QUANT,
104
+ ):
105
+ """Returns a weight-only quantization recipe with int4 weights and float32 activation.
106
+
107
+ All supported ops will be quantized with int4 weights and float32 activations.
108
+ The weights will be explicitly dequantized before being fed into the op to
109
+ enable float compute thus retain model quality.
110
+
111
+ Args:
112
+ algorithm_key: The algorithm to use for quantization.
113
+
114
+ Returns:
115
+ A weight-only quantization recipe.
116
+ """
117
+ rp_manager = recipe_manager.RecipeManager()
118
+ rp_manager.add_weight_only_config(
119
+ regex='.*',
120
+ operation_name=qtyping.TFLOperationName.ALL_SUPPORTED,
121
+ num_bits=4,
122
+ algorithm_key=algorithm_key,
123
+ )
124
+ return rp_manager.get_quantization_recipe()
125
+
126
+
127
+ def static_wi8_ai8(
128
+ algorithm_key: AlgorithmName = AlgorithmName.MIN_MAX_UNIFORM_QUANT,
129
+ ):
130
+ """Returns a static quantization recipe with int8 weights and int8 activations.
131
+
132
+ All supported ops will be quantized with int8 weights and int8 activations.
133
+ Calibration is needed to use this recipe.
134
+
135
+ Args:
136
+ algorithm_key: The algorithm to use for quantization.
137
+
138
+ Returns:
139
+ A static quantization recipe.
140
+ """
141
+ rp_manager = recipe_manager.RecipeManager()
142
+ rp_manager.add_static_config(
143
+ regex='.*',
144
+ operation_name=qtyping.TFLOperationName.ALL_SUPPORTED,
145
+ activation_num_bits=8,
146
+ weight_num_bits=8,
147
+ algorithm_key=algorithm_key,
148
+ )
149
+ return rp_manager.get_quantization_recipe()
150
+
151
+
152
+ def static_wi8_ai16(
153
+ algorithm_key: AlgorithmName = AlgorithmName.MIN_MAX_UNIFORM_QUANT,
154
+ ):
155
+ """Returns a static quantization recipe with int8 weights and int16 activations.
156
+
157
+ All supported ops will be quantized with int8 weights and int16 activations.
158
+ Calibration is needed to use this recipe.
159
+
160
+ Args:
161
+ algorithm_key: The algorithm to use for quantization.
162
+
163
+ Returns:
164
+ A static quantization recipe.
165
+ """
166
+ rp_manager = recipe_manager.RecipeManager()
167
+ rp_manager.add_static_config(
168
+ regex='.*',
169
+ operation_name=qtyping.TFLOperationName.ALL_SUPPORTED,
170
+ activation_num_bits=16,
171
+ weight_num_bits=8,
172
+ algorithm_key=algorithm_key,
173
+ )
174
+ return rp_manager.get_quantization_recipe()
63
175
 
64
176
 
65
177
  def dynamic_legacy_wi8_afp32():
@@ -243,3 +243,146 @@ class RecipeManager:
243
243
  ):
244
244
  return True
245
245
  return False
246
+
247
+ def add_dynamic_config(
248
+ self,
249
+ regex: str,
250
+ operation_name: _TFLOpName,
251
+ num_bits: int,
252
+ granularity: qtyping.QuantGranularity = qtyping.QuantGranularity.CHANNELWISE,
253
+ algorithm_key: str = algorithm_manager.AlgorithmName.MIN_MAX_UNIFORM_QUANT,
254
+ ):
255
+ """Adds a dynamic quantization configuration to the recipe.
256
+
257
+ During dynamic quantization, activations are not processed by AEQ and
258
+ remain in float format. The runtime kernel is expected to quantize these
259
+ activations on-the-fly, as indicated by compute_precision=Integer and
260
+ explicit_dequantize=False.
261
+
262
+ The model quality may suffer due to the on-the-fly quantization. If quality
263
+ is a concern, consider using weight-only
264
+ quantization.
265
+
266
+ Args:
267
+ regex: Regular expression for layer name matching.
268
+ operation_name: Target TFLite operation.
269
+ num_bits: Number of bits for quantization.
270
+ granularity: Granularity of quantization.
271
+ algorithm_key: Algorithm key to be applied.
272
+ """
273
+ weight_config = qtyping.TensorQuantizationConfig(
274
+ num_bits=num_bits,
275
+ symmetric=True, # LiteRT kernels only support symmetric quantized weights.
276
+ granularity=granularity,
277
+ )
278
+ self.add_quantization_config(
279
+ regex,
280
+ operation_name,
281
+ op_config=_OpQuantizationConfig(
282
+ weight_tensor_config=weight_config,
283
+ compute_precision=qtyping.ComputePrecision.INTEGER,
284
+ explicit_dequantize=False,
285
+ ),
286
+ algorithm_key=algorithm_key,
287
+ )
288
+
289
+ def add_weight_only_config(
290
+ self,
291
+ regex: str,
292
+ operation_name: _TFLOpName,
293
+ num_bits: int,
294
+ granularity: qtyping.QuantGranularity = qtyping.QuantGranularity.CHANNELWISE,
295
+ algorithm_key: str = algorithm_manager.AlgorithmName.MIN_MAX_UNIFORM_QUANT,
296
+ ):
297
+ """Adds a weight only quantization configuration to the recipe.
298
+
299
+ In weight-only quantization, weights are quantized, but the actual operation
300
+ (op) computation remains in float. The quantized weight is explicitly
301
+ dequantized before being fed into the op. This is achieved by inserting a
302
+ dequantize op between the quantized weight and the consuming op. To enable
303
+ this, both compute_precision will be set to Float and explicit_dequantize to
304
+ True.
305
+
306
+ Weight-only quantization is useful for reducing model size but may
307
+ not decrease latency due to float computation. However, quantized model
308
+ generally has better quality than other quantization options (e.g., dynamic
309
+ range quantization) due to no loss of precision on activations. If latency
310
+ is a concern, consider using dynamic quantization.
311
+
312
+ Args:
313
+ regex: Regular expression for layer name matching.
314
+ operation_name: Target TFLite operation.
315
+ num_bits: Number of bits for quantization.
316
+ granularity: Granularity of quantization.
317
+ algorithm_key: Algorithm key to be applied.
318
+ """
319
+ weight_config = qtyping.TensorQuantizationConfig(
320
+ num_bits=num_bits,
321
+ symmetric=True, # TFL kernels only support symmetric quantized weights.
322
+ granularity=granularity,
323
+ )
324
+ self.add_quantization_config(
325
+ regex,
326
+ operation_name,
327
+ op_config=_OpQuantizationConfig(
328
+ weight_tensor_config=weight_config,
329
+ compute_precision=qtyping.ComputePrecision.FLOAT,
330
+ explicit_dequantize=True,
331
+ ),
332
+ algorithm_key=algorithm_key,
333
+ )
334
+
335
+ def add_static_config(
336
+ self,
337
+ regex: str,
338
+ operation_name: _TFLOpName,
339
+ activation_num_bits: int,
340
+ weight_num_bits: int,
341
+ weight_granularity: qtyping.QuantGranularity = qtyping.QuantGranularity.CHANNELWISE,
342
+ algorithm_key: str = algorithm_manager.AlgorithmName.MIN_MAX_UNIFORM_QUANT,
343
+ ):
344
+ """Adds a static range quantization configuration to the recipe.
345
+
346
+ In static quantization, both weights and activations are quantized. This
347
+ requires a calibration step to determine the quantization parameters (e.g.,
348
+ min/max ranges) for activations. The quantized model uses integer arithmetic
349
+ for computations, which can lead to significant latency reductions.
350
+
351
+ However, calibration is needed to determine the quantization parameters for
352
+ activations, which requires sample data and may lead to quality loss. If
353
+ there is no hardware requirement for full integer quantization, consider
354
+ using dynamic quantization for simplicity.
355
+
356
+ Args:
357
+ regex: Regular expression for layer name matching.
358
+ operation_name: Target TFLite operation.
359
+ activation_num_bits: Number of bits for activation quantization.
360
+ weight_num_bits: Number of bits for weight quantization.
361
+ weight_granularity: Granularity of weight quantization.
362
+ algorithm_key: Algorithm key to be applied.
363
+ """
364
+ if activation_num_bits not in [16, 8]:
365
+ raise ValueError(
366
+ 'Activation quantization is only supported for 16 or 8 bits.'
367
+ )
368
+ # INT16 is symmetric and INT8 is asymmetric due to LiteRT kernel limitations.
369
+ activation_symmetric = activation_num_bits == 16
370
+ activation_config = qtyping.TensorQuantizationConfig(
371
+ num_bits=activation_num_bits, symmetric=activation_symmetric
372
+ )
373
+ weight_config = qtyping.TensorQuantizationConfig(
374
+ num_bits=weight_num_bits,
375
+ symmetric=True, # TFL kernels only support symmetric quantized weights.
376
+ granularity=weight_granularity,
377
+ )
378
+ self.add_quantization_config(
379
+ regex,
380
+ operation_name,
381
+ op_config=_OpQuantizationConfig(
382
+ activation_tensor_config=activation_config,
383
+ weight_tensor_config=weight_config,
384
+ compute_precision=qtyping.ComputePrecision.INTEGER,
385
+ explicit_dequantize=False,
386
+ ),
387
+ algorithm_key=algorithm_key,
388
+ )
@@ -293,6 +293,50 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
293
293
  # DRQ check.
294
294
  self.assertEqual(op_config.compute_precision, _ComputePrecision.INTEGER)
295
295
 
296
+ def test_add_dynamic_config(self):
297
+ self._recipe_manager.add_dynamic_config(
298
+ regex='.*/Dense/.*',
299
+ operation_name=_TFLOpName.FULLY_CONNECTED,
300
+ num_bits=8,
301
+ )
302
+ alg_key, op_config = self._recipe_manager.get_quantization_configs(
303
+ _TFLOpName.FULLY_CONNECTED, 'model/Dense/op'
304
+ )
305
+ self.assertEqual(alg_key, _AlgorithmName.MIN_MAX_UNIFORM_QUANT)
306
+ self.assertEqual(op_config.compute_precision, _ComputePrecision.INTEGER)
307
+ self.assertFalse(op_config.explicit_dequantize)
308
+ self.assertIsNone(op_config.activation_tensor_config)
309
+ weight_tensor_config = op_config.weight_tensor_config
310
+ self.assertIsNotNone(weight_tensor_config)
311
+ self.assertEqual(weight_tensor_config.num_bits, 8)
312
+ self.assertTrue(weight_tensor_config.symmetric)
313
+ self.assertEqual(
314
+ weight_tensor_config.granularity,
315
+ _QuantGranularity.CHANNELWISE,
316
+ )
317
+
318
+ def test_add_weight_only_config(self):
319
+ self._recipe_manager.add_weight_only_config(
320
+ regex='.*/Dense/.*',
321
+ operation_name=_TFLOpName.FULLY_CONNECTED,
322
+ num_bits=4,
323
+ )
324
+ alg_key, op_config = self._recipe_manager.get_quantization_configs(
325
+ _TFLOpName.FULLY_CONNECTED, 'model/Dense/op'
326
+ )
327
+ self.assertEqual(alg_key, _AlgorithmName.MIN_MAX_UNIFORM_QUANT)
328
+ self.assertEqual(op_config.compute_precision, _ComputePrecision.FLOAT)
329
+ self.assertTrue(op_config.explicit_dequantize)
330
+ self.assertIsNone(op_config.activation_tensor_config)
331
+ weight_tensor_config = op_config.weight_tensor_config
332
+ self.assertIsNotNone(weight_tensor_config)
333
+ self.assertEqual(weight_tensor_config.num_bits, 4)
334
+ self.assertTrue(weight_tensor_config.symmetric)
335
+ self.assertEqual(
336
+ weight_tensor_config.granularity,
337
+ _QuantGranularity.CHANNELWISE,
338
+ )
339
+
296
340
  def test_set_full_integer_quantization_config(self):
297
341
  _add_default_int8xint8_integer_recipe(self._recipe_manager)
298
342
  # Full integer setting is global
@@ -21,6 +21,7 @@ from tensorflow.python.platform import googletest
21
21
  from ai_edge_quantizer import quantizer
22
22
  from ai_edge_quantizer import recipe
23
23
  from ai_edge_quantizer.utils import test_utils
24
+ from ai_edge_quantizer.utils import tfl_interpreter_utils
24
25
 
25
26
 
26
27
  _TEST_DATA_PREFIX_PATH = test_utils.get_path_to_datafile('')
@@ -30,21 +31,63 @@ class RecipeTest(parameterized.TestCase):
30
31
 
31
32
  def setUp(self):
32
33
  super().setUp()
33
- self._test_model_path = os.path.join(
34
+ # Weights has < 1024 elements so legacy recipe will not quantize it.
35
+ self._small_model_path = os.path.join(
34
36
  _TEST_DATA_PREFIX_PATH,
35
37
  'tests/models/single_conv2d_transpose_bias.tflite',
36
38
  )
39
+ self._test_model_path = os.path.join(
40
+ _TEST_DATA_PREFIX_PATH,
41
+ 'tests/models/conv_fc_mnist.tflite',
42
+ )
37
43
 
38
- def _quantize_with_recipe_func(self, recipe_func):
39
- qt = quantizer.Quantizer(self._test_model_path)
44
+ def _quantize_with_recipe_func(self, recipe_func, test_model_path):
45
+ qt = quantizer.Quantizer(test_model_path)
40
46
  qt.load_quantization_recipe(recipe_func())
41
47
  self.assertIsNone(qt._result.quantized_model)
42
- quant_result = qt.quantize()
43
- self.assertIsNotNone(quant_result.quantized_model)
44
- return quant_result
48
+ if qt.need_calibration:
49
+ calibration_data = tfl_interpreter_utils.create_random_normal_input_data(
50
+ qt.float_model,
51
+ num_samples=1,
52
+ )
53
+ calibration_result = qt.calibrate(calibration_data)
54
+ quantization_result = qt.quantize(calibration_result)
55
+ else:
56
+ quantization_result = qt.quantize()
57
+ self.assertIsNotNone(quantization_result.quantized_model)
58
+ return quantization_result
45
59
 
46
60
  def test_quantization_from_dynamic_wi8_afp32_func_succeeds(self):
47
- quant_result = self._quantize_with_recipe_func(recipe.dynamic_wi8_afp32)
61
+ quant_result = self._quantize_with_recipe_func(
62
+ recipe.dynamic_wi8_afp32, self._test_model_path
63
+ )
64
+ self.assertLess(
65
+ len(quant_result.quantized_model),
66
+ os.path.getsize(self._test_model_path),
67
+ )
68
+
69
+ def test_quantization_from_dynamic_wi4_afp32_func_succeeds(self):
70
+ quant_result = self._quantize_with_recipe_func(
71
+ recipe.dynamic_wi4_afp32, self._test_model_path
72
+ )
73
+ self.assertLess(
74
+ len(quant_result.quantized_model),
75
+ os.path.getsize(self._test_model_path),
76
+ )
77
+
78
+ def test_quantization_from_weight_only_wi8_afp32_func_succeeds(self):
79
+ quant_result = self._quantize_with_recipe_func(
80
+ recipe.weight_only_wi8_afp32, self._test_model_path
81
+ )
82
+ self.assertLess(
83
+ len(quant_result.quantized_model),
84
+ os.path.getsize(self._test_model_path),
85
+ )
86
+
87
+ def test_quantization_from_weight_only_wi4_afp32_func_succeeds(self):
88
+ quant_result = self._quantize_with_recipe_func(
89
+ recipe.weight_only_wi4_afp32, self._test_model_path
90
+ )
48
91
  self.assertLess(
49
92
  len(quant_result.quantized_model),
50
93
  os.path.getsize(self._test_model_path),
@@ -52,11 +95,12 @@ class RecipeTest(parameterized.TestCase):
52
95
 
53
96
  def test_quantization_from_dynamic_legacy_wi8_afp32_func_succeeds(self):
54
97
  quant_result = self._quantize_with_recipe_func(
55
- recipe.dynamic_legacy_wi8_afp32
98
+ recipe.dynamic_legacy_wi8_afp32,
99
+ self._small_model_path,
56
100
  )
57
101
  self.assertLen(
58
102
  quant_result.quantized_model,
59
- os.path.getsize(self._test_model_path),
103
+ os.path.getsize(self._small_model_path),
60
104
  )
61
105
 
62
106
  @parameterized.named_parameters(
@@ -65,28 +109,54 @@ class RecipeTest(parameterized.TestCase):
65
109
  recipe_json_path='recipes/dynamic_wi8_afp32_recipe.json',
66
110
  recipe_func=recipe.dynamic_wi8_afp32,
67
111
  ),
112
+ dict(
113
+ testcase_name='weight_only_wi8_afp32',
114
+ recipe_json_path='recipes/default_af32w8float_recipe.json',
115
+ recipe_func=recipe.weight_only_wi8_afp32,
116
+ ),
117
+ dict(
118
+ testcase_name='weight_only_wi4_afp32',
119
+ recipe_json_path='recipes/default_af32w4float_recipe.json',
120
+ recipe_func=recipe.weight_only_wi4_afp32,
121
+ ),
68
122
  dict(
69
123
  testcase_name='dynamic_legacy_wi8_afp32',
70
124
  recipe_json_path='recipes/dynamic_legacy_wi8_afp32_recipe.json',
71
125
  recipe_func=recipe.dynamic_legacy_wi8_afp32,
72
126
  ),
127
+ dict(
128
+ testcase_name='a8w8',
129
+ recipe_json_path='recipes/default_a8w8_recipe.json',
130
+ recipe_func=recipe.static_wi8_ai8,
131
+ ),
132
+ dict(
133
+ testcase_name='a16w8',
134
+ recipe_json_path='recipes/default_a16w8_recipe.json',
135
+ recipe_func=recipe.static_wi8_ai16,
136
+ ),
73
137
  )
74
138
  def test_recipe_func_and_json_matches(self, recipe_json_path, recipe_func):
75
139
  # Quantize with recipe from function in recipe module.
76
- quant_result_from_func = self._quantize_with_recipe_func(recipe_func)
140
+ quant_result_from_func = self._quantize_with_recipe_func(
141
+ recipe_func, self._test_model_path
142
+ )
77
143
 
78
144
  # Quantize with recipe from json file.
79
145
  qt_json = quantizer.Quantizer(self._test_model_path)
80
146
  json_recipe_path = os.path.join(_TEST_DATA_PREFIX_PATH, recipe_json_path)
81
147
  qt_json.load_quantization_recipe(json_recipe_path)
82
- quant_result_from_json = qt_json.quantize()
148
+ if qt_json.need_calibration:
149
+ calibration_data = tfl_interpreter_utils.create_random_normal_input_data(
150
+ qt_json.float_model,
151
+ num_samples=1,
152
+ )
153
+ calibration_result = qt_json.calibrate(calibration_data)
154
+ quant_result_from_json = qt_json.quantize(calibration_result)
155
+ else:
156
+ quant_result_from_json = qt_json.quantize()
83
157
  self.assertIsNotNone(quant_result_from_json.quantized_model)
84
158
 
85
- # Check if the recipes and quantized models match.
86
- self.assertEqual(
87
- quant_result_from_func.recipe,
88
- quant_result_from_json.recipe,
89
- )
159
+ # Check if the quantized models match.
90
160
  self.assertEqual(
91
161
  len(quant_result_from_func.quantized_model),
92
162
  len(quant_result_from_json.quantized_model),
@@ -51,6 +51,7 @@ TFL_OP_NAME_TO_CODE = immutabledict.immutabledict({
51
51
  _TFLOpName.LOGISTIC: schema.BuiltinOperator.LOGISTIC,
52
52
  _TFLOpName.SLICE: schema.BuiltinOperator.SLICE,
53
53
  _TFLOpName.SUM: schema.BuiltinOperator.SUM,
54
+ _TFLOpName.SELECT: schema.BuiltinOperator.SELECT,
54
55
  _TFLOpName.SELECT_V2: schema.BuiltinOperator.SELECT_V2,
55
56
  _TFLOpName.STABLEHLO_COMPOSITE: schema.BuiltinOperator.STABLEHLO_COMPOSITE,
56
57
  _TFLOpName.DYNAMIC_UPDATE_SLICE: (
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ai-edge-quantizer-nightly
3
- Version: 0.3.0.dev20250814
3
+ Version: 0.3.0.dev20250816
4
4
  Summary: A quantizer for advanced developers to quantize converted AI Edge models.
5
5
  Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
6
6
  Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
@@ -1,24 +1,24 @@
1
1
  ai_edge_quantizer/__init__.py,sha256=4pFSkukSwahYyzwqia0yPRyz8TnFQfGRthVJhYpMWas,793
2
- ai_edge_quantizer/algorithm_manager.py,sha256=wiX9MTXUEMyUnhQv06EVFtLgFgM6gWCezGUXKotnPPU,13306
2
+ ai_edge_quantizer/algorithm_manager.py,sha256=O_psY-4R0ARmgTQHwfH2px81AJY8PmfamHtE7xJDRjQ,13424
3
3
  ai_edge_quantizer/algorithm_manager_api.py,sha256=u903TG0s1uIDhJqfeJne3CFl8A93phZrwgV2-hwdcXU,9247
4
4
  ai_edge_quantizer/algorithm_manager_api_test.py,sha256=w6bSONvXkX6bzXAGc0-7b6gNDt9oz9ieq97KP8Sg_JU,7666
5
5
  ai_edge_quantizer/calibrator.py,sha256=Sms7_AIHPH9G5xFaz5Ef3a5gPhxuIWQI8d2LUM8C96I,12071
6
- ai_edge_quantizer/calibrator_test.py,sha256=ejKc5YC7id8J1Ll9HAYCzMnKzxd0FUENSD06zkSSV0c,11900
6
+ ai_edge_quantizer/calibrator_test.py,sha256=ZLzIMWB2FSFU4TOatDioYuwp_kLh8iSCefZ5_Q9FU7s,11900
7
7
  ai_edge_quantizer/conftest.py,sha256=SxCz-5LlRD_lQm4hQc4c6IGG7DS8d7IyEWY9gnscPN0,794
8
- ai_edge_quantizer/default_policy.py,sha256=OyWc3fm5A7L28VjMag50yNbQXWJ7GX17JMpwn_pz1GA,11524
8
+ ai_edge_quantizer/default_policy.py,sha256=LXEdwdr0SiCfWo6ZwbHQ8ykoqA40GV6fGAT1aofry3o,11556
9
9
  ai_edge_quantizer/model_modifier.py,sha256=teGa8I6kGvn6TQY6Xv53YFIc_pQEhNvM9Zb4bvhezyw,7110
10
10
  ai_edge_quantizer/model_modifier_test.py,sha256=cJd04SLOG-fQZZNZPcisoBLx3cLtWEwGqUBbLb-pif4,4751
11
11
  ai_edge_quantizer/model_validator.py,sha256=Hj0_5o-Oa3dSlJ3ryVjRhvsyelHNyek1GrtG9buMczg,13153
12
12
  ai_edge_quantizer/model_validator_test.py,sha256=EeqOP_mrZsnZ3rug756s0ryDDqd2KgIDld5Lm_gDuWY,13020
13
13
  ai_edge_quantizer/params_generator.py,sha256=hcgMHJlERZERUyIAEi6AHJcLJ8gsKIBAEojzFFz-tqk,20098
14
14
  ai_edge_quantizer/params_generator_test.py,sha256=RDYoRZDJfEZRtjlTAU2kZ_4t3JHOqEHxfJX9V4ETAhg,40597
15
- ai_edge_quantizer/qtyping.py,sha256=62OjOcIPV477bICisjU822L6EtnFoJV0kmWgVSrJxXg,16772
16
- ai_edge_quantizer/quantizer.py,sha256=WeKwhh8cYZ07DUwvS0S1EdNzEZSfPODlynqIBvJ-Br4,13624
17
- ai_edge_quantizer/quantizer_test.py,sha256=K_HBA56JkFI3HL8VLWCqGEfC0ISh5ldMKoNyBdGRAJg,20368
18
- ai_edge_quantizer/recipe.py,sha256=FR0uJceumZrnle2VRSOQZ1uXup4S1cTYKRH-N53mWRo,2919
19
- ai_edge_quantizer/recipe_manager.py,sha256=qcGUD7e7BISKdsY9WH2rdaRR3acmzSA5qMezGNbzlpo,8931
20
- ai_edge_quantizer/recipe_manager_test.py,sha256=GVOfGFZPRciUb4EF4GkSi6d96LdjS6PbUkAJ0ayy0k8,32243
21
- ai_edge_quantizer/recipe_test.py,sha256=Fg_sfxovI2fRjk5qdu18ghOvXdUvhDR1TxbE0GHDczc,3381
15
+ ai_edge_quantizer/qtyping.py,sha256=zXXmLBZUT-cfjnQrqDkytDZaGg3z_yy1wWhKr34_XVg,16792
16
+ ai_edge_quantizer/quantizer.py,sha256=ckAEOnnBxuCKZuvlzdChevCKPuE-IeDPHCNtFTWr250,17857
17
+ ai_edge_quantizer/quantizer_test.py,sha256=m6f4ayyaF3yQb9i4V0aFAbmGw0OKZ2Zam1RoTPh-u24,22917
18
+ ai_edge_quantizer/recipe.py,sha256=MEkfQ2Sg3KAE9LAORHWcbjYNPg06EUbwc1d-VspQA2U,6461
19
+ ai_edge_quantizer/recipe_manager.py,sha256=6dgbE-IZfEetzXH3p3Qm_9eQutNDOpZnMpiaLTbP-ZQ,14744
20
+ ai_edge_quantizer/recipe_manager_test.py,sha256=H-B75vwPN5ND-nUa3pOXizeHTv4mufPiC5cL_OlDIYU,34040
21
+ ai_edge_quantizer/recipe_test.py,sha256=GKuo6N65wKLS2xwSpjd-BWWeVRpF1zc7Yt7phSMYSxA,5905
22
22
  ai_edge_quantizer/transformation_instruction_generator.py,sha256=iMGXy7_ufqgQRzu4drAfO31VGdze35peEFh1BMZlVHk,27714
23
23
  ai_edge_quantizer/transformation_instruction_generator_test.py,sha256=Zw3EOSnvzjuB4NWeo129eJZxK_EHno9oF9OtEQ-0dnM,48905
24
24
  ai_edge_quantizer/transformation_performer.py,sha256=o4J6OUbI0dLoobVYjkOFw5Po3yH0gZJXrfuTIYais4o,13029
@@ -28,7 +28,7 @@ ai_edge_quantizer/algorithms/nonlinear_quantize/__init__.py,sha256=lpq1g2ayg3lCP
28
28
  ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting.py,sha256=Bs9CK7wZAw6jNaZ8xEtbwO2vM34VYXNZSMVWvxJo9nw,9297
29
29
  ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting_test.py,sha256=EqIHGEZ1LgUrTN7zf880RuAzEv3Qy7kgh5ivObJGHSo,22646
30
30
  ai_edge_quantizer/algorithms/uniform_quantize/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
31
- ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=7H_QHhFKqVxGjhYp-_MXMAUR_VBk2kpATbta9EdhId0,34749
31
+ ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=rkf7jLPVDKpx2ju1LyyP7bxc6n34cLD2E3w2mxLd6qE,35344
32
32
  ai_edge_quantizer/algorithms/uniform_quantize/common_quantize_test.py,sha256=GGf_n3wIeg3GB_eGsmyNJ0fTcxgpeMMbugTMRONK6TQ,3553
33
33
  ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha256=BDdn_uBZakfHyzdMJPKadsOqxqyC-s6W2ZzFH99L4fE,8652
34
34
  ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py,sha256=sT5eX5TLZEHTtPfnSkCPDlS0sQxlTFWbCsbvOuj--yY,8889
@@ -38,7 +38,7 @@ ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=1
38
38
  ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=nscKDvNb14ErZdAfG0aXRWyRs6bTvhMqMjKx2vxvUK0,8725
39
39
  ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=Umxh4kJyeHddZf-Wd4aXE5MTI1XWFa5KRuM17uYU714,6922
40
40
  ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=sha1d99Xk87bI87tgz0g5LeDC-EeE4WMfM5rRC98-m4,9140
41
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=0d5XdNbjR2pVsAc-gWX3ik_pAIL-bZ-zemEz_jS2d0c,16531
41
+ ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=uCREMXi0U2ckhXXfgGVzwSgjFZc0IbtnFU-OjlG9IO8,17146
42
42
  ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=7kHluzpteMv36hFD6LD_qnwwMoE1GKUP4bGmGMFbOdA,12755
43
43
  ai_edge_quantizer/algorithms/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
44
44
  ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=QrEeCuvA7gY_vK1nbKtqassNDClyAjN1ClZIiw63k5U,35895
@@ -62,14 +62,14 @@ ai_edge_quantizer/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V
62
62
  ai_edge_quantizer/utils/calibration_utils.py,sha256=e3dG7Nm94Ix0hkTWTWPUhEG6a8QR_cAM3PSwblfJV5g,15106
63
63
  ai_edge_quantizer/utils/calibration_utils_test.py,sha256=4BlksXl7b4yptL8xPR67hmJCnjhN9V10a2PunzfHrUE,9372
64
64
  ai_edge_quantizer/utils/test_utils.py,sha256=a4Nk-wbeB09dFjTDZiA0K67d26j5DD0UDH_GIVmVG_4,8685
65
- ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=5QcNk7VTpVJzHhj6wuU4kN-nO609oFvRWVD-9Jbdky0,11330
65
+ ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=RL6oq6FzZj-xV0Zgh0UBn7-fOQaRXSxZ-PPG_LmtyUY,11384
66
66
  ai_edge_quantizer/utils/tfl_flatbuffer_utils_test.py,sha256=K1SbK8q92qYVtiVj0I0GtugsPTkpIpEKv9zakvFV_Sc,8555
67
67
  ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOihexmizeJqt4SQcET9aA,14925
68
68
  ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
69
69
  ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
70
70
  ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
71
- ai_edge_quantizer_nightly-0.3.0.dev20250814.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
72
- ai_edge_quantizer_nightly-0.3.0.dev20250814.dist-info/METADATA,sha256=t8-2KqfypcSgLS580360EqY75j4Pw0r9ykk1Q7nyQyo,1528
73
- ai_edge_quantizer_nightly-0.3.0.dev20250814.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
74
- ai_edge_quantizer_nightly-0.3.0.dev20250814.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
75
- ai_edge_quantizer_nightly-0.3.0.dev20250814.dist-info/RECORD,,
71
+ ai_edge_quantizer_nightly-0.3.0.dev20250816.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
72
+ ai_edge_quantizer_nightly-0.3.0.dev20250816.dist-info/METADATA,sha256=WJS_fEwcsNxU--TBLTULHimxyPiJHLhkDrG5b7MWwrg,1528
73
+ ai_edge_quantizer_nightly-0.3.0.dev20250816.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
74
+ ai_edge_quantizer_nightly-0.3.0.dev20250816.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
75
+ ai_edge_quantizer_nightly-0.3.0.dev20250816.dist-info/RECORD,,