PyPI - ai-edge-quantizer-nightly - Versions diffs - 0.4.0.dev20251027__py3-none-any.whl → 0.4.0.dev20251029__py3-none-any.whl - Mend

ai-edge-quantizer-nightly 0.4.0.dev20251027py3-none-any.whl → 0.4.0.dev20251029py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py CHANGED Viewed

@@ -1165,39 +1165,36 @@ def init_tensor_min_max(
     A dictionary containing the min/max values for the tensor, or an empty
     dictionary if the tensor data is None.
   """
-  if tensor_data is None:
+  weight_tensor_config = op_info.op_quant_config.weight_tensor_config
+  if tensor_data is None or weight_tensor_config is None:
     return {}
   else:
-    weight_tensor_config = op_info.op_quant_config.weight_tensor_config
-    quantized_dim = None
-    if weight_tensor_config is not None and (
-        weight_tensor_config.granularity == qtyping.QuantGranularity.CHANNELWISE
-    ):
+    # Get reduce dimension for min/max calculation based on quantization
+    # granularity.
+    granularity = weight_tensor_config.granularity
+    if granularity == qtyping.QuantGranularity.TENSORWISE:
+      reduce_dims = None
+      keep_dims = True
+    elif granularity == qtyping.QuantGranularity.CHANNELWISE:
       quantized_dim = common_utils.get_weight_quantized_dim(
           op_info, tensor_data, weight_tensor_config.granularity
       )
-    if (
-        weight_tensor_config is not None
-        and weight_tensor_config.granularity
-        == qtyping.QuantGranularity.BLOCKWISE
-    ):
-      reshaped_data, reduce_dims = (
+      reduce_dims = common_utils.get_reduce_dims(
+          quantized_dim, tensor_data.shape
+      )
+      keep_dims = True
+    elif uniform_quantize_tensor.is_blockwise(granularity):
+      tensor_data, reduce_dims = (
           uniform_quantize_tensor.reshape_data_for_blockwise(
               tensor_data,
               op_info.op_name,
-              weight_tensor_config.block_size,
+              granularity,
           )
       )
-      return {
-          "min": np.min(reshaped_data, axis=reduce_dims, keepdims=False),
-          "max": np.max(reshaped_data, axis=reduce_dims, keepdims=False),
-      }
+      keep_dims = False
     else:
-      reduce_dims = common_utils.get_reduce_dims(
-          quantized_dim, tensor_data.shape
-      )
-      return {
-          "min": np.min(tensor_data, axis=reduce_dims, keepdims=True),
-          "max": np.max(tensor_data, axis=reduce_dims, keepdims=True),
-      }
+      raise ValueError(f"Unsupported granularity: {granularity}")
+    return {
+        "min": np.min(tensor_data, axis=reduce_dims, keepdims=keep_dims),
+        "max": np.max(tensor_data, axis=reduce_dims, keepdims=keep_dims),
+    }

ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py CHANGED Viewed

@@ -158,7 +158,7 @@ def get_tensor_quant_params(
         op_info, tensor_quant_config, tensor_content, tensor_qsv
     )
-  if tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE:
+  if uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity):
     raise ValueError(
         "Blockwise quantization is not supported for dequantized weight"
         " recovery."

ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py CHANGED Viewed

@@ -147,8 +147,7 @@ class HadamardRotationFullyConnectedTest(parameterized.TestCase):
             weight_tensor_config=_TensorQuantConfig(
                 num_bits=8,
                 symmetric=True,
-                granularity=qtyping.QuantGranularity.BLOCKWISE,
-                block_size=32,
+                granularity=qtyping.QuantGranularity.BLOCKWISE_32,
             ),
         ),
     )

ai_edge_quantizer/algorithms/uniform_quantize/mse.py CHANGED Viewed

@@ -55,7 +55,7 @@ def get_tensor_quant_params(
     ValueError: `tensor_qsv` must contain min/max values, or `tensor_content`
       must be provided so that they can be inferred.
   """
-  if tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE:
+  if uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity):
     raise ValueError(
         "Blockwise quantization is not supported for MSE quantization."
     )
@@ -113,13 +113,15 @@ def get_tensor_quant_params(
       num_bits=tensor_quant_config.num_bits,
       symmetric=tensor_quant_config.symmetric,
       quantized_dimension=quantized_dim,
-      block_size=tensor_quant_config.block_size,
+      block_size=uniform_quantize_tensor.extract_block_size_from_granularity(
+          tensor_quant_config.granularity
+      ),
   )
   quantized_vars = uniform_quantize_tensor.uniform_quantize(
       tensor_content,
       quant_params,
-      tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE,
+      uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity),
   )
   return dataclasses.replace(quant_params, quantized_data=quantized_vars)

ai_edge_quantizer/algorithms/uniform_quantize/mse_test.py CHANGED Viewed

@@ -84,7 +84,7 @@ class MseQuantizeTest(parameterized.TestCase):
           tensor_quant_config=qtyping.TensorQuantizationConfig(
               num_bits=4,
               symmetric=True,
-              granularity=qtyping.QuantGranularity.BLOCKWISE,
+              granularity=qtyping.QuantGranularity.BLOCKWISE_32,
           ),
           tensor_content=test_data,
       )

ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py CHANGED Viewed

@@ -15,6 +15,7 @@
 """Performs naive min/max uniform quantization."""
+import dataclasses
 from typing import Any, Optional
 import numpy as np
 from ai_edge_quantizer import qtyping
@@ -91,7 +92,9 @@ def get_tensor_quant_params(
       num_bits=tensor_quant_config.num_bits,
       symmetric=tensor_quant_config.symmetric,
       quantized_dimension=quantized_dim,
-      block_size=tensor_quant_config.block_size,
+      block_size=uniform_quantize_tensor.extract_block_size_from_granularity(
+          tensor_quant_config.granularity
+      ),
   )
   if tensor_content is None:
     return quant_params
@@ -99,18 +102,10 @@ def get_tensor_quant_params(
   quantized_vars = uniform_quantize_tensor.uniform_quantize(
       tensor_content,
       quant_params,
-      tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE,
+      uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity),
   )
   # Update with quantized values.
-  return qtyping.UniformQuantParams(
-      scale=scale,
-      zero_point=zp,
-      num_bits=tensor_quant_config.num_bits,
-      symmetric=tensor_quant_config.symmetric,
-      quantized_dimension=quantized_dim,
-      quantized_data=quantized_vars,
-      block_size=tensor_quant_config.block_size,
-  )
+  return dataclasses.replace(quant_params, quantized_data=quantized_vars)
 # TODO: b/333731147 - Use named tuple to store min/max.

ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py CHANGED Viewed

@@ -17,6 +17,7 @@ import os
 from typing import cast
 from absl.testing import parameterized
+import ml_dtypes
 import numpy as np
 from tensorflow.python.platform import googletest
@@ -165,8 +166,7 @@ class NaiveMinMaxQuantizeTest(parameterized.TestCase):
     weight_tensor_config = _TensorQuantConfig(
         num_bits=4,
         symmetric=True,
-        granularity=qtyping.QuantGranularity.BLOCKWISE,
-        block_size=2,
+        granularity=qtyping.QuantGranularity.BLOCKWISE_32,
     )
     op_info = qtyping.OpInfo(
         op=fc_op,
@@ -176,28 +176,32 @@ class NaiveMinMaxQuantizeTest(parameterized.TestCase):
             weight_tensor_config=weight_tensor_config,
         ),
     )
-    test_data = np.array([[-7, 7], [4, -4], [4, -4], [7, 7]])
+    test_data = np.random.uniform(low=-10, high=10, size=(4, 32)).astype(
+        np.float32
+    )
     quant_params = naive_min_max_quantize.get_tensor_quant_params(
         op_info=op_info,
         tensor_quant_config=weight_tensor_config,
         tensor_content=test_data,
     )
-    scale = quant_params.scale
     zp = quant_params.zero_point
-    expected_scale = np.array([
-        [1],
-        [0.5703125],
-        [0.5703125],
-        [1],
-    ])
-    expected_zp = np.zeros([4, 1])
-    self.assertTrue(np.array_equal(zp, expected_zp))
-    self.assertTrue(np.array_equal(scale, expected_scale))
+    self.assertEqual(zp.shape, (4, 1))
+    self.assertTrue(np.array_equal(zp, np.zeros([4, 1])))
+    self.assertEqual(quant_params.scale.shape, (4, 1))
+    expected_scales = np.max(np.abs(test_data), axis=1, keepdims=True) / 7.0
+    expected_scales = (
+        expected_scales.astype(ml_dtypes.bfloat16)
+        .astype(np.float16)
+        .astype(np.float32)
+    )
+    self.assertTrue(np.allclose(quant_params.scale, expected_scales, atol=1e-5))
     self.assertIsNotNone(quant_params.quantized_data)
     self.assertTupleEqual(
         cast(np.ndarray, quant_params.quantized_data).shape, test_data.shape
     )
-    self.assertEqual(quant_params.block_size, 2)
+    self.assertEqual(quant_params.block_size, 32)
     self.assertEqual(quant_params.quantized_dimension, 1)
   def test_calibrate_ignores_inf_min_max(self):

ai_edge_quantizer/algorithms/uniform_quantize/octav.py CHANGED Viewed

@@ -131,12 +131,12 @@ def get_tensor_quant_params(
   quantized_dim = common_utils.get_weight_quantized_dim(
       op_info, tensor_content, tensor_quant_config.granularity
   )
-  if tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE:
+  if uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity):
     reshaped_data, reduce_dims = (
         uniform_quantize_tensor.reshape_data_for_blockwise(
             tensor_content,
             op_info.op_name,
-            tensor_quant_config.block_size,
+            tensor_quant_config.granularity,
         )
     )
   else:
@@ -154,7 +154,7 @@ def get_tensor_quant_params(
   # We created a new dimension in order to reduce properly for blockwise
   # quantization, so we need to reshape the clipping constants back to the
   # min/max shape for the next step.
-  if tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE:
+  if uniform_quantize_tensor.is_blockwise(tensor_quant_config.granularity):
     clipping_constants = clipping_constants.reshape(tensor_min_max["min"].shape)
   zp, scale = uniform_quantize_tensor.tensor_zp_scale_from_min_max(
@@ -172,13 +172,17 @@ def get_tensor_quant_params(
       num_bits=tensor_quant_config.num_bits,
       symmetric=tensor_quant_config.symmetric,
       quantized_dimension=quantized_dim,
-      block_size=tensor_quant_config.block_size,
+      block_size=uniform_quantize_tensor.extract_block_size_from_granularity(
+          tensor_quant_config.granularity
+      ),
   )
   quantized_vars = uniform_quantize_tensor.uniform_quantize(
       tensor_content,
       quant_params,
-      tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE,
+      is_blockwise_quant=uniform_quantize_tensor.is_blockwise(
+          tensor_quant_config.granularity
+      ),
   )
   return dataclasses.replace(quant_params, quantized_data=quantized_vars)

ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py CHANGED Viewed

@@ -196,8 +196,7 @@ class OctavQuantizeTest(parameterized.TestCase):
     tensor_config = qtyping.TensorQuantizationConfig(
         num_bits=4,
         symmetric=True,
-        granularity=qtyping.QuantGranularity.BLOCKWISE,
-        block_size=32,
+        granularity=qtyping.QuantGranularity.BLOCKWISE_32,
     )
     fc_op_info = qtyping.OpInfo(
         op=self._fc_op,

ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py CHANGED Viewed

@@ -29,6 +29,11 @@ class IntType:
   signed: bool
+def is_blockwise(granularity: qtyping.QuantGranularity) -> bool:
+  """Checks if the quantization granularity is blockwise."""
+  return "BLOCKWISE" in str(granularity)
 def get_quantized_range(qtype: IntType) -> tuple[float, float]:
   """Calculates range of the quantized type."""
   if qtype.signed:
@@ -40,6 +45,22 @@ def get_quantized_range(qtype: IntType) -> tuple[float, float]:
   return float(qmin), float(qmax)
+def extract_block_size_from_granularity(
+    granularity: qtyping.QuantGranularity,
+) -> int:
+  """Get the block size for blockwise quantization."""
+  if granularity == qtyping.QuantGranularity.BLOCKWISE_32:
+    return 32
+  elif granularity == qtyping.QuantGranularity.BLOCKWISE_64:
+    return 64
+  elif granularity == qtyping.QuantGranularity.BLOCKWISE_128:
+    return 128
+  elif granularity == qtyping.QuantGranularity.BLOCKWISE_256:
+    return 256
+  else:
+    return 0
 def _round_and_clip(
     tensor: np.ndarray, qtype: IntType, narrow: bool
 ) -> np.ndarray:
@@ -157,14 +178,16 @@ def _get_tensor_shape_for_blockwise(
 def reshape_data_for_blockwise(
-    tensor_data: np.ndarray, op_name: qtyping.TFLOperationName, block_size: int
+    tensor_data: np.ndarray,
+    op_name: qtyping.TFLOperationName,
+    granularity: qtyping.QuantGranularity,
 ) -> tuple[np.ndarray, int]:
   """Reshapes data for blockwise quantization.
   Args:
     tensor_data: The original tensor data.
     op_name: The name of the TFL op.
-    block_size: The size of the block.
+    granularity: The quantization granularity for the tensor.
   Returns:
     A tuple containing the reshaped tensor data and the new reduce dimension.
@@ -172,11 +195,11 @@ def reshape_data_for_blockwise(
   quantized_dim = tfl_flatbuffer_utils.TFL_OP_TO_BLOCKWISE_WEIGHT_QUANTIZED_DIM[
       op_name
   ]
+  block_size = extract_block_size_from_granularity(granularity)
   new_shape = _get_tensor_shape_for_blockwise(
       tensor_data.shape, quantized_dim, block_size
   )
-  reshaped_data = tensor_data.reshape(new_shape)
-  return reshaped_data, quantized_dim + 1
+  return tensor_data.reshape(new_shape), quantized_dim + 1
 def _broadcast_scale_zp_for_blockwise(
@@ -233,21 +256,21 @@ def _broadcast_scale_zp_for_blockwise(
 def uniform_quantize(
     tensor_data: np.ndarray,
     quantization_params: qtyping.UniformQuantParams,
-    is_blockwise: bool = False,
+    is_blockwise_quant: bool = False,
 ):
   """Uniform quantize a tensor.
   Args:
     tensor_data: The tensor to be quantized.
     quantization_params: The quantization parameters.
-    is_blockwise: Whether the tensor is blockwise quantized.
+    is_blockwise_quant: Whether the tensor is blockwise quantized.
   Returns:
     The quantized tensor.
   """
   # The reshaping for blockwise quantization is unique hence we do this here
   # to avoid unexpected broadcast behavior downstream.
-  if is_blockwise:
+  if is_blockwise_quant:
     quantization_params = _broadcast_scale_zp_for_blockwise(
         tensor_data, quantization_params
     )
@@ -381,10 +404,13 @@ def symmetric_quantize_bias_tensor(
   quantized_vars = uniform_quantize(bias_content, bias_quant_params)
   if check_error:
     dequantized_bias = uniform_dequantize(quantized_vars, bias_quant_params)
-    quantization_error = np.abs(dequantized_bias - bias_content)
-    if np.any(quantization_error > effective_output_scale):
+    max_quant_error = np.max(np.abs(dequantized_bias - bias_content))
+    error_tolerance = np.maximum(1e-6, np.max(effective_output_scale))
+    if max_quant_error > error_tolerance:
       raise ValueError(
-          "Quantization error is too large for bias tensor quantization."
+          "Quantization error is too large for bias tensor quantization. Max"
+          f" quantization error is {max_quant_error}, which exceed"
+          f" the threshold {error_tolerance}"
       )
   # Save the int32 quantized bias as int64 if the input tensor is quantized to
@@ -432,6 +458,7 @@ def tensor_zp_scale_from_min_max(
   Returns:
     The zero point and scale of the tensor.
   """
   # TODO: b/332574603 - support unsigned data type.
   qtype = IntType(
       num_bits,
@@ -442,7 +469,7 @@ def tensor_zp_scale_from_min_max(
   pos_clipping_values = None if clipping_values is None else clipping_values
   neg_clipping_values = None if clipping_values is None else -clipping_values
-  if granularity == qtyping.QuantGranularity.BLOCKWISE:
+  if is_blockwise(granularity):
     # Blockwise quantization uses float16 scale,
     # with 7 bit mantissa, so the maximum scale value is 65280 and maximum
     # representable range is [-65280 * (2 ** num_bits),
@@ -490,7 +517,7 @@ def tensor_zp_scale_from_min_max(
     zp = qmin - bound_min / scale
     zp = np.rint(zp)
-  if granularity == qtyping.QuantGranularity.BLOCKWISE:
+  if is_blockwise(granularity):
     # Round the scale values to 7 bit mantissa.
     scale = (
         scale.astype(ml_dtypes.bfloat16).astype(np.float16).astype(np.float32)

ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py CHANGED Viewed

@@ -222,7 +222,7 @@ class TensorUtilsTest(parameterized.TestCase):
               zero_point=np.array([-6]),
               symmetric=True,
           ),
-          is_blockwise=True,
+          is_blockwise_quant=True,
       )
   @parameterized.parameters(
@@ -431,7 +431,10 @@ class TensorUtilsTest(parameterized.TestCase):
     )
     # This will result in quantized bias of 3e9, which is larger than int32 max.
     bias_tensor_data = np.array([3e7])
-    with self.assertRaises(ValueError):
+    with self.assertRaisesRegex(
+        ValueError,
+        "Quantization error is too large for bias tensor quantization.",
+    ):
       uniform_quantize_tensor.symmetric_quantize_bias_tensor(
           bias_tensor_data,
           input_quant_config,

ai_edge_quantizer/algorithms/utils/common_utils.py CHANGED Viewed

@@ -51,8 +51,9 @@ def check_subchannel_config(
   """Checks the op quantization config for subchannel quantization."""
   if (
       op_quant_config.weight_tensor_config is not None
-      and op_quant_config.weight_tensor_config.granularity
-      == qtyping.QuantGranularity.BLOCKWISE
+      and uniform_quantize_tensor.is_blockwise(
+          op_quant_config.weight_tensor_config.granularity
+      )
   ):
     if op_name not in _SUPPORTED_SUBCHANNEL_OPS:
       raise ValueError(f"Unsupported op for blockwise quantization: {op_name}.")
@@ -66,10 +67,6 @@ def check_subchannel_config(
           "Blockwise quantization does not support for asymmetric weight"
           " quantization."
       )
-    if op_quant_config.weight_tensor_config.block_size <= 0:
-      raise ValueError(
-          "Blockwise quantization must have a non-zero block size."
-      )
 def check_if_valid_op_config(
@@ -993,7 +990,7 @@ def get_weight_quantized_dim(
       quantized_dim = tfl_flatbuffer_utils.TFL_OP_TO_WEIGHT_QUANTIZED_DIM.get(
           op_info.op_name, None
       )
-  elif granularity == qtyping.QuantGranularity.BLOCKWISE:
+  elif uniform_quantize_tensor.is_blockwise(granularity):
     quantized_dim = (
         tfl_flatbuffer_utils.TFL_OP_TO_BLOCKWISE_WEIGHT_QUANTIZED_DIM[
             op_info.op_name

ai_edge_quantizer/default_policy.py CHANGED Viewed

@@ -61,9 +61,8 @@ DEFAULT_JSON_POLICY = """
       "weight_tensor_config": {
         "num_bits": 4,
         "symmetric": [true],
-        "granularity": ["BLOCKWISE"],
-        "dtype": "INT",
-        "block_size": [32, 64, 96, 128, 256]
+        "granularity": ["BLOCKWISE_32", "BLOCKWISE_64", "BLOCKWISE_128", "BLOCKWISE_256"],
+        "dtype": "INT"
       },
       "explicit_dequantize": false,
       "compute_precision": "INTEGER"
@@ -320,16 +319,9 @@ def _unroll_json_config(
           "granularity": granularity,
           "dtype": json_config["weight_tensor_config"]["dtype"],
       }
-      if "block_size" in json_config["weight_tensor_config"]:
-        for block_size in json_config["weight_tensor_config"]["block_size"]:
-          tensor_config["block_size"] = block_size
-          weight_configs.append(
-              qtyping.TensorQuantizationConfig.from_dict(tensor_config)
-          )
-      else:
-        weight_configs.append(
-            qtyping.TensorQuantizationConfig.from_dict(tensor_config)
-        )
+      weight_configs.append(
+          qtyping.TensorQuantizationConfig.from_dict(tensor_config)
+      )
       if activation_configs:
         for activation_config in activation_configs:

ai_edge_quantizer/qtyping.py CHANGED Viewed

@@ -112,7 +112,11 @@ class TensorDataType(str, enum.Enum):
 class QuantGranularity(str, enum.Enum):
   TENSORWISE = 'TENSORWISE'
   CHANNELWISE = 'CHANNELWISE'
-  BLOCKWISE = 'BLOCKWISE'
+  # Blockwise quantization with various block sizes.
+  BLOCKWISE_32 = 'BLOCKWISE_32'
+  BLOCKWISE_64 = 'BLOCKWISE_64'
+  BLOCKWISE_128 = 'BLOCKWISE_128'
+  BLOCKWISE_256 = 'BLOCKWISE_256'
 class QuantTransformation(enum.Enum):
@@ -310,7 +314,6 @@ class TensorQuantizationConfig:
     granularity: Whether to perform per-tensor, per-channel or per-block
       quantization.
     dtype: The data type of the tensor.
-    block_size: The block size for blockwise quantization, ignored otherwise.
     algorithm_key: The algorithm key to use for quantization.
   """
@@ -318,7 +321,6 @@ class TensorQuantizationConfig:
   symmetric: bool = True
   granularity: QuantGranularity = QuantGranularity.TENSORWISE
   dtype: TensorDataType = TensorDataType.INT
-  block_size: int = 0
   def to_dict(self) -> dict[str, Any]:
     """Converts ActivationQuantizationConfig to dict."""
@@ -336,9 +338,28 @@ class TensorQuantizationConfig:
   def from_dict(cls, params: dict[str, Any]) -> 'TensorQuantizationConfig':
     """Converts a given dict to TensorQuantizationConfig."""
     params_copy = copy.deepcopy(params)
+    # Process block_size config from legacy recipe.
+    params_copy = _process_block_size(params_copy)
     return cls(**params_copy)
+def _process_block_size(params: dict[str, Any]) -> dict[str, Any]:
+  """Processes block size in the params."""
+  block_size = params.pop('block_size', 0)
+  if block_size > 0:
+    if block_size == 32:
+      params['granularity'] = QuantGranularity.BLOCKWISE_32
+    elif block_size == 64:
+      params['granularity'] = QuantGranularity.BLOCKWISE_64
+    elif block_size == 128:
+      params['granularity'] = QuantGranularity.BLOCKWISE_128
+    elif block_size == 256:
+      params['granularity'] = QuantGranularity.BLOCKWISE_256
+    else:
+      raise ValueError(f'Unsupported block size: {block_size}')
+  return params
 @dataclasses.dataclass(frozen=True)
 class OpQuantizationConfig:
   """Configuration class to control the quantization process behavior.

ai_edge_quantizer/quantizer_test.py CHANGED Viewed

@@ -309,6 +309,44 @@ class QuantizerTest(parameterized.TestCase):
       saved_recipe = json.load(json_file)
     self.assertEqual(saved_recipe, self._test_recipe)
+  def test_saved_legacy_recipe_lacks_block_size(self):
+    model_name = 'test_model'
+    legacy_recipe_path = os.path.join(
+        TEST_DATA_PREFIX_PATH,
+        'recipes/dynamic_legacy_wi8_afp32_recipe.json',
+    )
+    self._quantizer.load_quantization_recipe(legacy_recipe_path)
+    result = self._quantizer.quantize()
+    result.save(self._tmp_save_path, model_name)
+    saved_recipe_path = os.path.join(
+        self._tmp_save_path, model_name + '_recipe.json'
+    )
+    with open(saved_recipe_path) as json_file:
+      saved_recipe = json.load(json_file)
+    with open(legacy_recipe_path) as json_file:
+      legacy_recipe = json.load(json_file)
+    self.assertNotEqual(saved_recipe, legacy_recipe)
+    # Verify that the default test recipe contains 'block_size'.
+    has_block_size = False
+    for config in legacy_recipe:
+      op_config = config.get('op_config')
+      if op_config:
+        weight_config = op_config.get('weight_tensor_config')
+        if weight_config and 'block_size' in weight_config:
+          has_block_size = True
+          break
+    self.assertTrue(has_block_size)
+    # Verify that the saved recipe does not have 'block_size'.
+    for config in saved_recipe:
+      op_config = config.get('op_config')
+      if op_config:
+        weight_config = op_config.get('weight_tensor_config')
+        if weight_config:
+          self.assertNotIn('block_size', weight_config)
   def test_save_no_quantize_raise_error(self):
     error_message = 'No quantized model to save.'
     with self.assertRaisesWithPredicateMatch(
@@ -535,14 +573,12 @@ class QuantizerMultiSignatureModelTest(parameterized.TestCase):
                     'symmetric': False,
                     'granularity': 'TENSORWISE',
                     'dtype': 'INT',
-                    'block_size': 0,
                 },
                 'weight_tensor_config': {
                     'num_bits': 8,
                     'symmetric': True,
                     'granularity': 'CHANNELWISE',
                     'dtype': 'INT',
-                    'block_size': 0,
                 },
                 'compute_precision': 'INTEGER',
                 'explicit_dequantize': False,

ai_edge_quantizer/recipe_manager_test.py CHANGED Viewed

@@ -569,14 +569,12 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
                     'symmetric': False,
                     'granularity': _QuantGranularity.TENSORWISE,
                     'dtype': 'INT',
-                    'block_size': 0,
                 },
                 'weight_tensor_config': {
                     'num_bits': 8,
                     'symmetric': True,
                     'granularity': _QuantGranularity.TENSORWISE,
                     'dtype': 'INT',
-                    'block_size': 0,
                 },
                 # WEIGHT_ONLY.
                 'compute_precision': _ComputePrecision.INTEGER,
@@ -595,7 +593,6 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
                     'num_bits': 8,
                     'symmetric': True,
                     'granularity': _QuantGranularity.TENSORWISE,
-                    'block_size': 0,
                 },
                 # WEIGHT_ONLY.
                 'compute_precision': _ComputePrecision.FLOAT,
@@ -614,7 +611,6 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
                     'num_bits': 4,
                     'symmetric': True,
                     'granularity': _QuantGranularity.TENSORWISE,
-                    'block_size': 0,
                 },
                 # WEIGHT_ONLY.
                 'compute_precision': _ComputePrecision.FLOAT,
@@ -633,7 +629,6 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
                     'num_bits': 6,
                     'symmetric': True,
                     'granularity': _QuantGranularity.TENSORWISE,
-                    'block_size': 0,
                 },
                 # WEIGHT_ONLY.
                 'compute_precision': _ComputePrecision.FLOAT,
@@ -652,7 +647,6 @@ class ConfiguratorTest(parameterized.TestCase, googletest.TestCase):
                     'num_bits': 3,
                     'symmetric': True,
                     'granularity': _QuantGranularity.TENSORWISE,
-                    'block_size': 0,
                 },
                 # WEIGHT_ONLY.
                 'compute_precision': _ComputePrecision.FLOAT,

{ai_edge_quantizer_nightly-0.4.0.dev20251027.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-edge-quantizer-nightly
-Version: 0.4.0.dev20251027
+Version: 0.4.0.dev20251029
 Summary: A quantizer for advanced developers to quantize converted AI Edge models.
 Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
 Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI

{ai_edge_quantizer_nightly-0.4.0.dev20251027.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info}/RECORD RENAMED Viewed

@@ -5,19 +5,19 @@ ai_edge_quantizer/algorithm_manager_api_test.py,sha256=w6bSONvXkX6bzXAGc0-7b6gND
 ai_edge_quantizer/calibrator.py,sha256=Sms7_AIHPH9G5xFaz5Ef3a5gPhxuIWQI8d2LUM8C96I,12071
 ai_edge_quantizer/calibrator_test.py,sha256=ZLzIMWB2FSFU4TOatDioYuwp_kLh8iSCefZ5_Q9FU7s,11900
 ai_edge_quantizer/conftest.py,sha256=SxCz-5LlRD_lQm4hQc4c6IGG7DS8d7IyEWY9gnscPN0,794
-ai_edge_quantizer/default_policy.py,sha256=i_AcnIIElHqoJNc2jyJFEC2tYqfQ4Nvn4mQlTvormzk,11702
+ai_edge_quantizer/default_policy.py,sha256=YcwwtVzoWUhjYgMtJ7b9f647740lURKteDOeJvwe17o,11384
 ai_edge_quantizer/model_modifier.py,sha256=U70JByv6CItP8tg4bdyMfX-R3UlwylAGSviZkF_FSAM,10468
 ai_edge_quantizer/model_modifier_test.py,sha256=CV4pgMEQkBJr_qbYR720TO8HBCutbEYLHptDHgdQMUE,7274
 ai_edge_quantizer/model_validator.py,sha256=Hj0_5o-Oa3dSlJ3ryVjRhvsyelHNyek1GrtG9buMczg,13153
 ai_edge_quantizer/model_validator_test.py,sha256=EeqOP_mrZsnZ3rug756s0ryDDqd2KgIDld5Lm_gDuWY,13020
 ai_edge_quantizer/params_generator.py,sha256=0w-sDGk84sVNkXoduon1wDqq30sGOHVgBVbdg44QVF4,20153
 ai_edge_quantizer/params_generator_test.py,sha256=RDYoRZDJfEZRtjlTAU2kZ_4t3JHOqEHxfJX9V4ETAhg,40597
-ai_edge_quantizer/qtyping.py,sha256=f2NRz4xqM-7gMe0QFpR4x2m5lzTJI3tmsT0cehO5Vsg,17232
+ai_edge_quantizer/qtyping.py,sha256=y9KretGzUGztyLdmto2XV6U0cxrSrfLWP1UOVcwR4dY,18011
 ai_edge_quantizer/quantizer.py,sha256=teYeONdIS31IAY6ubLujCRi1t6lYAd0LkC8dRPxQdbw,18919
-ai_edge_quantizer/quantizer_test.py,sha256=9BVwt7oyM8IsSC7jN73nI0O-4MikBkymm_FigJnSeCM,27117
+ai_edge_quantizer/quantizer_test.py,sha256=CqAT83gLWGIUacN7cAKxrefQ77-9MME4HzBKa421zdg,28446
 ai_edge_quantizer/recipe.py,sha256=MEkfQ2Sg3KAE9LAORHWcbjYNPg06EUbwc1d-VspQA2U,6461
 ai_edge_quantizer/recipe_manager.py,sha256=6l2uq8KL23KLu9OQDmPGkxrFiwHrdDB9xnn-ni8WdEM,15036
-ai_edge_quantizer/recipe_manager_test.py,sha256=qjgGUF-wggXnSXqZ5khmqrDMIQI5CShk52IVWTahq6s,36817
+ai_edge_quantizer/recipe_manager_test.py,sha256=gYK3haUJ8-AISQvTI6tD-E-drJXQPSXPqBZdgpc5QTo,36595
 ai_edge_quantizer/recipe_test.py,sha256=QisyaTol8JRZFcGOGyee7QRCvqj5VbF4guKWdIoMUOE,6213
 ai_edge_quantizer/transformation_instruction_generator.py,sha256=O0U2aZcB8aXQgOV8r9g1rGNzDUiuI5Ta53XnxZbVffE,31576
 ai_edge_quantizer/transformation_instruction_generator_test.py,sha256=KW5-WoTTo9IqLEVnWxVC8ut8eWLi_91xfKgGqVQ9QDk,54635
@@ -28,22 +28,22 @@ ai_edge_quantizer/algorithms/nonlinear_quantize/__init__.py,sha256=lpq1g2ayg3lCP
 ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting.py,sha256=Bs9CK7wZAw6jNaZ8xEtbwO2vM34VYXNZSMVWvxJo9nw,9297
 ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting_test.py,sha256=EqIHGEZ1LgUrTN7zf880RuAzEv3Qy7kgh5ivObJGHSo,22646
 ai_edge_quantizer/algorithms/uniform_quantize/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
-ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=HF7aNccdDmCbZGZ21UxeO5UpSpQOLr3TiOEyLwWOVPQ,39888
+ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=wrp4F2Wo9ammz_6VXFjXu04RMJV4_MxGfp4XyFMhZHc,39904
 ai_edge_quantizer/algorithms/uniform_quantize/common_quantize_test.py,sha256=GGf_n3wIeg3GB_eGsmyNJ0fTcxgpeMMbugTMRONK6TQ,3553
-ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha256=BDdn_uBZakfHyzdMJPKadsOqxqyC-s6W2ZzFH99L4fE,8652
+ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha256=VjBDxGxjITHJc7xJABqBbZt6_qhobtZAl2gnVQrYJgc,8652
 ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py,sha256=sT5eX5TLZEHTtPfnSkCPDlS0sQxlTFWbCsbvOuj--yY,8889
 ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation.py,sha256=qxt9CPDcidVWIxp5nSWPN2hKKj1XZcsOOLBd2SYIvW0,14572
-ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py,sha256=mgv6aGIqQouxfA8_GacuGdOftvL75XBF1_h5tlCCYJQ,15468
-ai_edge_quantizer/algorithms/uniform_quantize/mse.py,sha256=qiIyzogATGVxjYwxzH0cZvgwPSPBJv_3y8NSumHZXTk,4561
-ai_edge_quantizer/algorithms/uniform_quantize/mse_test.py,sha256=-_P4jQJ7gVo0FNSapP3sIGcnhwfjQHW1AKLfoiAlS_s,7142
-ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=1sB2j1vlvvWDKyjcGvA_JLCpN2KbCmMslGCBUc4--V4,8461
-ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=nscKDvNb14ErZdAfG0aXRWyRs6bTvhMqMjKx2vxvUK0,8725
-ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=Umxh4kJyeHddZf-Wd4aXE5MTI1XWFa5KRuM17uYU714,6922
-ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=sha1d99Xk87bI87tgz0g5LeDC-EeE4WMfM5rRC98-m4,9140
-ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=0bLDAjCm5wxasGXKT3XiS4quk-zXlWK6JKb-14FQAd4,19570
-ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=0xOdoIWuEo9JlXvZdX_Gbq5lfwCwEcG7RwOxhXAFOOY,15939
+ai_edge_quantizer/algorithms/uniform_quantize/hadamard_rotation_test.py,sha256=1ejj5WS3GZwFk3qpsPiPS8jcmVS1-e7zRmvj2Nj8fKw,15440
+ai_edge_quantizer/algorithms/uniform_quantize/mse.py,sha256=EP5yPw6khAhTo6VNTPXEE2aGKLfNnqz8COeJnTKaGWs,4641
+ai_edge_quantizer/algorithms/uniform_quantize/mse_test.py,sha256=-E1LIlxadckspltdgBWTiUzsiwbawSubndavHhWLt1g,7145
+ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=3HldmkAZv1EN0GeUWr574L9brknb569KB8i1iIGgcx0,8334
+ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=Eqa4OUqoCGywbHz-HxJ9dWRj9BKlVzJPuIhVzvrpdLM,8925
+ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=-n-QZyp9y8WCy5FPSpXZXHfOA-p-RLvfSaCzAfhHiHI,7040
+ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=6m2U-9JdNei0XzOORg2gt87TJdD0XHZ-z5h9c4g_TB4,9120
+ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=ZU7QWZeN1KjdprJWWvfmSikz8ebhSH1aS1Cl7g1Qp0Q,20446
+ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=eTrrc8AGaSf1Ytp5gsRONAZ94PHFJUTd4dGi5ZnKZjU,16038
 ai_edge_quantizer/algorithms/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
-ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=4eAlGph6DDW18bUdoY0XcUoOXEr3P_3_W1ptidD8qK4,37611
+ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=Q6BoDDR1flnmxLL2NZ1YrNCaL3uUbt76htW8aHO6ukE,37462
 ai_edge_quantizer/algorithms/utils/common_utils_test.py,sha256=zqapGEfYhjQWe9cNGPLmdbwtEUUYQRhlO_kNe0cXX6E,18104
 ai_edge_quantizer/transformations/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
 ai_edge_quantizer/transformations/dequant_insert.py,sha256=sL1LHFVzBDSd9jgrzlHz38LWU0bwmVX7iBkaNcui0ts,3566
@@ -74,8 +74,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=EoVjI_hplX_Rml3hfRsGmQOi
 ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=6fjkM-rycZ95L4yfvlr0TN6RlrhfPzxNUYrZaYO_F0A,12013
 ai_edge_quantizer/utils/validation_utils.py,sha256=QTYyQ_HDVrFTGPIsrA240Lv8tUw1fwWp2fu9kTVISkE,6224
 ai_edge_quantizer/utils/validation_utils_test.py,sha256=lO51rGskhzpXePRdZMU87u_YO35_sDp9_eQ85CmupL4,4600
-ai_edge_quantizer_nightly-0.4.0.dev20251027.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-ai_edge_quantizer_nightly-0.4.0.dev20251027.dist-info/METADATA,sha256=3tcuuP3R_QQoBgH0QbodInLmEq-GyrIQeBYOlVo4DrM,1508
-ai_edge_quantizer_nightly-0.4.0.dev20251027.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-ai_edge_quantizer_nightly-0.4.0.dev20251027.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
-ai_edge_quantizer_nightly-0.4.0.dev20251027.dist-info/RECORD,,
+ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info/METADATA,sha256=WZuAdLSBsGAybfUkGXKpnL9dO6CbiueTPfDRvLJ0A7w,1508
+ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
+ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info/RECORD,,

{ai_edge_quantizer_nightly-0.4.0.dev20251027.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info}/LICENSE RENAMED Viewed

File without changes

{ai_edge_quantizer_nightly-0.4.0.dev20251027.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info}/WHEEL RENAMED Viewed

File without changes

{ai_edge_quantizer_nightly-0.4.0.dev20251027.dist-info → ai_edge_quantizer_nightly-0.4.0.dev20251029.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai-edge-quantizer-nightly 0.4.0.dev20251027__py3-none-any.whl → 0.4.0.dev20251029__py3-none-any.whl

ai-edge-quantizer-nightly 0.4.0.dev20251027py3-none-any.whl → 0.4.0.dev20251029py3-none-any.whl