PyPI - ai-edge-torch-nightly - Versions diffs - 0.2.0.dev20240610__py3-none-any.whl → 0.2.0.dev20240611__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.2.0.dev20240610py3-none-any.whl → 0.2.0.dev20240611py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ai-edge-torch-nightly might be problematic. Click here for more details.

Files changed (16) hide show

ai_edge_torch/convert/conversion_utils.py CHANGED Viewed

@@ -24,6 +24,7 @@ from typing import Any, Dict, Optional, Tuple, Union
 import torch
 from torch_xla import stablehlo
+from ai_edge_torch.generative.quantize.ai_edge_quantizer_glue import translate_recipe  # NOQA
 from ai_edge_torch.quantize import quant_config as qcfg
 try:
@@ -249,11 +250,6 @@ def _set_tfl_converter_quant_flags(
       converter._experimental_qdq_conversion_mode = "DYNAMIC"
     elif quantizer_mode == qcfg.QuantConfig._QuantizerMode.PT2E_STATIC:
       converter._experimental_qdq_conversion_mode = "STATIC"
-    elif quantizer_mode == qcfg.QuantConfig._QuantizerMode.TFLITE_DYNAMIC:
-      converter.optimizations = [tf.lite.Optimize.DEFAULT]
-    elif quantizer_mode == qcfg.QuantConfig._QuantizerMode.TFLITE_FP16:
-      converter.optimizations = [tf.lite.Optimize.DEFAULT]
-      converter.target_spec.supported_types = [tf.float16]
 def convert_stablehlo_to_tflite(
@@ -323,8 +319,24 @@ def convert_stablehlo_to_tflite(
     converter._experimental_enable_composite_direct_lowering = True
     _set_tfl_converter_quant_flags(converter, quant_config)
+    if (
+        quant_config is not None
+        and quant_config._quantizer_mode
+        == quant_config._QuantizerMode.AI_EDGE_QUANTIZER
+    ):
+      translated_recipe = translate_recipe.translate_to_ai_edge_recipe(
+          quant_config.generative_recipe
+      )
     _apply_tfl_backdoor_flags(converter, _tfl_converter_flags)
     tflite_model = converter.convert()
+    if (
+        quant_config is not None
+        and quant_config._quantizer_mode
+        == quant_config._QuantizerMode.AI_EDGE_QUANTIZER
+    ):
+      tflite_model = translate_recipe.quantize_model(tflite_model, translated_recipe)
   return tflite_model

ai_edge_torch/generative/layers/model_config.py CHANGED Viewed

@@ -27,6 +27,7 @@ class ActivationType(enum.Enum):
   SILU = enum.auto()
   GELU = enum.auto()
   GELU_TANH = enum.auto()
+  GELU_QUICK = enum.auto()
   GE_GLU = enum.auto()
   RELU = enum.auto()

ai_edge_torch/generative/quantize/ai_edge_quantizer_glue/__init__.py ADDED Viewed

File without changes

ai_edge_torch/generative/quantize/ai_edge_quantizer_glue/translate_recipe.py ADDED Viewed

@@ -0,0 +1,164 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import json
+from ai_edge_quantizer import quantizer
+from ai_edge_torch.generative.quantize import quant_attrs
+from ai_edge_torch.generative.quantize import quant_recipe
+_OpExecutionMode = quantizer.qtyping.OpExecutionMode
+_OpName = quantizer.qtyping.TFLOperationName
+_TensorQuantConfig = quantizer.qtyping.TensorQuantizationConfig
+_OpQuantConfig = quantizer.qtyping.OpQuantizationConfig
+_DEFAULT_REGEX_STR = '.*'
+_ATTENTION_IDX_REGEX_STR = (
+    'transformer_blocks\[{}\]/ai_edge_torch.generative.layers.attention'
+)
+_FEEDFORWARD_IDX_REGEX_STR = (
+    'transformer_blocks\[{}\]/ai_edge_torch.generative.layers.feed_forward'
+)
+_EMBEDDING_REGEX_STR = 'Embedding_tok_embedding'
+_ANY_TWO_DIGITS_REGEX_STR = '\d{1,2}'
+def _get_nbits_from_dtype(dtype: quant_attrs.Dtype) -> int:
+  if dtype == quant_attrs.Dtype.FP32:
+    return 32
+  elif dtype == quant_attrs.Dtype.FP16:
+    return 16
+  elif dtype == quant_attrs.Dtype.INT8:
+    return 8
+  raise ValueError('Unimplemented number of bits')
+def _get_dtype_from_dtype(dtype: quant_attrs.Dtype) -> quantizer.qtyping.TensorDataType:
+  if dtype == quant_attrs.Dtype.FP32 or dtype == quant_attrs.Dtype.FP16:
+    return quantizer.qtyping.TensorDataType.FLOAT
+  else:
+    return quantizer.qtyping.TensorDataType.INT
+def _get_execution_mode_from_mode(mode: quant_attrs.Mode) -> _OpExecutionMode:
+  if mode == quant_attrs.Mode.DYNAMIC_RANGE:
+    return _OpExecutionMode.DRQ
+  elif mode == quant_attrs.Mode.WEIGHT_ONLY:
+    return _OpExecutionMode.WEIGHT_ONLY
+  raise ValueError('Unimplemented execution mode')
+def _get_channelwise_from_granularity(granularity: quant_attrs.Granularity) -> bool:
+  if granularity == quant_attrs.Granularity.CHANNELWISE:
+    return True
+  elif granularity == quant_attrs.Granularity.NONE:
+    return False
+  raise ValueError('Unimplemented granularity')
+def _get_algorithm_key_from_algorithm(algo: quant_attrs.Algorithm) -> str:
+  if algo == quant_attrs.Algorithm.MIN_MAX:
+    return quantizer.algorithm_manager.AlgorithmName.MIN_MAX_UNIFORM_QUANT
+  elif algo == quant_attrs.Algorithm.FLOAT_CAST:
+    return quantizer.algorithm_manager.AlgorithmName.FLOAT_CASTING
+  raise ValueError('Unimplemented algorithm')
+def _set_quant_config(
+    rm: quantizer.recipe_manager.RecipeManager,
+    layer_recipe: quant_recipe.LayerQuantRecipe,
+    regex: str,
+):
+  support_op_list = [_OpName.FULLY_CONNECTED, _OpName.CONV_2D]
+  if layer_recipe.algorithm == quant_attrs.Algorithm.MIN_MAX:
+    support_op_list += [_OpName.BATCH_MATMUL, _OpName.EMBEDDING_LOOKUP]
+  for op_name in support_op_list:
+    rm.add_quantization_config(
+        regex=regex,
+        operation_name=op_name,
+        op_config=_OpQuantConfig(
+            weight_tensor_config=_TensorQuantConfig(
+                num_bits=_get_nbits_from_dtype(layer_recipe.weight_dtype),
+                symmetric=True,
+                channel_wise=_get_channelwise_from_granularity(
+                    layer_recipe.granularity
+                ),
+                dtype=_get_dtype_from_dtype(layer_recipe.weight_dtype),
+            ),
+            execution_mode=_get_execution_mode_from_mode(layer_recipe.mode),
+        ),
+        algorithm_key=_get_algorithm_key_from_algorithm(layer_recipe.algorithm),
+        override_algorithm=True,
+    )
+def translate_to_ai_edge_recipe(
+    recipe: quant_recipe.GenerativeQuantRecipe,
+) -> quantizer.recipe_manager.ModelQuantizationRecipe:
+  rm = quantizer.recipe_manager.RecipeManager()
+  if recipe.default is not None:
+    _set_quant_config(rm, recipe.default, _DEFAULT_REGEX_STR)
+  if recipe.embedding is not None:
+    _set_quant_config(rm, recipe.embedding, _EMBEDDING_REGEX_STR)
+  if recipe.attention is not None:
+    if isinstance(recipe.attention, dict):
+      for idx, layer in recipe.attention.items():
+        _set_quant_config(rm, layer, _ATTENTION_IDX_REGEX_STR.format(idx))
+    else:
+      _set_quant_config(
+          rm,
+          recipe.attention,
+          _ATTENTION_IDX_REGEX_STR.format(_ANY_TWO_DIGITS_REGEX_STR),
+      )
+  if recipe.feedforward is not None:
+    if isinstance(recipe.feedforward, dict):
+      for idx, layer in recipe.feedforward.items():
+        _set_quant_config(rm, layer, _FEEDFORWARD_IDX_REGEX_STR.format(idx))
+    else:
+      _set_quant_config(
+          rm,
+          recipe.feedforward,
+          _FEEDFORWARD_IDX_REGEX_STR.format(_ANY_TWO_DIGITS_REGEX_STR),
+      )
+  return rm.get_quantization_recipe()
+def quantize_model(
+    model: bytearray, recipe: quantizer.recipe_manager.ModelQuantizationRecipe
+) -> bytearray:
+  # TODO(b/336599483): Remove tempfile and use bytearray instead
+  tmp_model_path = '/tmp/tmp.tflite'
+  tmp_recipe_path = '/tmp/recipe.json'
+  with open(tmp_model_path, 'wb') as fp:
+    fp.write(model)
+  with open(tmp_recipe_path, 'w') as rp:
+    rp.write(json.dumps(recipe))
+  qt = quantizer.Quantizer(tmp_model_path, tmp_recipe_path)
+  result = qt.quantize()
+  # TODO(b/336599483): Remove tempfile and use bytearray instead
+  import os
+  os.remove(tmp_model_path)
+  os.remove(tmp_recipe_path)
+  return result.quantized_model

ai_edge_torch/generative/quantize/quant_attrs.py CHANGED Viewed

@@ -32,9 +32,11 @@ class Algorithm(enum.Enum):
   Attributes:
     MIN_MAX: Maps the min/max of floating point space to the min/max of
       quantized space and quantize uniformly.
+    FLOAT_CAST: Casts a float to another float of a different type.
   """
   MIN_MAX = enum.auto()
+  FLOAT_CAST = enum.auto()
 @enum.unique

ai_edge_torch/generative/quantize/quant_recipe.py CHANGED Viewed

@@ -14,8 +14,7 @@
 # ==============================================================================
 from dataclasses import dataclass
-import enum
-from typing import Optional
+from typing import Optional, Union
 from ai_edge_torch.generative.quantize import quant_attrs
 from ai_edge_torch.generative.quantize import supported_schemes
@@ -80,18 +79,50 @@ class LayerQuantRecipe:
 @dataclass
-class TransformerQuantRecipe:
+class GenerativeQuantRecipe:
   """Quantization recipe for a model composed of the Edge Generative API layers.
+  Some layers can be specified with different `LayerQuantRecipe` for each block by
+  providing a dictionary keyed by the TransformerBlock index, e.g. attention
+  and feedforward. For example,
+  ```
+  default = LayerQuantRecipeA
+  attention = { 2: LayerQuantRecipeB }
+  feedforward = { 3: LayerQuantRecipeC }
+  ```
+  will apply LayerQuantRecipeA to the entire model, overriden by
+  LayerQuantRecipeB for the TransformerBlock[2].attention layer and
+  LayerQuantRecipeC for the TransformerBlock[3].feedforward layer. Any config
+  with invalid indices will be ignored.
   Attributes:
     default: The quantization recipe for global scope of the model.
+    embedding: Recipe for the embedding table.
+    attention: Recipe for the attention blocks. This could be specified with
+      different LayerQuantRecipe for each block by providing a dictionary
+      keyed by the TransformerBlock index.
+    feedforward: Recipe for the feedforward layers. This could be specified with
+      different LayerQuantRecipe for each block by providing a dictionary
+      keyed by the TransformerBlock index.
   """
   default: Optional[LayerQuantRecipe] = None
+  embedding: Optional[LayerQuantRecipe] = None
+  attention: Union[
+      Optional[LayerQuantRecipe], Optional[dict[int, LayerQuantRecipe]]
+  ] = None
+  feedforward: Union[
+      Optional[LayerQuantRecipe], Optional[dict[int, LayerQuantRecipe]]
+  ] = None
   def __str__(self):
-    return f"""TransformerQuantRecipe(
+    return f"""GenerativeQuantRecipe(
   Default: {self.default}
+  Embedding: {self.embedding}
+  Attention: {self.attention}
+  Feedforward: {self.feedforward}
 )"""
   __repr__ = __str__
@@ -104,3 +135,17 @@ class TransformerQuantRecipe:
     """
     if self.default is not None:
       self.default.verify()
+    if self.embedding is not None:
+      self.embedding.verify()
+    if self.attention is not None:
+      if isinstance(self.attention, dict):
+        for recipe in self.attention.values():
+          recipe.verify()
+      else:
+        self.attention.verify()
+    if self.feedforward is not None:
+      if isinstance(self.feedforward, dict):
+        for recipe in self.feedforward.values():
+          recipe.verify()
+      else:
+        self.feedforward.verify()

ai_edge_torch/generative/quantize/quant_recipe_utils.py CHANGED Viewed

@@ -22,7 +22,7 @@ Typical usage example:
 1. Applying a single layer recipe to the entire model
-  quant_recipe.TransformerQuantRecipe(
+  quant_recipe.GenerativeQuantRecipe(
     default=quant_recipe_utils.create_layer_quant_int8_dynamic()
   )
 """
@@ -46,6 +46,6 @@ def create_layer_quant_fp16() -> quant_recipe.LayerQuantRecipe:
       activation_dtype=quant_attrs.Dtype.FP32,
       weight_dtype=quant_attrs.Dtype.FP16,
       mode=quant_attrs.Mode.WEIGHT_ONLY,
-      algorithm=quant_attrs.Algorithm.MIN_MAX,
+      algorithm=quant_attrs.Algorithm.FLOAT_CAST,
       granularity=quant_attrs.Granularity.NONE,
   )

ai_edge_torch/generative/quantize/quant_recipes.py CHANGED Viewed

@@ -34,15 +34,15 @@ from ai_edge_torch.quantize import quant_config
 def full_linear_int8_dynamic_recipe() -> quant_config.QuantConfig:
   return quant_config.QuantConfig(
-      transformer_recipe=quant_recipe.TransformerQuantRecipe(
-          default=quant_recipe_utils.create_layer_quant_int8_dynamic()
+      generative_recipe=quant_recipe.GenerativeQuantRecipe(
+          default=quant_recipe_utils.create_layer_quant_int8_dynamic(),
       )
   )
 def full_fp16_recipe() -> quant_config.QuantConfig:
   return quant_config.QuantConfig(
-      transformer_recipe=quant_recipe.TransformerQuantRecipe(
+      generative_recipe=quant_recipe.GenerativeQuantRecipe(
           default=quant_recipe_utils.create_layer_quant_fp16()
       )
   )

ai_edge_torch/generative/quantize/supported_schemes.py CHANGED Viewed

@@ -27,5 +27,6 @@ def get_supported_layer_schemes():
   return [
       (_t.FP32, _t.INT8, _m.DYNAMIC_RANGE, _a.MIN_MAX, _g.CHANNELWISE),
-      (_t.FP32, _t.FP16, _m.WEIGHT_ONLY, _a.MIN_MAX, _g.NONE),
+      (_t.FP32, _t.INT8, _m.WEIGHT_ONLY, _a.MIN_MAX, _g.CHANNELWISE),
+      (_t.FP32, _t.FP16, _m.WEIGHT_ONLY, _a.FLOAT_CAST, _g.NONE),
   ]

ai_edge_torch/generative/test/test_quantize.py CHANGED Viewed

@@ -21,11 +21,13 @@ import torch
 import ai_edge_torch
 from ai_edge_torch.generative.examples.test_models import toy_model_with_kv_cache  # NOQA
 from ai_edge_torch.generative.quantize import quant_recipe
+from ai_edge_torch.generative.quantize import quant_recipe_utils
 from ai_edge_torch.generative.quantize import quant_recipes
 from ai_edge_torch.generative.quantize.quant_attrs import Algorithm
 from ai_edge_torch.generative.quantize.quant_attrs import Dtype
 from ai_edge_torch.generative.quantize.quant_attrs import Granularity
 from ai_edge_torch.generative.quantize.quant_attrs import Mode
+from ai_edge_torch.quantize import quant_config
 from ai_edge_torch.testing import model_coverage
@@ -34,34 +36,47 @@ class TestVerifyRecipes(unittest.TestCase):
   @parameterized.expand(
       [
-          (Dtype.FP32, Dtype.FP32, Mode.DYNAMIC_RANGE),
-          (Dtype.INT8, Dtype.INT8, Mode.DYNAMIC_RANGE),
-          (Dtype.INT8, Dtype.FP16, Mode.DYNAMIC_RANGE),
-          (Dtype.FP16, Dtype.INT8, Mode.DYNAMIC_RANGE),
-          (Dtype.FP32, Dtype.FP32, Mode.WEIGHT_ONLY),
-          (Dtype.INT8, Dtype.INT8, Mode.WEIGHT_ONLY),
-          (Dtype.FP16, Dtype.INT8, Mode.WEIGHT_ONLY),
-          (Dtype.INT8, Dtype.FP16, Mode.WEIGHT_ONLY),
-          (Dtype.FP16, Dtype.FP16, Mode.WEIGHT_ONLY),
+          (Dtype.FP32, Dtype.FP32),
+          (Dtype.INT8, Dtype.INT8),
+          (Dtype.INT8, Dtype.FP16),
+          (Dtype.FP16, Dtype.INT8),
+          (Dtype.FP16, Dtype.FP16),
       ]
   )
   def test_verify_invalid_recipes(
       self,
       activation,
       weight,
-      mode,
-      algo=Algorithm.MIN_MAX,
-      granularity=Granularity.CHANNELWISE,
   ):
-    with self.assertRaises(ValueError):
-      quant_recipe.LayerQuantRecipe(
-          activation, weight, mode, algo, granularity
-      ).verify()
+    for m in Mode:
+      for a in Algorithm:
+        for g in Granularity:
+          with self.assertRaises(ValueError):
+            quant_recipe.LayerQuantRecipe(activation, weight, m, a, g).verify()
   @parameterized.expand(
       [
-          (Dtype.FP32, Dtype.INT8, Mode.DYNAMIC_RANGE, Granularity.CHANNELWISE),
-          (Dtype.FP32, Dtype.FP16, Mode.WEIGHT_ONLY, Granularity.NONE),
+          (
+              Dtype.FP32,
+              Dtype.INT8,
+              Mode.DYNAMIC_RANGE,
+              Algorithm.MIN_MAX,
+              Granularity.CHANNELWISE,
+          ),
+          (
+              Dtype.FP32,
+              Dtype.INT8,
+              Mode.WEIGHT_ONLY,
+              Algorithm.MIN_MAX,
+              Granularity.CHANNELWISE,
+          ),
+          (
+              Dtype.FP32,
+              Dtype.FP16,
+              Mode.WEIGHT_ONLY,
+              Algorithm.FLOAT_CAST,
+              Granularity.NONE,
+          ),
       ]
   )
   def test_verify_valid_recipes(
@@ -69,8 +84,8 @@ class TestVerifyRecipes(unittest.TestCase):
       activation,
       weight,
       mode,
+      algo,
       granularity,
-      algo=Algorithm.MIN_MAX,
   ):
     quant_recipe.LayerQuantRecipe(activation, weight, mode, algo, granularity).verify()
@@ -78,7 +93,46 @@ class TestVerifyRecipes(unittest.TestCase):
 class TestQuantizeConvert(unittest.TestCase):
   """Test conversion with quantization."""
-  def test_quantize_convert_toy(self):
+  def _attention_1_int8_dynamic_recipe() -> quant_config.QuantConfig:
+    return quant_config.QuantConfig(
+        generative_recipe=quant_recipe.GenerativeQuantRecipe(
+            attention={1: quant_recipe_utils.create_layer_quant_int8_dynamic()},
+        )
+    )
+  def _feedforward_0_int8_dynamic_recipe() -> quant_config.QuantConfig:
+    return quant_config.QuantConfig(
+        generative_recipe=quant_recipe.GenerativeQuantRecipe(
+            feedforward={0: quant_recipe_utils.create_layer_quant_int8_dynamic()},
+        )
+    )
+  @parameterized.expand(
+      [
+          (quant_recipes.full_fp16_recipe(), 0.75),
+          (quant_recipes.full_linear_int8_dynamic_recipe(), 0.64),
+          (_attention_1_int8_dynamic_recipe(), 0.95),
+          (_feedforward_0_int8_dynamic_recipe(), 0.87),
+      ]
+  )
+  def test_quantize_convert_toy_sizes(self, quant_config, expected_compression):
+    config = toy_model_with_kv_cache.get_model_config()
+    pytorch_model = toy_model_with_kv_cache.ToyModelWithKV(config)
+    idx, input_pos = torch.tensor([[1]], dtype=torch.long), torch.tensor(
+        [10], dtype=torch.int64
+    )
+    quantized_model = ai_edge_torch.convert(
+        pytorch_model, (idx, input_pos), quant_config=quant_config
+    )
+    float_model = ai_edge_torch.convert(pytorch_model, (idx, input_pos))
+    self.assertAlmostEqual(
+        len(quantized_model._tflite_model) / len(float_model._tflite_model),
+        expected_compression,
+        delta=0.01,
+    )
+  def test_quantize_convert_compare_toy(self):
     self.skipTest("b/338288901")
     config = toy_model_with_kv_cache.get_model_config()
     pytorch_model = toy_model_with_kv_cache.ToyModelWithKV(config)

ai_edge_torch/quantize/quant_config.py CHANGED Viewed

@@ -32,27 +32,26 @@ class QuantConfig:
     pt2e_quantizer: The instance of PT2EQuantizer used to quantize the model
       with PT2E quantization. This method of quantization is not applicable to
       models created with the Edge Generative API.
-    transformer_recipe: Quantization recipe to be applied on a model created
+    generative_recipe: Quantization recipe to be applied on a model created
       with the Edge Generative API.
   """
   pt2e_quantizer: pt2eq.PT2EQuantizer = None
-  transformer_recipe: quant_recipe.TransformerQuantRecipe = None
+  generative_recipe: quant_recipe.GenerativeQuantRecipe = None
   @enum.unique
   class _QuantizerMode(enum.Enum):
     NONE = enum.auto()
     PT2E_DYNAMIC = enum.auto()
     PT2E_STATIC = enum.auto()
-    TFLITE_DYNAMIC = enum.auto()
-    TFLITE_FP16 = enum.auto()
+    AI_EDGE_QUANTIZER = enum.auto()
   _quantizer_mode: _QuantizerMode = _QuantizerMode.NONE
   def __init__(
       self,
       pt2e_quantizer: Optional[pt2eq.PT2EQuantizer] = None,
-      transformer_recipe: Optional[quant_recipe.TransformerQuantRecipe] = None,
+      generative_recipe: Optional[quant_recipe.GenerativeQuantRecipe] = None,
   ):
     """Initializes some internal states based on selected quantization method.
@@ -61,8 +60,8 @@ class QuantConfig:
     is properly setup. Additionally sets up an utility enum _quantizer_mode to
     guide certain conversion processes.
     """
-    if pt2e_quantizer is not None and transformer_recipe is not None:
-      raise ValueError('Cannot set both pt2e_quantizer and transformer_recipe.')
+    if pt2e_quantizer is not None and generative_recipe is not None:
+      raise ValueError('Cannot set both pt2e_quantizer and generative_recipe.')
     elif pt2e_quantizer is not None:
       object.__setattr__(self, 'pt2e_quantizer', pt2e_quantizer)
       object.__setattr__(
@@ -74,12 +73,9 @@ class QuantConfig:
               else self._QuantizerMode.PT2E_STATIC
           ),
       )
-    elif transformer_recipe is not None:
-      transformer_recipe.verify()
-      object.__setattr__(self, 'transformer_recipe', transformer_recipe)
-      if self.transformer_recipe.default.mode == quant_attrs.Mode.DYNAMIC_RANGE:
-        object.__setattr__(self, '_quantizer_mode', self._QuantizerMode.TFLITE_DYNAMIC)
-      elif self.transformer_recipe.default.weight_dtype == quant_attrs.Dtype.FP16:
-        object.__setattr__(self, '_quantizer_mode', self._QuantizerMode.TFLITE_FP16)
+    elif generative_recipe is not None:
+      generative_recipe.verify()
+      object.__setattr__(self, 'generative_recipe', generative_recipe)
+      object.__setattr__(self, '_quantizer_mode', self._QuantizerMode.AI_EDGE_QUANTIZER)
     else:
-      raise ValueError('Either pt2e_quantizer or transformer_recipe must be set.')
+      raise ValueError('Either pt2e_quantizer or generative_recipe must be set.')

{ai_edge_torch_nightly-0.2.0.dev20240610.dist-info → ai_edge_torch_nightly-0.2.0.dev20240611.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-edge-torch-nightly
-Version: 0.2.0.dev20240610
+Version: 0.2.0.dev20240611
 Summary: Supporting PyTorch models with the Google AI Edge TFLite runtime.
 Home-page: https://github.com/google-ai-edge/ai-edge-torch
 Keywords: On-Device ML,AI,Google,TFLite,PyTorch,LLMs,GenAI

{ai_edge_torch_nightly-0.2.0.dev20240610.dist-info → ai_edge_torch_nightly-0.2.0.dev20240611.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ ai_edge_torch/__init__.py,sha256=FPMmuFU3pyMREtjB_san1fy_0PFtAsgA0VZfOYvDrb4,100
 ai_edge_torch/model.py,sha256=kmcgELjsYl8YzF8nUF6P7q4i8MWS-pLGpfsy-yTUXmE,4243
 ai_edge_torch/convert/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/convert/conversion.py,sha256=GN2Js232u_5Y118wg3qIfEoYewxbxLl3TpSnO6osi8c,4029
-ai_edge_torch/convert/conversion_utils.py,sha256=NpVm3Ms81_cIW5IYgGsr0BVganJJgBKWVBDe5h_ZaGE,11021
+ai_edge_torch/convert/conversion_utils.py,sha256=9BqCL38DErv1vEVGtT3BIJVhdwZjw2EQ-_m5UpvVVYE,11294
 ai_edge_torch/convert/converter.py,sha256=bjj5TV5_g4sGyuSh8ThEDydlNMqhkGSY4SzXK6vwhqI,6927
 ai_edge_torch/convert/fx_passes/__init__.py,sha256=EPs4PSIDLuRH5EBETi6deaOvaaf_Q4xD3_9NVcR7x8o,2810
 ai_edge_torch/convert/fx_passes/_pass_base.py,sha256=ijVyDclPnd6a0DWWUJkwR4igj6f82S-cE1-83QGPvgw,1652
@@ -70,7 +70,7 @@ ai_edge_torch/generative/layers/attention_utils.py,sha256=hXhuyKblPPxKIRzlAf1YNl
 ai_edge_torch/generative/layers/builder.py,sha256=jAyrR5hsSI0aimKZumyvxdJ1GovERIfsK0g-dezX2gs,4163
 ai_edge_torch/generative/layers/feed_forward.py,sha256=4j2QaSCw59Jkk_ixKDpKEj7FLRauzuExTiSNRzAjAhE,2820
 ai_edge_torch/generative/layers/kv_cache.py,sha256=4uiZLO3om5G3--kT04Jt0esEYznbkJ7QLzSHfb8mjc4,3090
-ai_edge_torch/generative/layers/model_config.py,sha256=g_XJXcQOCkE-mt58fSH4-T4GY_uLeMilg6mxwDMCfz4,4557
+ai_edge_torch/generative/layers/model_config.py,sha256=toWECENDWgay9hsZcy4C89qph0KI3CpaeFqFc8Fr-Xk,4584
 ai_edge_torch/generative/layers/normalization.py,sha256=M27eW3TcNK20oaXClXtfnu0lLWrAGrSKSsbegRWnj3c,1867
 ai_edge_torch/generative/layers/rotary_position_embedding.py,sha256=12SsCuoRuLNCwnFGe_pHDOZEBwBcqXs87Aj0PaWWw4E,1383
 ai_edge_torch/generative/layers/scaled_dot_product_attention.py,sha256=dYafGC205QE5CLIbBTCI-7eVvEGZEHzs1toPEhemeDs,3391
@@ -80,15 +80,17 @@ ai_edge_torch/generative/layers/unet/builder.py,sha256=iH0_nuY9TF2ap5h1JbGNCOonP
 ai_edge_torch/generative/layers/unet/model_config.py,sha256=sbtbDEHmMV9GLKngwjsNvqm8wovLxnlidkQbXdXkXKs,4060
 ai_edge_torch/generative/quantize/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/quantize/example.py,sha256=t-YwyKSPAG-OZC1DfH-0vfie2RHHpTSQjxUY-tmhu5g,1543
-ai_edge_torch/generative/quantize/quant_attrs.py,sha256=ffBALrrbrfiG_mrOr-f3B1Gc6PlAma9gtvVnfP7SDzI,1862
-ai_edge_torch/generative/quantize/quant_recipe.py,sha256=BOk4E0FW-_YD8Y-oPVmIDsgXx_bPtvzsP_V1av5DvgU,3327
-ai_edge_torch/generative/quantize/quant_recipe_utils.py,sha256=9ktL7fT8C5j1dnY_7fkiFL4oWNLVs1dMWXkS_EuyA3Y,1913
-ai_edge_torch/generative/quantize/quant_recipes.py,sha256=CRA2ENevS-3usHqidWDe2wrf_epILE_7Hx-XfZQ9buk,1798
-ai_edge_torch/generative/quantize/supported_schemes.py,sha256=OQ4ghQXknA1PPjuY-xBgAmOpaIBgYFM8F2YAIot06hE,1345
+ai_edge_torch/generative/quantize/quant_attrs.py,sha256=n1Fm8BFC8gJa_oiwwAOOghJyHtOXYZ4q-5ZRy4pHrIw,1957
+ai_edge_torch/generative/quantize/quant_recipe.py,sha256=Y8zahKw7b_h7ajPaJZVef4jG-MoqImRCpVSbFtV_i24,5139
+ai_edge_torch/generative/quantize/quant_recipe_utils.py,sha256=-vd6Qp0BdXJVKg4f0_hhwbKOi3QPIAPVqyXnJ-ZnISQ,1915
+ai_edge_torch/generative/quantize/quant_recipes.py,sha256=9ItD70jQRXMEhWod-nUfEeoWGJUUu6V9YOffF07VU9g,1795
+ai_edge_torch/generative/quantize/supported_schemes.py,sha256=FjdycEOvxRgBmQdZVufetPvkDoD7rUowIOSKV9oV5Kk,1418
+ai_edge_torch/generative/quantize/ai_edge_quantizer_glue/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+ai_edge_torch/generative/quantize/ai_edge_quantizer_glue/translate_recipe.py,sha256=qUB4f2DoB14dLkNPWf6TZodpT81mfAJeWM-lCAmkuHY,5735
 ai_edge_torch/generative/test/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/test/loader_test.py,sha256=N88CbrLW7Q2x1EyurwdXQ6YjsA-ySQcPxpZH3QOGp-M,3317
 ai_edge_torch/generative/test/test_model_conversion.py,sha256=i_SAW-hD8SaHuopMZI9IuXXDFn5uSTJa1nKZhaC3dAQ,6811
-ai_edge_torch/generative/test/test_quantize.py,sha256=f70sH1ZFzdCwYj0MG-eg54WOC4LasR0D8CTUYpjxZYM,3728
+ai_edge_torch/generative/test/test_quantize.py,sha256=NVlMixAxVpDUabEvp6zTHHgIDgHFsMRwlf5MuyDwrPg,5355
 ai_edge_torch/generative/utilities/__init__.py,sha256=-_jxnnFnCgnTU4oTm4MnRsvL5lqhomBNdFBbqfmfHPo,720
 ai_edge_torch/generative/utilities/autoencoder_loader.py,sha256=G2Nosy33JzkjGALPR4JjvffdFX1JWOj2zjbbuaDJEgg,10065
 ai_edge_torch/generative/utilities/loader.py,sha256=Hs92478j1g4jQGvbdP1aWvOy907HjwqQZE-NFy6HELo,11326
@@ -103,12 +105,12 @@ ai_edge_torch/hlfb/test/test_stablehlo_composite_builder.py,sha256=aUAPKnH4_Jxpp
 ai_edge_torch/quantize/__init__.py,sha256=aB5dXot04bqyUhpsDFvxt9CIi15QAC4euvqOndJ0XLU,714
 ai_edge_torch/quantize/pt2e_quantizer.py,sha256=ye1f5vAZ0Vr4RWAtfrgU1o3JLs03Sa4inHRq3YxJDGo,15602
 ai_edge_torch/quantize/pt2e_quantizer_utils.py,sha256=yjzKoptnfEeW_sN7sODUfj3nCtUMXVzq3vHKxblsd5Y,36046
-ai_edge_torch/quantize/quant_config.py,sha256=ExThdTXqnWmGC3-F6sdXbXr8nYzkEe_qCziCfhsoMPA,3435
+ai_edge_torch/quantize/quant_config.py,sha256=eO9Ra160ITjQSyRBEGy6nNIVH3gYacSWDdN5XtvHwjc,3148
 ai_edge_torch/testing/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/testing/model_coverage/__init__.py,sha256=5P8J6Zk5YYtDvTBucFvB9NGSRI7Gw_24WnrbhXgycEE,765
 ai_edge_torch/testing/model_coverage/model_coverage.py,sha256=EIyKz-HY70DguWuSrJal8LpYXQ5ZSEUf3ZrVl7jikFM,4286
-ai_edge_torch_nightly-0.2.0.dev20240610.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
-ai_edge_torch_nightly-0.2.0.dev20240610.dist-info/METADATA,sha256=6hL5PV3S56VU2l6xqS-YrmzMZeajtXsikIdR7kDYcWE,1748
-ai_edge_torch_nightly-0.2.0.dev20240610.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-ai_edge_torch_nightly-0.2.0.dev20240610.dist-info/top_level.txt,sha256=5KXRaF2hwkApYxf7Y8y_tVb9aulGTlbOoNdbx1aKRkE,14
-ai_edge_torch_nightly-0.2.0.dev20240610.dist-info/RECORD,,
+ai_edge_torch_nightly-0.2.0.dev20240611.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
+ai_edge_torch_nightly-0.2.0.dev20240611.dist-info/METADATA,sha256=WPGu2pq6N57fBtpunyFhunPe73UK_SVbqlZQsZwjWGo,1748
+ai_edge_torch_nightly-0.2.0.dev20240611.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+ai_edge_torch_nightly-0.2.0.dev20240611.dist-info/top_level.txt,sha256=5KXRaF2hwkApYxf7Y8y_tVb9aulGTlbOoNdbx1aKRkE,14
+ai_edge_torch_nightly-0.2.0.dev20240611.dist-info/RECORD,,

{ai_edge_torch_nightly-0.2.0.dev20240610.dist-info → ai_edge_torch_nightly-0.2.0.dev20240611.dist-info}/LICENSE RENAMED Viewed

File without changes

{ai_edge_torch_nightly-0.2.0.dev20240610.dist-info → ai_edge_torch_nightly-0.2.0.dev20240611.dist-info}/WHEEL RENAMED Viewed

File without changes

{ai_edge_torch_nightly-0.2.0.dev20240610.dist-info → ai_edge_torch_nightly-0.2.0.dev20240611.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai-edge-torch-nightly 0.2.0.dev20240610__py3-none-any.whl → 0.2.0.dev20240611__py3-none-any.whl

Potentially problematic release.

ai-edge-torch-nightly 0.2.0.dev20240610py3-none-any.whl → 0.2.0.dev20240611py3-none-any.whl