PyPI - ai-edge-torch-nightly - Versions diffs - 0.3.0.dev20241117__py3-none-any.whl → 0.3.0.dev20241120__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.3.0.dev20241117py3-none-any.whl → 0.3.0.dev20241120py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

ai_edge_torch/_convert/fx_passes/build_interpolate_composite_pass.py CHANGED Viewed

@@ -49,7 +49,7 @@ def _get_upsample_bilinear2d_pattern():
     output = internal_match.returning_nodes[0]
     output_h, output_w = output.meta["val"].shape[-2:]
     return {
-        "output": (int(output_h), int(output_w)),
+        "size": (int(output_h), int(output_w)),
         "align_corners": False,
         "is_nchw_op": True,
     }
@@ -73,7 +73,7 @@ def _get_upsample_bilinear2d_align_corners_pattern():
     output = internal_match.returning_nodes[0]
     output_h, output_w = output.meta["val"].shape[-2:]
     return {
-        "output": (int(output_h), int(output_w)),
+        "size": (int(output_h), int(output_w)),
         "align_corners": True,
         "is_nchw_op": True,
     }

ai_edge_torch/_convert/test/test_convert_composites.py CHANGED Viewed

@@ -39,6 +39,7 @@ def _func_to_torch_module(func: Callable[..., torch.Tensor]):
   return TestModule(func).eval()
+@googletest.skip('Temporary outage due to changes for b/377531086')
 class TestConvertComposites(googletest.TestCase):
   """Tests conversion modules that are meant to be wrapped as composites."""

ai_edge_torch/generative/examples/gemma/convert_gemma2_multi_prefills.py CHANGED Viewed

@@ -13,9 +13,8 @@
 # limitations under the License.
 # ==============================================================================
-"""Example of converting a Gemma2 model to multi-signature tflite model, with multiple prefill lengths."""
+"""Example to convert a Gemma2 model to multiple prefill length tflite model."""
-import logging
 import os
 import pathlib
@@ -35,9 +34,9 @@ _TFLITE_PATH = flags.DEFINE_string(
     'The tflite file path to export.',
 )
 _PREFILL_SEQ_LENS = flags.DEFINE_multi_integer(
-    'prefill_seq_len',
+    'prefill_seq_lens',
     (8, 64, 128, 256, 512, 1024),
-    'A list of prefill lengths to export.',
+    'List of the maximum sizes of prefill input tensors.',
 )
 _KV_CACHE_MAX_LEN = flags.DEFINE_integer(
     'kv_cache_max_len',
@@ -51,32 +50,19 @@ _QUANTIZE = flags.DEFINE_bool(
 )
-# Note that the converted model is not compatible with LLM Inference engine for
-# now. The main purpose for this function is to allow you export a tflite model
-# with multiple prefill signatures for different prefill lengths for faster
-# inference.
-def convert_to_tflite_multi_prefill_lens():
+def main(_):
   pytorch_model = gemma2.build_2b_model(
       _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
   )
   quant_suffix = 'q8' if _QUANTIZE.value else 'f32'
   output_filename = f'gemma2_{quant_suffix}_multi-prefill-seq_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
-  converter.convert_to_tflite_multi_prefill_lens(
+  converter.convert_to_tflite(
       pytorch_model,
       tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
-      prefill_seq_lens=_PREFILL_SEQ_LENS.value,
+      prefill_seq_len=_PREFILL_SEQ_LENS.value,
       quantize=_QUANTIZE.value,
   )
-def main(_):
-  if len(_PREFILL_SEQ_LENS.value) > 1:
-    # If multiple prefill lengths are provided, export a model with multiple
-    # prefill signatures each for a different prefill length.
-    convert_to_tflite_multi_prefill_lens()
-  else:
-    logging.warning('Need more than one prefill lengths to be specified.')
 if __name__ == '__main__':
   app.run(main)

ai_edge_torch/generative/examples/paligemma/convert_to_tflite.py ADDED Viewed

@@ -0,0 +1,80 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Example of converting a PaliGemma model to multi-signature tflite model.
+DISCLAIMER: It works only with ODML Torch conversion backend. Refer to
+https://github.com/google-ai-edge/ai-edge-torch/blob/main/docs/pytorch_converter/README.md#use-odml-torch-conversion-backend-experimental.
+"""
+import os
+import pathlib
+from absl import app
+from absl import flags
+from ai_edge_torch.generative.examples.paligemma import paligemma
+from ai_edge_torch.generative.utilities import converter
+import torch
+_CHECKPOINT_PATH = flags.DEFINE_string(
+    'checkpoint_path',
+    os.path.join(pathlib.Path.home(), 'Downloads/llm_data/paligemma-3b-224'),
+    'The path to the model checkpoint, or directory holding the checkpoint.',
+)
+_TFLITE_PATH = flags.DEFINE_string(
+    'tflite_path',
+    '/tmp/',
+    'The tflite file path to export.',
+)
+_PREFILL_SEQ_LEN = flags.DEFINE_integer(
+    'prefill_seq_len',
+    1024,
+    'The maximum size of prefill input tensor.',
+)
+_KV_CACHE_MAX_LEN = flags.DEFINE_integer(
+    'kv_cache_max_len',
+    1280,
+    'The maximum size of KV cache buffer, including both prefill and decode.',
+)
+_PIXEL_VALUES_SIZE = flags.DEFINE_multi_integer(
+    'pixel_values_size',
+    [3, 224, 224],
+    'The size of prefill pixel values except the batch dimension.',
+)
+_QUANTIZE = flags.DEFINE_bool(
+    'quantize',
+    True,
+    'Whether the model should be quantized.',
+)
+def main(_):
+  pytorch_model = paligemma.build_model(
+      _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
+  )
+  quant_suffix = 'q8' if _QUANTIZE.value else 'f32'
+  output_filename = f'paligemma_{quant_suffix}_seq{_PREFILL_SEQ_LEN.value}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
+  converter.convert_to_tflite(
+      pytorch_model,
+      tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
+      prefill_seq_len=_PREFILL_SEQ_LEN.value,
+      pixel_values_size=torch.Size(_PIXEL_VALUES_SIZE.value),
+      quantize=_QUANTIZE.value,
+      config=pytorch_model.config.decoder_config,
+  )
+if __name__ == '__main__':
+  app.run(main)

ai_edge_torch/generative/examples/paligemma/image_encoder.py CHANGED Viewed

@@ -59,7 +59,7 @@ class SiglipVisionEncoder(nn.Module):
         out_channels=config.embedding_dim,
         kernel_size=config.image_embedding.patch_size,
         stride=config.image_embedding.patch_size,
-        padding="valid",
+        padding=0,
     )
     num_patches = (
         config.image_embedding.image_size // config.image_embedding.patch_size
@@ -144,6 +144,8 @@ def get_fake_image_encoder_config() -> cfg.ModelConfig:
   config = get_image_encoder_config()
   # PaliGemma image encoder has only one block config.
   config.block_config(0).ff_config.intermediate_size = 128
+  config.image_embedding.image_size = 8
+  config.image_embedding.patch_size = 2
   config.num_layers = 2
   return config

ai_edge_torch/generative/examples/paligemma/paligemma.py CHANGED Viewed

@@ -54,6 +54,10 @@ class PaliGemma(nn.Module):
         bias=config.image_projection_use_bias,
     )
     self.decoder = decoder.Decoder(config.decoder_config)
+    image_embedding_config = config.image_encoder_config.image_embedding
+    self.num_patches = (
+        image_embedding_config.image_size // image_embedding_config.patch_size
+    ) ** 2
     self.config = config
   @torch.inference_mode
@@ -74,10 +78,22 @@ class PaliGemma(nn.Module):
     if self.config.decoder_config.embedding_scale is not None:
       image_embeds = image_embeds / self.config.decoder_config.embedding_scale
-    # Merge image_embeds into text_embeds as PaliGemmaForConditionalGeneration.
-    image_mask = tokens == self.config.image_token_id
-    image_mask = image_mask.unsqueeze(-1).expand_as(input_embeds)
-    input_embeds = input_embeds.masked_scatter(image_mask, image_embeds)
+    # Merging image_embeds into text_embeds as PaliGemmaForConditionalGeneration
+    # can be done like:
+    #
+    #   image_mask = tokens == self.config.image_token_id
+    #   image_mask = image_mask.unsqueeze(-1).expand_as(input_embeds)
+    #   input_embeds = input_embeds.masked_scatter(image_mask, image_embeds)
+    #
+    # Unfortunately, torch.Tensor.masked_scatter can't be lowered on CPU.
+    # Since PaliGemma token embedder reserves the first [num_patches] tokens
+    # for image tokens, we can use this property to merge image_embeds into
+    # input_embeds by concatenating them.
+    assert image_embeds.shape[1] == self.num_patches
+    assert input_embeds.shape[1] >= self.num_patches
+    input_embeds = torch.cat(
+        (image_embeds, input_embeds[:, self.num_patches:, :]), dim=1
+    )
     return self.decoder(
         tokens=None,
@@ -87,7 +103,7 @@ class PaliGemma(nn.Module):
     )
-def get_model_config() -> PaliGemmaConfig:
+def get_model_config(**kwargs) -> PaliGemmaConfig:
   """Returns the model config for a PaliGemma 3B-224 model.
   Returns:
@@ -95,13 +111,13 @@ def get_model_config() -> PaliGemmaConfig:
   """
   return PaliGemmaConfig(
       image_encoder_config=image_encoder.get_image_encoder_config(),
-      decoder_config=decoder.get_decoder_config(),
+      decoder_config=decoder.get_decoder_config(**kwargs),
       image_projection_use_bias=True,
       image_token_id=257152,
   )
-def get_fake_image_encoder_config() -> PaliGemmaConfig:
+def get_fake_model_config() -> PaliGemmaConfig:
   return PaliGemmaConfig(
       image_encoder_config=image_encoder.get_fake_image_encoder_config(),
       decoder_config=decoder.get_fake_decoder_config(),
@@ -110,8 +126,8 @@ def get_fake_image_encoder_config() -> PaliGemmaConfig:
   )
-def build_model(checkpoint_path: str) -> PaliGemma:
-  config = get_model_config()
+def build_model(checkpoint_path: str, **kwargs) -> PaliGemma:
+  config = get_model_config(**kwargs)
   model = PaliGemma(config)
   # Load the parameters of image encoder.
   loader = loading_utils.ModelLoader(

ai_edge_torch/generative/layers/model_config.py CHANGED Viewed

@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-# Model configuration class.
-from dataclasses import dataclass
-from dataclasses import field
+"""Model configuration class."""
+import dataclasses
 import enum
 from typing import Optional, Sequence, Union
@@ -35,7 +36,7 @@ class ActivationType(enum.Enum):
 @enum.unique
 class NormalizationType(enum.Enum):
-  """Different normalization functions"""
+  """Different normalization functions."""
   # No normalization is applied.
   NONE = enum.auto()
@@ -59,7 +60,7 @@ class AttentionType(enum.Enum):
   LOCAL_SLIDING = enum.auto()
-@dataclass
+@dataclasses.dataclass
 class NormalizationConfig:
   """Normalizater parameters."""
@@ -71,7 +72,7 @@ class NormalizationConfig:
   group_num: Optional[float] = None
-@dataclass
+@dataclasses.dataclass
 class AttentionConfig:
   """Attention model's parameters."""
@@ -90,18 +91,20 @@ class AttentionConfig:
   # Whether to use bias with Query, Key, and Value projection.
   qkv_use_bias: bool = False
   # Whether the fused q, k, v projection weights interleaves q, k, v heads.
-  # If True, the projection weights are in format [q_head_0, k_head_0, v_head_0, q_head_1, k_head_1, v_head_1, ...]
-  # If False, the projection weights are in format [q_head_0, q_head_1, ..., k_head_0, k_head_1, ... v_head_0, v_head_1, ...]
+  # If True, the projection weights are in format:
+  # `[q_head_0, k_head_0, v_head_0, q_head_1, k_head_1, v_head_1, ...]`
+  # If False, the projection weights are in format:
+  # `[q_head_0, q_head_1, ..., k_head_0, k_head_1, ... v_head_0, v_head_1, ...]`
   qkv_fused_interleaved: bool = True
   # Whether to use bias with attention output projection.
   output_proj_use_bias: bool = False
   enable_kv_cache: bool = True
   # The normalization applied to query projection's output.
-  query_norm_config: NormalizationConfig = field(
+  query_norm_config: NormalizationConfig = dataclasses.field(
       default_factory=NormalizationConfig
   )
   # The normalization applied to key projection's output.
-  key_norm_config: NormalizationConfig = field(
+  key_norm_config: NormalizationConfig = dataclasses.field(
       default_factory=NormalizationConfig
   )
   relative_attention_num_buckets: int = 0
@@ -114,7 +117,7 @@ class AttentionConfig:
   sliding_window_size: Optional[int] = None
-@dataclass
+@dataclasses.dataclass
 class ActivationConfig:
   type: ActivationType = ActivationType.LINEAR
   # Dimension of input and output, used in GeGLU.
@@ -122,7 +125,7 @@ class ActivationConfig:
   dim_out: Optional[int] = None
-@dataclass
+@dataclasses.dataclass
 class FeedForwardConfig:
   """FeedForward module's parameters."""
@@ -131,27 +134,27 @@ class FeedForwardConfig:
   intermediate_size: int
   use_bias: bool = False
   # The normalization applied to feed forward's input.
-  pre_ff_norm_config: NormalizationConfig = field(
+  pre_ff_norm_config: NormalizationConfig = dataclasses.field(
       default_factory=NormalizationConfig
   )
   # The normalization applied to feed forward's output.
-  post_ff_norm_config: NormalizationConfig = field(
+  post_ff_norm_config: NormalizationConfig = dataclasses.field(
       default_factory=NormalizationConfig
   )
-@dataclass
+@dataclasses.dataclass
 class TransformerBlockConfig:
   """TransformerBlock module's parameters."""
   attn_config: AttentionConfig
   ff_config: FeedForwardConfig
   # The normalization applied to attention's input.
-  pre_attention_norm_config: NormalizationConfig = field(
+  pre_attention_norm_config: NormalizationConfig = dataclasses.field(
       default_factory=NormalizationConfig
   )
   # The normalization applied to attentions's output.
-  post_attention_norm_config: NormalizationConfig = field(
+  post_attention_norm_config: NormalizationConfig = dataclasses.field(
       default_factory=NormalizationConfig
   )
   # If set to True, only attn_config.pre_attention_norm is applied to the input
@@ -163,7 +166,7 @@ class TransformerBlockConfig:
   relative_attention: bool = False
-@dataclass
+@dataclasses.dataclass
 class ImageEmbeddingConfig:
   """Image embedding parameters."""
@@ -173,7 +176,7 @@ class ImageEmbeddingConfig:
   patch_size: int
-@dataclass
+@dataclasses.dataclass
 class ModelConfig:
   """Base configurations for building a transformer architecture."""
@@ -187,7 +190,7 @@ class ModelConfig:
   block_configs: Union[TransformerBlockConfig, Sequence[TransformerBlockConfig]]
   # The normalization applied before LM head.
-  final_norm_config: NormalizationConfig = field(
+  final_norm_config: NormalizationConfig = dataclasses.field(
       default_factory=NormalizationConfig
   )

ai_edge_torch/generative/test/test_model_conversion.py CHANGED Viewed

@@ -117,7 +117,7 @@ class TestModelConversion(googletest.TestCase):
   def _test_multisig_model(self, config, pytorch_model, atol, rtol):
     # prefill
     seq_len = 10
-    prefill_tokens = torch.full((1, seq_len), 0, dtype=torch.int, device="cpu")
+    prefill_tokens = torch.zeros((1, seq_len), dtype=torch.int, device="cpu")
     prompt_token = torch.from_numpy(np.array([1, 2, 3, 4]))
     prefill_tokens[0, : len(prompt_token)] = prompt_token
     prefill_input_pos = torch.arange(0, seq_len, dtype=torch.int)

ai_edge_torch/generative/test/test_model_conversion_large.py CHANGED Viewed

@@ -22,6 +22,7 @@ from ai_edge_torch.generative.examples.gemma import gemma1
 from ai_edge_torch.generative.examples.gemma import gemma2
 from ai_edge_torch.generative.examples.llama import llama
 from ai_edge_torch.generative.examples.openelm import openelm
+from ai_edge_torch.generative.examples.paligemma import paligemma
 from ai_edge_torch.generative.examples.phi import phi2
 from ai_edge_torch.generative.examples.phi import phi3
 from ai_edge_torch.generative.examples.qwen import qwen
@@ -55,7 +56,7 @@ class TestModelConversion(googletest.TestCase):
   def _test_model(self, config, model, signature_name, atol, rtol):
     idx = torch.from_numpy(np.array([[1, 2, 3, 4]]))
-    tokens = torch.full((1, 10), 0, dtype=torch.int, device="cpu")
+    tokens = torch.zeros((1, 10), dtype=torch.int, device="cpu")
     tokens[0, :4] = idx
     input_pos = torch.arange(0, 10, dtype=torch.int)
     kv = kv_cache.KVCache.from_model_config(config)
@@ -171,6 +172,54 @@ class TestModelConversion(googletest.TestCase):
     pytorch_model = model_builder.DecoderOnlyModel(config).eval()
     self._test_model(config, pytorch_model, "prefill", atol=1e-3, rtol=1e-5)
+  @googletest.skipIf(
+      ai_edge_config.Config.use_torch_xla,
+      reason="tests with custom ops are not supported on oss",
+  )
+  def test_paligemma(self):
+    config = paligemma.get_fake_model_config()
+    pytorch_model = paligemma.PaliGemma(config).eval()
+    idx = torch.from_numpy(np.array([[1, 2, 3, 4]]))
+    image_embedding_config = config.image_encoder_config.image_embedding
+    num_patches = (
+        image_embedding_config.image_size // image_embedding_config.patch_size
+    ) ** 2
+    # Make sure the token size is longer than the number of image patches.
+    tokens_len = num_patches + 10
+    tokens = torch.zeros((1, tokens_len), dtype=torch.int, device="cpu")
+    tokens[0, :4] = idx
+    input_pos = torch.arange(0, tokens_len, dtype=torch.int)
+    kv = kv_cache.KVCache.from_model_config(config.decoder_config)
+    pixel_values = torch.zeros((1, 3, 8, 8), dtype=torch.float32, device="cpu")
+    edge_model = ai_edge_torch.signature(
+        "prefill_pixel",
+        pytorch_model,
+        sample_kwargs={
+            "tokens": tokens,
+            "input_pos": input_pos,
+            "kv_cache": kv,
+            "pixel_values": pixel_values,
+        },
+    ).convert()
+    edge_model.set_interpreter_builder(
+        self._interpreter_builder(edge_model.tflite_model())
+    )
+    self.assertTrue(
+        test_utils.compare_tflite_torch(
+            edge_model,
+            pytorch_model,
+            tokens,
+            input_pos,
+            kv,
+            pixel_values=pixel_values,
+            signature_name="prefill_pixel",
+            atol=1e-3,
+            rtol=1e-5,
+        )
+    )
   @googletest.skipIf(
       ai_edge_config.Config.use_torch_xla,
       reason="tests with custom ops are not supported on oss",

ai_edge_torch/generative/test/utils.py CHANGED Viewed

@@ -32,18 +32,21 @@ def compare_tflite_torch(
     signature_name: str,
     atol: float = 1e-5,
     rtol: float = 1e-5,
+    **kwargs,
 ):
   """Compares torch models and TFLite models."""
   values, spec = pytree.tree_flatten({"kv_cache": kv_cache})
   flat_names = common_utils.flat_dict_names(spec.children_specs, spec.context)
-  torch_output = torch_model(tokens, input_pos, kv_cache)
+  torch_output = torch_model(tokens, input_pos, kv_cache, **kwargs)
-  input_kv_flatten = {k: v.numpy() for k, v in zip(flat_names, values)}
+  if "pixel_values" in kwargs:
+    kwargs["pixel_values"] = kwargs["pixel_values"].numpy()
+  kwargs.update({k: v.numpy() for k, v in zip(flat_names, values)})
   edge_output = edge_model(
       signature_name=signature_name,
       tokens=tokens.numpy(),
       input_pos=input_pos.numpy(),
-      **input_kv_flatten,
+      **kwargs,
   )
   return np.allclose(

ai_edge_torch/generative/utilities/converter.py CHANGED Viewed

@@ -15,9 +15,11 @@
 """Common utility functions for model conversion."""
-import ai_edge_torch
+from typing import Union
 from ai_edge_torch._convert import converter as converter_utils
-from ai_edge_torch.generative.layers import kv_cache as kv_utils
+import ai_edge_torch.generative.layers.kv_cache as kv_utils
+import ai_edge_torch.generative.layers.model_config as cfg
 from ai_edge_torch.generative.quantize import quant_recipes
 import torch
@@ -25,109 +27,74 @@ import torch
 def convert_to_tflite(
     pytorch_model: torch.nn.Module,
     tflite_path: str,
-    prefill_seq_len: int = 512,
+    prefill_seq_len: Union[int, list[int]],
+    pixel_values_size: torch.Size = None,
     quantize: bool = True,
+    config: cfg.ModelConfig = None,
 ):
   """Converts a nn.Module model to multi-signature tflite model.
-  A PyTorch model will be converted to a tflite model with two signatures:
-  "prefill" and "decode".
-  "prefill" signature takes a tensor of shape [1, prefill_seq_len] of token
-  sequence, a tensor of shape [1, prefill_seq_len] of token positions, and an
-  external KV cache as a sample input.
-  "decode" signature takes a tensor of shape [1, 1] of token sequence, a tensor
-  of shape [1, 1] of the token position, and an external KV cache as a sample
-  input.
+  A PyTorch model will be converted to a tflite model with several signatures:
+    * "prefill_[prefill_seq_len]" (or "prefill" if only one prefill_seq_len is
+        passed),
+    * "prefill_[preill_seq_len]_pixel" (or "prefill_pixel" if only one
+        prefill_seq_len is passed) if num_pixel_values > 0, and
+    * "decode".
+  "prefill_[prefill_seq_len]" (or "prefill" if only one prefill_seq_len is
+  passed) signature takes as a sample input:
+    * a tensor of shape [1, prefill_seq_len] of token sequence,
+    * a tensor of shape [1, prefill_seq_len] of token positions, and
+    * an external KV cache.
+  If num_pixel_values > 0, "prefill_[prefill_seq_len]_pixel" (or "prefill_pixel"
+  if only one prefill_seq_len is passed) signature takes as a sample input:
+    * a tensor of shape [1, prefill_seq_len] of token sequence,
+    * a tensor of shape [1, prefill_seq_len] of token positions,
+    * an external KV cache, and
+    * a tensor of shape [1, num_pixel_values] of pixel values.
+  "decode" signature takes as a sample input:
+    * a tensor of shape [1, 1] of token sequence,
+    * a tensor of shape [1, 1] of the token position, and
+    * an external KV cache.
   The final tflite model will be exported to tflite_path.
   Args:
       pytorch_model (torch.nn.Module): PyTorch model to convert to tflite.
       tflite_path (str): The tflite file path to export.
-      prefill_seq_len (int, optional): The maximum size of prefill input tensor.
-        Defaults to 512.
+      prefill_seq_len (Union[int, list[int]]): A list of prefill lengths to
+        export.
+      pixel_values_size (torch.Size, optional): The size of pixel values to pass
+        to the model. If None, the model is not expected to take pixel values.
       quantize (bool, optional): Whether the model should be quanized. Defaults
         to True.
+      config (cfg.ModelConfig, optional): The model config used to configure KV
+        cache. If None, it uses the config of the pytorch_model.
   """
-  # Tensors used to trace the model graph during conversion.
-  prefill_tokens = torch.full((1, prefill_seq_len), 0, dtype=torch.int)
-  prefill_input_pos = torch.arange(0, prefill_seq_len, dtype=torch.int)
-  decode_token = torch.tensor([[0]], dtype=torch.int)
-  decode_input_pos = torch.tensor([0], dtype=torch.int)
-  kv = kv_utils.KVCache.from_model_config(pytorch_model.config)
-  quant_config = quant_recipes.full_int8_dynamic_recipe() if quantize else None
-  edge_model = (
-      ai_edge_torch.signature(
-          'prefill',
-          pytorch_model,
-          sample_kwargs={
-              'tokens': prefill_tokens,
-              'input_pos': prefill_input_pos,
-              'kv_cache': kv,
-          },
-      )
-      .signature(
-          'decode',
-          pytorch_model,
-          sample_kwargs={
-              'tokens': decode_token,
-              'input_pos': decode_input_pos,
-              'kv_cache': kv,
-          },
-      )
-      .convert(quant_config=quant_config)
+  prefill_seq_lens = (
+      [prefill_seq_len] if isinstance(prefill_seq_len, int) else prefill_seq_len
   )
-  edge_model.export(tflite_path)
-def convert_to_tflite_multi_prefill_lens(
-    pytorch_model: torch.nn.Module,
-    tflite_path: str,
-    prefill_seq_lens: list[int],
-    quantize: bool = True,
-):
-  """Converts a nn.Module model to multi-signature tflite model with different
-  prefill lengths.
-  A PyTorch model will be converted to a tflite model with several signatures:
-  "prefill_[prefill_seq_len]" and "decode".
-  "prefill_[prefill_seq_len]" signature takes a tensor of shape [1,
-  prefill_seq_len] of token
-  sequence, a tensor of shape [1, prefill_seq_len] of token positions, and an
-  external KV cache as a sample input.
-  "decode" signature takes a tensor of shape [1, 1] of token sequence, a tensor
-  of shape [1, 1] of the token position, and an external KV cache as a sample
-  input.
-  The final tflite model will be exported to tflite_path.
-  Args:
-      pytorch_model (torch.nn.Module): PyTorch model to convert to tflite.
-      tflite_path (str): The tflite file path to export.
-      prefill_seq_lens (list[int]): A list of prefill lengths to export.
-      quantize (bool, optional): Whether the model should be quanized. Defaults
-        to True.
-  """
   # Tensors used to trace the model graph during conversion.
   prefill_tokens_list = []
   prefill_input_pos_list = []
-  for prefill_seq_len in prefill_seq_lens:
-    prefill_tokens_list.append(
-        torch.full((1, prefill_seq_len), 0, dtype=torch.int)
-    )
-    prefill_input_pos_list.append(
-        torch.arange(0, prefill_seq_len, dtype=torch.int)
-    )
+  for seq_len in prefill_seq_lens:
+    prefill_tokens_list.append(torch.full((1, seq_len), 0, dtype=torch.int))
+    prefill_input_pos_list.append(torch.arange(0, seq_len, dtype=torch.int))
+  prefill_pixel_values = (
+      torch.full((1,) + pixel_values_size, 0, dtype=torch.float32)
+      if pixel_values_size
+      else None
+  )
   decode_token = torch.tensor([[0]], dtype=torch.int)
   decode_input_pos = torch.tensor([0], dtype=torch.int)
-  kv = kv_utils.KVCache.from_model_config(pytorch_model.config)
+  kv = kv_utils.KVCache.from_model_config(
+      config if config else pytorch_model.config
+  )
   quant_config = quant_recipes.full_int8_dynamic_recipe() if quantize else None
   converter = converter_utils.Converter()
@@ -135,8 +102,12 @@ def convert_to_tflite_multi_prefill_lens(
     prefill_seq_len = prefill_seq_lens[i]
     prefill_tokens = prefill_tokens_list[i]
     prefill_input_pos = prefill_input_pos_list[i]
+    if i == 0 and len(prefill_seq_lens) == 1:
+      prefill_signature_name = 'prefill'
+    else:
+      prefill_signature_name = f'prefill_{prefill_seq_len}'
     converter.add_signature(
-        f'prefill_{prefill_seq_len}',
+        prefill_signature_name,
         pytorch_model,
         sample_kwargs={
             'tokens': prefill_tokens,
@@ -144,8 +115,19 @@ def convert_to_tflite_multi_prefill_lens(
             'kv_cache': kv,
         },
     )
+    if prefill_pixel_values is not None:
+      converter.add_signature(
+          prefill_signature_name + '_pixel',
+          pytorch_model,
+          sample_kwargs={
+              'tokens': prefill_tokens,
+              'input_pos': prefill_input_pos,
+              'kv_cache': kv,
+              'pixel_values': prefill_pixel_values,
+          },
+      )
-  edge_model = converter.add_signature(
+  converter.add_signature(
       'decode',
       pytorch_model,
       sample_kwargs={
@@ -153,5 +135,7 @@ def convert_to_tflite_multi_prefill_lens(
           'input_pos': decode_input_pos,
           'kv_cache': kv,
       },
-  ).convert(quant_config=quant_config)
+  )
+  edge_model = converter.convert(quant_config=quant_config)
   edge_model.export(tflite_path)

ai_edge_torch/version.py CHANGED Viewed

@@ -13,4 +13,4 @@
 # limitations under the License.
 # ==============================================================================
-__version__ = "0.3.0.dev20241117"
+__version__ = "0.3.0.dev20241120"

{ai_edge_torch_nightly-0.3.0.dev20241117.dist-info → ai_edge_torch_nightly-0.3.0.dev20241120.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-edge-torch-nightly
-Version: 0.3.0.dev20241117
+Version: 0.3.0.dev20241120
 Summary: Supporting PyTorch models with the Google AI Edge TFLite runtime.
 Home-page: https://github.com/google-ai-edge/ai-edge-torch
 Keywords: On-Device ML,AI,Google,TFLite,PyTorch,LLMs,GenAI

{ai_edge_torch_nightly-0.3.0.dev20241117.dist-info → ai_edge_torch_nightly-0.3.0.dev20241120.dist-info}/RECORD RENAMED Viewed

@@ -3,7 +3,7 @@ ai_edge_torch/config.py,sha256=FMWeCH2b7HYILBvaI1iZNnYCO4WAhDOwBZBmIE-xrF0,909
 ai_edge_torch/conftest.py,sha256=r0GTrhMRhlmOGrrkvumHN8hkmyug6WvF60vWq8wRIBI,758
 ai_edge_torch/fx_pass_base.py,sha256=518ziQ0TUxqum2qZXqlD8qr65pHPh8ZNLnwFC6zvK3k,4253
 ai_edge_torch/model.py,sha256=N-pNpTxzhaFGhWhnSGd70lBzb9VlEhTOq5mddU7bvvI,5542
-ai_edge_torch/version.py,sha256=7_Q9eL2px6cC84NORS1VimuJ-a5-0vrWBZ38lYckMtA,706
+ai_edge_torch/version.py,sha256=52sF7t2CBQE8RcB2Hcmo-f6_BLyCW9NzWZ-wTKM9ho4,706
 ai_edge_torch/_convert/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/_convert/conversion.py,sha256=HwzfRx_DX5TLtPqwEH1_NOm38_INvHzHl4_mX67KOdQ,5448
 ai_edge_torch/_convert/conversion_utils.py,sha256=Sr8qXVcTwc-ZnZmK7yxVrIOOp1S_vNrwzC0zUvLTI2o,2160
@@ -12,7 +12,7 @@ ai_edge_torch/_convert/signature.py,sha256=rGpBNss3Y9FCCCcdBwDo16KqavJi8N5P0M_6W
 ai_edge_torch/_convert/to_channel_last_io.py,sha256=_31phf7TYgZY2ftpNbrdlB1RhDium1lz_BXEQ6IsMFc,2893
 ai_edge_torch/_convert/fx_passes/__init__.py,sha256=NVe-eGcm7j8jZpP2pcMhC8j5dVjgR1pPzyXhHdvKH4E,1267
 ai_edge_torch/_convert/fx_passes/build_aten_composite_pass.py,sha256=doaww8KqrgRTD5LotBVAIRFsEqzPn9R5lcGehBJOczA,9098
-ai_edge_torch/_convert/fx_passes/build_interpolate_composite_pass.py,sha256=m_yj66V11LmWCYgA7yLtr__cy14IbC5WEJe0BE0_IPE,4339
+ai_edge_torch/_convert/fx_passes/build_interpolate_composite_pass.py,sha256=qb4JBDi4Xca14JJUIcaaZQIJiyqKyHJF49jsRCIFCVA,4335
 ai_edge_torch/_convert/fx_passes/inject_mlir_debuginfo_pass.py,sha256=IlZuK42kfVcRqAWZp4j2k_81T2uWo9T2558U_GPJAlU,2327
 ai_edge_torch/_convert/fx_passes/remove_non_user_outputs_pass.py,sha256=f1IUVWyhioOClsMiZzLyynoW2R17U83vA-7Q-3pGPM4,2126
 ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/__init__.py,sha256=lxnoH-WGLeiQIF8XjMGodjiZEFTxucl7g05N7MR9OPk,796
@@ -27,7 +27,7 @@ ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/layout_partitio
 ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/layout_partitioners/min_cut.py,sha256=mzfL9cf0qBnpmxM_OlMQFvQsEZV2B_Mia9yEJV4J7rI,7135
 ai_edge_torch/_convert/test/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/_convert/test/test_convert.py,sha256=yXfeWDw9u_rTS3B6kvvFPo5E4XNT3zKTSLFSBSAI9Fc,15502
-ai_edge_torch/_convert/test/test_convert_composites.py,sha256=BCIODgxMI_3MxMLfNWYMGjcz-al-J3z5eDHCiZJXNwY,7992
+ai_edge_torch/_convert/test/test_convert_composites.py,sha256=ELwHxTdTTCJm30aWg_PZXxg9HvDM4Hnf9lT0wwOWT6s,8060
 ai_edge_torch/_convert/test/test_convert_multisig.py,sha256=6_C2R9--KyNR7_oezZIAfyTSR97tOeEWy4XGcbSxBDE,5778
 ai_edge_torch/_convert/test/test_to_channel_last_io.py,sha256=1o-gUiwzIuO67FNAJ8DeyKv8fVUeZVNNNwofNVDjYeU,3024
 ai_edge_torch/debug/__init__.py,sha256=N05Mmvi41KgSuK0JhuMejERESgP8QekiGdp9_PEyuKU,742
@@ -45,7 +45,7 @@ ai_edge_torch/generative/examples/amd_llama_135m/convert_to_tflite.py,sha256=-n7
 ai_edge_torch/generative/examples/amd_llama_135m/verify.py,sha256=-9Nb9D818YSJR3olVtBwoLNeMMD5qE58YBnsA67hlHg,2421
 ai_edge_torch/generative/examples/gemma/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/examples/gemma/convert_gemma1_to_tflite.py,sha256=evmUj_4yygQthSRU-ke-Xn1qFNDCZKbegqINWfruKwU,2184
-ai_edge_torch/generative/examples/gemma/convert_gemma2_multi_prefills.py,sha256=bZKOiAJBWPzIVHdASEgKRUFdyZSPVGFfe3uXUYrRh1c,2868
+ai_edge_torch/generative/examples/gemma/convert_gemma2_multi_prefills.py,sha256=6d9wG5MnStEys34_gFXwKTMRXUBFLTW1jEzCoWkAtwM,2224
 ai_edge_torch/generative/examples/gemma/convert_gemma2_to_tflite.py,sha256=RZDs6oY-NLYrPNtfuJDweIHzGUL2kzpIc3AW_1p8gGg,2186
 ai_edge_torch/generative/examples/gemma/gemma1.py,sha256=oSbysiPvwp5efMbNYZop3HrxDMGiD15Tmz-HiQuTr2E,3315
 ai_edge_torch/generative/examples/gemma/gemma2.py,sha256=RQFQDMEnIVp8PefcCTr7P0CvllKI7FVoIJLXbPLLIsc,9056
@@ -61,9 +61,10 @@ ai_edge_torch/generative/examples/openelm/convert_to_tflite.py,sha256=85FVEt6cKF
 ai_edge_torch/generative/examples/openelm/openelm.py,sha256=sFakstoPDcOHSak0IGFEEq_HQMBBSMcx-WVCDZqcVDo,4411
 ai_edge_torch/generative/examples/openelm/verify.py,sha256=VkigoqhAr8ew95neb3TifYv-SLOSheaWKv2AH0iKDrc,2441
 ai_edge_torch/generative/examples/paligemma/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
+ai_edge_torch/generative/examples/paligemma/convert_to_tflite.py,sha256=dT7dnx1dzGzFiH5gQJ4M6zcTLSRFvSDpi3IuZ9_vd78,2706
 ai_edge_torch/generative/examples/paligemma/decoder.py,sha256=XMeznGBbjRJidv725L6_7XzkYskS2cDjf8NGB18FNhg,4944
-ai_edge_torch/generative/examples/paligemma/image_encoder.py,sha256=v19_EKALhAP9FjkINKqpv8JsVaQ6iH_7X5FpnhE6abw,5500
-ai_edge_torch/generative/examples/paligemma/paligemma.py,sha256=mbq9CBp2znXPIQdzIQTiQGRh4Ql3bn9kyX-k_LXKTms,4537
+ai_edge_torch/generative/examples/paligemma/image_encoder.py,sha256=yKPWG8aBp-GuzeyQntlzwTTcGBBjvUywVGRjnlNprmo,5574
+ai_edge_torch/generative/examples/paligemma/paligemma.py,sha256=pIjsS-IUFevRjFA9153YT1vtWXATGWHsgVQQX_nWaZQ,5280
 ai_edge_torch/generative/examples/paligemma/verify.py,sha256=Bkbgy-GFjnMNYjduWUM7YLWarPTwmj1v38eHY-PdBlM,4874
 ai_edge_torch/generative/examples/paligemma/verify_decoder.py,sha256=al5wMPWri4IRVWrLmCplPi6uoCzwh0vBHMGnCt-XUqo,2690
 ai_edge_torch/generative/examples/paligemma/verify_image_encoder.py,sha256=pSekf1BybhieQz3cQx_llbRQHxczXbTqool8fOyGj_0,3114
@@ -117,7 +118,7 @@ ai_edge_torch/generative/layers/attention_utils.py,sha256=zBVwlBUTs-nStIKCZG0ks5
 ai_edge_torch/generative/layers/builder.py,sha256=Z5LyzCEThgnYZeyViakaE3yJVzTGHtw13acHsAQR15U,5050
 ai_edge_torch/generative/layers/feed_forward.py,sha256=hdICat-8gW7-vxDAevJQ8NQ-mynllPiqLdXQMF6JMnc,4189
 ai_edge_torch/generative/layers/kv_cache.py,sha256=lbm-yJ1jGPtcgWS4C3FmSnB1IlxqDE7g0BLRh3PN4N4,6324
-ai_edge_torch/generative/layers/model_config.py,sha256=xqa7ZBEjgK4UWJAThRXb_VBFZ5KCGtDu-QaY5GXar9s,7366
+ai_edge_torch/generative/layers/model_config.py,sha256=viX51T_naJ9sPpPxPoMnSueBPYE2zxWNOD0xn0f-_bM,7510
 ai_edge_torch/generative/layers/normalization.py,sha256=eKAGst9rPuyRFExMcQFJO7R3iHdCtlmjeF_lITjLhwE,6498
 ai_edge_torch/generative/layers/rotary_position_embedding.py,sha256=CZqOoibLcHvUgrgaIIWAlmk3XgE2inzx340MN-npLoU,1347
 ai_edge_torch/generative/layers/scaled_dot_product_attention.py,sha256=gXxh3papKy4FBpGEX7VyZ7rZ1Js6aHK70Q6DKrVSckY,4154
@@ -135,12 +136,12 @@ ai_edge_torch/generative/quantize/supported_schemes.py,sha256=FjdycEOvxRgBmQdZVu
 ai_edge_torch/generative/test/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/test/test_kv_cache.py,sha256=W6Bh0gYDzmwb0j9HdD5_D7Z7FPToP2HSyFrmwIXuFqo,3793
 ai_edge_torch/generative/test/test_loader.py,sha256=9mQUeeZKOVApOWSWl2cN9c10axZjMKM1-0Zd823CCS4,3449
-ai_edge_torch/generative/test/test_model_conversion.py,sha256=a4TzSw8KMxEafirxqkykZi-WgTs5Z7wHp-J1AfjRDzA,6353
-ai_edge_torch/generative/test/test_model_conversion_large.py,sha256=TzBEbWOoB7bIHePuP6ySL9eYfmKHpONgTQCU-f05m8c,9497
+ai_edge_torch/generative/test/test_model_conversion.py,sha256=aZFaheg2sq7rEccch1TZM6W4BSfpJZjrM9Gyp4hVGYs,6351
+ai_edge_torch/generative/test/test_model_conversion_large.py,sha256=xWV9O2wuRHc4VNBWuWipiuqXa3AJhiV1nmjewAZHHWM,11177
 ai_edge_torch/generative/test/test_quantize.py,sha256=8geJhKwYBU20m0mdGPD1BUFwQ0lZKNtCB04SOLO18y4,5980
-ai_edge_torch/generative/test/utils.py,sha256=YvEhO2HIj1LkBs5du1UxY-cGRW9HMyAYsOUhgsTrTpA,1796
+ai_edge_torch/generative/test/utils.py,sha256=eQ-hjd1eXuHJF3SJK6_CrjgOZVzmG_4VEdH7Z1gH_lA,1897
 ai_edge_torch/generative/utilities/__init__.py,sha256=-_jxnnFnCgnTU4oTm4MnRsvL5lqhomBNdFBbqfmfHPo,720
-ai_edge_torch/generative/utilities/converter.py,sha256=17O83wVifH1vQJCI4WC3DaNiCIOyK2gys1GzohbLrRs,5554
+ai_edge_torch/generative/utilities/converter.py,sha256=S14STbyxV6A9HKy1BdUo49f2jS6Ij0RL9mVAFUMWYV8,5291
 ai_edge_torch/generative/utilities/loader.py,sha256=A3SOjPXp--AsvoP1hqj5QKWE4sgxoFc3H5EBUz_Eogc,13531
 ai_edge_torch/generative/utilities/model_builder.py,sha256=OcHJhEqc3LjI3STli6cyn71m1mdzr7QbzF9fqSNCXrg,5730
 ai_edge_torch/generative/utilities/stable_diffusion_loader.py,sha256=dqPD9qRXEWtU3ombslOC-BE2l_dMwHoCNu7NsIJhsso,36158
@@ -193,8 +194,8 @@ ai_edge_torch/quantize/quant_config.py,sha256=U0KisSW-uZkoMJcy-ZP9W57p3tsa594fr9
 ai_edge_torch/testing/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/testing/model_coverage/__init__.py,sha256=5P8J6Zk5YYtDvTBucFvB9NGSRI7Gw_24WnrbhXgycEE,765
 ai_edge_torch/testing/model_coverage/model_coverage.py,sha256=UPB448aMDUyC0HNYVqio2rcJPnDN0tBQMP08J6vPYew,4718
-ai_edge_torch_nightly-0.3.0.dev20241117.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
-ai_edge_torch_nightly-0.3.0.dev20241117.dist-info/METADATA,sha256=66qws6AuCk-8cDSyPhBpsjrASeUYB3G2PMwJG5BeflI,1897
-ai_edge_torch_nightly-0.3.0.dev20241117.dist-info/WHEEL,sha256=bFJAMchF8aTQGUgMZzHJyDDMPTO3ToJ7x23SLJa1SVo,92
-ai_edge_torch_nightly-0.3.0.dev20241117.dist-info/top_level.txt,sha256=5KXRaF2hwkApYxf7Y8y_tVb9aulGTlbOoNdbx1aKRkE,14
-ai_edge_torch_nightly-0.3.0.dev20241117.dist-info/RECORD,,
+ai_edge_torch_nightly-0.3.0.dev20241120.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
+ai_edge_torch_nightly-0.3.0.dev20241120.dist-info/METADATA,sha256=1Nv_QeerPRw888sOTf4jHx5Ihu-PJD9rL8GOpRHSTa4,1897
+ai_edge_torch_nightly-0.3.0.dev20241120.dist-info/WHEEL,sha256=bFJAMchF8aTQGUgMZzHJyDDMPTO3ToJ7x23SLJa1SVo,92
+ai_edge_torch_nightly-0.3.0.dev20241120.dist-info/top_level.txt,sha256=5KXRaF2hwkApYxf7Y8y_tVb9aulGTlbOoNdbx1aKRkE,14
+ai_edge_torch_nightly-0.3.0.dev20241120.dist-info/RECORD,,

{ai_edge_torch_nightly-0.3.0.dev20241117.dist-info → ai_edge_torch_nightly-0.3.0.dev20241120.dist-info}/LICENSE RENAMED Viewed

File without changes

{ai_edge_torch_nightly-0.3.0.dev20241117.dist-info → ai_edge_torch_nightly-0.3.0.dev20241120.dist-info}/WHEEL RENAMED Viewed

File without changes

{ai_edge_torch_nightly-0.3.0.dev20241117.dist-info → ai_edge_torch_nightly-0.3.0.dev20241120.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai-edge-torch-nightly 0.3.0.dev20241117__py3-none-any.whl → 0.3.0.dev20241120__py3-none-any.whl

ai-edge-torch-nightly 0.3.0.dev20241117py3-none-any.whl → 0.3.0.dev20241120py3-none-any.whl