PyPI - ai-edge-torch-nightly - Versions diffs - 0.3.0.dev20250108__py3-none-any.whl → 0.3.0.dev20250110__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.3.0.dev20250108py3-none-any.whl → 0.3.0.dev20250110py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

ai_edge_torch/generative/examples/gemma/gemma2.py CHANGED Viewed

@@ -15,13 +15,14 @@
 """Example of building a Gemma2 model."""
-from typing import Optional, Tuple
+from typing import List, Optional, Tuple
 from ai_edge_torch.generative.layers import attention
 from ai_edge_torch.generative.layers import builder
 from ai_edge_torch.generative.layers import kv_cache as kv_utils
 import ai_edge_torch.generative.layers.attention_utils as attn_utils
 import ai_edge_torch.generative.layers.model_config as cfg
+import ai_edge_torch.generative.layers.rotary_position_embedding as rotary_pos_emb
 from ai_edge_torch.generative.utilities import model_builder
 import ai_edge_torch.generative.utilities.loader as loading_utils
 import torch
@@ -103,17 +104,12 @@ class Gemma2(nn.Module):
         config.embedding_dim,
         config.final_norm_config,
     )
-    # Gemma2 has same hyper parameters for each layer except for attention
-    # types. Use the first layer.
-    attn_config = config.block_config(0).attn_config
-    self.rope_cache = attn_utils.build_rope_cache(
-        size=config.kv_cache_max,
-        dim=int(attn_config.rotary_percentage * attn_config.head_dim),
-        base=attn_config.rotary_base,
-    )
     self.mask_cache = attn_utils.build_causal_mask_cache(
         size=config.kv_cache_max,
     )
+    # Gemma2 has same hyper parameters for each layer except for attention
+    # types. Use the first layer.
+    attn_config = config.block_config(0).attn_config
     self.sliding_window_mask_cache = attn_utils.build_sliding_window_mask_cache(
         size=config.kv_cache_max,
         window_size=attn_config.sliding_window_size,
@@ -133,6 +129,7 @@ class Gemma2(nn.Module):
       tokens: torch.Tensor,
       input_pos: torch.Tensor,
       kv_cache: kv_utils.KVCache,
+      mask: Optional[torch.Tensor] = None,
       export_config: Optional[model_builder.ExportConfig] = None,
   ) -> dict[torch.Tensor, kv_utils.KVCache]:
     _, seq_len = tokens.size()
@@ -140,29 +137,59 @@ class Gemma2(nn.Module):
         f"Cannot forward sequence of length {seq_len}, max seq length is only"
         f" {self.config.max_seq_len}"
     )
+    # token embeddings of shape (b, t, n_embd)
+    input_embeds = self.tok_embedding(tokens)
+    # RoPE parameters are the same for all blocks. Use the first layer.
+    attn_config = self.config.block_config(0).attn_config
+    n_elem = int(attn_config.rotary_percentage * attn_config.head_dim)
+    rope = rotary_pos_emb.build_rope(
+        input_pos, n_elem, attn_config.head_dim, attn_config.rotary_base
+    )
+    mask = [
+        self.get_attention_mask(
+            self.config.block_config(i).attn_config.attn_type, input_pos
+        )
+        for i in range(self.config.num_layers)
+    ]
+    return self._forward_with_embeds(
+        input_embeds, rope, mask, input_pos, kv_cache, export_config
+    )
+  def _forward_with_embeds(
+      self,
+      input_embeds: torch.Tensor,
+      rope: Tuple[torch.Tensor, torch.Tensor],
+      mask: List[torch.Tensor],
+      input_pos: torch.Tensor,
+      kv_cache: kv_utils.KVCache,
+      export_config: Optional[model_builder.ExportConfig] = None,
+  ) -> dict[torch.Tensor, kv_utils.KVCache]:
+    """Forwards the model with input embeddings."""
     assert len(self.transformer_blocks) == len(kv_cache.caches), (
         "The number of transformer blocks and the number of KV cache entries"
         " must be the same."
     )
-    cos, sin = self.rope_cache
-    cos = cos.index_select(0, input_pos)
-    sin = sin.index_select(0, input_pos)
-    # token embeddings of shape (b, t, n_embd)
-    x = self.tok_embedding(tokens)
-    x = x * (self.config.embedding_dim**0.5)
-    updated_kv_entires = []
+    if self.config.embedding_scale is not None:
+      input_embeds = input_embeds * self.config.embedding_scale
+    x = input_embeds
+    updated_kv_entries = []
+    mask_input = mask is not None
     for i, block in enumerate(self.transformer_blocks):
-      mask = self.get_attention_mask(
-          block.config.attn_config.attn_type, input_pos
+      mask = (
+          mask
+          if mask_input
+          else self.get_attention_mask(
+              block.config.attn_config.attn_type, input_pos
+          )
       )
       kv_entry = kv_cache.caches[i] if kv_cache else None
-      x, kv_entry = block(x, (cos, sin), mask, input_pos, kv_entry)
+      x, kv_entry = block(x, rope, mask[i], input_pos, kv_entry)
       if kv_entry:
-        updated_kv_entires.append(kv_entry)
-    updated_kv_cache = kv_utils.KVCache(tuple(updated_kv_entires))
+        updated_kv_entries.append(kv_entry)
+    updated_kv_cache = kv_utils.KVCache(tuple(updated_kv_entries))
     if export_config is not None:
       if (
@@ -228,11 +255,13 @@ def get_model_config_2b(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
     )
   num_layers = 26
+  embedding_dim = 2304
   config = cfg.ModelConfig(
       vocab_size=256000,
       num_layers=num_layers,
       max_seq_len=8192,
-      embedding_dim=2304,
+      embedding_dim=embedding_dim,
+      embedding_scale=embedding_dim**0.5,
       kv_cache_max_len=kv_cache_max_len,
       block_configs=[get_block_config(i) for i in range(num_layers)],
       final_norm_config=norm_config,
@@ -249,6 +278,7 @@ def get_fake_model_config(kv_cache_max_len: int = 128) -> cfg.ModelConfig:
   config.num_layers = 2
   config.max_seq_len = 2 * kv_cache_max_len
   config.embedding_dim = 128
+  config.embedding_scale = config.embedding_dim**0.5
   config.block_configs = config.block_configs[: config.num_layers]
   for block_config in config.block_configs:
     block_config.attn_config.num_heads = 4

ai_edge_torch/generative/examples/llama/llama.py CHANGED Viewed

@@ -15,6 +15,7 @@
 """Example of building Llama 3.2 models."""
+from functools import partial
 import math
 from typing import Tuple
@@ -26,8 +27,8 @@ TENSOR_NAMES = model_builder.TENSOR_NAMES
 def _build_llama3_rope_cache(
-    size: int,
-    dim: int,
+    input_pos: torch.Tensor,
+    n_elem: int,
     base: int,
     condense_ratio: int,
     dtype: torch.dtype,
@@ -36,8 +37,9 @@ def _build_llama3_rope_cache(
     low_freq_factor: float,
     high_freq_factor: float,
     max_seq_len: int,
+    **kwargs,
 ) -> Tuple[torch.Tensor, torch.Tensor]:
-  """Precomputes Rotary Positional Embeddings for Llama 3.2 model.
+  """Computes Rotary Positional Embeddings for Llama 3.2 model.
   It's a modified version of attn_utils.build_rope_cache with additional
   arguments for Llama 3.2 model. It precomputes Rotary Positional Embedding Sin
@@ -47,13 +49,12 @@ def _build_llama3_rope_cache(
   https://github.com/huggingface/transformers/blob/main/src/transformers/modeling_rope_utils.py#L307
   Args:
-      size (int): The size of the built cache.
-      dim (int): Each sequence's dimmension.
-      base (int, optional): Rope base value.
-      condense_ratio (int, optional): The ratio by which sequence indicies are
-        condensed.
-      dtype (torch.dtype, optional): Output tensor's data type.
-      device (torch.device, optional): Output tensor's data type.
+      input_pos (torch.Tensor): the given input sequence positions
+      n_elem (int): Each sequence's dimmension.
+      base (int): Rope base value.
+      condense_ratio (int): The ratio by which sequence indicies are condensed.
+      dtype (torch.dtype): Output tensor's data type.
+      device (torch.device): Output tensor's data type.
       factor (float): Factor to scale theta down for tokens in long range in the
         sequence.
       low_freq_factor (float): Factor to determine if tokens are in long range
@@ -66,7 +67,7 @@ def _build_llama3_rope_cache(
   Returns:
       Tuple[torch.Tensor, torch.Tensor]: Rope's Cosine and Sine waves.
   """
-  theta = 1.0 / (base ** (torch.arange(0, dim, 2).float() / dim))
+  theta = 1.0 / (base ** (torch.arange(0, n_elem, 2).float() / n_elem))
   low_freq_wavelen = max_seq_len / low_freq_factor
   high_freq_wavelen = max_seq_len / high_freq_factor
   wavelen = 2 * math.pi / theta
@@ -81,7 +82,7 @@ def _build_llama3_rope_cache(
   is_medium = ~(wavelen < high_freq_wavelen) * ~(wavelen > low_freq_wavelen)
   theta = torch.where(is_medium, smoothed_theta, theta)
-  seq_idx = torch.arange(size) / condense_ratio
+  seq_idx = input_pos / condense_ratio
   idx_theta = torch.outer(seq_idx, theta)
   cos = torch.cos(idx_theta).to(dtype=dtype, device=device)
   sin = torch.sin(idx_theta).to(dtype=dtype, device=device)
@@ -97,18 +98,6 @@ class Llama(model_builder.DecoderOnlyModel):
   def __init__(self, config: cfg.ModelConfig):
     super().__init__(config)
     attn_config = self.config.block_config(0).attn_config
-    self.rope_cache = _build_llama3_rope_cache(
-        size=self.config.kv_cache_max,
-        dim=int(attn_config.rotary_percentage * attn_config.head_dim),
-        base=attn_config.rotary_base,
-        condense_ratio=1,
-        dtype=torch.float32,
-        device=torch.device("cpu"),
-        factor=32.0,
-        low_freq_factor=1.0,
-        high_freq_factor=4.0,
-        max_seq_len=self.config.max_seq_len,
-    )
 def get_1b_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
@@ -140,15 +129,30 @@ def get_1b_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       pre_attention_norm_config=norm_config,
       post_attention_norm_config=norm_config,
   )
+  max_seq_len = 8192
+  # Create the RoPE callable
+  build_rope = partial(
+      _build_llama3_rope_cache,
+      condense_ratio=1,
+      dtype=torch.float32,
+      device=torch.device("cpu"),
+      factor=32.0,
+      low_freq_factor=1.0,
+      high_freq_factor=4.0,
+      max_seq_len=max_seq_len,
+  )
   config = cfg.ModelConfig(
       vocab_size=128256,
       num_layers=16,
-      max_seq_len=8192,
+      max_seq_len=max_seq_len,
       embedding_dim=2048,
       kv_cache_max_len=kv_cache_max_len,
       block_configs=block_config,
       final_norm_config=norm_config,
       enable_hlfb=True,
+      build_rope=build_rope,
   )
   return config

ai_edge_torch/generative/examples/paligemma/decoder.py CHANGED Viewed

@@ -54,6 +54,7 @@ class Decoder(model_builder.DecoderOnlyModel):
       input_pos: torch.Tensor,
       kv_cache: kv_utils.KVCache,
       input_embeds: torch.Tensor = None,
+      mask: Optional[torch.Tensor] = None,
       export_config: Optional[model_builder.ExportConfig] = None,
       called_by_generate: bool = True,
   ) -> dict[torch.Tensor, kv_utils.KVCache]:
@@ -73,8 +74,9 @@ class Decoder(model_builder.DecoderOnlyModel):
     # The first part of input_embeds are image embeddings. Diagonal causal mask
     # doesn't work here.
     embeds_len = input_embeds.shape[1]
-    mask = torch.zeros(embeds_len, self.config.kv_cache_max)
-    mask[:, embeds_len:] = float("-inf")
+    if mask is None:
+      mask = torch.zeros(embeds_len, self.config.kv_cache_max)
+      mask[:, embeds_len:] = float("-inf")
     return self._forward_with_embeds(
         input_embeds, rope, mask, input_pos, kv_cache

ai_edge_torch/generative/examples/paligemma/decoder2.py CHANGED Viewed

@@ -57,6 +57,7 @@ class Decoder2(gemma2.Gemma2):
       input_pos: torch.Tensor,
       kv_cache: kv_utils.KVCache,
       input_embeds: torch.Tensor = None,
+      mask: Optional[torch.Tensor] = None,
       export_config: Optional[model_builder.ExportConfig] = None,
       called_by_generate: bool = True,
   ) -> dict[torch.Tensor, kv_utils.KVCache]:
@@ -73,17 +74,21 @@ class Decoder2(gemma2.Gemma2):
         repo_pos, n_elem, attn_config.head_dim, attn_config.rotary_base
     )
-    if called_by_generate:
-      # PaliGemma2 generate() use a diagonal causal mask even with image embeds.
-      mask = [self.get_attention_mask(
-          self.config.block_config(i).attn_config.attn_type, input_pos
-      ) for i in range(self.config.num_layers)]
-    else:
-      # By default, don't mask image embeds with a diagonal causal mask.
-      embeds_len = input_embeds.shape[1]
-      mask = torch.zeros(embeds_len, self.config.kv_cache_max)
-      mask[:, embeds_len:] = float("-inf")
-      mask = [mask] * self.config.num_layers
+    if mask is None:
+      if called_by_generate:
+        # PaliGemma2 generate() use a diagonal causal mask even with image embeds.
+        mask = [
+            self.get_attention_mask(
+                self.config.block_config(i).attn_config.attn_type, input_pos
+            )
+            for i in range(self.config.num_layers)
+        ]
+      else:
+        # By default, don't mask image embeds with a diagonal causal mask.
+        embeds_len = input_embeds.shape[1]
+        mask = torch.zeros(embeds_len, self.config.kv_cache_max)
+        mask[:, embeds_len:] = float("-inf")
+        mask = [mask] * self.config.num_layers
     return self._forward_with_embeds(
         input_embeds, rope, mask, input_pos, kv_cache, export_config

ai_edge_torch/generative/examples/paligemma/paligemma.py CHANGED Viewed

@@ -70,6 +70,7 @@ class PaliGemma(nn.Module):
       tokens: torch.Tensor,
       input_pos: torch.Tensor,
       kv_cache: kv_utils.KVCache,
+      mask: Optional[torch.Tensor] = None,
       pixel_values: torch.Tensor = None,
       export_config: Optional[model_builder.ExportConfig] = None,
       called_by_generate: bool = True,
@@ -79,6 +80,7 @@ class PaliGemma(nn.Module):
           tokens=tokens,
           input_pos=input_pos,
           kv_cache=kv_cache,
+          mask=mask,
           input_embeds=None,
           export_config=export_config,
           called_by_generate=called_by_generate,
@@ -111,6 +113,7 @@ class PaliGemma(nn.Module):
         tokens=None,
         input_pos=input_pos,
         kv_cache=kv_cache,
+        mask=mask,
         input_embeds=input_embeds,
         export_config=export_config,
         called_by_generate=called_by_generate,

ai_edge_torch/generative/examples/phi/convert_phi3_to_tflite.py CHANGED Viewed

@@ -26,7 +26,7 @@ from ai_edge_torch.generative.utilities.model_builder import ExportConfig
 _CHECKPOINT_PATH = flags.DEFINE_string(
     'checkpoint_path',
-    os.path.join(pathlib.Path.home(), 'Downloads/llm_data/gemma-2b'),
+    os.path.join(pathlib.Path.home(), 'Downloads/llm_data/phi3'),
     'The path to the model checkpoint, or directory holding the checkpoint.',
 )
 _OUTPUT_PATH = flags.DEFINE_string(

ai_edge_torch/generative/examples/phi/phi3.py CHANGED Viewed

@@ -15,6 +15,7 @@
 """Example of building a Phi-3.5 model up to 4K tokens, not to 128K tokens."""
+from functools import partial
 import math
 from typing import Tuple
@@ -93,40 +94,41 @@ ROPE_SHORT_FACTOR = [
 ]
-def _build_rope_cache(
-    size: int,
-    dim: int,
+def _build_phi3_rope(
+    input_pos: int,
+    n_elem: int,
     base: int,
     condense_ratio: int,
     dtype: torch.dtype,
     device: torch.device,
     theta_factors: torch.Tensor,
     scale: float,
+    **kwargs,
 ) -> Tuple[torch.Tensor, torch.Tensor]:
-  """Precomputes Rotary Positional Embeddings for Phi-3.5 model.
+  """Computes Rotary Positional Embeddings for Phi-3.5 model.
   It's a modified version of attn_utils.build_rope_cache with additional
   arguments for Phi-3.5 model. It precompute Rotary Positional Embedding Sin and
   Cos values with scaling factors for quick lookup during the inference.
   Args:
-      size (int): The size of the built cache.
-      dim (int): Each sequence's dimmension.
+      input_pos (torch.Tensor): the given input sequence positions
+      n_elem (int): Each sequence's dimmension.
       base (int, optional): Rope base value.
       condense_ratio (int, optional): The ratio by which sequence indicies are
         condensed.
       dtype (torch.dtype, optional): Output tensor's data type.
       device (torch.device, optional): Output tensor's data type.
-      theta_factors (torch.Tensor, optional): A tensor of shape (dim,) used to
-        scale the theta values.
+      theta_factors (torch.Tensor, optional): A tensor of shape (n_elem,) used
+        to scale the theta values.
       scale (float, optional): A float used to scale the rope values.
   Returns:
       Tuple[torch.Tensor, torch.Tensor]: Rope's Cosine and Sine waves.
   """
-  theta = 1.0 / (base ** (torch.arange(0, dim, 2).float() / dim))
+  theta = 1.0 / (base ** (torch.arange(0, n_elem, 2).float() / n_elem))
   theta = theta / theta_factors
-  seq_idx = torch.arange(size) / condense_ratio
+  seq_idx = input_pos / condense_ratio
   idx_theta = torch.outer(seq_idx, theta)
   cos = torch.cos(idx_theta).to(dtype=dtype, device=device) * scale
   sin = torch.sin(idx_theta).to(dtype=dtype, device=device) * scale
@@ -139,18 +141,6 @@ class Phi3_5Mini(model_builder.DecoderOnlyModel):
   def __init__(self, config: cfg.ModelConfig):
     super().__init__(config)
     attn_config = self.config.block_config(0).attn_config
-    self.rope_cache = _build_rope_cache(
-        size=self.config.kv_cache_max,
-        dim=int(attn_config.rotary_percentage * attn_config.head_dim),
-        base=attn_config.rotary_base,
-        condense_ratio=1,
-        dtype=torch.float32,
-        device=torch.device("cpu"),
-        theta_factors=torch.tensor(ROPE_SHORT_FACTOR),
-        scale=math.sqrt(
-            1 + math.log(ROPE_SCALE_FACTOR) / math.log(config.max_seq_len)
-        ),
-    )
 def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
@@ -183,16 +173,29 @@ def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       pre_attention_norm_config=norm_config,
       post_attention_norm_config=norm_config,
   )
+  max_seq_len = 4096
+  # Create the RoPE callable
+  build_rope = partial(
+      _build_phi3_rope,
+      condense_ratio=1,
+      dtype=torch.float32,
+      device=torch.device("cpu"),
+      theta_factors=torch.tensor(ROPE_SHORT_FACTOR),
+      scale=math.sqrt(1 + math.log(ROPE_SCALE_FACTOR) / math.log(max_seq_len)),
+      max_seq_len=max_seq_len,
+  )
   config = cfg.ModelConfig(
       vocab_size=32064,
       num_layers=32,
-      max_seq_len=4096,
+      max_seq_len=max_seq_len,
       kv_cache_max_len=kv_cache_max_len,
       embedding_dim=3072,
       block_configs=block_config,
       final_norm_config=norm_config,
       lm_head_share_weight_with_embedding=False,
       enable_hlfb=True,
+      build_rope=build_rope,
   )
   return config

ai_edge_torch/generative/examples/smollm/convert_v2_to_tflite.py ADDED Viewed

@@ -0,0 +1,71 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Example of converting SmolLM2 model to multi-signature tflite model."""
+import os
+import pathlib
+from absl import app
+from absl import flags
+from ai_edge_torch.generative.examples.smollm import smollm
+from ai_edge_torch.generative.utilities import converter
+from ai_edge_torch.generative.utilities.model_builder import ExportConfig
+_CHECKPOINT_PATH = flags.DEFINE_string(
+    'checkpoint_path',
+    os.path.join(pathlib.Path.home(), 'Downloads/llm_data/smollm2'),
+    'The path to the model checkpoint, or directory holding the checkpoint.',
+)
+_TFLITE_PATH = flags.DEFINE_string(
+    'tflite_path',
+    '/tmp/',
+    'The tflite file path to export.',
+)
+_PREFILL_SEQ_LENS = flags.DEFINE_multi_integer(
+    'prefill_seq_lens',
+    (8, 64, 128, 256, 512, 1024),
+    'List of the maximum sizes of prefill input tensors.',
+)
+_KV_CACHE_MAX_LEN = flags.DEFINE_integer(
+    'kv_cache_max_len',
+    1280,
+    'The maximum size of KV cache buffer, including both prefill and decode.',
+)
+_QUANTIZE = flags.DEFINE_bool(
+    'quantize',
+    True,
+    'Whether the model should be quantized.',
+)
+def main(_):
+  pytorch_model = smollm.build_model_v2(
+      _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
+  )
+  quant_suffix = 'q8' if _QUANTIZE.value else 'f32'
+  output_filename = f'smollm2_{quant_suffix}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
+  converter.convert_to_tflite(
+      pytorch_model,
+      tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
+      prefill_seq_len=_PREFILL_SEQ_LENS.value,
+      quantize=_QUANTIZE.value,
+      export_config=ExportConfig(),
+  )
+if __name__ == '__main__':
+  app.run(main)

ai_edge_torch/generative/examples/smollm/smollm.py CHANGED Viewed

@@ -85,3 +85,41 @@ def build_model(checkpoint_path: str, **kwargs) -> nn.Module:
       tensor_names=TENSOR_NAMES,
       model_class=SmolLM,
   )
+class SmolLM2(model_builder.DecoderOnlyModel):
+  """A SmolLM2 model built from the Edge Generative API layers."""
+  pass
+def get_model_config_v2(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
+  """Returns the model config for a SmolLM2 135M model.
+  Args:
+    kv_cache_max_len (int): The maximum sequence length of the KV cache. Default
+      is 1024.
+  Returns:
+    The model config for a SmolLM2 model.
+  """
+  config = get_model_config(kv_cache_max_len)
+  config.block_config(0).attn_config.rotary_base = 100000
+  return config
+def get_fake_model_config_v2(**kwargs) -> cfg.ModelConfig:
+  config = get_model_config_v2(**kwargs)
+  config.vocab_size = 128
+  config.num_layers = 2
+  # SmolLM2 has only one block config.
+  config.block_config(0).ff_config.intermediate_size = 64
+  return config
+def build_model_v2(checkpoint_path: str, **kwargs) -> nn.Module:
+  return model_builder.build_decoder_only_model(
+      checkpoint_path=checkpoint_path,
+      config=get_model_config_v2(**kwargs),
+      tensor_names=TENSOR_NAMES,
+      model_class=SmolLM2,
+  )

ai_edge_torch/generative/examples/smollm/verify.py CHANGED Viewed

@@ -36,10 +36,26 @@ _MAX_NEW_TOKENS = flags.DEFINE_integer(
     30,
     "The maximum size of the generated tokens.",
 )
+_MODEL_VERSION = flags.DEFINE_enum(
+    "model_version",
+    "v1",
+    ["v1", "v2"],
+    "The version of SmolLm to verify.",
+)
+_CHECKPOINT = {
+    "v1": "HuggingFaceTB/SmolLM-135M",
+    "v2": "HuggingFaceTB/SmolLM2-135M",
+}
+_BUILDER = {
+    "v1": smollm.build_model,
+    "v2": smollm.build_model_v2,
+}
 def main(_):
-  checkpoint = "HuggingFaceTB/SmolLM-135M"
+  checkpoint = _CHECKPOINT[_MODEL_VERSION.value]
+  builder = _BUILDER[_MODEL_VERSION.value]
   logging.info("Loading the original model from: %s", checkpoint)
   original_model = transformers.AutoModelForCausalLM.from_pretrained(checkpoint)
@@ -49,7 +65,7 @@ def main(_):
   )
   reauthored_checkpoint = pathlib.Path(cached_config_file).parent
   logging.info("Building the reauthored model from: %s", reauthored_checkpoint)
-  reauthored_model = smollm.build_model(reauthored_checkpoint)
+  reauthored_model = builder(reauthored_checkpoint)
   logging.info("Loading the tokenizer from: %s", checkpoint)
   tokenizer = transformers.AutoTokenizer.from_pretrained(checkpoint)

ai_edge_torch/generative/examples/test_models/toy_model.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 # A toy example which has a single-layer transformer block.
-from typing import Tuple
+from typing import Optional, Tuple
 from ai_edge_torch.generative.layers import builder
 from ai_edge_torch.generative.layers.attention import TransformerBlock
@@ -52,14 +52,20 @@ class ToySingleLayerModel(torch.nn.Module):
     self.config = config
   @torch.inference_mode
-  def forward(self, idx: torch.Tensor, input_pos: torch.Tensor) -> torch.Tensor:
+  def forward(
+      self,
+      idx: torch.Tensor,
+      input_pos: torch.Tensor,
+      mask: Optional[torch.Tensor] = None,
+  ) -> torch.Tensor:
     x = self.tok_embedding(idx)
     cos, sin = self.rope_cache
     cos = cos.index_select(0, input_pos)
     sin = sin.index_select(0, input_pos)
-    mask = self.mask_cache.index_select(2, input_pos)
-    mask = mask[:, :, :, : self.config.max_seq_len]
+    if mask is None:
+      mask = self.mask_cache.index_select(2, input_pos)
+      mask = mask[:, :, :, : self.config.max_seq_len]
     x = self.transformer_block(x, (cos, sin), mask, input_pos)
     x = self.final_norm(x)
@@ -98,7 +104,12 @@ class ToySingleLayerModelWeightSharing(torch.nn.Module):
     self.config = config
   @torch.inference_mode
-  def forward(self, idx: torch.Tensor, input_pos: torch.Tensor) -> torch.Tensor:
+  def forward(
+      self,
+      idx: torch.Tensor,
+      input_pos: torch.Tensor,
+      mask: Optional[torch.Tensor] = None,
+  ) -> torch.Tensor:
     x = self.tok_embedding(idx)
     cos, sin = self.rope_cache

ai_edge_torch/generative/examples/test_models/toy_model_with_kv_cache.py CHANGED Viewed

@@ -63,23 +63,25 @@ class ToyModelWithKVCache(torch.nn.Module):
       tokens: torch.Tensor,
       input_pos: torch.Tensor,
       kv_cache: kv_utils.KVCache,
+      mask: Optional[torch.Tensor] = None,
       export_config: Optional[ExportConfig] = None,
   ) -> Tuple[torch.Tensor, kv_utils.KVCache]:
     x = self.tok_embedding(tokens)
     cos, sin = self.rope_cache
     cos = cos.index_select(0, input_pos)
     sin = sin.index_select(0, input_pos)
-    mask = self.mask_cache.index_select(2, input_pos)
-    mask = mask[:, :, :, : self.config.max_seq_len]
+    if mask is None:
+      mask = self.mask_cache.index_select(2, input_pos)
+      mask = mask[:, :, :, : self.config.max_seq_len]
-    updated_kv_entires = []
+    updated_kv_entries = []
     for i, block in enumerate(self.transformer_blocks):
       kv_entry = kv_cache.caches[i] if kv_cache else None
       x, kv_entry = block(x, (cos, sin), mask, input_pos, kv_entry)
       if kv_entry:
-        updated_kv_entires.append(kv_entry)
+        updated_kv_entries.append(kv_entry)
-    updated_kv_cache = kv_utils.KVCache(tuple(updated_kv_entires))
+    updated_kv_cache = kv_utils.KVCache(tuple(updated_kv_entries))
     if export_config is not None:
       if (

ai_edge_torch/generative/layers/attention.py CHANGED Viewed

@@ -27,33 +27,6 @@ import torch
 from torch import nn
-def _embed_rope(
-    q: torch.Tensor,
-    k: torch.Tensor,
-    n_elem: int,
-    rope: Tuple[torch.Tensor, torch.Tensor],
-) -> Tuple[torch.Tensor, torch.Tensor]:
-  """Embed rotary positional embedding for query and key.
-  Args:
-    q (torch.Tensor): query tensor.
-    k (torch.Tensor): key tensor.
-    n_elem (int): number of elements to embed rotarty positional embedding.
-    rope (Tuple[torch.Tensor, torch.Tensor]): the input rope tensor.
-  """
-  if n_elem > 0:
-    cos, sin = rope
-    q_roped = rotary_pos_emb.apply_rope(
-        q[..., :n_elem], cos.repeat(1, 2), sin.repeat(1, 2)
-    )
-    k_roped = rotary_pos_emb.apply_rope(
-        k[..., :n_elem], cos.repeat(1, 2), sin.repeat(1, 2)
-    )
-    q = torch.cat((q_roped, q[..., n_elem:]), dim=-1)
-    k = torch.cat((k_roped, k[..., n_elem:]), dim=-1)
-  return q, k
 class TransformerBlock(nn.Module):
   def __init__(
@@ -252,7 +225,8 @@ class CausalSelfAttention(nn.Module):
     if rope is not None:
       # Compute rotary positional embedding for query and key.
       n_elem = int(self.config.rotary_percentage * self.config.head_dim)
-      q, k = _embed_rope(q, k, n_elem, rope)
+      cos, sin = rope
+      q, k = rotary_pos_emb.apply_rope_inline(q, k, cos, sin)
     if kv_cache is not None:
       kv_cache = kv_utils.update(kv_cache, input_pos, k, v)
@@ -404,7 +378,8 @@ class CrossAttention(nn.Module):
     if rope is not None:
       # Compute rotary positional embedding for query and key.
       n_elem = int(self.config.rotary_percentage * self.config.head_dim)
-      q, k = _embed_rope(q, k, n_elem, rope)
+      cos, sin = rope
+      q, k = rotary_pos_emb.apply_rope_inline(q, k, cos, sin)
     if kv_cache is not None:
       kv_cache = kv_utils.update(kv_cache, input_pos, k, v)

ai_edge_torch/generative/layers/model_config.py CHANGED Viewed

@@ -17,8 +17,8 @@
 import dataclasses
 import enum
-from typing import Optional, Sequence, Union
+from typing import Callable, Optional, Sequence, Union
+from ai_edge_torch.generative.layers import rotary_position_embedding
 @enum.unique
 class ActivationType(enum.Enum):
@@ -218,6 +218,10 @@ class ModelConfig:
   # Softcap on the model output logits.
   final_logit_softcap: Optional[float] = None
+  # The function to call to create the RoPE sin and cos vectors during the
+  # forward pass. Defaults to a standard implementation.
+  build_rope: Callable = rotary_position_embedding.build_rope
   @property
   def kv_cache_max(self) -> int:
     if self.kv_cache_max_len > 0:

ai_edge_torch/generative/layers/rotary_position_embedding.py CHANGED Viewed

@@ -32,57 +32,63 @@ def apply_rope(
   """
   x = x.transpose(1, 2)
   head_size = x.size(-1)
-  x1 = x[..., : head_size // 2]  # (B, nh, T, hs/2)
-  x2 = x[..., head_size // 2 :]  # (B, nh, T, hs/2)
-  rotated = torch.cat((-x2, x1), dim=-1)  # (B, nh, T, hs)
-  roped = (x * cos) + (rotated * sin)
+  x1, x2 = torch.split(x, head_size // 2, dim=-1)
+  left = x1 * cos - x2 * sin
+  right = x2 * cos + x1 * sin
+  roped = torch.cat([left, right], dim=-1)
   return roped.transpose(1, 2).type_as(x)
-def apply_rope_inline(
-    q: torch.Tensor,
-    k: torch.Tensor,
+def build_rope(
     input_pos: torch.Tensor,
     n_elem: int,
+    head_dim: int,
     base: int = 10_000,
 ) -> Tuple[torch.Tensor, torch.Tensor]:
-  """Computes rotary positional embedding inline for a query and key.
+  """Computes rotary positional embedding cosine and sine tensors.
   Args:
-    q: the query tensor.
-    k: the key tensor.
     input_pos: the sequence indices for the query and key
     n_elem: number of elements of the head dimension for RoPE computation
+    base: the base of the exponentiated value for RoPE.
   Returns:
-    output the RoPE'd query and key.
+    cos, sin tensors
   """
   if n_elem <= 0:
-    return q, k
+    return None, None
-  theta = 1.0 / (base ** (torch.arange(0, n_elem, 2).float() / n_elem))
   freq_exponents = (2.0 / n_elem) * torch.arange(
-      q.shape[-1] // 2, dtype=torch.float32
+      head_dim // 2, dtype=torch.float32
   )
   timescale = float(base) ** freq_exponents
   radians = input_pos.clone().unsqueeze(0).unsqueeze(-1) / timescale.unsqueeze(
       0
   ).unsqueeze(0)
-  cos = torch.cos(radians).type_as(q)
-  sin = torch.sin(radians).type_as(q)
+  cos = torch.cos(radians)
+  sin = torch.sin(radians)
+  return cos, sin
-  def apply(x, sin, cos):
-    x = x.transpose(1, 2)
-    b, h, s, d = x.shape
-    ans = torch.split(x, d // 2, dim=-1)
-    x1, x2 = ans
-    left = x1 * cos - x2 * sin
-    right = x2 * cos + x1 * sin
-    res = torch.cat([left, right], dim=-1)
-    res = res.transpose(1, 2)
-    return res
+def apply_rope_inline(
+    q: torch.Tensor,
+    k: torch.Tensor,
+    cos: torch.Tensor,
+    sin: torch.Tensor,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+  """Computes rotary positional embedding inline for a query and key.
+  Args:
+    q: the query tensor.
+    k: the key tensor.
+    cos: the cosine tensor.
+    sin: the sine tensor.
+  Returns:
+    output the RoPE'd query and key.
+  """
-  q_roped = apply(q, sin, cos)
-  k_roped = apply(k, sin, cos)
+  q_roped = apply_rope(q, cos, sin)
+  k_roped = apply_rope(k, cos, sin)
   return q_roped, k_roped

ai_edge_torch/generative/test/test_model_conversion_large.py CHANGED Viewed

@@ -150,6 +150,16 @@ class TestModelConversion(googletest.TestCase):
       ai_edge_torch.config.in_oss,
       reason="tests with custom ops are not supported in oss",
   )
+  def test_smollm2(self):
+    config = smollm.get_fake_model_config_v2()
+    pytorch_model = smollm.SmolLM2(config).eval()
+    self._test_model(config, pytorch_model, "prefill", atol=1e-4, rtol=1e-5)
+  @googletest.skipIf(
+      ai_edge_torch.config.in_oss,
+      reason="tests with custom ops are not supported in oss",
+  )
   def test_openelm(self):
     config = openelm.get_fake_model_config()
     pytorch_model = openelm.OpenELM(config).eval()

ai_edge_torch/generative/utilities/model_builder.py CHANGED Viewed

@@ -25,6 +25,7 @@ from ai_edge_torch.generative.layers import kv_cache as kv_utils
 from ai_edge_torch.generative.layers import lora as lora_utils
 import ai_edge_torch.generative.layers.attention_utils as attn_utils
 import ai_edge_torch.generative.layers.model_config as cfg
+import ai_edge_torch.generative.layers.rotary_position_embedding as rotary_pos_emb
 import ai_edge_torch.generative.utilities.loader as loading_utils
 import torch
 from torch import nn
@@ -87,13 +88,6 @@ class DecoderOnlyModel(nn.Module):
         config.embedding_dim,
         config.final_norm_config,
     )
-    # ROPE parameters for all attn_configs are the same. Take the first one.
-    attn_config = config.block_config(0).attn_config
-    self.rope_cache = attn_utils.build_rope_cache(
-        size=config.kv_cache_max,
-        dim=int(attn_config.rotary_percentage * attn_config.head_dim),
-        base=attn_config.rotary_base,
-    )
     self.mask_cache = attn_utils.build_causal_mask_cache(
         size=config.kv_cache_max,
     )
@@ -105,6 +99,7 @@ class DecoderOnlyModel(nn.Module):
       tokens: torch.Tensor,
       input_pos: torch.Tensor,
       kv_cache: kv_utils.KVCache,
+      mask: Optional[torch.Tensor] = None,
       lora: Optional[lora_utils.LoRA] = None,
       export_config: Optional[ExportConfig] = None,
   ) -> dict[torch.Tensor, kv_utils.KVCache]:
@@ -116,10 +111,21 @@ class DecoderOnlyModel(nn.Module):
     # token embeddings of shape (b, t, n_embd)
     input_embeds = self.tok_embedding(tokens)
-    cos, sin = self.rope_cache
-    rope = (cos.index_select(0, input_pos), sin.index_select(0, input_pos))
-    mask = self.mask_cache.index_select(2, input_pos)
-    mask = mask[:, :, :, : self.config.kv_cache_max]
+    # ROPE parameters for all attn_configs are the same. Take the first one.
+    attn_config = self.config.block_config(0).attn_config
+    n_elem = int(attn_config.rotary_percentage * attn_config.head_dim)
+    rope = self.config.build_rope(
+        input_pos=input_pos,
+        n_elem=n_elem,
+        base=attn_config.rotary_base,
+        head_dim=attn_config.head_dim,
+        # input_pos=input_pos, n_elem=n_elem, base=attn_config.rotary_base
+    )
+    if mask is None:
+      mask = self.mask_cache.index_select(2, input_pos)
+      mask = mask[:, :, :, : self.config.kv_cache_max]
     return self.forward_with_embeds(
         input_embeds, rope, mask, input_pos, kv_cache, lora, export_config
@@ -145,14 +151,14 @@ class DecoderOnlyModel(nn.Module):
     if self.config.embedding_scale is not None:
       x = x * self.config.embedding_scale
-    updated_kv_entires = []
+    updated_kv_entries = []
     for i, block in enumerate(self.transformer_blocks):
       kv_entry = kv_cache.caches[i] if kv_cache else None
       lora_adapter = lora.adapters[i] if lora else None
       x, kv_entry = block(x, rope, mask, input_pos, kv_entry, lora_adapter)
       if kv_entry:
-        updated_kv_entires.append(kv_entry)
-    updated_kv_cache = kv_utils.KVCache(tuple(updated_kv_entires))
+        updated_kv_entries.append(kv_entry)
+    updated_kv_cache = kv_utils.KVCache(tuple(updated_kv_entries))
     if export_config is not None:
       if (

ai_edge_torch/hlfb/mark_pattern/__init__.py CHANGED Viewed

@@ -17,7 +17,7 @@ from typing import Any
 import uuid
 from ai_edge_torch import lowertools
-from ai_edge_torch.hlfb.mark_pattern import passes
+from ai_edge_torch.hlfb.mark_pattern import fx_utils
 from ai_edge_torch.hlfb.mark_pattern import pattern as pattern_module
 import torch
@@ -87,7 +87,7 @@ def mark_pattern(
     m.meta["ORIGINAL_NODE"] = n
   # Sanitize graph_module to match in the same way as pattern's graph_module.
-  graph_module_to_match = passes.remove_clone_ops(graph_module_to_match)
+  graph_module_to_match = fx_utils.remove_clone_ops(graph_module_to_match)
   match_with_attrs = pattern.match(graph_module_to_match)
@@ -111,13 +111,25 @@ def mark_pattern(
           is_input=True,
       )
-      # Only replace input by the marker node for those nodes used in the pattern.
+      # Only replace input by the marker node for those nodes used in the
+      # pattern.
       in_pattern_nodes = set(match.nodes_map.values())
       for user in input_node.users.keys():
-        if user in in_pattern_nodes:
-          user.meta["ORIGINAL_NODE"].replace_input_with(
-              input_node.meta["ORIGINAL_NODE"], new_input_node
-          )
+        if user not in in_pattern_nodes:
+          continue
+        user.meta["ORIGINAL_NODE"].replace_input_with(
+            input_node.meta["ORIGINAL_NODE"], new_input_node
+        )
+        # Pattern matching graph sanitization may remove clone ops, which means
+        # the user's input in the original graph may be a clone op. When
+        # replacing the input with the marker node, we need to further try
+        # replacing the input of the clone op that connects to the user.
+        for original_user_input in user.meta["ORIGINAL_NODE"].all_input_nodes:
+          if fx_utils.is_clone_op(original_user_input):
+            original_user_input.replace_input_with(
+                input_node.meta["ORIGINAL_NODE"], new_input_node
+            )
     for i, pattern_output_node in enumerate(pattern.output_nodes):
       output_node = match.nodes_map[pattern_output_node]

ai_edge_torch/hlfb/mark_pattern/{passes.py → fx_utils.py} RENAMED Viewed

@@ -12,11 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Passes to clean up the model graph for pattern matching."""
+"""FX graph utilities for pattern matching clean ups."""
 import torch
+def is_clone_op(node: torch.fx.Node) -> bool:
+  """Checks if the node is a clone op."""
+  return (
+      node.op == "call_function" and node.target == torch.ops.aten.clone.default
+  )
 def remove_clone_ops(gm: torch.fx.GraphModule):
   """Removes clone ops from the graph.
@@ -32,7 +39,7 @@ def remove_clone_ops(gm: torch.fx.GraphModule):
     The graph module with clone ops removed.
   """
   for node in gm.graph.nodes:
-    if node.op == "call_function" and node.name.startswith("clone"):
+    if is_clone_op(node):
       node.replace_all_uses_with(node.args[0])
       gm.graph.erase_node(node)

ai_edge_torch/hlfb/mark_pattern/pattern.py CHANGED Viewed

@@ -18,13 +18,14 @@ import dataclasses
 from typing import Any, Callable, Optional, Union
 from ai_edge_torch import fx_pass_base
-from ai_edge_torch.hlfb.mark_pattern import passes
+from ai_edge_torch.hlfb.mark_pattern import fx_utils
 import torch
-from torch.export.graph_signature import TensorArgument
-from torch.fx import Graph
-from torch.fx import GraphModule
-from torch.fx.passes.utils.matcher_utils import InternalMatch
-from torch.fx.passes.utils.matcher_utils import SubgraphMatcher
+Graph = torch.fx.Graph
+GraphModule = torch.fx.GraphModule
+TensorArgument = torch.export.graph_signature.TensorArgument
+InternalMatch = torch.fx.passes.utils.matcher_utils.InternalMatch
+SubgraphMatcher = torch.fx.passes.utils.matcher_utils.SubgraphMatcher
 def _are_equal(x: Any, y: Any) -> bool:
@@ -219,8 +220,8 @@ class Pattern:
     # Sanitize graph_module for more precise pattern matching.
     # The graph_module to match against this pattern should apply equivalent
     # sanitization.
-    self.graph_module = passes.remove_clone_ops(self.graph_module)
-    self.graph_module = passes.remove_dangling_args(self.graph_module)
+    self.graph_module = fx_utils.remove_clone_ops(self.graph_module)
+    self.graph_module = fx_utils.remove_dangling_args(self.graph_module)
     # Builds list of ordered input and output nodes.
     self.graph_nodes_map = {}

ai_edge_torch/hlfb/test/test_mark_pattern.py CHANGED Viewed

@@ -58,6 +58,32 @@ class TestMarkPattern(googletest.TestCase):
         {"stablehlo.custom_call @mark_tensor": 6},
     )
+  def test_mark_pattern_with_clone_inputs(self):
+    class TestModel(torch.nn.Module):
+      def forward(self, x):
+        return torch.ops.aten.clone.default(x * x) + x
+    pattern = pattern_module.Pattern(
+        "test.add",
+        lambda a, b: a + b,
+        export_args=(torch.rand(2, 2), torch.rand(2, 2)),
+    )
+    model = TestModel().eval()
+    args = (torch.rand(20, 20),)
+    exported_program = torch.export.export(model, args)
+    mark_pattern.mark_pattern(exported_program.graph_module, pattern)
+    mlir = _export_stablehlo_mlir(exported_program)
+    lowertools.assert_string_count(
+        self,
+        mlir,
+        {'stablehlo.composite "test.add"': 1},
+        {"stablehlo.custom_call @mark_tensor": 3},
+    )
   def test_mark_pattern_with_attr_builder(self):
     class TestModel(torch.nn.Module):

ai_edge_torch/version.py CHANGED Viewed

@@ -13,4 +13,4 @@
 # limitations under the License.
 # ==============================================================================
-__version__ = "0.3.0.dev20250108"
+__version__ = "0.3.0.dev20250110"

{ai_edge_torch_nightly-0.3.0.dev20250108.dist-info → ai_edge_torch_nightly-0.3.0.dev20250110.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-edge-torch-nightly
-Version: 0.3.0.dev20250108
+Version: 0.3.0.dev20250110
 Summary: Supporting PyTorch models with the Google AI Edge TFLite runtime.
 Home-page: https://github.com/google-ai-edge/ai-edge-torch
 Keywords: On-Device ML,AI,Google,TFLite,PyTorch,LLMs,GenAI

{ai_edge_torch_nightly-0.3.0.dev20250108.dist-info → ai_edge_torch_nightly-0.3.0.dev20250110.dist-info}/RECORD RENAMED Viewed

@@ -3,7 +3,7 @@ ai_edge_torch/_config.py,sha256=PKtOtBOup-cM0wBdQxby6HzuhLhIC3oq-TBG8FF4znE,2161
 ai_edge_torch/conftest.py,sha256=r0GTrhMRhlmOGrrkvumHN8hkmyug6WvF60vWq8wRIBI,758
 ai_edge_torch/fx_pass_base.py,sha256=518ziQ0TUxqum2qZXqlD8qr65pHPh8ZNLnwFC6zvK3k,4253
 ai_edge_torch/model.py,sha256=N-pNpTxzhaFGhWhnSGd70lBzb9VlEhTOq5mddU7bvvI,5542
-ai_edge_torch/version.py,sha256=NOhiFx3WkuX_tsxWzAZcCmPr0n5wuIu79KHGbDtrbb8,706
+ai_edge_torch/version.py,sha256=VHqAyYw4u6BgyQ6v7Xp08Jqb0cnzIVGsulfnclxgY5c,706
 ai_edge_torch/_convert/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/_convert/conversion.py,sha256=_PoH0E1gbbsWhLGwDRwUtW2G_IgNzNF7pKQbn9ct6-4,5778
 ai_edge_torch/_convert/conversion_utils.py,sha256=Sr8qXVcTwc-ZnZmK7yxVrIOOp1S_vNrwzC0zUvLTI2o,2160
@@ -47,13 +47,13 @@ ai_edge_torch/generative/examples/gemma/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIX
 ai_edge_torch/generative/examples/gemma/convert_gemma1_to_tflite.py,sha256=8HJi0cutxPstafVNs2LfBKdUzufVucje1Vrfjw_RS_g,2527
 ai_edge_torch/generative/examples/gemma/convert_gemma2_to_tflite.py,sha256=MX8fZhJJPZ5IoMiNHX0tLkRpHYqVuh4qhW0rkeIfmYw,2529
 ai_edge_torch/generative/examples/gemma/gemma1.py,sha256=w8oWYibZzvEvCDyp39EYyAWmjgJljhzdYPyFCfAWxZA,3497
-ai_edge_torch/generative/examples/gemma/gemma2.py,sha256=whQ6DEnmhmj9hd5OyaoEI-FUNJ4m302vY3Swo_IqQcA,9285
+ai_edge_torch/generative/examples/gemma/gemma2.py,sha256=e9HfiHr4FkQZwVBYdDUZGzOjB5TqY2LqtVTHEzwVkQY,10428
 ai_edge_torch/generative/examples/gemma/verify_gemma1.py,sha256=ip-Gmk4CI5f0GWSdAIdrectxQWJ0t328KCsA4nfHuGg,1736
 ai_edge_torch/generative/examples/gemma/verify_gemma2.py,sha256=IoBhEMwH07-tFm5-U6F2hpCsI8xynglhq1x9tIOdaPQ,1322
 ai_edge_torch/generative/examples/gemma/verify_util.py,sha256=tR8RflXocDZqvuStyw9aFlzuiTllEC8rNnjrxms6_Is,5727
 ai_edge_torch/generative/examples/llama/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/examples/llama/convert_to_tflite.py,sha256=tMSsqg7LU3LR-PHtKvlWtLCqlk71mfcO9hANU4vnvDM,2734
-ai_edge_torch/generative/examples/llama/llama.py,sha256=BMjpdw6oOXmtqXCAfW9o7Iewaj-Hxd57xVrvSLBuHTk,6656
+ai_edge_torch/generative/examples/llama/llama.py,sha256=kWy6-V4bFtE1yguCROLJS5XB0GOJD1-acJWp2dFjB5Q,6606
 ai_edge_torch/generative/examples/llama/verify.py,sha256=X7oKQi85M789ugBrOlMvzk8eSRR3Kf1Mprfl-U-WIpo,2842
 ai_edge_torch/generative/examples/moonshine/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/examples/moonshine/convert_moonshine_to_tflite.py,sha256=7m3rYRzThRDYb-7pGnpLr3ACi4PWX07Mg20Q98ArPc4,1714
@@ -64,19 +64,19 @@ ai_edge_torch/generative/examples/openelm/openelm.py,sha256=sIJ8Ie1oxFrJM-1jvv2u
 ai_edge_torch/generative/examples/openelm/verify.py,sha256=VkigoqhAr8ew95neb3TifYv-SLOSheaWKv2AH0iKDrc,2441
 ai_edge_torch/generative/examples/paligemma/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/examples/paligemma/convert_to_tflite.py,sha256=scLsguzzuHfKYDWUd2uZkKYVRzdAbQHLd-kPam8QwvM,3004
-ai_edge_torch/generative/examples/paligemma/decoder.py,sha256=amN96oBMTPolOFvGa47vG92AZ-BNLm8j0bBYd-IrMvI,5407
-ai_edge_torch/generative/examples/paligemma/decoder2.py,sha256=0V_CX0Pn5Fj_-koOGjc_Av2KMSAaVjAlD-G8P6FBGyY,6385
+ai_edge_torch/generative/examples/paligemma/decoder.py,sha256=NJGhfPxVQjHDqea_lYGffjihOBdIYiXftiFTM6ccrwM,5475
+ai_edge_torch/generative/examples/paligemma/decoder2.py,sha256=L6F6KWHqxdnGQTOp9P3c8r_K1Uxet0ZCcbdvmjWtIos,6513
 ai_edge_torch/generative/examples/paligemma/image_encoder.py,sha256=yKPWG8aBp-GuzeyQntlzwTTcGBBjvUywVGRjnlNprmo,5574
-ai_edge_torch/generative/examples/paligemma/paligemma.py,sha256=FwGlFHl9zktGDxnoOpEtbS6NYN5RyzcOXH7lvNUCwEU,6257
+ai_edge_torch/generative/examples/paligemma/paligemma.py,sha256=CEMG9gh51ev1KXPew927a6nfampiXX9bL6m-25tNYN8,6340
 ai_edge_torch/generative/examples/paligemma/verify.py,sha256=KT3Ruy40tSESxQuy-Sw01NAI3zId1BZr6Bp7FZj1wZk,5622
 ai_edge_torch/generative/examples/paligemma/verify_decoder.py,sha256=al5wMPWri4IRVWrLmCplPi6uoCzwh0vBHMGnCt-XUqo,2690
 ai_edge_torch/generative/examples/paligemma/verify_decoder2.py,sha256=tm-UfLr0YeBRVcQsWLBOMWI9JUzHmtPEbYK2vpITpqY,2534
 ai_edge_torch/generative/examples/paligemma/verify_image_encoder.py,sha256=vNm-wTT8BD6zbX6GocfP1QrVoHl0zSvuVxoXN36eeiU,3540
 ai_edge_torch/generative/examples/phi/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
-ai_edge_torch/generative/examples/phi/convert_phi3_to_tflite.py,sha256=P2K6G7bNespSJLk72qxuCLaCcR_xAPs0Mn1dBZoByhE,2518
+ai_edge_torch/generative/examples/phi/convert_phi3_to_tflite.py,sha256=CaI_-Vtd0j9FoWIDd8q5z4CFsGYUhTwEWGvMGaXICuU,2514
 ai_edge_torch/generative/examples/phi/convert_to_tflite.py,sha256=g-MvEibJT_iIhkec2VGtFFA_iP54VCq9mY4KxwAYF08,2512
 ai_edge_torch/generative/examples/phi/phi2.py,sha256=c6PYCky7yJn6MVIYOCTx8S_CH27kOPmJbRZcI95nbZs,3477
-ai_edge_torch/generative/examples/phi/phi3.py,sha256=7Y1E4XpRuZOiSbeZJ-C2uJjmlnDtWv6L0XvPRE8oEQs,7112
+ai_edge_torch/generative/examples/phi/phi3.py,sha256=SHvJjmi5eIch5cYIWORt6YFmSQx_oCiOk1UbKKGibtk,7119
 ai_edge_torch/generative/examples/phi/verify.py,sha256=YPFCdbnfmvq38fbpBNr0kHPfSZo4p3_6WkLJAW3pLPo,2177
 ai_edge_torch/generative/examples/phi/verify_phi3.py,sha256=kVYaBVvddfQng0IyZGxyTJEzhiPO0G4VFJm2WOc2Q94,2360
 ai_edge_torch/generative/examples/qwen/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
@@ -85,8 +85,9 @@ ai_edge_torch/generative/examples/qwen/qwen.py,sha256=Zi_qiQ1JPokXZ95jgSEnQp3F-L
 ai_edge_torch/generative/examples/qwen/verify.py,sha256=9_AyEJTeUfvhhID64Rto2bflFPyXMFokdQLsseLUMiI,2775
 ai_edge_torch/generative/examples/smollm/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/examples/smollm/convert_to_tflite.py,sha256=megskv1oiPhwHSnguoG7zV-esXp1Ns_FPeMLAYKhDb0,2522
-ai_edge_torch/generative/examples/smollm/smollm.py,sha256=kk3cB_qaCzbFOhHtJlLb7qvSEBQTsILnoAcSFE3AkpE,2711
-ai_edge_torch/generative/examples/smollm/verify.py,sha256=HXYcCjDJMylVL3Pc9HU-UXqtpjtIU25o1YhPiX30aPU,2361
+ai_edge_torch/generative/examples/smollm/convert_v2_to_tflite.py,sha256=CjY1i0iCYxFSjhCpQZwxkmVxILgeo0zu1m0oBrHqyDU,2311
+ai_edge_torch/generative/examples/smollm/smollm.py,sha256=3uUltb6D3Q1aHpndcYTJrsWM_RBwLAraKDniH8ZZous,3779
+ai_edge_torch/generative/examples/smollm/verify.py,sha256=KpYxVz_lv61YWy6HLfwT68n0owZMvty5Rr3W7ZNWWSw,2702
 ai_edge_torch/generative/examples/stable_diffusion/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/examples/stable_diffusion/attention.py,sha256=kDWG6MlIGa89zC5KSRcJlw2c4ITuw8KcchtfmF55f4g,3545
 ai_edge_torch/generative/examples/stable_diffusion/clip.py,sha256=5M4auM33SgCTODt0VT8TO-EVILruqGDRiNILBPeB83Y,6072
@@ -108,8 +109,8 @@ ai_edge_torch/generative/examples/t5/t5.py,sha256=gFTmPi-xB8pcPRgoF3DJxvH_fT-KWT
 ai_edge_torch/generative/examples/t5/t5_attention.py,sha256=l01oYyJo77INzRwN4xqXquaFQPvCFBFF5zOnmGVb3Hg,8731
 ai_edge_torch/generative/examples/test_models/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/examples/test_models/convert_toy_model.py,sha256=6-WaNHckq_LlXMVTh8x90MGWeWq2bu_T_XQd3w9FnGg,3261
-ai_edge_torch/generative/examples/test_models/toy_model.py,sha256=4113jZK-Hu3kYop__WTc8Bq-bG6YzQtADbxHtYPEB4w,5036
-ai_edge_torch/generative/examples/test_models/toy_model_with_kv_cache.py,sha256=WMl1iuCE8So9FDnxPV0OTMzuPngQUTO61g8rfnBLyB4,4664
+ai_edge_torch/generative/examples/test_models/toy_model.py,sha256=Crpj-vOwSViHpblXOrRJmsIn4DrHyuB3XZ8kHifb7LA,5203
+ai_edge_torch/generative/examples/test_models/toy_model_with_kv_cache.py,sha256=Ab_N9xc-4DImA-Pvevr-nnnslBXScXVo4Pw7L3_OlhI,4732
 ai_edge_torch/generative/examples/tiny_llama/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/examples/tiny_llama/convert_to_tflite.py,sha256=VU0c5pgvrUtaTboT1xuDBGjpKOM85aqtaB_hYfSBuEk,2544
 ai_edge_torch/generative/examples/tiny_llama/tiny_llama.py,sha256=mhJ18rb9sxrYRzv1YSzhbNs97oUZck99avZDcUO2oV8,2800
@@ -117,15 +118,15 @@ ai_edge_torch/generative/examples/tiny_llama/verify.py,sha256=7Bk8z033M-BCXJ299f
 ai_edge_torch/generative/fx_passes/__init__.py,sha256=jrzCB3ZyY_t5jJM1e2Czdt3DjAIL43R0_a-T-I7wOzw,1155
 ai_edge_torch/generative/fx_passes/remove_sdpa_zero_mask_pass.py,sha256=hhxSQvkDMv0isZJhmuLiod66ZODaJ8uSPSVTJVHBabQ,1931
 ai_edge_torch/generative/layers/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
-ai_edge_torch/generative/layers/attention.py,sha256=03YlYLYCD8kxkxlGwRcmw4rFEA2bI8BP6_o5gflnaXQ,14522
+ai_edge_torch/generative/layers/attention.py,sha256=GrAy8CT1pEsgRoB8JQP6PlnNYk8kQ4U3YANfSiTJKn8,13776
 ai_edge_torch/generative/layers/attention_utils.py,sha256=zBVwlBUTs-nStIKCZG0ks5ra7tsqc9ShfakFJKH5rds,7344
 ai_edge_torch/generative/layers/builder.py,sha256=LXGuSHIx6QZAzLFm7aJvlzoMPgQwbXLFchGEKYwOOUA,5090
 ai_edge_torch/generative/layers/feed_forward.py,sha256=hdICat-8gW7-vxDAevJQ8NQ-mynllPiqLdXQMF6JMnc,4189
 ai_edge_torch/generative/layers/kv_cache.py,sha256=DhHIggaOQ2IAY4aRuMAuCLWZv1dBz5PYtmOEjkx9EQY,6291
 ai_edge_torch/generative/layers/lora.py,sha256=hsvWLLOnW7HQ0AysOZu30x_cetMquDd1tjfyLz8HCSU,17892
-ai_edge_torch/generative/layers/model_config.py,sha256=viX51T_naJ9sPpPxPoMnSueBPYE2zxWNOD0xn0f-_bM,7510
+ai_edge_torch/generative/layers/model_config.py,sha256=9yPEmWNw3-_2wXBmPmZ7RUKcPXHF2ZbJwksyQoXTA6M,7784
 ai_edge_torch/generative/layers/normalization.py,sha256=MbwH-n80Fob5YvjBzdqDjBizMHLzSJGYRDdbD-rL5C0,6174
-ai_edge_torch/generative/layers/rotary_position_embedding.py,sha256=xxWtlVsGGJkEyXC6PwznubyhJnLPEfSpHOORE_hgxss,2670
+ai_edge_torch/generative/layers/rotary_position_embedding.py,sha256=1L1MEGPYbDELi0zy2OKl7yXyk9FXdBjcXwRZbfiJriU,2619
 ai_edge_torch/generative/layers/scaled_dot_product_attention.py,sha256=gXxh3papKy4FBpGEX7VyZ7rZ1Js6aHK70Q6DKrVSckY,4154
 ai_edge_torch/generative/layers/unet/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/layers/unet/blocks_2d.py,sha256=9jKzOfiBQ66bp1ZnVIAoREIifVNFx4aTlQeYMAx2_pA,29062
@@ -144,25 +145,25 @@ ai_edge_torch/generative/test/test_kv_cache.py,sha256=2AulHBS3hC4b_68PNNBkRVOryp
 ai_edge_torch/generative/test/test_loader.py,sha256=9mQUeeZKOVApOWSWl2cN9c10axZjMKM1-0Zd823CCS4,3449
 ai_edge_torch/generative/test/test_lora.py,sha256=6QIM6RLTc2HrodGpp_aS3OxM9Rco2KAzEnYgotkg41M,5310
 ai_edge_torch/generative/test/test_model_conversion.py,sha256=jfqkECCX7XKHeBAuDXrkwQJf0vM72eG3LMc5rluha84,6191
-ai_edge_torch/generative/test/test_model_conversion_large.py,sha256=NctnggTSFh0XEQbTu55diZ35rFD2QIARO-8PzLktRWg,12165
+ai_edge_torch/generative/test/test_model_conversion_large.py,sha256=bBcey-aD4L_TwKRrrM81bN2VQoJjPPC84Rv4o3WOc34,12491
 ai_edge_torch/generative/test/test_quantize.py,sha256=bEJMhpQ9bIDUZVBXTW888728FcH-i3SyE4JSZZUgU0A,6071
 ai_edge_torch/generative/test/utils.py,sha256=tF6aCfAGJnc9dmzCnZCEOuKNVimfWOqscv9og0DDLHU,2656
 ai_edge_torch/generative/utilities/__init__.py,sha256=-_jxnnFnCgnTU4oTm4MnRsvL5lqhomBNdFBbqfmfHPo,720
 ai_edge_torch/generative/utilities/converter.py,sha256=MY8BK29yD-W4v45Xdl_ErbNilipsTlD-4-y9MyBxR5g,7620
 ai_edge_torch/generative/utilities/dynamic_update_slice.py,sha256=e2mhx-Vp8sUK4EXoPtpZLSx3TViqLAKs67EhKcXBjAQ,2121
 ai_edge_torch/generative/utilities/loader.py,sha256=A3SOjPXp--AsvoP1hqj5QKWE4sgxoFc3H5EBUz_Eogc,13531
-ai_edge_torch/generative/utilities/model_builder.py,sha256=3XhB3fJXXIJEiqw9eqtIRY86lbED1BpjVGOdt7z5kpE,6611
+ai_edge_torch/generative/utilities/model_builder.py,sha256=6OBKyOmbg5Sap_np1wnajpCQ1fh8P0eONqNls9eHAX4,6778
 ai_edge_torch/generative/utilities/moonshine_loader.py,sha256=_RpFabSqtGH5PHiP3_1f6QfO14qMADUxr_HGRlVDFB0,4891
 ai_edge_torch/generative/utilities/stable_diffusion_loader.py,sha256=dqPD9qRXEWtU3ombslOC-BE2l_dMwHoCNu7NsIJhsso,36158
 ai_edge_torch/generative/utilities/t5_loader.py,sha256=tEsfy8-ymzbbjOIc-oesXF3yGyyWtJgFXn2s7VOavt8,16961
 ai_edge_torch/generative/utilities/transformers_verifier.py,sha256=8sp9m_FMcXn7nqOrochtu2jIANkJKhnhIBUmH0ZTDR4,1549
 ai_edge_torch/generative/utilities/verifier.py,sha256=6lnBU9Cy5GanB8JWK3-2_VU3PxqunDWGe-SgSLba5Yw,12065
 ai_edge_torch/hlfb/__init__.py,sha256=sH4um75na-O8tzxN6chFyp6Y4xnexsE7kUQpZySv6dE,735
-ai_edge_torch/hlfb/mark_pattern/__init__.py,sha256=cjTprggj_cuktSCm7-A25e7Shop3k63ylp7sdZmtZ8o,4790
-ai_edge_torch/hlfb/mark_pattern/passes.py,sha256=pjkKcI1nHECPluAt87cFBrt1DP0f3ge7rHq1NhCkBIE,1936
-ai_edge_torch/hlfb/mark_pattern/pattern.py,sha256=NP2mYhe5D2GjtqQfqqldp-ko3xtNghuFKKJOQskUJFI,10041
+ai_edge_torch/hlfb/mark_pattern/__init__.py,sha256=-BYE7MGMxr-VfBy8tAiiOaCqYv8ytJ0w5l2P8B7h3eM,5387
+ai_edge_torch/hlfb/mark_pattern/fx_utils.py,sha256=taWLpF5IVglxlsF9HM2dIoKDXuQREaCRAXtJeG5gKzs,2073
+ai_edge_torch/hlfb/mark_pattern/pattern.py,sha256=7bv9XqRkm1pjxiVL4Cm1cArExnolId8hQKFHtvlkCI8,10061
 ai_edge_torch/hlfb/test/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
-ai_edge_torch/hlfb/test/test_mark_pattern.py,sha256=ivq0eVjuf31idfNY0E12F4FxdkSI9hwYXapLJBkIf8Q,4831
+ai_edge_torch/hlfb/test/test_mark_pattern.py,sha256=-5UqJyk__1YbUNGuxi4b2sn0CED0W-G337AXwxPGdEs,5567
 ai_edge_torch/lowertools/__init__.py,sha256=A8WBXvWtuFYYWtNTqPD7waVntLaSVAnSMwx5ugjZBIw,761
 ai_edge_torch/lowertools/_shim.py,sha256=Mbg16tnCVK0YsHowfbpqpNX1qySuMLvpGI_-I5SIrG0,3276
 ai_edge_torch/lowertools/common_utils.py,sha256=Z7p-ivOHtddktpnHrlDm_dSoTxJOdEjFXIGQbzjgwQo,4504
@@ -205,8 +206,8 @@ ai_edge_torch/quantize/quant_config.py,sha256=U0KisSW-uZkoMJcy-ZP9W57p3tsa594fr9
 ai_edge_torch/testing/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/testing/model_coverage/__init__.py,sha256=5P8J6Zk5YYtDvTBucFvB9NGSRI7Gw_24WnrbhXgycEE,765
 ai_edge_torch/testing/model_coverage/model_coverage.py,sha256=UPB448aMDUyC0HNYVqio2rcJPnDN0tBQMP08J6vPYew,4718
-ai_edge_torch_nightly-0.3.0.dev20250108.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
-ai_edge_torch_nightly-0.3.0.dev20250108.dist-info/METADATA,sha256=npbaQVRzcYi1A0B6ylSeKLzLGHmmcr6ELSuOU2vXo_0,1966
-ai_edge_torch_nightly-0.3.0.dev20250108.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-ai_edge_torch_nightly-0.3.0.dev20250108.dist-info/top_level.txt,sha256=5KXRaF2hwkApYxf7Y8y_tVb9aulGTlbOoNdbx1aKRkE,14
-ai_edge_torch_nightly-0.3.0.dev20250108.dist-info/RECORD,,
+ai_edge_torch_nightly-0.3.0.dev20250110.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
+ai_edge_torch_nightly-0.3.0.dev20250110.dist-info/METADATA,sha256=D_Vexo_GTTaYsb6IqB5rLrD-mos2YWze1Xcj3IFDgKE,1966
+ai_edge_torch_nightly-0.3.0.dev20250110.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+ai_edge_torch_nightly-0.3.0.dev20250110.dist-info/top_level.txt,sha256=5KXRaF2hwkApYxf7Y8y_tVb9aulGTlbOoNdbx1aKRkE,14
+ai_edge_torch_nightly-0.3.0.dev20250110.dist-info/RECORD,,

{ai_edge_torch_nightly-0.3.0.dev20250108.dist-info → ai_edge_torch_nightly-0.3.0.dev20250110.dist-info}/LICENSE RENAMED Viewed

File without changes

{ai_edge_torch_nightly-0.3.0.dev20250108.dist-info → ai_edge_torch_nightly-0.3.0.dev20250110.dist-info}/WHEEL RENAMED Viewed

File without changes

{ai_edge_torch_nightly-0.3.0.dev20250108.dist-info → ai_edge_torch_nightly-0.3.0.dev20250110.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai-edge-torch-nightly 0.3.0.dev20250108__py3-none-any.whl → 0.3.0.dev20250110__py3-none-any.whl

ai-edge-torch-nightly 0.3.0.dev20250108py3-none-any.whl → 0.3.0.dev20250110py3-none-any.whl