PyPI - ai-edge-torch-nightly - Versions diffs - 0.3.0.dev20240812__py3-none-any.whl → 0.3.0.dev20240814__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.3.0.dev20240812py3-none-any.whl → 0.3.0.dev20240814py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ai-edge-torch-nightly might be problematic. Click here for more details.

Files changed (27) hide show

ai_edge_torch/generative/examples/experimental/gemma/gemma.py CHANGED Viewed

@@ -40,7 +40,7 @@ TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
     attn_value_proj="model.layers.{}.self_attn.v_proj",
     attn_output_proj="model.layers.{}.self_attn.o_proj",
     pre_attn_norm="model.layers.{}.input_layernorm",
-    pre_ff_norm="model.layers.{}.post_attention_layernorm",
+    post_attn_norm="model.layers.{}.post_attention_layernorm",
     embedding="model.embed_tokens",
     final_norm="model.norm",
     lm_head=None,
@@ -150,7 +150,7 @@ def get_model_config_2b(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       attn_config=attn_config,
       ff_config=ff_config,
       pre_attention_norm_config=norm_config,
-      pre_ff_norm_config=norm_config,
+      post_attention_norm_config=norm_config,
       final_norm_config=norm_config,
       parallel_residual=False,
       lm_head_use_bias=False,

ai_edge_torch/generative/examples/experimental/tiny_llama/tiny_llama.py CHANGED Viewed

@@ -41,7 +41,7 @@ TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
     attn_value_proj="model.layers.{}.self_attn.v_proj",
     attn_output_proj="model.layers.{}.self_attn.o_proj",
     pre_attn_norm="model.layers.{}.input_layernorm",
-    pre_ff_norm="model.layers.{}.post_attention_layernorm",
+    post_attn_norm="model.layers.{}.post_attention_layernorm",
     embedding="model.embed_tokens",
     final_norm="model.norm",
     lm_head="lm_head",
@@ -142,7 +142,7 @@ def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       attn_config=attn_config,
       ff_config=ff_config,
       pre_attention_norm_config=norm_config,
-      pre_ff_norm_config=norm_config,
+      post_attention_norm_config=norm_config,
       final_norm_config=norm_config,
       enable_hlfb=True,
   )

ai_edge_torch/generative/examples/gemma/convert_gemma2_to_tflite.py ADDED Viewed

@@ -0,0 +1,67 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import os
+from pathlib import Path
+import ai_edge_torch
+from ai_edge_torch.generative.examples.gemma import gemma2
+from ai_edge_torch.generative.quantize import quant_recipes
+import torch
+def convert_gemma_to_tflite(
+    checkpoint_path: str,
+    prefill_seq_len: int = 512,
+    kv_cache_max_len: int = 1024,
+    quantize: bool = True,
+):
+  """Converting a Gemma 2 2B model to multi-signature
+  tflite model.
+  Args:
+      checkpoint_path (str): The filepath to the model checkpoint, or directory holding the checkpoint.
+      prefill_seq_len (int, optional): The maximum size of prefill input tensor.
+        Defaults to 512.
+      kv_cache_max_len (int, optional): The maximum size of KV cache buffer,
+        including both prefill and decode. Defaults to 1024.
+      quantize (bool, optional): Whether the model should be quanized.
+        Defaults to True.
+  """
+  pytorch_model = gemma2.build_2b_model(
+      checkpoint_path, kv_cache_max_len=kv_cache_max_len
+  )
+  # Tensors used to trace the model graph during conversion.
+  prefill_tokens = torch.full((1, prefill_seq_len), 0, dtype=torch.long)
+  prefill_input_pos = torch.arange(0, prefill_seq_len)
+  decode_token = torch.tensor([[0]], dtype=torch.long)
+  decode_input_pos = torch.tensor([0], dtype=torch.int64)
+  quant_config = quant_recipes.full_int8_dynamic_recipe() if quantize else None
+  edge_model = (
+      ai_edge_torch.signature(
+          'prefill', pytorch_model, (prefill_tokens, prefill_input_pos)
+      )
+      .signature('decode', pytorch_model, (decode_token, decode_input_pos))
+      .convert(quant_config=quant_config)
+  )
+  edge_model.export(
+      f'/tmp/gemma2_seq{prefill_seq_len}_kv{kv_cache_max_len}.tflite'
+  )
+if __name__ == '__main__':
+  checkpoint_path = os.path.join(Path.home(), 'Downloads/llm_data/gemma2-2b')
+  convert_gemma_to_tflite(checkpoint_path)

ai_edge_torch/generative/examples/gemma/gemma.py CHANGED Viewed

@@ -35,7 +35,7 @@ TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
     attn_value_proj="model.layers.{}.self_attn.v_proj",
     attn_output_proj="model.layers.{}.self_attn.o_proj",
     pre_attn_norm="model.layers.{}.input_layernorm",
-    pre_ff_norm="model.layers.{}.post_attention_layernorm",
+    post_attn_norm="model.layers.{}.post_attention_layernorm",
     embedding="model.embed_tokens",
     final_norm="model.norm",
     lm_head=None,
@@ -138,7 +138,7 @@ def get_model_config_2b(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       attn_config=attn_config,
       ff_config=ff_config,
       pre_attention_norm_config=norm_config,
-      pre_ff_norm_config=norm_config,
+      post_attention_norm_config=norm_config,
       final_norm_config=norm_config,
       parallel_residual=False,
       lm_head_use_bias=False,
@@ -160,6 +160,7 @@ def build_2b_model(checkpoint_path, **kwargs) -> nn.Module:
   # since embedding and lm-head use the same weight, we need to set strict
   # to False.
   loader.load(model, strict=False)
+  model.eval()
   return model

ai_edge_torch/generative/examples/gemma/gemma2.py ADDED Viewed

@@ -0,0 +1,250 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Example of building the Gemma2 2B model.
+import os
+from pathlib import Path
+from typing import Optional, Tuple
+from ai_edge_torch.generative.layers.attention import TransformerBlock
+import ai_edge_torch.generative.layers.attention_utils as attn_utils
+import ai_edge_torch.generative.layers.builder as builder
+import ai_edge_torch.generative.layers.model_config as cfg
+import ai_edge_torch.generative.utilities.loader as loading_utils
+import numpy as np
+import torch
+import torch.nn as nn
+TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
+    ff_up_proj="model.layers.{}.mlp.up_proj",
+    ff_down_proj="model.layers.{}.mlp.down_proj",
+    ff_gate_proj="model.layers.{}.mlp.gate_proj",
+    attn_fused_qkv_proj="model.layers.{}.self_attn.qkv_proj",
+    attn_output_proj="model.layers.{}.self_attn.o_proj",
+    pre_attn_norm="model.layers.{}.input_layernorm",
+    post_attn_norm="model.layers.{}.post_attention_layernorm",
+    pre_ff_norm="model.layers.{}.pre_feedforward_layernorm",
+    post_ff_norm="model.layers.{}.post_feedforward_layernorm",
+    embedding="embedder",
+    final_norm="model.norm",
+    lm_head=None,
+)
+class Gemma2Block(TransformerBlock):
+  def forward(
+      self,
+      x: torch.Tensor,
+      rope: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
+      mask: Optional[torch.Tensor] = None,
+      input_pos: Optional[torch.Tensor] = None,
+  ) -> torch.Tensor:
+    """Forward function of the Gemma2Block.
+    Exactly the same as TransformerBlock but we call the post-attention norm
+    immediately after attention and not after the residual pointwise addition.
+    Args:
+      x (torch.Tensor): the input tensor.
+      rope (Tuple[torch.Tensor, torch.Tensor]): the input rope tensor.
+      mask (torch.Tensor): the optional mask tensor.
+      input_pos (torch.Tensor): the optional input position tensor.
+    Returns:
+      output activation from this transformer block.
+    """
+    x_norm = self.pre_atten_norm(x)
+    attn_out = self.atten_func(x_norm, rope, mask, input_pos)
+    attn_out_norm = self.post_atten_norm(attn_out)
+    x = x + attn_out_norm
+    output = x + self.ff(x)
+    return output
+class Gemma2(nn.Module):
+  def __init__(self, config: cfg.ModelConfig):
+    super().__init__()
+    self.config = config
+    # Construct model layers.
+    self.tok_embedding = nn.Embedding(
+        config.vocab_size, config.embedding_dim, padding_idx=0
+    )
+    self.lm_head = nn.Linear(
+        config.embedding_dim,
+        config.vocab_size,
+        bias=config.lm_head_use_bias,
+    )
+    # Gemma re-uses the embedding as the head projection layer.
+    self.lm_head.weight.data = self.tok_embedding.weight.data
+    self.transformer_blocks = nn.ModuleList(
+        Gemma2Block(config) for _ in range(config.num_layers)
+    )
+    self.final_norm = builder.build_norm(
+        config.embedding_dim,
+        config.final_norm_config,
+    )
+    self.rope_cache = attn_utils.build_rope_cache(
+        size=config.kv_cache_max,
+        dim=int(
+            config.attn_config.rotary_percentage * config.attn_config.head_dim
+        ),
+        base=10_000,
+        condense_ratio=1,
+        dtype=torch.float32,
+        device=torch.device("cpu"),
+    )
+    self.mask_cache = attn_utils.build_causal_mask_cache(
+        size=config.kv_cache_max,
+        dtype=torch.float32,
+        device=torch.device("cpu"),
+    )
+    self.sliding_window_mask_cache = attn_utils.build_sliding_window_mask_cache(
+        size=config.kv_cache_max,
+        window_size=self.config.attn_config.sliding_window_size,
+        dtype=torch.float32,
+        device=torch.device("cpu"),
+    )
+    self.config = config
+  def get_attention_mask(
+      self, idx: int, input_pos: torch.Tensor
+  ) -> torch.Tensor:
+    if self.config.attn_config.attn_types:
+      if (
+          self.config.attn_config.attn_types[idx]
+          == cfg.AttentionType.LOCAL_SLIDING
+      ):
+        return self.sliding_window_mask_cache.index_select(2, input_pos)
+    return self.mask_cache.index_select(2, input_pos)
+  @torch.inference_mode
+  def forward(self, idx: torch.Tensor, input_pos: torch.Tensor) -> torch.Tensor:
+    B, T = idx.size()
+    assert self.config.max_seq_len >= T, (
+        f"Cannot forward sequence of length {T}, max seq length is only"
+        f" {self.config.max_seq_len}"
+    )
+    cos, sin = self.rope_cache
+    cos = cos.index_select(0, input_pos)
+    sin = sin.index_select(0, input_pos)
+    # token embeddings of shape (b, t, n_embd)
+    x = self.tok_embedding(idx)
+    x = x * (self.config.embedding_dim**0.5)
+    for i, block in enumerate(self.transformer_blocks):
+      mask = self.get_attention_mask(i, input_pos)
+      x = block(x, (cos, sin), mask, input_pos)
+    x = self.final_norm(x)
+    res = self.lm_head(x)  # (b, t, vocab_size)
+    if self.config.final_logit_softcap is not None:
+      res = res / self.config.final_logit_softcap
+      res = torch.tanh(res)
+      res = res * self.config.final_logit_softcap
+    return res
+def get_model_config_2b(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
+  attn_config = cfg.AttentionConfig(
+      num_heads=8,
+      head_dim=256,
+      num_query_groups=4,
+      rotary_percentage=1.0,
+      qkv_transpose_before_split=True,
+      logit_softcap=50.0,
+      sliding_window_size=4096,
+      attn_types=[cfg.AttentionType.GLOBAL, cfg.AttentionType.LOCAL_SLIDING]
+      * 13,
+  )
+  norm_config = cfg.NormalizationConfig(
+      type=cfg.NormalizationType.RMS_NORM,
+      epsilon=1e-6,
+      zero_centered=True,
+  )
+  ff_config = cfg.FeedForwardConfig(
+      type=cfg.FeedForwardType.GATED,
+      activation=cfg.ActivationConfig(cfg.ActivationType.GELU_TANH),
+      intermediate_size=9216,
+      pre_ff_norm_config=norm_config,
+      post_ff_norm_config=norm_config,
+  )
+  config = cfg.ModelConfig(
+      vocab_size=256000,
+      num_layers=26,
+      max_seq_len=8192,
+      embedding_dim=2304,
+      kv_cache_max_len=kv_cache_max_len,
+      attn_config=attn_config,
+      ff_config=ff_config,
+      pre_attention_norm_config=norm_config,
+      post_attention_norm_config=norm_config,
+      final_norm_config=norm_config,
+      parallel_residual=False,
+      lm_head_use_bias=False,
+      enable_hlfb=False,
+      final_logit_softcap=30.0,
+  )
+  return config
+def get_fake_model_config_2b_for_test() -> cfg.ModelConfig:
+  config = get_model_config_2b()
+  config.num_layers = 2
+  return config
+def build_2b_model(checkpoint_path, **kwargs) -> nn.Module:
+  config = get_model_config_2b(**kwargs)
+  model = Gemma2(config)
+  loader = loading_utils.ModelLoader(checkpoint_path, TENSOR_NAMES)
+  # since embedding and lm-head use the same weight, we need to set strict
+  # to False.
+  loader.load(model, strict=False)
+  model.eval()
+  return model
+def define_and_run_2b() -> None:
+  current_dir = Path(__file__).parent.resolve()
+  gemma2_goldens = torch.load(current_dir / "gemma2it_2b_golden.pt")
+  print("Running GEMMA 2")
+  kv_cache_max_len = 1024
+  checkpoint_path = os.path.join(Path.home(), "Downloads/llm_data/gemma2-2b")
+  model = build_2b_model(checkpoint_path, kv_cache_max_len=kv_cache_max_len)
+  toks = torch.from_numpy(
+      np.array([2, 651, 9456, 576, 573, 3520, 3858, 603, 235248])
+  )
+  tokens = torch.full((1, kv_cache_max_len), 0, dtype=torch.long, device="cpu")
+  tokens[0, :9] = toks
+  input_pos = torch.arange(0, kv_cache_max_len)
+  out = model.forward(tokens, input_pos)
+  out_final = out[0, 8, :]
+  assert torch.allclose(gemma2_goldens, out_final, atol=1e-04)
+  print(out)
+if __name__ == "__main__":
+  torch.set_printoptions(sci_mode=True)
+  define_and_run_2b()

ai_edge_torch/generative/examples/stable_diffusion/clip.py CHANGED Viewed

@@ -35,7 +35,7 @@ TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
     pre_attn_norm=(
         "cond_stage_model.transformer.text_model.encoder.layers.{}.layer_norm1"
     ),
-    pre_ff_norm=(
+    post_attn_norm=(
         "cond_stage_model.transformer.text_model.encoder.layers.{}.layer_norm2"
     ),
     embedding=(
@@ -120,7 +120,7 @@ def get_model_config() -> cfg.ModelConfig:
       attn_config=attn_config,
       ff_config=ff_config,
       pre_attention_norm_config=norm_config,
-      pre_ff_norm_config=norm_config,
+      post_attention_norm_config=norm_config,
       final_norm_config=norm_config,
       enable_hlfb=True,
   )

ai_edge_torch/generative/examples/t5/t5.py CHANGED Viewed

@@ -38,7 +38,7 @@ ENCDEC_TENSOR_NAMES = {
         "{prefix}.block.0.layer.0.SelfAttention.relative_attention_bias"
     ),
     "pre_attn_norm": "{prefix}.block.{}.layer.0.layer_norm",
-    "pre_ff_norm": "{prefix}.block.{}.layer.1.layer_norm",
+    "post_attn_norm": "{prefix}.block.{}.layer.1.layer_norm",
     "final_norm": "{prefix}.final_layer_norm",
 }
@@ -396,7 +396,7 @@ def get_model_config_t5() -> cfg.ModelConfig:
       relative_attention=True,
       ff_config=ff_config,
       pre_attention_norm_config=norm_config,
-      pre_ff_norm_config=norm_config,
+      post_attention_norm_config=norm_config,
       final_norm_config=norm_config,
       parallel_residual=False,
       lm_head_use_bias=False,
@@ -419,7 +419,7 @@ def build_t5_model(checkpoint_path: str) -> nn.Module:
       "cross_attn_value_proj": "{prefix}.block.{}.layer.1.EncDecAttention.v",
       "cross_attn_output_proj": "{prefix}.block.{}.layer.1.EncDecAttention.o",
       # In the decoder, the FF is layer 2 in the Transformer block
-      "pre_ff_norm": "{prefix}.block.{}.layer.2.layer_norm",
+      "post_attn_norm": "{prefix}.block.{}.layer.2.layer_norm",
       # In the decoder, the cross attention is layer 1 in the Transformer block
       "pre_cross_attn_norm": "{prefix}.block.{}.layer.1.layer_norm",
   }
@@ -475,7 +475,7 @@ def build_t5_decoder_model(
       "cross_attn_value_proj": "{prefix}.block.{}.layer.1.EncDecAttention.v",
       "cross_attn_output_proj": "{prefix}.block.{}.layer.1.EncDecAttention.o",
       # In the decoder, the FF is layer 2 in the Transformer block
-      "pre_ff_norm": "{prefix}.block.{}.layer.2.layer_norm",
+      "post_attn_norm": "{prefix}.block.{}.layer.2.layer_norm",
       # In the decoder, the cross attention is layer 1 in the Transformer block
       "pre_cross_attn_norm": "{prefix}.block.{}.layer.1.layer_norm",
   }

ai_edge_torch/generative/examples/t5/t5_attention.py CHANGED Viewed

@@ -68,8 +68,8 @@ class EncoderDecoderBlock(nn.Module):
     else:
       self.cross_atten_func = None
-    self.pre_ff_norm = builder.build_norm(
-        config.embedding_dim, config.pre_ff_norm_config
+    self.post_atten_norm = builder.build_norm(
+        config.embedding_dim, config.post_attention_norm_config
     )
     self.ff = builder.build_ff(config.embedding_dim, config.ff_config)
     self.config = config
@@ -118,7 +118,7 @@ class EncoderDecoderBlock(nn.Module):
       )
       attn_out = hidden_states + attn_out
-    forwarded = self.pre_ff_norm(attn_out)
+    forwarded = self.post_atten_norm(attn_out)
     forwarded = self.ff(forwarded)
     hidden_states = attn_out + forwarded

ai_edge_torch/generative/examples/test_models/toy_model.py CHANGED Viewed

@@ -93,7 +93,7 @@ def get_model_config() -> cfg.ModelConfig:
       attn_config=attn_config,
       ff_config=ff_config,
       pre_attention_norm_config=norm_config,
-      pre_ff_norm_config=norm_config,
+      post_attention_norm_config=norm_config,
       final_norm_config=norm_config,
   )
   return config

ai_edge_torch/generative/examples/test_models/toy_model_with_external_kv_cache.py CHANGED Viewed

@@ -107,7 +107,7 @@ def get_model_config() -> cfg.ModelConfig:
       attn_config=attn_config,
       ff_config=ff_config,
       pre_attention_norm_config=norm_config,
-      pre_ff_norm_config=norm_config,
+      post_attention_norm_config=norm_config,
       final_norm_config=norm_config,
       enable_hlfb=True,
   )

ai_edge_torch/generative/examples/test_models/toy_model_with_kv_cache.py CHANGED Viewed

@@ -94,7 +94,7 @@ def get_model_config() -> cfg.ModelConfig:
       attn_config=attn_config,
       ff_config=ff_config,
       pre_attention_norm_config=norm_config,
-      pre_ff_norm_config=norm_config,
+      post_attention_norm_config=norm_config,
       final_norm_config=norm_config,
       enable_hlfb=True,
   )

ai_edge_torch/generative/examples/tiny_llama/tiny_llama.py CHANGED Viewed

@@ -35,7 +35,7 @@ TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
     attn_value_proj="model.layers.{}.self_attn.v_proj",
     attn_output_proj="model.layers.{}.self_attn.o_proj",
     pre_attn_norm="model.layers.{}.input_layernorm",
-    pre_ff_norm="model.layers.{}.post_attention_layernorm",
+    post_attn_norm="model.layers.{}.post_attention_layernorm",
     embedding="model.embed_tokens",
     final_norm="model.norm",
     lm_head="lm_head",
@@ -130,7 +130,7 @@ def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       attn_config=attn_config,
       ff_config=ff_config,
       pre_attention_norm_config=norm_config,
-      pre_ff_norm_config=norm_config,
+      post_attention_norm_config=norm_config,
       final_norm_config=norm_config,
       enable_hlfb=True,
   )

ai_edge_torch/generative/layers/attention.py CHANGED Viewed

@@ -74,8 +74,8 @@ class TransformerBlock(nn.Module):
         config.kv_cache_max,
         config.enable_hlfb,
     )
-    self.pre_ff_norm = builder.build_norm(
-        config.embedding_dim, config.pre_ff_norm_config
+    self.post_atten_norm = builder.build_norm(
+        config.embedding_dim, config.post_attention_norm_config
     )
     self.ff = builder.build_ff(config.embedding_dim, config.ff_config)
     self.config = config
@@ -108,7 +108,7 @@ class TransformerBlock(nn.Module):
       x_norm = self.pre_atten_norm(x)
       attn_out = self.atten_func(x_norm, rope, mask, input_pos)
       x = x + attn_out
-      x_norm = self.pre_ff_norm(x)
+      x_norm = self.post_atten_norm(x)
       output = x + self.ff(x_norm)
     return output
@@ -228,8 +228,15 @@ class CausalSelfAttention(nn.Module):
       # TODO(haoliang): Handle when execeeding max sequence length.
       k, v = self.kv_cache.update_cache(input_pos, k, v)
-    y = self.sdpa_func(q, k, v, self.config.head_dim, mask=mask)
-    y = y.reshape(B, T, E)
+    y = self.sdpa_func(
+        q,
+        k,
+        v,
+        self.config.head_dim,
+        mask=mask,
+        softcap=self.config.logit_softcap,
+    )
+    y = y.reshape(B, T, -1)
     # Compute the output projection.
     y = self.output_projection(y)

ai_edge_torch/generative/layers/attention_utils.py CHANGED Viewed

@@ -74,12 +74,42 @@ def build_causal_mask_cache(
   Returns:
       torch.Tensor: Causal attention mask.
   """
   if device is None:
     device = torch.device('cpu')
   mask = torch.full((size, size), float('-inf'), dtype=dtype, device=device)
   return torch.triu(mask, diagonal=1).unsqueeze(0).unsqueeze(0)
+def build_sliding_window_mask_cache(
+    size: int,
+    window_size: int,
+    dtype: torch.dtype = torch.float32,
+    device: torch.device = None,
+) -> torch.Tensor:
+  """Build a cache for a sliding window mask.
+  Args:
+      size (int): The size of the built mask cache.
+      window_size (int): The window size that is "seen" by a token.
+      dtype (torch.dtype, optional): Output tensor's data type. Defaults to
+        torch.float32.
+      device (torch.device, optional): Output tensor's data type. Defaults to
+        None in which case "cpu" is used.
+  Returns:
+      torch.Tensor: Causal attention mask.
+  """
+  mask = build_causal_mask_cache(size, dtype, device)
+  all_ones = torch.ones_like(mask)
+  window_size = min(size, window_size)
+  sliding_mask = torch.triu(all_ones, -1 * window_size + 1) * torch.tril(
+      all_ones, window_size - 1
+  )
+  return torch.where(sliding_mask == 1, mask, -2.3819763e38)
 def relative_position_bucket(
     relative_position: torch.Tensor,
     bidirectional: bool,

ai_edge_torch/generative/layers/builder.py CHANGED Viewed

@@ -89,11 +89,16 @@ def build_ff(dim: int, config: cfg.FeedForwardConfig):
   activation = get_activation(config.activation)
+  pre_ff_norm = build_norm(dim, config.pre_ff_norm_config)
+  post_ff_norm = build_norm(dim, config.post_ff_norm_config)
   return ff_module(
       dim=dim,
       hidden_dim=config.intermediate_size,
       activation=activation,
       use_bias=config.use_bias,
+      pre_ff_norm=pre_ff_norm,
+      post_ff_norm=post_ff_norm,
   )

ai_edge_torch/generative/layers/feed_forward.py CHANGED Viewed

@@ -14,7 +14,7 @@
 # ==============================================================================
 # Common building blocks for FeedForward layers.
-from typing import Callable
+from typing import Callable, Optional
 import torch
 from torch import nn
@@ -30,6 +30,8 @@ class SequentialFeedForward(nn.Module):
       hidden_dim: int,
       activation: Callable[[torch.Tensor], torch.Tensor],
       use_bias=False,
+      pre_ff_norm: Optional[Callable[[torch.Tensor], torch.Tensor]] = None,
+      post_ff_norm: Optional[Callable[[torch.Tensor], torch.Tensor]] = None,
   ):
     """Init function for feedforward layer.
@@ -41,6 +43,8 @@ class SequentialFeedForward(nn.Module):
     self.act = activation
     self.w1 = nn.Linear(dim, hidden_dim, bias=use_bias)
     self.w2 = nn.Linear(hidden_dim, dim, bias=use_bias)
+    self.pre_ff_norm = pre_ff_norm if pre_ff_norm else lambda x: x
+    self.post_ff_norm = post_ff_norm if post_ff_norm else lambda x: x
   def forward(self, x):
     """Forward pass for Feedforward layer.
@@ -51,7 +55,9 @@ class SequentialFeedForward(nn.Module):
     Returns:
       torch.Tensor: output tensor after feedforward.
     """
-    return self.w2(self.act(self.w1(x)))
+    x_norm = self.pre_ff_norm(x)
+    out = self.w2(self.act(self.w1(x_norm)))
+    return self.post_ff_norm(out)
 class GatedFeedForward(nn.Module):
@@ -66,6 +72,8 @@ class GatedFeedForward(nn.Module):
       hidden_dim: int,
       activation: Callable[[torch.Tensor], torch.Tensor],
       use_bias=False,
+      pre_ff_norm: Optional[Callable[[torch.Tensor], torch.Tensor]] = None,
+      post_ff_norm: Optional[Callable[[torch.Tensor], torch.Tensor]] = None,
   ):
     """Init function for feedforward layer.
@@ -78,6 +86,8 @@ class GatedFeedForward(nn.Module):
     self.w1 = nn.Linear(dim, hidden_dim, bias=use_bias)
     self.w2 = nn.Linear(hidden_dim, dim, bias=use_bias)
     self.w3 = nn.Linear(dim, hidden_dim, bias=use_bias)
+    self.pre_ff_norm = pre_ff_norm if pre_ff_norm else lambda x: x
+    self.post_ff_norm = post_ff_norm if post_ff_norm else lambda x: x
   def forward(self, x):
     """Forward pass for Feedforward layer.
@@ -88,4 +98,6 @@ class GatedFeedForward(nn.Module):
     Returns:
       torch.Tensor: output tensor after feedforward.
     """
-    return self.w2(self.act(self.w1(x)) * self.w3(x))
+    x_norm = self.pre_ff_norm(x)
+    out = self.w2(self.act(self.w1(x_norm)) * self.w3(x_norm))
+    return self.post_ff_norm(out)

ai_edge_torch/generative/layers/model_config.py CHANGED Viewed

@@ -16,7 +16,7 @@
 from dataclasses import dataclass
 from dataclasses import field
 import enum
-from typing import Optional
+from typing import Optional, Sequence
 @enum.unique
@@ -53,6 +53,11 @@ class FeedForwardType(enum.Enum):
   GATED = enum.auto()
+class AttentionType(enum.Enum):
+  GLOBAL = enum.auto()
+  LOCAL_SLIDING = enum.auto()
 @dataclass
 class AttentionConfig:
   """Attention model's parameters."""
@@ -78,6 +83,12 @@ class AttentionConfig:
   enable_kv_cache: bool = True
   relative_attention_num_buckets: int = 0
   relative_attention_max_distance: int = 0
+  # Softcap on the output logits.
+  logit_softcap: Optional[float] = None
+  # The types of attention used in the layers of the model.
+  attn_types: Optional[Sequence[AttentionType]] = None
+  # The size of the sliding window used for local attention.
+  sliding_window_size: Optional[int] = None
 @dataclass
@@ -88,16 +99,6 @@ class ActivationConfig:
   dim_out: Optional[int] = None
-@dataclass
-class FeedForwardConfig:
-  """FeedForward module's parameters."""
-  type: FeedForwardType
-  activation: ActivationConfig
-  intermediate_size: int
-  use_bias: bool = False
 @dataclass
 class NormalizationConfig:
   """Normalizater parameters."""
@@ -109,6 +110,24 @@ class NormalizationConfig:
   group_num: Optional[float] = None
+@dataclass
+class FeedForwardConfig:
+  """FeedForward module's parameters."""
+  type: FeedForwardType
+  activation: ActivationConfig
+  intermediate_size: int
+  use_bias: bool = False
+  # The normalization applied to feed forward's input.
+  pre_ff_norm_config: NormalizationConfig = field(
+      default_factory=NormalizationConfig
+  )
+  # The normalization applied to feed forward's output.
+  post_ff_norm_config: NormalizationConfig = field(
+      default_factory=NormalizationConfig
+  )
 @dataclass
 class ModelConfig:
   """Base configurations for building a transformer architecture."""
@@ -124,8 +143,8 @@ class ModelConfig:
   pre_attention_norm_config: NormalizationConfig = field(
       default_factory=NormalizationConfig
   )
-  # The normalization applied to feed forward's input.
-  pre_ff_norm_config: NormalizationConfig = field(
+  # The normalization applied to attentions's output.
+  post_attention_norm_config: NormalizationConfig = field(
       default_factory=NormalizationConfig
   )
   # The normalization applied before LM head.
@@ -151,6 +170,9 @@ class ModelConfig:
   # Default batch size of the exported model. Default value is 1.
   batch_size: int = 1
+  # Softcap on the model output logits.
+  final_logit_softcap: Optional[float] = None
   @property
   def kv_cache_max(self) -> int:
     if self.kv_cache_max_len > 0:

ai_edge_torch/generative/layers/scaled_dot_product_attention.py CHANGED Viewed

@@ -29,6 +29,7 @@ def scaled_dot_product_attention(
     head_size: int,
     mask: Optional[torch.Tensor] = None,
     scale: Optional[float] = None,
+    softcap: Optional[float] = None,
 ):
   """Scaled dot product attention.
@@ -53,15 +54,26 @@ def scaled_dot_product_attention(
     # Handle the GQA case, where q.shape[1] % k.shape[1] == 0.
     k = k.repeat_interleave(q.shape[1] // k.shape[1], dim=1)
     v = v.repeat_interleave(q.shape[1] // v.shape[1], dim=1)
-  y = F.scaled_dot_product_attention(
-      q,
-      k,
-      v,
-      attn_mask=mask,
-      dropout_p=0.0,
-      is_causal=mask is None,
-      scale=scale,
-  )
+  if softcap is None:
+    y = F.scaled_dot_product_attention(
+        q,
+        k,
+        v,
+        attn_mask=mask,
+        dropout_p=0.0,
+        is_causal=mask is None,
+        scale=scale,
+    )
+  else:
+    q.mul_(scale)
+    scores = q @ k.transpose(-1, -2)
+    scores = scores / softcap
+    scores = torch.tanh(scores)
+    scores = scores * softcap
+    scores = scores + mask
+    out = F.softmax(scores.float(), dim=-1).type_as(q)
+    y = torch.matmul(out, v)
   return y.transpose(1, 2)
@@ -72,6 +84,7 @@ def scaled_dot_product_attention_with_hlfb(
     head_size: int,
     mask: Optional[torch.Tensor] = None,
     scale: Optional[float] = None,
+    softcap: Optional[float] = None,
 ):
   """Scaled dot product attention with high-level function boundary enabled.
@@ -86,6 +99,9 @@ def scaled_dot_product_attention_with_hlfb(
     The output tensor of scaled_dot_product_attention.
   """
+  if softcap is not None:
+    raise NotImplementedError("SDPA with HLFB not available with softcap.")
   if scale is None:
     scale = 1.0 / math.sqrt(head_size)

ai_edge_torch/generative/test/test_model_conversion.py CHANGED Viewed

@@ -16,7 +16,7 @@
 import copy
 import ai_edge_torch
-from ai_edge_torch.generative.examples.gemma import gemma
+from ai_edge_torch.generative.examples.gemma import gemma, gemma2
 from ai_edge_torch.generative.examples.phi2 import phi2
 from ai_edge_torch.generative.examples.test_models import toy_model_with_kv_cache  # NOQA
 from ai_edge_torch.generative.examples.tiny_llama import tiny_llama
@@ -202,6 +202,34 @@ class TestModelConversion(googletest.TestCase):
           )
       )
+  def test_gemma2(self):
+    self.skipTest("b/338288901")
+    config = gemma2.get_fake_model_config_2b_for_test()
+    model = gemma2.Gemma2(config)
+    model.eval()
+    idx = torch.from_numpy(np.array([[1, 2, 3, 4]]))
+    tokens = torch.full((1, 10), 0, dtype=torch.long, device="cpu")
+    tokens[0, :4] = idx
+    input_pos = torch.arange(0, 10)
+    edge_model = ai_edge_torch.convert(model, (tokens, input_pos))
+    # TODO: b/338288901 - re-enable test to check output tensors.
+    skip_output_check = True
+    if not skip_output_check:
+      # TODO(talumbau, haoliang): debug numerical diff.
+      self.assertTrue(
+          model_coverage.compare_tflite_torch(
+              edge_model,
+              model,
+              (tokens, input_pos),
+              num_valid_inputs=1,
+              atol=1e-2,
+              rtol=1e-5,
+          )
+      )
   def test_phi2(self):
     self.skipTest("b/338288901")
     config = phi2.get_fake_model_config_for_test()

ai_edge_torch/generative/utilities/loader.py CHANGED Viewed

@@ -107,7 +107,9 @@ class ModelLoader:
     ff_gate_proj: str = None
     pre_attn_norm: str = None
+    post_attn_norm: str = None
     pre_ff_norm: str = None
+    post_ff_norm: str = None
     embedding: str = None
     embedding_position: str = None
     final_norm: str = None
@@ -258,6 +260,26 @@ class ModelLoader:
             f"{ff_gate_proj_name}.bias"
         )
+    if self._names.pre_ff_norm is not None:
+      pre_ff_norm_name = self._names.pre_ff_norm.format(idx)
+      converted_state[f"{prefix}.ff.pre_ff_norm.weight"] = state.pop(
+          f"{pre_ff_norm_name}.weight"
+      )
+      if f"{pre_ff_norm_name}.bias" in state:
+        converted_state[f"{prefix}.ff.pre_ff_norm.bias"] = state.pop(
+            f"{pre_ff_norm_name}.bias"
+        )
+    if self._names.post_ff_norm is not None:
+      post_ff_norm_name = self._names.post_ff_norm.format(idx)
+      converted_state[f"{prefix}.ff.post_ff_norm.weight"] = state.pop(
+          f"{post_ff_norm_name}.weight"
+      )
+      if f"{post_ff_norm_name}.bias" in state:
+        converted_state[f"{prefix}.ff.post_ff_norm.bias"] = state.pop(
+            f"{post_ff_norm_name}.bias"
+        )
   def _map_attention(
       self,
       idx: int,
@@ -325,14 +347,14 @@ class ModelLoader:
             f"{pre_attn_norm_name}.bias"
         )
-    if self._names.pre_ff_norm is not None:
-      pre_ff_norm_name = self._names.pre_ff_norm.format(idx)
-      converted_state[f"{prefix}.pre_ff_norm.weight"] = state.pop(
-          f"{pre_ff_norm_name}.weight"
+    if self._names.post_attn_norm is not None:
+      post_attn_norm_name = self._names.post_attn_norm.format(idx)
+      converted_state[f"{prefix}.post_atten_norm.weight"] = state.pop(
+          f"{post_attn_norm_name}.weight"
       )
-      if f"{pre_ff_norm_name}.bias" in state:
-        converted_state[f"{prefix}.pre_ff_norm.bias"] = state.pop(
-            f"{pre_ff_norm_name}.bias"
+      if f"{post_attn_norm_name}.bias" in state:
+        converted_state[f"{prefix}.post_atten_norm.bias"] = state.pop(
+            f"{post_attn_norm_name}.bias"
         )
   def _fuse_qkv(

ai_edge_torch/generative/utilities/t5_loader.py CHANGED Viewed

@@ -113,7 +113,7 @@ class ModelLoader:
     pre_attn_norm: str = None
     pre_cross_attn_norm: str = None
-    pre_ff_norm: str = None
+    post_attn_norm: str = None
     embedding: str = None
     final_norm: str = None
     lm_head: str = None
@@ -484,14 +484,14 @@ class ModelLoader:
             state.pop(f"{pre_cross_attn_norm_name}.bias")
         )
-    if names.pre_ff_norm is not None:
-      pre_ff_norm_name = names.pre_ff_norm.format(idx)
-      converted_state[f"{prefix}.pre_ff_norm.weight"] = state.pop(
-          f"{pre_ff_norm_name}.weight"
+    if names.post_attn_norm is not None:
+      post_attn_norm_name = names.post_attn_norm.format(idx)
+      converted_state[f"{prefix}.post_atten_norm.weight"] = state.pop(
+          f"{post_attn_norm_name}.weight"
       )
-      if f"{pre_ff_norm_name}.bias" in state:
-        converted_state[f"{prefix}.pre_ff_norm.bias"] = state.pop(
-            f"{pre_ff_norm_name}.bias"
+      if f"{post_attn_norm_name}.bias" in state:
+        converted_state[f"{prefix}.post_atten_norm.bias"] = state.pop(
+            f"{post_attn_norm_name}.bias"
         )
   def _fuse_qkv(

ai_edge_torch/version.py CHANGED Viewed

@@ -13,4 +13,4 @@
 # limitations under the License.
 # ==============================================================================
-__version__ = "0.3.0.dev20240812"
+__version__ = "0.3.0.dev20240814"

{ai_edge_torch_nightly-0.3.0.dev20240812.dist-info → ai_edge_torch_nightly-0.3.0.dev20240814.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-edge-torch-nightly
-Version: 0.3.0.dev20240812
+Version: 0.3.0.dev20240814
 Summary: Supporting PyTorch models with the Google AI Edge TFLite runtime.
 Home-page: https://github.com/google-ai-edge/ai-edge-torch
 Keywords: On-Device ML,AI,Google,TFLite,PyTorch,LLMs,GenAI

{ai_edge_torch_nightly-0.3.0.dev20240812.dist-info → ai_edge_torch_nightly-0.3.0.dev20240814.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ ai_edge_torch/__init__.py,sha256=48qP37uHT90YPs4eIUQxCiWVwqGEX3idCUs6mQKvX1U,116
 ai_edge_torch/config.py,sha256=PCd9PVrbUNeVIUDFUCnW4goDWU4bjouK28yMYU6VOi0,877
 ai_edge_torch/conftest.py,sha256=r0GTrhMRhlmOGrrkvumHN8hkmyug6WvF60vWq8wRIBI,758
 ai_edge_torch/model.py,sha256=5DYNpFVwvI1w0JbAC1hn83NJVGS1WPX7n742419PMqs,4558
-ai_edge_torch/version.py,sha256=r9fsbK5xf0at_x8jslViy2ZJpvEEsjT4T_QI6Abij6s,706
+ai_edge_torch/version.py,sha256=BlH3JqkXwVHXFYAd5rF04dUvLCthvKVqnfgO3abgh14,706
 ai_edge_torch/_convert/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/_convert/conversion.py,sha256=kcv_QgNgeyDmrqwdzHicGNP68w6zF7GJg7YkMEIXp4Q,3759
 ai_edge_torch/_convert/conversion_utils.py,sha256=Sr8qXVcTwc-ZnZmK7yxVrIOOp1S_vNrwzC0zUvLTI2o,2160
@@ -42,22 +42,24 @@ ai_edge_torch/generative/examples/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQe
 ai_edge_torch/generative/examples/experimental/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/examples/experimental/gemma/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/examples/experimental/gemma/convert_to_tflite.py,sha256=lpiPFSh3SJd6WwuZ0QegSva3__iSz2tUD7L7QfkAe4I,3085
-ai_edge_torch/generative/examples/experimental/gemma/gemma.py,sha256=QoFbUUCTJrW1IYZg0vfb2-K-X0q1-NJFbWNGPQGwBgk,6688
+ai_edge_torch/generative/examples/experimental/gemma/gemma.py,sha256=8313wSsddvuxZ5ZYVdaITBV2FF1k22dcCujnq0UZvKs,6699
 ai_edge_torch/generative/examples/experimental/phi/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/examples/experimental/phi/convert_to_tflite.py,sha256=DavrdGmqUgoThsGNRv3LXMW5tvJdYEvj66Hf1XRqkXU,3055
 ai_edge_torch/generative/examples/experimental/phi/phi2.py,sha256=u-VJX5mjzQKspXtAhNi53LCITtag-3nCaRTKdk5Z1sc,6231
 ai_edge_torch/generative/examples/experimental/tiny_llama/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/examples/experimental/tiny_llama/convert_to_tflite.py,sha256=xPVvHQjLJHFiRv_-Fy2sDm0Aft7SG8SXiV6o3rF03cQ,3108
-ai_edge_torch/generative/examples/experimental/tiny_llama/tiny_llama.py,sha256=GOLLd9yCBnlNXeW7xrVy1wjOltcTbRdSpiJycbMj8TA,6372
+ai_edge_torch/generative/examples/experimental/tiny_llama/tiny_llama.py,sha256=zQYtyk3xYdiRAnzMKN58Q_wgTQFnDujxp6L4RFQjiD4,6383
 ai_edge_torch/generative/examples/gemma/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
+ai_edge_torch/generative/examples/gemma/convert_gemma2_to_tflite.py,sha256=pseJExH35lSAK0ZtzSHB1sFtRtF_EuT2xcSpGU0gKVI,2524
 ai_edge_torch/generative/examples/gemma/convert_to_tflite.py,sha256=w589IJETATd6Z9_1XCIWbrlCV3E92X_5ac3VVCVFXG0,2522
-ai_edge_torch/generative/examples/gemma/gemma.py,sha256=5Dn9JgJiXN-hWGQj9YqCr8Iik8mh5s0dX0VfyY8KDDo,6236
+ai_edge_torch/generative/examples/gemma/gemma.py,sha256=cCki-0cKvmGxK4Md6dRNdPDWZUyhkJUI854OCTFf3h0,6262
+ai_edge_torch/generative/examples/gemma/gemma2.py,sha256=j-zxJ-JNRnQ_kDzUESmsyy_a_4IxWZ510HmIImc0LDc,8240
 ai_edge_torch/generative/examples/phi2/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/examples/phi2/convert_to_tflite.py,sha256=ON6zLO-nFS8eJ2yhyWzT5x2Somr-Ca-VjpjT7OGFU10,2506
 ai_edge_torch/generative/examples/phi2/phi2.py,sha256=C_kFYsPrEQ9GJCnc6h-jh8B5qQryvEpI6O6t4FBxg1I,5858
 ai_edge_torch/generative/examples/stable_diffusion/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/examples/stable_diffusion/attention.py,sha256=kDWG6MlIGa89zC5KSRcJlw2c4ITuw8KcchtfmF55f4g,3545
-ai_edge_torch/generative/examples/stable_diffusion/clip.py,sha256=VR09iAnj1e-sr-oam2rh24Wnb_JdZZQvpJIjylfgnS8,4468
+ai_edge_torch/generative/examples/stable_diffusion/clip.py,sha256=0WniBWQ6_NcQc5WycX3YRRX7Os9AGQSxfc1m2HKBqg8,4479
 ai_edge_torch/generative/examples/stable_diffusion/convert_to_tflite.py,sha256=7ra36nM5tQwSw-vi6QCFLx5IssZhT-6yVK4H3XsAc4w,5044
 ai_edge_torch/generative/examples/stable_diffusion/decoder.py,sha256=slieF2-QcDCwd4DRZ7snsZIphT97IXpp4plRRsRSwL8,13983
 ai_edge_torch/generative/examples/stable_diffusion/diffusion.py,sha256=7oUIJ6HO0vmlhFdkXpqGm9KTB-eM4Ob9VrHSDlIGFOg,30926
@@ -72,27 +74,27 @@ ai_edge_torch/generative/examples/stable_diffusion/samplers/k_lms.py,sha256=ZE6H
 ai_edge_torch/generative/examples/stable_diffusion/samplers/sampler.py,sha256=RxR5rw0wFFm_5CfAY-3-EIz83vhM9EKye8Bb5zBb0Ok,1341
 ai_edge_torch/generative/examples/t5/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/examples/t5/convert_to_tflite.py,sha256=CZVuNEL8OHPkdsz70WOvNpTJ9LFkiDnlwgJiXfUZCVk,4548
-ai_edge_torch/generative/examples/t5/t5.py,sha256=6Rkisv7UI2w5KV8ogPPzeIiPWYwDLfFfSIncqD7Eenc,20854
-ai_edge_torch/generative/examples/t5/t5_attention.py,sha256=gp7DV8pv4FwICQhYlUYfYZ7BE5jzDIsD_V3a_4-T4Ds,8492
+ai_edge_torch/generative/examples/t5/t5.py,sha256=Zobw5BV-PC0nlU9Z6fzb2O07rMeU8vGIk-KtKp9D_H0,20871
+ai_edge_torch/generative/examples/t5/t5_attention.py,sha256=1lvbSlzyBwmd5Bs7-Up_v4iJQkCPIJx2RmMkLgy7l2Q,8508
 ai_edge_torch/generative/examples/test_models/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
-ai_edge_torch/generative/examples/test_models/toy_model.py,sha256=DhxOrIKe-tilBjbh1q4MsmCmmKMc4c1BPUzhnaJDD6M,3955
-ai_edge_torch/generative/examples/test_models/toy_model_with_external_kv_cache.py,sha256=bW0QB-_h9cfwAQf11AxFxOBq3HrEep_UlpBjXz3JSew,5801
-ai_edge_torch/generative/examples/test_models/toy_model_with_kv_cache.py,sha256=CRja_CT0_eaH16rSDxwHKJS_CGUJMW0Fxd4r45Ii8Uo,4833
+ai_edge_torch/generative/examples/test_models/toy_model.py,sha256=LfWO_gSr1f66V1pxAc6yh21mtaJs7TVeuO9748zXBnE,3963
+ai_edge_torch/generative/examples/test_models/toy_model_with_external_kv_cache.py,sha256=l9swUKTcDtnTibNSNExaMgLvDeJ4Er2tVh5ZW1EtRgk,5809
+ai_edge_torch/generative/examples/test_models/toy_model_with_kv_cache.py,sha256=mQkcpSe6HlRLMkIRCEHc9ZXL7jxEp9RWSGUQjjd-r2w,4841
 ai_edge_torch/generative/examples/tiny_llama/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/examples/tiny_llama/convert_to_tflite.py,sha256=CLRqO7ycMbpy7J3_Czp1sLx6hcdwGD9zVq04yRba0e8,2550
-ai_edge_torch/generative/examples/tiny_llama/tiny_llama.py,sha256=nu3Il8Vxe7JwM8-AnGNXoGoZ9eVXKHMYEAqVEP-gwe8,5929
+ai_edge_torch/generative/examples/tiny_llama/tiny_llama.py,sha256=mXXFYJfo8yegSOFOndCR0oYxFPchYb9vTJ4ThXGIFLU,5940
 ai_edge_torch/generative/fx_passes/__init__.py,sha256=fmNNXawJ722M4cTUuTx289rT0NHxBEsOy_k8baqCOms,1173
 ai_edge_torch/generative/fx_passes/remove_sdpa_zero_mask_pass.py,sha256=sXis0U4u-RoIp_NyrmWJNnqFqpqRuZOrhfsJIO6rMps,2028
 ai_edge_torch/generative/layers/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
-ai_edge_torch/generative/layers/attention.py,sha256=xq10Gw4GudK4M2eY8-H4fi3qmpmZCfE-CziAXDZvqiQ,12177
-ai_edge_torch/generative/layers/attention_utils.py,sha256=2hzBVZvWCqqLfI-f3RJA1hi6T8cuaIJBPt8cdjQCA5s,6420
-ai_edge_torch/generative/layers/builder.py,sha256=JvPmwrG8_M4-kO2MM6sDZhpS32Wx3wVVhlVO4yPJKJ0,4161
-ai_edge_torch/generative/layers/feed_forward.py,sha256=RukSYr9h_DehcYVZWLS_rfCTY73Uj__pTRUatjxJtv8,2788
+ai_edge_torch/generative/layers/attention.py,sha256=2UujQePRJ1LK02PN-hGcuMu0ooCJC6ETfPvzEYVFyho,12284
+ai_edge_torch/generative/layers/attention_utils.py,sha256=68GXGR2HSWBFViTxX7cHifzVG-kcLS2IL2tQJPIpupg,7344
+ai_edge_torch/generative/layers/builder.py,sha256=xb7rjADv3Jm4qfmlYtg6oLLe7ReDE9UjsEqiejPpDD8,4346
+ai_edge_torch/generative/layers/feed_forward.py,sha256=uto7xtwx6jPkk1GZ2x7pSTentQzRrPSKw4_PSE12ahA,3525
 ai_edge_torch/generative/layers/kv_cache.py,sha256=Ob8QeXWW5xt-6hcGA0uoC48eRQ8lfvKca8JbWtFx2CE,3082
-ai_edge_torch/generative/layers/model_config.py,sha256=CTvKFwsBR3Rc-Kf73NA7k0799m1WnEvaEBKCnnfNkyo,4961
+ai_edge_torch/generative/layers/model_config.py,sha256=WpZ9djUBAZddyeSODHDaVMG37EQqfzGGrlMPi8AA-Hc,5752
 ai_edge_torch/generative/layers/normalization.py,sha256=u8lv0p-ktKcRqCDlOqZQa9WQcfDK9JM2IaUQFQdn7xs,1860
 ai_edge_torch/generative/layers/rotary_position_embedding.py,sha256=CZqOoibLcHvUgrgaIIWAlmk3XgE2inzx340MN-npLoU,1347
-ai_edge_torch/generative/layers/scaled_dot_product_attention.py,sha256=6WMe-A5KSSujQcZ34hIeSnnor3AXrw10cQ5FKy-30IU,3390
+ai_edge_torch/generative/layers/scaled_dot_product_attention.py,sha256=x2bOmrTgOISXcb06IDP7X3xgftpPpxOjBXw_OxTMVns,3874
 ai_edge_torch/generative/layers/unet/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/layers/unet/blocks_2d.py,sha256=4a0wh0co8Avz1wvxS3XqsgrgL5G-X1GSARI5Rj3L-xg,26995
 ai_edge_torch/generative/layers/unet/builder.py,sha256=zAqWXdimmMrQRhmE_t9XkS68mh6PSrzwb-2NZZXrR5I,1901
@@ -109,12 +111,12 @@ ai_edge_torch/generative/quantize/ai_edge_quantizer_glue/translate_recipe.py,sha
 ai_edge_torch/generative/test/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/test/test_experimental_ekv.py,sha256=T5-O2RVLJTH7v9w1_uBfp-Y7o3sdGzYq2Tj2wLRNHyI,4357
 ai_edge_torch/generative/test/test_loader.py,sha256=1ZqAq0HY5uIioumsReOVIsbGBx0WkYcl18PvttdJKrk,3381
-ai_edge_torch/generative/test/test_model_conversion.py,sha256=4RTB1oPA2eWPyuof2-ZB1BxVKzKy5Q9vCux7psmV6zc,7615
+ai_edge_torch/generative/test/test_model_conversion.py,sha256=52ciFy_Qol2Xuym6P6EqdL29oai35LSWGvsUwyEdFTo,8477
 ai_edge_torch/generative/test/test_quantize.py,sha256=3SmJm7Kq98gAneU6IGwwJrJYCVH1qwWR6oUxPfb6qiI,5346
 ai_edge_torch/generative/utilities/__init__.py,sha256=-_jxnnFnCgnTU4oTm4MnRsvL5lqhomBNdFBbqfmfHPo,720
-ai_edge_torch/generative/utilities/loader.py,sha256=XfVRvwvZyQuofctxIedLNDKQrsy9UlRr4wpScZJLWcw,11779
+ai_edge_torch/generative/utilities/loader.py,sha256=bAWZ7FM4v_pPnX_AmEdGxHkDH65QdL-MjIP3PxscZmI,12649
 ai_edge_torch/generative/utilities/stable_diffusion_loader.py,sha256=pKp3AMSbS3otCvgwJRF5M1l4JRNKk-aCKimXzIMSrds,35679
-ai_edge_torch/generative/utilities/t5_loader.py,sha256=jz2qnDtH6oyxcqaBwEVfiiKmq_93LTDeUKNJ2cWpLwg,16856
+ai_edge_torch/generative/utilities/t5_loader.py,sha256=_UXcc1QKT-S92hikfo-fTBFhnYLzROqcyRqKonVsqj4,16885
 ai_edge_torch/hlfb/__init__.py,sha256=sH4um75na-O8tzxN6chFyp6Y4xnexsE7kUQpZySv6dE,735
 ai_edge_torch/hlfb/mark_pattern/__init__.py,sha256=cjTprggj_cuktSCm7-A25e7Shop3k63ylp7sdZmtZ8o,4790
 ai_edge_torch/hlfb/mark_pattern/passes.py,sha256=pjkKcI1nHECPluAt87cFBrt1DP0f3ge7rHq1NhCkBIE,1936
@@ -134,8 +136,8 @@ ai_edge_torch/quantize/quant_config.py,sha256=U0KisSW-uZkoMJcy-ZP9W57p3tsa594fr9
 ai_edge_torch/testing/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/testing/model_coverage/__init__.py,sha256=5P8J6Zk5YYtDvTBucFvB9NGSRI7Gw_24WnrbhXgycEE,765
 ai_edge_torch/testing/model_coverage/model_coverage.py,sha256=UPB448aMDUyC0HNYVqio2rcJPnDN0tBQMP08J6vPYew,4718
-ai_edge_torch_nightly-0.3.0.dev20240812.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
-ai_edge_torch_nightly-0.3.0.dev20240812.dist-info/METADATA,sha256=pIyFIAaFnI29tya2nHTePAd_y-WQ91ahIWU6OwBSVkk,1885
-ai_edge_torch_nightly-0.3.0.dev20240812.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
-ai_edge_torch_nightly-0.3.0.dev20240812.dist-info/top_level.txt,sha256=5KXRaF2hwkApYxf7Y8y_tVb9aulGTlbOoNdbx1aKRkE,14
-ai_edge_torch_nightly-0.3.0.dev20240812.dist-info/RECORD,,
+ai_edge_torch_nightly-0.3.0.dev20240814.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
+ai_edge_torch_nightly-0.3.0.dev20240814.dist-info/METADATA,sha256=eYXq0PpFouGnXKu9vXIzyaXj8XsLDxlDn903GJFR3ak,1885
+ai_edge_torch_nightly-0.3.0.dev20240814.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
+ai_edge_torch_nightly-0.3.0.dev20240814.dist-info/top_level.txt,sha256=5KXRaF2hwkApYxf7Y8y_tVb9aulGTlbOoNdbx1aKRkE,14
+ai_edge_torch_nightly-0.3.0.dev20240814.dist-info/RECORD,,

{ai_edge_torch_nightly-0.3.0.dev20240812.dist-info → ai_edge_torch_nightly-0.3.0.dev20240814.dist-info}/LICENSE RENAMED Viewed

File without changes

{ai_edge_torch_nightly-0.3.0.dev20240812.dist-info → ai_edge_torch_nightly-0.3.0.dev20240814.dist-info}/WHEEL RENAMED Viewed

File without changes

{ai_edge_torch_nightly-0.3.0.dev20240812.dist-info → ai_edge_torch_nightly-0.3.0.dev20240814.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai-edge-torch-nightly 0.3.0.dev20240812__py3-none-any.whl → 0.3.0.dev20240814__py3-none-any.whl

Potentially problematic release.

ai-edge-torch-nightly 0.3.0.dev20240812py3-none-any.whl → 0.3.0.dev20240814py3-none-any.whl