PyPI - ai-edge-torch-nightly - Versions diffs - 0.6.0.dev20250521__py3-none-any.whl → 0.6.0.dev20250523__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.6.0.dev20250521py3-none-any.whl → 0.6.0.dev20250523py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

ai_edge_torch/generative/examples/qwen/convert_v3_to_tflite.py ADDED Viewed

@@ -0,0 +1,61 @@
+# Copyright 2025 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Example of converting Qwen 3.0 models to multi-signature tflite model."""
+from absl import app
+from ai_edge_torch.generative.examples.qwen import qwen3
+from ai_edge_torch.generative.utilities import converter
+from ai_edge_torch.generative.utilities import export_config
+from ai_edge_torch.generative.utilities import loader
+flags = converter.define_conversion_flags('qwen')
+_MODEL_SIZE = flags.DEFINE_enum(
+    'model_size',
+    '1.7b',
+    ['0.6b', '1.7b', '4b'],
+    'The size of the model to convert.',
+)
+_BUILDER = {
+    '0.6b': qwen3.build_0_6b_model,
+    '1.7b': qwen3.build_1_7b_model,
+    '4b': qwen3.build_4b_model,
+}
+def main(_):
+  checkpoint_path = flags.FLAGS.checkpoint_path
+  pytorch_model = _BUILDER[_MODEL_SIZE.value](
+      checkpoint_path,
+      custom_loader=loader.maybe_get_custom_loader(
+          checkpoint_path, flags.FLAGS.custom_checkpoint_loader
+      ),
+      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
+  )
+  converter.convert_to_tflite(
+      pytorch_model,
+      output_path=flags.FLAGS.output_path,
+      output_name_prefix=flags.FLAGS.output_name_prefix,
+      prefill_seq_len=flags.FLAGS.prefill_seq_lens,
+      quantize=flags.FLAGS.quantize,
+      lora_ranks=flags.FLAGS.lora_ranks,
+      export_config=export_config.get_from_flags(),
+  )
+if __name__ == '__main__':
+  app.run(main)

ai_edge_torch/generative/examples/qwen/qwen3.py ADDED Viewed

@@ -0,0 +1,171 @@
+# Copyright 2025 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Example of building Qwen 3.0 models."""
+from typing import Callable, Dict
+import ai_edge_torch.generative.layers.model_config as cfg
+from ai_edge_torch.generative.utilities import loader as loading_utils
+from ai_edge_torch.generative.utilities import model_builder
+import torch
+from torch import nn
+TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
+    ff_up_proj="model.layers.{}.mlp.up_proj",
+    ff_down_proj="model.layers.{}.mlp.down_proj",
+    ff_gate_proj="model.layers.{}.mlp.gate_proj",
+    attn_query_proj="model.layers.{}.self_attn.q_proj",
+    attn_key_proj="model.layers.{}.self_attn.k_proj",
+    attn_value_proj="model.layers.{}.self_attn.v_proj",
+    attn_query_norm="model.layers.{}.self_attn.q_norm",
+    attn_key_norm="model.layers.{}.self_attn.k_norm",
+    attn_output_proj="model.layers.{}.self_attn.o_proj",
+    pre_attn_norm="model.layers.{}.input_layernorm",
+    post_attn_norm="model.layers.{}.post_attention_layernorm",
+    embedding="model.embed_tokens",
+    final_norm="model.norm",
+    lm_head="lm_head",
+)
+class Qwen3(model_builder.DecoderOnlyModel):
+  """A Qwen3 model built from the Edge Generative API layers."""
+  pass
+def get_4b_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
+  """Returns the model config for a Qwen 3.0 4B model.
+  Args:
+    kv_cache_max_len (int): The maximum sequence length of the KV cache. Default
+      is 1024.
+  Returns:
+    The model config for a SmolLM model.
+  """
+  norm_config = cfg.NormalizationConfig(
+      type=cfg.NormalizationType.RMS_NORM, epsilon=1e-06
+  )
+  attn_config = cfg.AttentionConfig(
+      num_heads=32,
+      head_dim=128,
+      num_query_groups=8,
+      query_norm_config=norm_config,
+      key_norm_config=norm_config,
+      rotary_base=1000000,
+      rotary_percentage=1.0,
+      qkv_use_bias=False,
+      qkv_transpose_before_split=True,
+      qkv_fused_interleaved=False,  # No interleaved qkv projection.
+  )
+  ff_config = cfg.FeedForwardConfig(
+      type=cfg.FeedForwardType.GATED,
+      activation=cfg.ActivationConfig(cfg.ActivationType.SILU),
+      intermediate_size=9728,
+  )
+  block_config = cfg.TransformerBlockConfig(
+      attn_config=attn_config,
+      ff_config=ff_config,
+      pre_attention_norm_config=norm_config,
+      post_attention_norm_config=norm_config,
+  )
+  config = cfg.ModelConfig(
+      vocab_size=151936,
+      num_layers=36,
+      max_seq_len=40960,
+      embedding_dim=2560,
+      kv_cache_max_len=kv_cache_max_len,
+      block_configs=block_config,
+      final_norm_config=norm_config,
+  )
+  return config
+def get_1_7b_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
+  """Returns the model config for a Qwen 3.0 1.7B model."""
+  config = get_4b_model_config(kv_cache_max_len)
+  # Qwen has only one block config.
+  block_config = config.block_config(0)
+  block_config.attn_config.num_heads = 16
+  block_config.attn_config.head_dim = 128
+  block_config.ff_config.intermediate_size = 6144
+  config.num_layers = 28
+  config.embedding_dim = 2048
+  return config
+def get_0_6b_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
+  """Returns the model config for a Qwen 3.0 0.6B model."""
+  config = get_4b_model_config(kv_cache_max_len)
+  # Qwen has only one block config.
+  block_config = config.block_config(0)
+  block_config.attn_config.num_heads = 16
+  block_config.attn_config.head_dim = 128
+  block_config.ff_config.intermediate_size = 3072
+  config.num_layers = 28
+  config.embedding_dim = 1024
+  return config
+def get_fake_model_config(**kwargs) -> cfg.ModelConfig:
+  config = get_4b_model_config(**kwargs)
+  config.vocab_size = 128
+  config.num_layers = 2
+  # Qwen has only one block config.
+  config.block_config(0).ff_config.intermediate_size = 64
+  return config
+def build_4b_model(
+    checkpoint_path: str,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
+    **kwargs
+) -> nn.Module:
+  return model_builder.build_decoder_only_model(
+      checkpoint_path=checkpoint_path,
+      config=get_4b_model_config(**kwargs),
+      tensor_names=TENSOR_NAMES,
+      model_class=Qwen3,
+      custom_loader=custom_loader,
+  )
+def build_1_7b_model(
+    checkpoint_path: str,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
+    **kwargs
+) -> nn.Module:
+  return model_builder.build_decoder_only_model(
+      checkpoint_path=checkpoint_path,
+      config=get_1_7b_model_config(**kwargs),
+      tensor_names=TENSOR_NAMES,
+      model_class=Qwen3,
+      custom_loader=custom_loader,
+  )
+def build_0_6b_model(
+    checkpoint_path: str,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
+    **kwargs
+) -> nn.Module:
+  return model_builder.build_decoder_only_model(
+      checkpoint_path=checkpoint_path,
+      config=get_0_6b_model_config(**kwargs),
+      tensor_names=TENSOR_NAMES,
+      model_class=Qwen3,
+      custom_loader=custom_loader,
+  )

ai_edge_torch/generative/examples/qwen/{verify.py → verify_qwen2.py} RENAMED Viewed

@@ -48,6 +48,7 @@ _CHECKPOINT = {
 def main(_):
   verify_util.verify_qwen(
       model_size=_MODEL_SIZE.value,
+      model_version="v2",
       checkpoint_dir=_CHECKPOINT[_MODEL_SIZE.value],
       max_new_tokens=_MAX_NEW_TOKENS.value,
       prompts=_PROMPTS.value,

ai_edge_torch/generative/examples/qwen/verify_qwen3.py ADDED Viewed

@@ -0,0 +1,59 @@
+# Copyright 2025 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Verifies the reauthored Qwen 3.0 0.6B, 1.7B, and 4B models."""
+from absl import app
+from absl import flags
+from ai_edge_torch.generative.examples.qwen import verify_util
+_MODEL_SIZE = flags.DEFINE_enum(
+    "model_size",
+    "0.6b",
+    ["0.6b", "1.7b", "4b"],
+    "The size of the model to verify.",
+)
+_PROMPTS = flags.DEFINE_multi_string(
+    "prompts",
+    "What is the meaning of life?",
+    "The input prompts to generate answers.",
+)
+_MAX_NEW_TOKENS = flags.DEFINE_integer(
+    "max_new_tokens",
+    30,
+    "The maximum size of the generated tokens.",
+)
+_CHECKPOINT = {
+    "0.6b": "Qwen/Qwen3-0.6B",
+    "1.7b": "Qwen/Qwen3-1.7B",
+    "4b": "Qwen/Qwen3-4B",
+}
+def main(_):
+  verify_util.verify_qwen(
+      model_size=_MODEL_SIZE.value,
+      model_version="v3",
+      checkpoint_dir=_CHECKPOINT[_MODEL_SIZE.value],
+      max_new_tokens=_MAX_NEW_TOKENS.value,
+      prompts=_PROMPTS.value,
+  )
+if __name__ == "__main__":
+  app.run(main)

ai_edge_torch/generative/examples/qwen/verify_util.py CHANGED Viewed

@@ -17,24 +17,36 @@ import logging
 import os
 import pathlib
-from ai_edge_torch.generative.examples.qwen import qwen
+from ai_edge_torch.generative.examples.qwen import qwen, qwen3
 from ai_edge_torch.generative.utilities import loader
 from ai_edge_torch.generative.utilities import transformers_verifier
 from ai_edge_torch.generative.utilities import verifier
 import transformers
-_BUILDER = {
+_BUILDER_V2 = {
     "0.5b": qwen.build_0_5b_model,
     "1.5b": qwen.build_1_5b_model,
     "3b": qwen.build_3b_model,
 }
+_BUILDER_V3 = {
+    "0.6b": qwen3.build_0_6b_model,
+    "1.7b": qwen3.build_1_7b_model,
+    "4b": qwen3.build_4b_model,
+}
+_BUILDER = {
+    "v2": _BUILDER_V2,
+    "v3": _BUILDER_V3,
+}
 DEFAULT_PROMPTS = ["What is the meaning of life?"]
 def verify_qwen(
     model_size: str,
+    model_version: str,
     checkpoint_dir: str,
     weight_filename: str = "model.safetensors",
     max_new_tokens: int = 30,
@@ -64,7 +76,7 @@ def verify_qwen(
     reauthored_checkpoint = os.path.join(checkpoint_dir, weight_filename)
   logging.info("Building the reauthored model from: %s", reauthored_checkpoint)
-  reauthored_model = _BUILDER[model_size](
+  reauthored_model = _BUILDER[model_version][model_size](
       checkpoint_path=reauthored_checkpoint,
       custom_loader=custom_loader,
   )

ai_edge_torch/generative/layers/attention.py CHANGED Viewed

@@ -15,6 +15,7 @@
 """Common building blocks for Attention layer."""
+import abc
 from typing import Optional, Tuple, Union
 from ai_edge_torch.generative.layers import builder
@@ -111,7 +112,42 @@ class TransformerBlock(nn.Module):
     return output if kv is None else (output, kv)
-class CausalSelfAttention(nn.Module):
+class CausalSelfAttentionBase(nn.Module):
+  """Base class for causal self attention layer."""
+  def __init__(
+      self, dim: int, config: cfg.AttentionConfig, enable_hlfb: bool
+  ) -> None:
+    super().__init__()
+    self.dim = dim
+    self.config = config
+    self.enable_hlfb = enable_hlfb
+    self.query_norm = builder.build_norm(
+        self.config.head_dim, self.config.query_norm_config
+    )
+    self.key_norm = builder.build_norm(
+        self.config.head_dim, self.config.key_norm_config
+    )
+    self.value_norm = builder.build_norm(
+        self.config.head_dim, self.config.value_norm_config
+    )
+  @abc.abstractmethod
+  def forward(
+      self,
+      x: torch.Tensor,
+      rope: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
+      mask: Optional[torch.Tensor] = None,
+      input_pos: Optional[torch.Tensor] = None,
+      kv_cache: Optional[kv_utils.KVCacheEntry] = None,
+      lora: Optional[lora_utils.LoRAEntry] = None,
+  ) -> Union[torch.Tensor, Tuple[torch.Tensor, kv_utils.KVCacheEntry]]:
+    raise NotImplementedError()
+class CausalSelfAttention(CausalSelfAttentionBase):
+  """Causal self attention layer implementation."""
   def __init__(
       self,
@@ -126,7 +162,7 @@ class CausalSelfAttention(nn.Module):
       config (cfg.AttentionConfig): attention specific configurations.
       enable_hlfb (bool): whether hlfb is enabled or not.
     """
-    super().__init__()
+    super().__init__(dim, config, enable_hlfb)
     self.kv_cache = None
     qkv_shape = (
         config.num_heads + 2 * config.num_query_groups
@@ -137,12 +173,6 @@ class CausalSelfAttention(nn.Module):
     self.output_projection = nn.Linear(
         output_shape, dim, bias=config.output_proj_use_bias
     )
-    self.query_norm = builder.build_norm(
-        config.head_dim, config.query_norm_config
-    )
-    self.key_norm = builder.build_norm(config.head_dim, config.key_norm_config)
-    self.config = config
-    self.enable_hlfb = enable_hlfb
   def forward(
       self,
@@ -204,6 +234,7 @@ class CausalSelfAttention(nn.Module):
     q = self.query_norm(q)
     k = self.key_norm(k)
+    v = self.value_norm(v)
     q = q.reshape(B, T, -1, self.config.head_dim)
     k = k.reshape(B, T, -1, self.config.head_dim)

ai_edge_torch/generative/layers/builder.py CHANGED Viewed

@@ -15,9 +15,9 @@
 # Builder class for individual components.
 from typing import Callable
+from ai_edge_torch.generative.layers import normalization
 import ai_edge_torch.generative.layers.feed_forward as feed_forward
 import ai_edge_torch.generative.layers.model_config as cfg
-import ai_edge_torch.generative.layers.normalization as normalization
 import torch
 from torch import nn
 import torch.nn.functional as F
@@ -74,6 +74,8 @@ def build_norm(dim: int, config: cfg.NormalizationConfig):
         dim,
         eps=config.epsilon,
         zero_centered_gamma=config.zero_centered,
+        with_scale=config.with_scale,
+        scale_shift=config.scale_shift,
         enable_hlfb=config.enable_hlfb,
     )
   elif config.type == cfg.NormalizationType.LAYER_NORM:
@@ -107,20 +109,13 @@ def build_ff(dim: int, config: cfg.FeedForwardConfig):
   else:
     raise ValueError("Unsupported feedforward type.")
-  activation = get_activation(config.activation)
   pre_ff_norm = build_norm(dim, config.pre_ff_norm_config)
   post_ff_norm = build_norm(dim, config.post_ff_norm_config)
   return ff_module(
       dim=dim,
-      hidden_dim=config.intermediate_size,
-      activation=activation,
-      use_bias=config.use_bias,
-      use_glu=(
-          config.activation.type == cfg.ActivationType.GE_GLU
-          or config.activation.type == cfg.ActivationType.SILU_GLU
-      ),
+      activation=get_activation(config.activation),
+      config=config,
       pre_ff_norm=pre_ff_norm,
       post_ff_norm=post_ff_norm,
   )

ai_edge_torch/generative/layers/feed_forward.py CHANGED Viewed

@@ -14,45 +14,69 @@
 # ==============================================================================
 # Common building blocks for FeedForward layers.
-from typing import Callable, Optional
+import abc
+from typing import Callable
+import ai_edge_torch.generative.layers.model_config as cfg
 import torch
 from torch import nn
-class SequentialFeedForward(nn.Module):
+class FeedForwardBase(nn.Module):
+  """Base class for feedforward layer."""
+  def __init__(
+      self,
+      dim: int,
+      activation: Callable[[torch.Tensor], torch.Tensor],
+      config: cfg.FeedForwardConfig,
+      pre_ff_norm: Callable[[torch.Tensor], torch.Tensor] = lambda x: x,
+      post_ff_norm: Callable[[torch.Tensor], torch.Tensor] = lambda x: x,
+  ):
+    super().__init__()
+    self.dim = dim
+    self.act = activation
+    self.config = config
+    self.hidden_dim = config.intermediate_size
+    self.use_bias = config.use_bias
+    self.use_glu = (
+        config.activation.type == cfg.ActivationType.GE_GLU
+        or config.activation.type == cfg.ActivationType.SILU_GLU
+    )
+    self.pre_ff_norm = pre_ff_norm
+    self.post_ff_norm = post_ff_norm
+  @abc.abstractmethod
+  def forward(self, x: torch.Tensor) -> torch.Tensor:
+    raise NotImplementedError()
+class SequentialFeedForward(FeedForwardBase):
   """Vanilla sequential Feedforward with customizable activation."""
   def __init__(
       self,
       dim: int,
-      hidden_dim: int,
       activation: Callable[[torch.Tensor], torch.Tensor],
-      use_bias=False,
-      use_glu=False,
-      pre_ff_norm: Optional[Callable[[torch.Tensor], torch.Tensor]] = None,
-      post_ff_norm: Optional[Callable[[torch.Tensor], torch.Tensor]] = None,
+      config: cfg.FeedForwardConfig,
+      pre_ff_norm: Callable[[torch.Tensor], torch.Tensor] = lambda x: x,
+      post_ff_norm: Callable[[torch.Tensor], torch.Tensor] = lambda x: x,
   ):
     """Init function for feedforward layer.
     Args:
       dim (int): embedding size.
-      hidden_dim (int): hidden dim size of the feedforward layer.
       activation (Callable): activation function used in this block.
-      use_bias (Boolean): whether to use bias. Default is false.
-      use_glu (Boolean): whether to use glu in activation. Default is false.
-      pre_ff_norm (Callable): pre feedforward norm. Default is None.
-      post_ff_norm (Callable): post feedforward norm. Default is None.
+      config (cfg.FeedForwardConfig): feedforward layer configuration.
+      pre_ff_norm (Callable): pre feedforward norm. Default is identity.
+      post_ff_norm (Callable): post feedforward norm. Default is identity.
     """
-    super().__init__()
-    self.act = activation
-    if use_glu:
-      self.w1 = nn.Linear(dim, hidden_dim * 2, bias=use_bias)
+    super().__init__(dim, activation, config, pre_ff_norm, post_ff_norm)
+    if self.use_glu:
+      self.w1 = nn.Linear(dim, self.hidden_dim * 2, bias=self.use_bias)
     else:
-      self.w1 = nn.Linear(dim, hidden_dim, bias=use_bias)
-    self.w2 = nn.Linear(hidden_dim, dim, bias=use_bias)
-    self.pre_ff_norm = pre_ff_norm if pre_ff_norm else lambda x: x
-    self.post_ff_norm = post_ff_norm if post_ff_norm else lambda x: x
+      self.w1 = nn.Linear(dim, self.hidden_dim, bias=self.use_bias)
+    self.w2 = nn.Linear(self.hidden_dim, dim, bias=self.use_bias)
   def forward(self, x):
     """Forward pass for Feedforward layer.
@@ -68,7 +92,7 @@ class SequentialFeedForward(nn.Module):
     return self.post_ff_norm(out)
-class GatedFeedForward(nn.Module):
+class GatedFeedForward(FeedForwardBase):
   """Gated Feedforward with customizable activation.
   https://arxiv.org/pdf/2002.05202v1.pdf
@@ -77,34 +101,48 @@ class GatedFeedForward(nn.Module):
   def __init__(
       self,
       dim: int,
-      hidden_dim: int,
       activation: Callable[[torch.Tensor], torch.Tensor],
-      use_bias=False,
-      use_glu=False,
-      pre_ff_norm: Optional[Callable[[torch.Tensor], torch.Tensor]] = None,
-      post_ff_norm: Optional[Callable[[torch.Tensor], torch.Tensor]] = None,
+      config: cfg.FeedForwardConfig,
+      pre_ff_norm: Callable[[torch.Tensor], torch.Tensor] = lambda x: x,
+      post_ff_norm: Callable[[torch.Tensor], torch.Tensor] = lambda x: x,
   ):
     """Init function for feedforward layer.
     Args:
       dim (int): embedding size.
-      hidden_dim (int): hidden dim size of the feedforward layer.
       activation (Callable): activation function used in this block.
-      use_bias (Boolean): whether to use bias. Default is false.
-      use_glu (Boolean): whether to use glu in activation. Default is false.
-      pre_ff_norm (Callable): pre feedforward norm. Default is None.
-      post_ff_norm (Callable): post feedforward norm. Default is None.
+      pre_ff_norm (Callable): pre feedforward norm. Default is identity.
+      post_ff_norm (Callable): post feedforward norm. Default is identity.
+      config (cfg.FeedForwardConfig): feedforward layer configuration.
     """
-    super().__init__()
-    self.act = activation
-    if use_glu:
-      self.w1 = nn.Linear(dim, hidden_dim * 2, bias=use_bias)
+    super().__init__(dim, activation, config, pre_ff_norm, post_ff_norm)
+    if self.use_glu:
+      assert (
+          self.config.use_separate_gating
+      ), 'use_separate_gating must be True for GE_GLU | SILU_GLU activation.'
+    if self.config.use_separate_gating:
+      if self.use_glu:
+        self.w1 = nn.Linear(dim, self.hidden_dim * 2, bias=self.use_bias)
+      else:
+        self.w1 = nn.Linear(dim, self.hidden_dim, bias=self.use_bias)
+      self.w3 = nn.Linear(dim, self.hidden_dim, bias=self.use_bias)
     else:
-      self.w1 = nn.Linear(dim, hidden_dim, bias=use_bias)
-    self.w2 = nn.Linear(hidden_dim, dim, bias=use_bias)
-    self.w3 = nn.Linear(dim, hidden_dim, bias=use_bias)
-    self.pre_ff_norm = pre_ff_norm if pre_ff_norm else lambda x: x
-    self.post_ff_norm = post_ff_norm if post_ff_norm else lambda x: x
+      self.w_gating = nn.Parameter(
+          torch.ones((2, dim, self.hidden_dim), dtype=torch.float32),
+          requires_grad=False,
+      )
+      self.gating_bias = (
+          nn.Parameter(
+              torch.zeros((2, self.hidden_dim), dtype=torch.float32),
+              requires_grad=False,
+          )
+          if self.use_bias
+          else torch.zeros((2, self.hidden_dim), dtype=torch.float32)
+      )
+    self.w2 = nn.Linear(self.hidden_dim, dim, bias=self.use_bias)
   def forward(self, x):
     """Forward pass for Feedforward layer.
@@ -116,5 +154,12 @@ class GatedFeedForward(nn.Module):
       torch.Tensor: output tensor after feedforward.
     """
     x_norm = self.pre_ff_norm(x)
-    out = self.w2(self.act(self.w1(x_norm)) * self.w3(x_norm))
+    if self.config.use_separate_gating:
+      out = self.w2(self.act(self.w1(x_norm)) * self.w3(x_norm))
+    else:
+      out = self.w2(
+          self.act(torch.matmul(x_norm, self.w_gating[0]) + self.gating_bias[0])
+          * (torch.matmul(x_norm, self.w_gating[1]) + self.gating_bias[1])
+      )
     return self.post_ff_norm(out)

ai_edge_torch/generative/layers/feed_forward_test.py CHANGED Viewed

@@ -14,6 +14,7 @@
 # ==============================================================================
 from ai_edge_torch.generative.layers import feed_forward
+from ai_edge_torch.generative.layers import model_config as cfg
 import torch
 import torch.nn.functional as F
 from absl.testing import absltest as googletest
@@ -22,28 +23,32 @@ from absl.testing import absltest as googletest
 class FeedForwardTest(googletest.TestCase):
   def test_sequential_feed_forward(self):
+    ff_config = cfg.FeedForwardConfig(
+        type=cfg.FeedForwardType.SEQUENTIAL,
+        activation=cfg.ActivationConfig(cfg.ActivationType.SILU),
+        intermediate_size=10,
+        use_bias=True,
+    )
     ff = feed_forward.SequentialFeedForward(
         dim=10,
-        hidden_dim=10,
         activation=F.silu,
-        use_bias=True,
-        use_glu=False,
-        pre_ff_norm=torch.nn.Identity(),
-        post_ff_norm=torch.nn.Identity(),
+        config=ff_config,
     )
     x = torch.ones((1, 10))
     out = ff(x)
     self.assertEqual(out.shape, (1, 10))
   def test_gated_feed_forward(self):
+    ff_config = cfg.FeedForwardConfig(
+        type=cfg.FeedForwardType.GATED,
+        activation=cfg.ActivationConfig(cfg.ActivationType.SILU),
+        intermediate_size=10,
+        use_bias=True,
+    )
     ff = feed_forward.GatedFeedForward(
         dim=10,
-        hidden_dim=10,
         activation=F.silu,
-        use_bias=True,
-        use_glu=False,
-        pre_ff_norm=torch.nn.Identity(),
-        post_ff_norm=torch.nn.Identity(),
+        config=ff_config,
     )
     x = torch.ones((1, 10))
     out = ff(x)

ai_edge_torch/generative/layers/model_config.py CHANGED Viewed

@@ -69,10 +69,32 @@ class NormalizationConfig:
   enable_hlfb: bool = True
   epsilon: float = 1e-5
   zero_centered: bool = False
+  # Whether to use a scale parameter in the normalization.
+  with_scale: bool = False
+  # The shift to apply to the scale parameter.
+  scale_shift: float = 0.0
   # Number of groups used in group normalization.
   group_num: Optional[float] = None
+# Exprimental feature and may subject to change.
+class KVCacheUpdateStrategy(enum.Enum):
+  """Different alignment strategies of the KV cache.
+  Due to restrictions from different devices, we may need to apply different
+  alignment strategies to the KV cache during Attention layer's cache update.
+  Available options:
+    INPLACE: Update the existing cache in place using indexes.
+    PREPEND_LEFT: Append the new kv to the left of the existing cache. When this
+      cache update is applied, the newer kvs will always be prepended at the
+      beginning of the cache.
+  """
+  INPLACE = enum.auto()
+  PREPEND_LEFT = enum.auto()
 @dataclasses.dataclass
 class AttentionConfig:
   """Attention model's parameters."""
@@ -108,6 +130,12 @@ class AttentionConfig:
   key_norm_config: NormalizationConfig = dataclasses.field(
       default_factory=NormalizationConfig
   )
+  # The normalization applied to value projection's output.
+  value_norm_config: NormalizationConfig = dataclasses.field(
+      default_factory=NormalizationConfig
+  )
+  # Whether the KV cache is shared with the previous attention block.
+  kv_shared: bool = False
   relative_attention_num_buckets: int = 0
   relative_attention_max_distance: int = 0
   # Softcap on the output logits.
@@ -118,6 +146,8 @@ class AttentionConfig:
   sliding_window_size: Optional[int] = None
   # The default causal mask value used by attention layer.
   causal_mask_value: float = float("-inf")
+  # The update strategy of the KV cache. Default to INPLACE.
+  kvcache_update_strategy: KVCacheUpdateStrategy = KVCacheUpdateStrategy.INPLACE
 @dataclasses.dataclass
@@ -135,6 +165,9 @@ class FeedForwardConfig:
   type: FeedForwardType
   activation: ActivationConfig
   intermediate_size: int
+  # Whether to use two separate gating parameters or a single one in
+  # GatedFeedForward.
+  use_separate_gating: bool = True
   use_bias: bool = False
   # The normalization applied to feed forward's input.
   pre_ff_norm_config: NormalizationConfig = dataclasses.field(

ai_edge_torch/generative/utilities/transformers_verifier.py CHANGED Viewed

@@ -15,8 +15,6 @@
 """Utilities for the models predefined in HuggingFace transformers."""
-from typing import cast
 from ai_edge_torch.generative.utilities import verifier
 import torch
 import transformers
@@ -39,4 +37,8 @@ class TransformersModelWrapper(verifier.ModelWrapper):
       self, inputs: torch.Tensor, max_new_tokens: int
   ) -> torch.IntTensor:
     gen_config = transformers.GenerationConfig(max_new_tokens=max_new_tokens)
-    return self.model.generate(inputs=inputs, generation_config=gen_config)
+    # Do not override GenerationConfig with model defaults. Always keep greedy
+    # sampling.
+    return self.model.generate(
+        inputs=inputs, generation_config=gen_config, use_model_defaults=False
+    )

ai_edge_torch/version.py CHANGED Viewed

@@ -13,4 +13,4 @@
 # limitations under the License.
 # ==============================================================================
-__version__ = "0.6.0.dev20250521"
+__version__ = "0.6.0.dev20250523"

{ai_edge_torch_nightly-0.6.0.dev20250521.dist-info → ai_edge_torch_nightly-0.6.0.dev20250523.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-edge-torch-nightly
-Version: 0.6.0.dev20250521
+Version: 0.6.0.dev20250523
 Summary: Supporting PyTorch models with the Google AI Edge TFLite runtime.
 Home-page: https://github.com/google-ai-edge/ai-edge-torch
 Keywords: On-Device ML,AI,Google,TFLite,PyTorch,LLMs,GenAI

{ai_edge_torch_nightly-0.6.0.dev20250521.dist-info → ai_edge_torch_nightly-0.6.0.dev20250523.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ ai_edge_torch/__init__.py,sha256=lemyLCNoGYRnJsmDuGZu7qOqLbLqG6CGDFtu3ue1syU,129
 ai_edge_torch/_config.py,sha256=AiqhbcheF7j_ozIGDLC89k1we95aVgFDa-tR6h7UI0s,2529
 ai_edge_torch/conftest.py,sha256=r0GTrhMRhlmOGrrkvumHN8hkmyug6WvF60vWq8wRIBI,758
 ai_edge_torch/model.py,sha256=wxjSFq_rBSxSqbUE8E8EJTCkgvgaRLjq_ZuAM-IZpCU,5606
-ai_edge_torch/version.py,sha256=lmyCstaeVZjTAbBP4s9Z02tpX00ynyLPsymBY2tCe4A,706
+ai_edge_torch/version.py,sha256=GtHv2onfhAfdaCEqSWpqO8k8_lxn7A37AJJnPbucqbI,706
 ai_edge_torch/_convert/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/_convert/conversion.py,sha256=iQk3R-pLq4c1nfLqPB4xTRj78gghxPGzJCJtILLdg5o,6123
 ai_edge_torch/_convert/conversion_utils.py,sha256=Sr8qXVcTwc-ZnZmK7yxVrIOOp1S_vNrwzC0zUvLTI2o,2160
@@ -118,9 +118,12 @@ ai_edge_torch/generative/examples/phi/verify_phi4.py,sha256=2MlgQrfRkhE7Dya8MIix
 ai_edge_torch/generative/examples/phi/verify_util.py,sha256=kRREOMSikn_BRbTDkQiXBllPZwmWHa9KUk-kK5lCkbU,2945
 ai_edge_torch/generative/examples/qwen/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/examples/qwen/convert_to_tflite.py,sha256=TnzyARHQgmWeOdYsV9WpRj5vhKGBH0kAbp3tMj8ZCYw,1998
+ai_edge_torch/generative/examples/qwen/convert_v3_to_tflite.py,sha256=GVV8CVj3rdgt_ZTOlpLSa6AD1pMMpMnZEuowzN2AIGM,2004
 ai_edge_torch/generative/examples/qwen/qwen.py,sha256=EcIHVeBcJLc290TiPkPfE7jdG_VXZYKlVGf0XQXzqo8,4554
-ai_edge_torch/generative/examples/qwen/verify.py,sha256=mP1SIAX2B1vFO02vRkAZC0UCyvBBxeWxK_456gG5a1s,1633
-ai_edge_torch/generative/examples/qwen/verify_util.py,sha256=jEmqYnOkOcQhOmHJrHsX0vdLq7JSahROvEBrG6n7tqg,2919
+ai_edge_torch/generative/examples/qwen/qwen3.py,sha256=g6aVHjnlPo4YhLjSdXxONaDcKT3fZOh8cewlvf3cfoQ,5554
+ai_edge_torch/generative/examples/qwen/verify_qwen2.py,sha256=ry-c2QesH-0KnrSQygfjUFs6d4kOFvJz2ts_8mP156I,1659
+ai_edge_torch/generative/examples/qwen/verify_qwen3.py,sha256=hmE0gdyzgcDpEDcWiwOzKQcxt4XeAe9DPRspy_I-lc8,1628
+ai_edge_torch/generative/examples/qwen/verify_util.py,sha256=vPROwLRABTChMGo5yWJkZURXP6TKWgh5FJj1Z3Zs6HU,3153
 ai_edge_torch/generative/examples/qwen_vl/__init__.py,sha256=JaAnrFoXTl3RJX97XspklkTyqOHVyAgRJsZtzNDd10c,671
 ai_edge_torch/generative/examples/qwen_vl/convert_to_tflite.py,sha256=BM-ed7KrmPwzI3MvDs2R7P-kJgE1SK_cNVqIfXhtJjs,2411
 ai_edge_torch/generative/examples/qwen_vl/decoder.py,sha256=plOi-3LltxReW_HVxhxwee_rYCQq-gsOwbGZtRsM8N8,4443
@@ -166,18 +169,18 @@ ai_edge_torch/generative/examples/tiny_llama/verify_util.py,sha256=_zYGqP4HO_Stc
 ai_edge_torch/generative/fx_passes/__init__.py,sha256=PFSMsA1vfBfrV9ssBCkYJNl8Hx_bLdWjN01iyjPM5jE,1094
 ai_edge_torch/generative/fx_passes/remove_sdpa_zero_mask_pass.py,sha256=myGjal5A8yIBoqgArd2k40rZmCgD1Ya369KR7182bhI,2129
 ai_edge_torch/generative/layers/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
-ai_edge_torch/generative/layers/attention.py,sha256=uK1ih2kxPZherwi-pGSm8B--NNWnQ8npEAfgcjMIkEY,12964
+ai_edge_torch/generative/layers/attention.py,sha256=RaXENRRQo1MsLdt3U8h3kYTCmd6imHQ-aCXtmPXCh_o,13911
 ai_edge_torch/generative/layers/attention_test.py,sha256=9v8v96TLyFPdqxEylU1JOAeRFAp2s0YoDHZN83SFJJA,4764
 ai_edge_torch/generative/layers/attention_utils.py,sha256=zBVwlBUTs-nStIKCZG0ks5ra7tsqc9ShfakFJKH5rds,7344
 ai_edge_torch/generative/layers/attention_utils_test.py,sha256=22gQ1gcRPkwqFG3_p82GZfRKVE3udEssSy58wNOqv0w,2431
-ai_edge_torch/generative/layers/builder.py,sha256=LXGuSHIx6QZAzLFm7aJvlzoMPgQwbXLFchGEKYwOOUA,5090
+ai_edge_torch/generative/layers/builder.py,sha256=2bUgkyowDkDznkF8XaHyZs4nowHr1QEHYLM7pMaFmIk,4921
 ai_edge_torch/generative/layers/einsum.py,sha256=EsZSWNVWUs0-1plp4TBnhP4ZhaRDBa2VlDO6hWpUAqU,1288
 ai_edge_torch/generative/layers/einsum_test.py,sha256=ltIE773bvvNLv_9aLQxFwe1MgQ762sez0c5E2tejxuA,1079
-ai_edge_torch/generative/layers/feed_forward.py,sha256=hdICat-8gW7-vxDAevJQ8NQ-mynllPiqLdXQMF6JMnc,4189
-ai_edge_torch/generative/layers/feed_forward_test.py,sha256=8ZGy79BBpsyS6yKKDEKrDt249G5Mz-8VKWW7_WHx0u4,1655
+ai_edge_torch/generative/layers/feed_forward.py,sha256=_GmtHxwL068l9gh_F_WFcFk7La-Tl5SfoQ9v2hMabZM,5541
+ai_edge_torch/generative/layers/feed_forward_test.py,sha256=Y5l1eC9NgfYixHcfIfE1W4FGh7oC-9UGGyHdKS9tQKc,1880
 ai_edge_torch/generative/layers/kv_cache.py,sha256=b-7shzDaKexmvQF7P3SiAmIz4ZofjYWv3m5u71GojsA,10460
 ai_edge_torch/generative/layers/lora.py,sha256=hsvWLLOnW7HQ0AysOZu30x_cetMquDd1tjfyLz8HCSU,17892
-ai_edge_torch/generative/layers/model_config.py,sha256=H1MpjP1Ij1r4DEcE4cQ_6A8h0QvUjCkuGATXMkIMIWg,8570
+ai_edge_torch/generative/layers/model_config.py,sha256=0FH3UJPVnEhgBO4eUlNaHuQBDo_OKH17ChG5-Ybj2T4,9895
 ai_edge_torch/generative/layers/normalization.py,sha256=ijwCpi22NLX-Sygwy5sK9l9WjGvbPIhZvVwoBAonWAo,7014
 ai_edge_torch/generative/layers/normalization_test.py,sha256=zwurZly-TgFxdgVVdpzu9vCpcLbd5RYt_gKg9Lfg1jI,2248
 ai_edge_torch/generative/layers/rotary_position_embedding.py,sha256=975zR202MdIrILJ7blceAcxrNqX1ZCN0ECKG1gz-bV8,2655
@@ -212,7 +215,7 @@ ai_edge_torch/generative/utilities/model_builder.py,sha256=tBfOcsI_NcneggHqkCSyd
 ai_edge_torch/generative/utilities/moonshine_loader.py,sha256=_RpFabSqtGH5PHiP3_1f6QfO14qMADUxr_HGRlVDFB0,4891
 ai_edge_torch/generative/utilities/stable_diffusion_loader.py,sha256=dqPD9qRXEWtU3ombslOC-BE2l_dMwHoCNu7NsIJhsso,36158
 ai_edge_torch/generative/utilities/t5_loader.py,sha256=tEsfy8-ymzbbjOIc-oesXF3yGyyWtJgFXn2s7VOavt8,16961
-ai_edge_torch/generative/utilities/transformers_verifier.py,sha256=8sp9m_FMcXn7nqOrochtu2jIANkJKhnhIBUmH0ZTDR4,1549
+ai_edge_torch/generative/utilities/transformers_verifier.py,sha256=l54bmmhj613eB2oCoONIAKEHhf8TQOhC9Gwjp6lxHAE,1659
 ai_edge_torch/generative/utilities/types.py,sha256=gZI9hIPB3XAo4oecKIIoVDfiyibLaSNFhecPFx4VDTM,2913
 ai_edge_torch/generative/utilities/verifier.py,sha256=ETO2ShU5KXG7MLP8eVOWuzuRLCUtapafYHcZ6TZHIkw,13061
 ai_edge_torch/hlfb/__init__.py,sha256=sH4um75na-O8tzxN6chFyp6Y4xnexsE7kUQpZySv6dE,735
@@ -264,8 +267,8 @@ ai_edge_torch/testing/__init__.py,sha256=_yGgvnBZWb7T3IN3mc4x1sS4vM96HZwM8pwIcPG
 ai_edge_torch/testing/export.py,sha256=k5mGDGzwc23Z4zaIVDs8CNh-oOt64gsf9MS9NjhbPy4,3293
 ai_edge_torch/testing/model_coverage/__init__.py,sha256=5P8J6Zk5YYtDvTBucFvB9NGSRI7Gw_24WnrbhXgycEE,765
 ai_edge_torch/testing/model_coverage/model_coverage.py,sha256=UPB448aMDUyC0HNYVqio2rcJPnDN0tBQMP08J6vPYew,4718
-ai_edge_torch_nightly-0.6.0.dev20250521.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
-ai_edge_torch_nightly-0.6.0.dev20250521.dist-info/METADATA,sha256=_UC8q7Xe3xMUCwKKbF4CJ5hewK9PLIJ26ksKCAeWjik,2074
-ai_edge_torch_nightly-0.6.0.dev20250521.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-ai_edge_torch_nightly-0.6.0.dev20250521.dist-info/top_level.txt,sha256=5KXRaF2hwkApYxf7Y8y_tVb9aulGTlbOoNdbx1aKRkE,14
-ai_edge_torch_nightly-0.6.0.dev20250521.dist-info/RECORD,,
+ai_edge_torch_nightly-0.6.0.dev20250523.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
+ai_edge_torch_nightly-0.6.0.dev20250523.dist-info/METADATA,sha256=rVs5qa-WVOxoGTyFSWL9oeK9t6or0QJjsk4Cyr7IYpM,2074
+ai_edge_torch_nightly-0.6.0.dev20250523.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+ai_edge_torch_nightly-0.6.0.dev20250523.dist-info/top_level.txt,sha256=5KXRaF2hwkApYxf7Y8y_tVb9aulGTlbOoNdbx1aKRkE,14
+ai_edge_torch_nightly-0.6.0.dev20250523.dist-info/RECORD,,

{ai_edge_torch_nightly-0.6.0.dev20250521.dist-info → ai_edge_torch_nightly-0.6.0.dev20250523.dist-info}/LICENSE RENAMED Viewed

File without changes

{ai_edge_torch_nightly-0.6.0.dev20250521.dist-info → ai_edge_torch_nightly-0.6.0.dev20250523.dist-info}/WHEEL RENAMED Viewed

File without changes

{ai_edge_torch_nightly-0.6.0.dev20250521.dist-info → ai_edge_torch_nightly-0.6.0.dev20250523.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai-edge-torch-nightly 0.6.0.dev20250521__py3-none-any.whl → 0.6.0.dev20250523__py3-none-any.whl

ai-edge-torch-nightly 0.6.0.dev20250521py3-none-any.whl → 0.6.0.dev20250523py3-none-any.whl