PyPI - ai-edge-torch-nightly - Versions diffs - 0.4.0.dev20250314__py3-none-any.whl → 0.4.0.dev20250316__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.4.0.dev20250314py3-none-any.whl → 0.4.0.dev20250316py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

ai_edge_torch/generative/examples/gemma/gemma2.py CHANGED Viewed

@@ -28,7 +28,7 @@ import ai_edge_torch.generative.utilities.loader as loading_utils
 import torch
 from torch import nn
-TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
+TENSOR_NAMES_FUSED_QKV = loading_utils.ModelLoader.TensorNames(
     ff_up_proj="model.layers.{}.mlp.up_proj",
     ff_down_proj="model.layers.{}.mlp.down_proj",
     ff_gate_proj="model.layers.{}.mlp.gate_proj",
@@ -43,7 +43,7 @@ TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
     lm_head=None,
 )
-ALT_TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
+TENSOR_NAMES_SEP_QKV = loading_utils.ModelLoader.TensorNames(
     ff_up_proj="model.layers.{}.mlp.up_proj",
     ff_down_proj="model.layers.{}.mlp.down_proj",
     ff_gate_proj="model.layers.{}.mlp.gate_proj",
@@ -59,6 +59,11 @@ ALT_TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
     final_norm="model.norm",
 )
+TENSOR_NAMES_DICT = {
+    "safetensors": TENSOR_NAMES_SEP_QKV,
+    "kaggle": TENSOR_NAMES_FUSED_QKV,
+}
 class Gemma2Block(attention.TransformerBlock):
@@ -300,18 +305,13 @@ def get_fake_model_config(kv_cache_max_len: int = 128) -> cfg.ModelConfig:
 def build_2b_model(checkpoint_path: str, **kwargs) -> nn.Module:
-  try:
-    return model_builder.build_decoder_only_model(
-        checkpoint_path=checkpoint_path,
-        config=get_model_config_2b(**kwargs),
-        tensor_names=TENSOR_NAMES,
-        model_class=Gemma2,
-    )
-  except KeyError as ke:
-    # Also attempt to load with an alternative naming scheme.
-    return model_builder.build_decoder_only_model(
-        checkpoint_path=checkpoint_path,
-        config=get_model_config_2b(**kwargs),
-        tensor_names=ALT_TENSOR_NAMES,
-        model_class=Gemma2,
-    )
+  for tensor_names in TENSOR_NAMES_DICT.values():
+    try:
+      return model_builder.build_decoder_only_model(
+          checkpoint_path=checkpoint_path,
+          config=get_model_config_2b(**kwargs),
+          tensor_names=tensor_names,
+          model_class=Gemma2,
+      )
+    except KeyError as ke:
+      continue

ai_edge_torch/generative/examples/gemma3/decoder.py CHANGED Viewed

@@ -29,7 +29,7 @@ import torch
 from torch import nn
-TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
+TENSOR_NAMES_SEP_QKV = loading_utils.ModelLoader.TensorNames(
     ff_up_proj="model.layers.{}.mlp.up_proj",
     ff_down_proj="model.layers.{}.mlp.down_proj",
     ff_gate_proj="model.layers.{}.mlp.gate_proj",
@@ -48,9 +48,8 @@ TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
     lm_head=None,
 )
-# Please don't use tensor mapping for converting checkpoints hosted on Kaggle
-# or HuggingFace. Will be removed in the future.
-TENSOR_NAMES_TO_BE_REMOVED = loading_utils.ModelLoader.TensorNames(
+TENSOR_NAMES_FUSED_QKV = loading_utils.ModelLoader.TensorNames(
     ff_up_proj="model.layers.{}.mlp.up_proj",
     ff_down_proj="model.layers.{}.mlp.down_proj",
     ff_gate_proj="model.layers.{}.mlp.gate_proj",
@@ -67,6 +66,11 @@ TENSOR_NAMES_TO_BE_REMOVED = loading_utils.ModelLoader.TensorNames(
     lm_head=None,
 )
+TENSOR_NAMES_DICT = {
+    "safetensors": TENSOR_NAMES_SEP_QKV,
+    "kaggle": TENSOR_NAMES_FUSED_QKV,
+}
 class DecoderBlock(attention.TransformerBlock):
@@ -428,9 +432,15 @@ def get_fake_decoder_config_1b(kv_cache_max_len: int = 128) -> cfg.ModelConfig:
 def build_model_1b(checkpoint_path: str, **kwargs) -> nn.Module:
-  return model_builder.build_decoder_only_model(
-      checkpoint_path=checkpoint_path,
-      config=get_decoder_config_1b(**kwargs),
-      tensor_names=TENSOR_NAMES,
-      model_class=Decoder,
-  )
+  # TODO(b/403644647): Better error handling for loading checkpoints with
+  # different tensor names.
+  for tensor_names in TENSOR_NAMES_DICT.values():
+    try:
+      return model_builder.build_decoder_only_model(
+          checkpoint_path=checkpoint_path,
+          config=get_decoder_config_1b(**kwargs),
+          tensor_names=tensor_names,
+          model_class=Decoder,
+      )
+    except KeyError as ke:
+      continue

ai_edge_torch/generative/examples/gemma3/gemma3.py CHANGED Viewed

@@ -20,7 +20,7 @@ from typing import List, Optional, Tuple
 import xmlrpc
 from ai_edge_torch.generative.examples.gemma3 import decoder
-from ai_edge_torch.generative.examples.gemma3.cpu_only import image_encoder
+from ai_edge_torch.generative.examples.gemma3 import image_encoder
 from ai_edge_torch.generative.layers import builder
 from ai_edge_torch.generative.layers import kv_cache as kv_utils
 import ai_edge_torch.generative.layers.model_config as cfg

ai_edge_torch/version.py CHANGED Viewed

@@ -13,4 +13,4 @@
 # limitations under the License.
 # ==============================================================================
-__version__ = "0.4.0.dev20250314"
+__version__ = "0.4.0.dev20250316"

{ai_edge_torch_nightly-0.4.0.dev20250314.dist-info → ai_edge_torch_nightly-0.4.0.dev20250316.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-edge-torch-nightly
-Version: 0.4.0.dev20250314
+Version: 0.4.0.dev20250316
 Summary: Supporting PyTorch models with the Google AI Edge TFLite runtime.
 Home-page: https://github.com/google-ai-edge/ai-edge-torch
 Keywords: On-Device ML,AI,Google,TFLite,PyTorch,LLMs,GenAI

{ai_edge_torch_nightly-0.4.0.dev20250314.dist-info → ai_edge_torch_nightly-0.4.0.dev20250316.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ ai_edge_torch/__init__.py,sha256=8sPR_5uXJA4NEE0nIwNdSl-ADOJEoR8hAgYvBQDY70Y,120
 ai_edge_torch/_config.py,sha256=AiqhbcheF7j_ozIGDLC89k1we95aVgFDa-tR6h7UI0s,2529
 ai_edge_torch/conftest.py,sha256=r0GTrhMRhlmOGrrkvumHN8hkmyug6WvF60vWq8wRIBI,758
 ai_edge_torch/model.py,sha256=N-pNpTxzhaFGhWhnSGd70lBzb9VlEhTOq5mddU7bvvI,5542
-ai_edge_torch/version.py,sha256=PjlstuIJ-GlyKyFBMrwc7RQFRecNIkHpz5aIzvYNRKo,706
+ai_edge_torch/version.py,sha256=jME-032g08KjA0-4jHbpsL3FKCJ7nOx1hgJCxDO5ePE,706
 ai_edge_torch/_convert/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/_convert/conversion.py,sha256=gpXQnifODU-mWxkUZw_3ov1lEYBw1SPVIcqj5k7pTGo,5550
 ai_edge_torch/_convert/conversion_utils.py,sha256=Sr8qXVcTwc-ZnZmK7yxVrIOOp1S_vNrwzC0zUvLTI2o,2160
@@ -57,19 +57,15 @@ ai_edge_torch/generative/examples/gemma/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIX
 ai_edge_torch/generative/examples/gemma/convert_gemma1_to_tflite.py,sha256=8HJi0cutxPstafVNs2LfBKdUzufVucje1Vrfjw_RS_g,2527
 ai_edge_torch/generative/examples/gemma/convert_gemma2_to_tflite.py,sha256=MX8fZhJJPZ5IoMiNHX0tLkRpHYqVuh4qhW0rkeIfmYw,2529
 ai_edge_torch/generative/examples/gemma/gemma1.py,sha256=w8oWYibZzvEvCDyp39EYyAWmjgJljhzdYPyFCfAWxZA,3497
-ai_edge_torch/generative/examples/gemma/gemma2.py,sha256=CK1lHw-YQPAr26KMdrYA6icQHvKH59yHAQ4eC4X636o,11539
+ai_edge_torch/generative/examples/gemma/gemma2.py,sha256=lR-T25GkjCfd_sN8mAKY_0XNA0MEkMgsj4ZBQnnytHo,11465
 ai_edge_torch/generative/examples/gemma/verify_gemma1.py,sha256=ip-Gmk4CI5f0GWSdAIdrectxQWJ0t328KCsA4nfHuGg,1736
 ai_edge_torch/generative/examples/gemma/verify_gemma2.py,sha256=IoBhEMwH07-tFm5-U6F2hpCsI8xynglhq1x9tIOdaPQ,1322
 ai_edge_torch/generative/examples/gemma/verify_util.py,sha256=tR8RflXocDZqvuStyw9aFlzuiTllEC8rNnjrxms6_Is,5727
 ai_edge_torch/generative/examples/gemma3/__init__.py,sha256=JaAnrFoXTl3RJX97XspklkTyqOHVyAgRJsZtzNDd10c,671
 ai_edge_torch/generative/examples/gemma3/convert_gemma3_to_tflite.py,sha256=xAjMqhNrSv2srrBvrwCsnbLzdQXVpkZEOYImb3Mvw3w,3910
-ai_edge_torch/generative/examples/gemma3/decoder.py,sha256=_7s_JrzwW4rX07f41VDuRLDZDJDshc3vqhXVY92K8q8,15423
-ai_edge_torch/generative/examples/gemma3/gemma3.py,sha256=n2EQVp5SrnMeb0csHrz46_gdNiHTpsApaRmcAc8xyj8,6482
-ai_edge_torch/generative/examples/gemma3/cpu_only/__init__.py,sha256=P11xO0F1MUbLMs8ySz6tu6qGDOOyK43q-HV_pqdsCUY,670
-ai_edge_torch/generative/examples/gemma3/cpu_only/convert_gemma3_to_tflite.py,sha256=4Ym4f8pvHu7dUSkTXfSToNuX8X3fhV5kKuhgEzOcyuw,3012
-ai_edge_torch/generative/examples/gemma3/cpu_only/decoder.py,sha256=fB2oYR08u7GcrWYjNbeADRZM5z1vTbE03mHXi497RRw,16140
-ai_edge_torch/generative/examples/gemma3/cpu_only/gemma3.py,sha256=NeMqW67uQEQl09R7nE3vSpT84KXmAHEg9oy4-7TVC5k,8104
-ai_edge_torch/generative/examples/gemma3/cpu_only/image_encoder.py,sha256=uRoLoBWzFtQz5wFZfPCxbkvZsgPAqSkUUsV3977GbYc,5184
+ai_edge_torch/generative/examples/gemma3/decoder.py,sha256=4Vf1zA94qLyNzj9iLU0jrd3kzFFZXft4uiItoIBjKyM,15632
+ai_edge_torch/generative/examples/gemma3/gemma3.py,sha256=NQzqZ55cmC8tGlZ1SKkDeD0Su8mZ79KiazCS8X08xUY,6473
+ai_edge_torch/generative/examples/gemma3/image_encoder.py,sha256=uRoLoBWzFtQz5wFZfPCxbkvZsgPAqSkUUsV3977GbYc,5184
 ai_edge_torch/generative/examples/llama/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/examples/llama/convert_to_tflite.py,sha256=tMSsqg7LU3LR-PHtKvlWtLCqlk71mfcO9hANU4vnvDM,2734
 ai_edge_torch/generative/examples/llama/llama.py,sha256=UKvMO85_5z1vEY5MVu6QBW_vpQYA8LWHbJI4Yx6BrCc,6592
@@ -243,8 +239,8 @@ ai_edge_torch/testing/__init__.py,sha256=_yGgvnBZWb7T3IN3mc4x1sS4vM96HZwM8pwIcPG
 ai_edge_torch/testing/export.py,sha256=dguMa-aEi-WDPnmGBUs2IPdEmt2IVmHOELH19uiJ1uU,3014
 ai_edge_torch/testing/model_coverage/__init__.py,sha256=5P8J6Zk5YYtDvTBucFvB9NGSRI7Gw_24WnrbhXgycEE,765
 ai_edge_torch/testing/model_coverage/model_coverage.py,sha256=UPB448aMDUyC0HNYVqio2rcJPnDN0tBQMP08J6vPYew,4718
-ai_edge_torch_nightly-0.4.0.dev20250314.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
-ai_edge_torch_nightly-0.4.0.dev20250314.dist-info/METADATA,sha256=n_c6T76WR-J-SCOmKKKzzuPoyM4i_2W2TO6ub8AuDw0,1966
-ai_edge_torch_nightly-0.4.0.dev20250314.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-ai_edge_torch_nightly-0.4.0.dev20250314.dist-info/top_level.txt,sha256=5KXRaF2hwkApYxf7Y8y_tVb9aulGTlbOoNdbx1aKRkE,14
-ai_edge_torch_nightly-0.4.0.dev20250314.dist-info/RECORD,,
+ai_edge_torch_nightly-0.4.0.dev20250316.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
+ai_edge_torch_nightly-0.4.0.dev20250316.dist-info/METADATA,sha256=Ua-f14kHLaTqaczlZePB7-9RspufHu5AMI4tbEnQCPc,1966
+ai_edge_torch_nightly-0.4.0.dev20250316.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+ai_edge_torch_nightly-0.4.0.dev20250316.dist-info/top_level.txt,sha256=5KXRaF2hwkApYxf7Y8y_tVb9aulGTlbOoNdbx1aKRkE,14
+ai_edge_torch_nightly-0.4.0.dev20250316.dist-info/RECORD,,

ai_edge_torch/generative/examples/gemma3/cpu_only/__init__.py DELETED Viewed

@@ -1,14 +0,0 @@
-# Copyright 2025 The AI Edge Torch Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================

ai_edge_torch/generative/examples/gemma3/cpu_only/convert_gemma3_to_tflite.py DELETED Viewed

@@ -1,96 +0,0 @@
-# Copyright 2024 The AI Edge Torch Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Example of converting a Gemma3 model to multi-signature tflite model."""
-import os
-import pathlib
-from absl import app
-from absl import flags
-from ai_edge_torch.generative.examples.gemma3 import gemma3
-from ai_edge_torch.generative.utilities import converter
-from ai_edge_torch.generative.utilities.model_builder import ExportConfig
-_MODEL_SIZE = flags.DEFINE_string(
-    'model_size',
-    '1b',
-    'The size of the model to convert.',
-)
-_CHECKPOINT_PATH = flags.DEFINE_string(
-    'checkpoint_path',
-    os.path.join(pathlib.Path.home(), 'Downloads/llm_data/gemma3-1b'),
-    'The path to the model checkpoint, or directory holding the checkpoint.',
-)
-_OUTPUT_PATH = flags.DEFINE_string(
-    'output_path',
-    '/tmp/',
-    'The path to export the tflite model.',
-)
-_OUTPUT_NAME_PREFIX = flags.DEFINE_string(
-    'output_name_prefix',
-    'gemma3',
-    'The prefix of the output tflite model name.',
-)
-_PREFILL_SEQ_LENS = flags.DEFINE_multi_integer(
-    'prefill_seq_lens',
-    (8, 64, 128, 256, 512, 1024),
-    'List of the maximum sizes of prefill input tensors.',
-)
-_KV_CACHE_MAX_LEN = flags.DEFINE_integer(
-    'kv_cache_max_len',
-    1280,
-    'The maximum size of KV cache buffer, including both prefill and decode.',
-)
-_QUANTIZE = flags.DEFINE_bool(
-    'quantize',
-    True,
-    'Whether the model should be quantized.',
-)
-_LORA_RANKS = flags.DEFINE_multi_integer(
-    'lora_ranks',
-    None,
-    'If set, the model will be converted with the provided list of LoRA ranks.',
-)
-def main(_):
-  if _MODEL_SIZE.value == '1b':
-    pytorch_model = gemma3.build_model_1b(
-        _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
-    )
-    config = pytorch_model.config
-  elif _MODEL_SIZE.value == '4b':
-    pytorch_model = gemma3.build_model_4b(
-        _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
-    )
-    config = pytorch_model.config.decoder_config
-  else:
-    raise ValueError(f'Unsupported model size: {_MODEL_SIZE.value}')
-  converter.convert_to_tflite(
-      pytorch_model,
-      output_path=_OUTPUT_PATH.value,
-      output_name_prefix=_OUTPUT_NAME_PREFIX.value,
-      prefill_seq_len=_PREFILL_SEQ_LENS.value,
-      quantize=_QUANTIZE.value,
-      config=config,
-      lora_ranks=_LORA_RANKS.value,
-      export_config=ExportConfig(),
-  )
-if __name__ == '__main__':
-  app.run(main)

ai_edge_torch/generative/examples/gemma3/cpu_only/decoder.py DELETED Viewed

@@ -1,463 +0,0 @@
-# Copyright 2024 The AI Edge Torch Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Example of building a Decoder for Gemma3 model."""
-from typing import List, Optional, Tuple
-from ai_edge_torch.generative.layers import attention
-from ai_edge_torch.generative.layers import builder
-from ai_edge_torch.generative.layers import kv_cache as kv_utils
-import ai_edge_torch.generative.layers.attention_utils as attn_utils
-import ai_edge_torch.generative.layers.model_config as cfg
-import ai_edge_torch.generative.layers.rotary_position_embedding as rotary_pos_emb
-from ai_edge_torch.generative.utilities import model_builder
-import ai_edge_torch.generative.utilities.loader as loading_utils
-import torch
-from torch import nn
-TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
-    ff_up_proj="model.layers.{}.mlp.up_proj",
-    ff_down_proj="model.layers.{}.mlp.down_proj",
-    ff_gate_proj="model.layers.{}.mlp.gate_proj",
-    attn_query_proj="model.layers.{}.self_attn.q_proj",
-    attn_key_proj="model.layers.{}.self_attn.k_proj",
-    attn_value_proj="model.layers.{}.self_attn.v_proj",
-    attn_output_proj="model.layers.{}.self_attn.o_proj",
-    attn_query_norm="model.layers.{}.self_attn.q_norm",
-    attn_key_norm="model.layers.{}.self_attn.k_norm",
-    pre_attn_norm="model.layers.{}.input_layernorm",
-    post_attn_norm="model.layers.{}.post_attention_layernorm",
-    pre_ff_norm="model.layers.{}.pre_feedforward_layernorm",
-    post_ff_norm="model.layers.{}.post_feedforward_layernorm",
-    embedding="model.embed_tokens",
-    final_norm="model.norm",
-    lm_head=None,
-)
-# Please don't use tensor mapping for converting checkpoints hosted on Kaggle
-# or HuggingFace. Will be removed in the future.
-TENSOR_NAMES_TO_BE_REMOVED = loading_utils.ModelLoader.TensorNames(
-    ff_up_proj="model.layers.{}.mlp.up_proj",
-    ff_down_proj="model.layers.{}.mlp.down_proj",
-    ff_gate_proj="model.layers.{}.mlp.gate_proj",
-    attn_fused_qkv_proj="model.layers.{}.self_attn.qkv_proj",
-    attn_output_proj="model.layers.{}.self_attn.o_proj",
-    attn_query_norm="model.layers.{}.self_attn.query_norm",
-    attn_key_norm="model.layers.{}.self_attn.key_norm",
-    pre_attn_norm="model.layers.{}.input_layernorm",
-    post_attn_norm="model.layers.{}.post_attention_layernorm",
-    pre_ff_norm="model.layers.{}.pre_feedforward_layernorm",
-    post_ff_norm="model.layers.{}.post_feedforward_layernorm",
-    embedding="embedder",
-    final_norm="model.norm",
-    lm_head=None,
-)
-class DecoderBlock(attention.TransformerBlock):
-  def forward(
-      self,
-      x: torch.Tensor,
-      rope: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
-      mask: Optional[torch.Tensor] = None,
-      input_pos: Optional[torch.Tensor] = None,
-      kv_cache: kv_utils.KVCacheEntry = None,
-  ) -> Tuple[torch.Tensor, Optional[kv_utils.KVCacheEntry]]:
-    """Forward function of the Gemma3Block.
-    Exactly the same as TransformerBlock but we call the post-attention norm
-    immediately after attention and not after the residual pointwise addition.
-    Args:
-      x (torch.Tensor): the input tensor.
-      rope (Tuple[torch.Tensor, torch.Tensor]): the input rope tensor.
-      mask (torch.Tensor): the optional mask tensor.
-      input_pos (torch.Tensor): the optional input position tensor.
-      kv_cache (KVCacheEntry): the optional kv cache entry.
-    Returns:
-      output activation from this transformer block, and updated kv cache (if
-      passed in).
-    """
-    x_norm = self.pre_atten_norm(x)
-    attn_out, kv = self.atten_func(x_norm, rope, mask, input_pos, kv_cache)
-    attn_out_norm = self.post_atten_norm(attn_out)
-    x = x + attn_out_norm
-    output = x + self.ff(x)
-    return output, kv
-class Decoder(nn.Module):
-  """A Gemma3 decoder model built from the Edge Generative API layers."""
-  def __init__(self, config: cfg.ModelConfig):
-    super().__init__()
-    # Construct model layers.
-    self.tok_embedding = nn.Embedding(
-        config.vocab_size, config.embedding_dim, padding_idx=0
-    )
-    self.lm_head = nn.Linear(
-        config.embedding_dim,
-        config.vocab_size,
-        bias=config.lm_head_use_bias,
-    )
-    # Gemma3 re-uses the embedding as the head projection layer.
-    self.lm_head.weight.data = self.tok_embedding.weight.data
-    self.transformer_blocks = nn.ModuleList(
-        DecoderBlock(config.block_config(idx), config)
-        for idx in range(config.num_layers)
-    )
-    self.final_norm = builder.build_norm(
-        config.embedding_dim,
-        config.final_norm_config,
-    )
-    self.mask_cache = attn_utils.build_causal_mask_cache(
-        size=config.kv_cache_max,
-    )
-    # Gemma3 has same hyper parameters for each layer except for attention
-    # types. Use the first layer.
-    attn_config = config.block_config(0).attn_config
-    self.sliding_window_mask_cache = attn_utils.build_sliding_window_mask_cache(
-        size=config.kv_cache_max,
-        window_size=attn_config.sliding_window_size,
-    )
-    self.config = config
-  def get_attention_mask(
-      self,
-      attn_type: cfg.AttentionType,
-      input_pos: torch.Tensor,
-  ) -> torch.Tensor:
-    if attn_type == cfg.AttentionType.LOCAL_SLIDING:
-      return self.sliding_window_mask_cache.index_select(2, input_pos)
-    return self.mask_cache.index_select(2, input_pos)
-  def compose_mask(
-      self, mask: torch.Tensor, pixel_mask: torch.Tensor,
-      attn_type: cfg.AttentionType,
-  ) -> torch.Tensor:
-    mask = mask == 0
-    if attn_type == cfg.AttentionType.LOCAL_SLIDING:
-      mask = torch.logical_and(mask, pixel_mask)
-    else:
-      mask = torch.logical_or(mask, pixel_mask)
-    mask = torch.where(mask, 0, float("-inf"))
-    return mask
-  def build_pixel_mask(self, image_indices: torch.Tensor):
-    pixel_mask = image_indices >= 0
-    max_seq_len = self.config.kv_cache_max
-    if pixel_mask.size(1) < max_seq_len:
-      pixel_mask = torch.cat(
-          [
-              pixel_mask,
-              torch.zeros(
-                  (pixel_mask.size(0), max_seq_len - pixel_mask.size(1))
-              ),
-          ],
-          dim=1,
-      )
-    pixel_mask = torch.logical_and(
-        pixel_mask.unsqueeze(1), pixel_mask.unsqueeze(-1)
-    )
-    return pixel_mask.unsqueeze(1)
-  @torch.inference_mode
-  def forward(
-      self,
-      tokens: torch.Tensor,
-      input_pos: torch.Tensor,
-      kv_cache: kv_utils.KVCache,
-      input_embeds: Optional[torch.Tensor] = None,
-      mask: Optional[torch.Tensor] = None,
-      image_indices: Optional[torch.Tensor] = None,
-      export_config: Optional[model_builder.ExportConfig] = None,
-  ) -> dict[torch.Tensor, kv_utils.KVCache]:
-    pixel_mask = None
-    if input_embeds is None:
-      # token embeddings of shape (b, t, n_embd)
-      input_embeds = self.tok_embedding(tokens)
-      if self.config.embedding_scale is not None:
-        input_embeds = input_embeds * self.config.embedding_scale
-    if image_indices is not None:
-      pixel_mask = self.build_pixel_mask(image_indices)
-    # RoPE parameters are the same for all blocks. Use the first layer.
-    attn_config = self.config.block_config(0).attn_config
-    n_elem = int(attn_config.rotary_percentage * attn_config.head_dim)
-    # Different rotary base for global and local attention
-    # based on attention pattern
-    rope = [rotary_pos_emb.build_rope(
-        input_pos, attn_config.head_dim,
-        self.config.block_config(i).attn_config.rotary_base
-    ) for i in range(self.config.num_layers)]
-    mask = [self.get_attention_mask(
-        self.config.block_config(i).attn_config.attn_type, input_pos
-    ) for i in range(self.config.num_layers)]
-    return self._forward_with_embeds(
-        input_embeds, rope, mask, input_pos, kv_cache,
-        pixel_mask, export_config
-    )
-  def _forward_with_embeds(
-      self,
-      input_embeds: torch.Tensor,
-      rope: List[Tuple[torch.Tensor, torch.Tensor]],
-      mask: List[torch.Tensor],
-      input_pos: torch.Tensor,
-      kv_cache: kv_utils.KVCache,
-      pixel_mask: Optional[torch.Tensor] = None,
-      export_config: Optional[model_builder.ExportConfig] = None,
-  ) -> dict[torch.Tensor, kv_utils.KVCache]:
-    """Forwards the model with input embeddings."""
-    assert len(self.transformer_blocks) == len(kv_cache.caches), (
-        "The number of transformer blocks and the number of KV cache entries"
-        " must be the same."
-    )
-    x = input_embeds
-    if pixel_mask is not None:
-      pixel_mask = pixel_mask.index_select(2, input_pos)
-      mask = [
-          self.compose_mask(
-              mask[i],
-              pixel_mask,
-              self.config.block_config(i).attn_config.attn_type,
-          )
-          for i in range(self.config.num_layers)
-      ]
-    updated_kv_entries = []
-    for i, block in enumerate(self.transformer_blocks):
-      kv_entry = kv_cache.caches[i] if kv_cache else None
-      x, kv_entry = block(x, rope[i], mask[i], input_pos, kv_entry)
-      if kv_entry:
-        updated_kv_entries.append(kv_entry)
-    updated_kv_cache = kv_utils.KVCache(tuple(updated_kv_entries))
-    if export_config is not None:
-      if (
-          torch.numel(input_pos) > 1
-          and not export_config.output_logits_on_prefill
-      ):
-        return {"kv_cache": updated_kv_cache}
-    x = self.final_norm(x)
-    res = self.lm_head(x)  # (b, t, vocab_size)
-    return {"logits": res, "kv_cache": updated_kv_cache}
-def get_decoder_config_4b(kv_cache_max_len: int = 2048) -> cfg.ModelConfig:
-  """Returns the model config for a Gemma3 4B model.
-  Args:
-    kv_cache_max_len (int): The maximum sequence length of the KV cache. Default
-      is 2048.
-  Returns:
-    The model config for a Gemma 4B model.
-  """
-  norm_config = cfg.NormalizationConfig(
-      type=cfg.NormalizationType.RMS_NORM,
-      epsilon=1e-6,
-      zero_centered=True,
-  )
-  ff_config = cfg.FeedForwardConfig(
-      type=cfg.FeedForwardType.GATED,
-      activation=cfg.ActivationConfig(cfg.ActivationType.GELU_TANH),
-      intermediate_size=4*2560,
-      pre_ff_norm_config=norm_config,
-      post_ff_norm_config=norm_config,
-  )
-  def get_block_config(idx: int) -> cfg.TransformerBlockConfig:
-    attn_config = cfg.AttentionConfig(
-        num_heads=8,
-        head_dim=256,
-        num_query_groups=4,
-        rotary_base=1_000_000 if (idx + 1) % 6 == 0 else 10_000,
-        rotary_percentage=1.0,
-        qkv_transpose_before_split=True,
-        query_norm_config=norm_config,
-        key_norm_config=norm_config,
-        logit_softcap=None,
-        sliding_window_size=1024,
-        attn_type=(
-            cfg.AttentionType.GLOBAL
-            if (idx + 1) % 6 == 0
-            else cfg.AttentionType.LOCAL_SLIDING
-        ),
-    )
-    return cfg.TransformerBlockConfig(
-        attn_config=attn_config,
-        ff_config=ff_config,
-        pre_attention_norm_config=norm_config,
-        post_attention_norm_config=norm_config,
-    )
-  num_layers = 34
-  embedding_dim = 2560
-  config = cfg.ModelConfig(
-      vocab_size=262_144,
-      num_layers=num_layers,
-      max_seq_len=32_768,
-      embedding_dim=embedding_dim,
-      embedding_scale=embedding_dim**0.5,
-      kv_cache_max_len=kv_cache_max_len,
-      block_configs=[get_block_config(i) for i in range(num_layers)],
-      final_norm_config=norm_config,
-      lm_head_use_bias=False,
-      enable_hlfb=True,
-      final_logit_softcap=None,
-  )
-  return config
-def get_decoder_config_1b(kv_cache_max_len: int = 2048) -> cfg.ModelConfig:
-  """Returns the model config for a Gemma3 1B model.
-  Args:
-    kv_cache_max_len (int): The maximum sequence length of the KV cache. Default
-      is 2048.
-  Returns:
-    The model config for a Gemma 1B model.
-  """
-  norm_config = cfg.NormalizationConfig(
-      type=cfg.NormalizationType.RMS_NORM,
-      epsilon=1e-6,
-      zero_centered=True,
-  )
-  ff_config = cfg.FeedForwardConfig(
-      type=cfg.FeedForwardType.GATED,
-      activation=cfg.ActivationConfig(cfg.ActivationType.GELU_TANH),
-      intermediate_size=6*1152,
-      pre_ff_norm_config=norm_config,
-      post_ff_norm_config=norm_config,
-  )
-  def get_block_config(idx: int) -> cfg.TransformerBlockConfig:
-    attn_config = cfg.AttentionConfig(
-        num_heads=4,
-        head_dim=256,
-        num_query_groups=1,
-        rotary_base=1_000_000 if (idx + 1) % 6 == 0 else 10_000,
-        rotary_percentage=1.0,
-        qkv_transpose_before_split=True,
-        query_norm_config=norm_config,
-        key_norm_config=norm_config,
-        logit_softcap=None,
-        sliding_window_size=512,
-        attn_type=(
-            cfg.AttentionType.GLOBAL
-            if (idx + 1) % 6 == 0
-            else cfg.AttentionType.LOCAL_SLIDING
-        ),
-    )
-    return cfg.TransformerBlockConfig(
-        attn_config=attn_config,
-        ff_config=ff_config,
-        pre_attention_norm_config=norm_config,
-        post_attention_norm_config=norm_config,
-    )
-  num_layers = 26
-  embedding_dim = 1152
-  config = cfg.ModelConfig(
-      vocab_size=262_144,
-      num_layers=num_layers,
-      max_seq_len=32_768,
-      embedding_dim=embedding_dim,
-      embedding_scale=embedding_dim**0.5,
-      kv_cache_max_len=kv_cache_max_len,
-      block_configs=[get_block_config(i) for i in range(num_layers)],
-      final_norm_config=norm_config,
-      lm_head_use_bias=False,
-      enable_hlfb=True,
-      final_logit_softcap=None,
-  )
-  return config
-def get_fake_decoder_config_4b(kv_cache_max_len: int = 128) -> cfg.ModelConfig:
-  """Returns a fake model config for a Gemma3 4B model.
-  Args:
-    kv_cache_max_len (int): The maximum sequence length of the KV cache. Default
-      is 128.
-  Returns:
-    A fake model config for a Gemma 4B model.
-  """
-  config = get_decoder_config_4b(kv_cache_max_len)
-  config.vocab_size = 128
-  config.num_layers = 2
-  config.max_seq_len = 2 * kv_cache_max_len
-  config.embedding_dim = 128
-  config.embedding_scale = config.embedding_dim**0.5
-  config.block_configs = config.block_configs[: config.num_layers]
-  for block_config in config.block_configs:
-    block_config.attn_config.num_heads = 4
-    block_config.attn_config.head_dim = 64
-    block_config.attn_config.sliding_window_size = 64
-    block_config.ff_config.intermediate_size = 128
-  return config
-def get_fake_decoder_config_1b(kv_cache_max_len: int = 128) -> cfg.ModelConfig:
-  """Returns a fake model config for a Gemma3 1B model.
-  Args:
-    kv_cache_max_len (int): The maximum sequence length of the KV cache. Default
-      is 128.
-  Returns:
-    A fake model config for a Gemma 1B model.
-  """
-  config = get_decoder_config_1b(kv_cache_max_len)
-  config.vocab_size = 128
-  config.num_layers = 2
-  config.max_seq_len = 2 * kv_cache_max_len
-  config.embedding_dim = 128
-  config.embedding_scale = config.embedding_dim**0.5
-  config.block_configs = config.block_configs[: config.num_layers]
-  for block_config in config.block_configs:
-    block_config.attn_config.num_heads = 4
-    block_config.attn_config.head_dim = 64
-    block_config.attn_config.sliding_window_size = 64
-    block_config.ff_config.intermediate_size = 128
-  return config
-def build_model_4b(checkpoint_path: str, **kwargs) -> nn.Module:
-  return model_builder.build_decoder_only_model(
-      checkpoint_path=checkpoint_path,
-      config=get_decoder_config_4b(**kwargs),
-      tensor_names=TENSOR_NAMES,
-      model_class=Decoder,
-  )
-def build_model_1b(checkpoint_path: str, **kwargs) -> nn.Module:
-  return model_builder.build_decoder_only_model(
-      checkpoint_path=checkpoint_path,
-      config=get_decoder_config_1b(**kwargs),
-      tensor_names=TENSOR_NAMES,
-      model_class=Decoder,
-  )

ai_edge_torch/generative/examples/gemma3/cpu_only/gemma3.py DELETED Viewed

@@ -1,212 +0,0 @@
-# Copyright 2024 The AI Edge Torch Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Example of building a Gemma3 model."""
-from dataclasses import dataclass
-from typing import List, Optional, Tuple
-import xmlrpc
-from ai_edge_torch.generative.examples.gemma3 import decoder
-from ai_edge_torch.generative.examples.gemma3 import image_encoder
-from ai_edge_torch.generative.layers import builder
-from ai_edge_torch.generative.layers import kv_cache as kv_utils
-import ai_edge_torch.generative.layers.model_config as cfg
-from ai_edge_torch.generative.utilities import model_builder
-import ai_edge_torch.generative.utilities.loader as loading_utils
-import torch
-from torch import nn
-PROJECTION_TENSOR_NAME = "multi_modal_projector.linear"
-@dataclass
-class Gemma3MMConfig:
-  """Gemma3 model configurations."""
-  image_encoder_config: cfg.ModelConfig
-  decoder_config: cfg.ModelConfig
-  mm_norm_config: cfg.NormalizationConfig
-  mm_extra_tokens: int
-  image_token_id: int
-  image_projection_scale: float
-  image_projection_use_bias: bool = False
-class Gemma3MM(nn.Module):
-  """A Gemma3 multimodal model built from the Edge Generative API layers."""
-  def __init__(self, config: Gemma3MMConfig):
-    super().__init__()
-    self.image_encoder = image_encoder.SiglipVisionEncoderWithExit(
-        config.image_encoder_config)
-    self.decoder = decoder.Decoder(config.decoder_config)
-    self.mm_norm = builder.build_norm(
-        config.image_encoder_config.embedding_dim,
-        config.mm_norm_config,
-    )
-    self.extra_embedding = nn.Embedding(
-        config.mm_extra_tokens, config.image_encoder_config.embedding_dim)
-    self.image_projection = nn.Linear(
-        config.image_encoder_config.embedding_dim,
-        config.decoder_config.embedding_dim,
-        bias=config.image_projection_use_bias,
-    )
-    image_embedding_config = config.image_encoder_config.image_embedding
-    self.num_patches = (
-        image_embedding_config.image_size // image_embedding_config.patch_size
-    ) ** 2
-    self.config = config
-  @torch.inference_mode
-  def forward(
-      self,
-      tokens: torch.Tensor,
-      input_pos: torch.Tensor,
-      kv_cache: kv_utils.KVCache,
-      image_indices: Optional[torch.Tensor] = None,
-      image_feat_indices: Optional[torch.Tensor] = None,
-      pixel_values: torch.Tensor = None,
-      export_config: Optional[model_builder.ExportConfig] = None,
-  ) -> dict[torch.Tensor, kv_utils.KVCache]:
-    _, seq_len = tokens.size()
-    assert self.config.decoder_config.max_seq_len >= seq_len, (
-        f"Cannot forward sequence of length {seq_len}, max seq length is only"
-        f" {self.config.decoder_config.max_seq_len}"
-    )
-    if pixel_values is None:
-      return self.decoder(tokens=tokens,
-                          input_pos=input_pos,
-                          kv_cache=kv_cache,
-                          input_embeds=None,
-                          export_config=export_config,
-                          )
-    vocab_size = self.config.decoder_config.vocab_size
-    input_embeds = self.decoder.tok_embedding(torch.clip(tokens, 0,
-                                                         vocab_size - 1))
-    if self.decoder.config.embedding_scale is not None:
-      input_embeds = input_embeds * self.decoder.config.embedding_scale
-    # TODO: Identify embedding path for hard tokens if required.
-    # extra_embeds = self.extra_embedding(
-    #     torch.clip(tokens - vocab_size, 0, self.config.mm_extra_tokens - 1)
-    # )
-    # extra_embeds = self.image_projection(extra_embeds)
-    # input_embeds = torch.where(tokens < self.config.decoder_config.vocab_size,
-    #                            input_embeds, extra_embeds)
-    # alternate method of implementation
-    # rows, cols = torch.where(tokens >= self.config.vocab_size)
-    # ext_embeds = self.ext_embedding(
-    #     tokens[rows, cols] - self.config.vocab_size
-    # )
-    # ext_embeds = self.mm_projection(extra_embeds)
-    # input_embeds[rows, cols, :] = extra_embeds
-    # Shape of pixel_values: (b, n, c, h, w)
-    batch_size, num_media, c, h, w = pixel_values.size()
-    pixel_values = pixel_values.view(-1, c, h, w)
-    image_encoded = self.image_encoder(pixel_values=pixel_values)
-    image_encoded = self.mm_norm(image_encoded)
-    image_encoded = self.image_projection(image_encoded)
-    _, num_patches, num_channels = image_encoded.size()
-    image_encoded = image_encoded.view(
-        batch_size, num_media, num_patches, num_channels
-    )
-    # Interleave the image soft embeddings with the text embeddings
-    for b in range(tokens.shape[0]):
-      unbatched_image_encoded = image_encoded[b]
-      image_features = unbatched_image_encoded[
-          image_indices[b], image_feat_indices[b]
-      ]
-      index_to_copy = torch.where(image_indices[b] >= 0)[0]
-      input_embeds[b] = torch.index_copy(input_embeds[b], 0, index_to_copy,
-                                         image_features[index_to_copy])
-    return self.decoder(
-        tokens=None,
-        input_pos=input_pos,
-        kv_cache=kv_cache,
-        input_embeds=input_embeds,
-        image_indices=image_indices,
-        export_config=export_config,
-    )
-def get_model_config_4b(**kwargs) -> Gemma3MMConfig:
-  return Gemma3MMConfig(
-      image_encoder_config=image_encoder.get_image_encoder_config(),
-      decoder_config=decoder.get_decoder_config_4b(),
-      image_token_id=257152,  # TODO: confirm
-      image_projection_scale=2048**0.5,
-      image_projection_use_bias=False,
-      mm_norm_config=cfg.NormalizationConfig(
-          type=cfg.NormalizationType.LAYER_NORM,
-          epsilon=1e-6,
-          enable_hlfb=True,
-      ),
-      mm_extra_tokens=128,
-  )
-def get_fake_model_config(**kwargs) -> Gemma3MMConfig:
-  return Gemma3MMConfig(
-      image_encoder_config=image_encoder.get_fake_image_encoder_config(),
-      decoder_config=decoder.get_fake_decoder_config_4b(**kwargs),
-      image_token_id=127,
-      image_projection_scale=128**0.5,
-      image_projection_use_bias=False,
-      mm_norm_config=cfg.NormalizationConfig(
-          type=cfg.NormalizationType.LAYER_NORM,
-          epsilon=1e-6,
-          enable_hlfb=True,
-      ),
-      mm_extra_tokens=32,
-  )
-def build_model_4b(checkpoint_path: str, **kwargs) -> Gemma3MM:
-  decoder_tensor_names = decoder.TENSOR_NAMES
-  config = get_model_config_4b(**kwargs)
-  model = Gemma3MM(config)
-  # # TODO: Load the parameters of image encoder from checkpoint mapping Tensor names properly.
-  # loader = loading_utils.ModelLoader(
-  #     checkpoint_path, image_encoder.TENSOR_NAMES
-  # )
-  # loader.load(model.image_encoder, strict=False)
-  # # Load the parameters of decoder.
-  loader = loading_utils.ModelLoader(checkpoint_path, decoder_tensor_names)
-  loader.load(model.decoder, strict=False)
-  # # Load the parameters of image projection.
-  # loader = loading_utils.ModelLoader(checkpoint_path, None)
-  # state = loader.get_state()
-  # converted_state = dict()
-  # converted_state["weight"] = state.pop(f"{PROJECTION_TENSOR_NAME}.weight")
-  # if config.image_projection_use_bias:
-  #   converted_state["bias"] = state.pop(f"{PROJECTION_TENSOR_NAME}.bias")
-  # model.image_projection.load_state_dict(converted_state)
-  model.eval()
-  return model
-def build_model_1b(checkpoint_path: str, **kwargs) -> decoder.Decoder:
-  if checkpoint_path:
-    model = decoder.build_model_1b(checkpoint_path, **kwargs)
-  else:
-    config = decoder.get_decoder_config_1b(**kwargs)
-    model = decoder.Decoder(config)
-  model.eval()
-  return model

/ai_edge_torch/generative/examples/gemma3/{cpu_only/image_encoder.py → image_encoder.py} RENAMED Viewed

File without changes

{ai_edge_torch_nightly-0.4.0.dev20250314.dist-info → ai_edge_torch_nightly-0.4.0.dev20250316.dist-info}/LICENSE RENAMED Viewed

File without changes

{ai_edge_torch_nightly-0.4.0.dev20250314.dist-info → ai_edge_torch_nightly-0.4.0.dev20250316.dist-info}/WHEEL RENAMED Viewed

File without changes

{ai_edge_torch_nightly-0.4.0.dev20250314.dist-info → ai_edge_torch_nightly-0.4.0.dev20250316.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai-edge-torch-nightly 0.4.0.dev20250314__py3-none-any.whl → 0.4.0.dev20250316__py3-none-any.whl

ai-edge-torch-nightly 0.4.0.dev20250314py3-none-any.whl → 0.4.0.dev20250316py3-none-any.whl