PyPI - ai-edge-torch-nightly - Versions diffs - 0.5.0.dev20250515__py3-none-any.whl → 0.5.0.dev20250517__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.5.0.dev20250515py3-none-any.whl → 0.5.0.dev20250517py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

ai_edge_torch/__init__.py CHANGED Viewed

@@ -14,6 +14,7 @@
 # ==============================================================================
 from ai_edge_torch._config import config
 from ai_edge_torch._convert.converter import convert
+from ai_edge_torch._convert.converter import experimental_add_compilation_backend
 from ai_edge_torch._convert.converter import signature
 from ai_edge_torch._convert.to_channel_last_io import to_channel_last_io
 from ai_edge_torch.model import Model

ai_edge_torch/_convert/conversion.py CHANGED Viewed

@@ -26,6 +26,9 @@ from ai_edge_torch.generative import fx_passes as generative_fx_passes
 from ai_edge_torch.quantize import quant_config as qcfg
 import torch
+from ai_edge_litert.aot import aot_compile as aot_compile_lib
+from ai_edge_litert.aot.core import types as litert_types
 def _run_convert_passes(
     exported_program: torch.export.ExportedProgram,
@@ -35,6 +38,7 @@ def _run_convert_passes(
   )
   passes = [
+      fx_passes.EliminateDeadCodePass(),
       fx_passes.OptimizeLayoutTransposesPass(),
       fx_passes.CanonicalizePass(),
       fx_passes.BuildAtenCompositePass(),
@@ -153,3 +157,23 @@ def convert_signatures(
   )
   return model.TfLiteModel(tflite_model)
+def aot_compile(
+    compilation_configs: list[litert_types.CompilationConfig],
+    cpu_model: model.TfLiteModel,
+) -> litert_types.CompilationResult:
+  """Compiles the given CPU model.
+  Args:
+    compilation_configs: The list of compilation configs to use.
+    cpu_model: The CPU model to compile.
+  Returns:
+    The compilation result.
+  """
+  litert_model = litert_types.Model.create_from_bytes(cpu_model.tflite_model())
+  return aot_compile_lib.aot_compile(
+      litert_model,
+      config=compilation_configs,
+  )

ai_edge_torch/_convert/converter.py CHANGED Viewed

@@ -23,6 +23,9 @@ from ai_edge_torch._convert import signature as signature_module
 from ai_edge_torch.quantize import quant_config as qcfg
 import torch
+from ai_edge_litert.aot.core import types as litert_types
+from ai_edge_litert.aot.vendors import import_vendor as vendor_lib
 class Converter:
   """A converter for converting PyTorch models to edge models.
@@ -32,6 +35,7 @@ class Converter:
   def __init__(self):
     self._signatures: list[signature_module.Signature] = []
+    self._compilation_configs: list[litert_types.CompilationConfig] = []
   def signature(
       self,
@@ -96,6 +100,31 @@ class Converter:
     )
     return self
+  def experimental_add_compilation_backend(
+      self,
+      target: litert_types.Target | None = None,
+      **kwargs: litert_types.Config,
+  ) -> Converter:
+    """Adds an AOT compilation target to the converter.
+    NOTE: This API is experimental and subject to change.
+    Args:
+      target: The target to compile for. If not specified, will compile to all
+        registered AOT targets in ai_edge_litert. See ai_edge_litert.aot.vendors
+        for more details. Adding a same target multiple times will be a no-op.
+      **kwargs: Additional arguments to pass to the backend compiler.
+    Returns:
+      The converter object itself.
+    """
+    if target is None:
+      target = vendor_lib.AllRegisteredTarget()
+    if isinstance(target, litert_types.Target):
+      target = litert_types.CompilationConfig(target=target, **kwargs)
+    self._compilation_configs.append(target)
+    return self
   def convert(
       self,
       module: torch.nn.Module = None,
@@ -107,7 +136,7 @@ class Converter:
       dynamic_shapes: Optional[Union[dict[str, Any], Tuple[Any, ...]]] = None,
       _ai_edge_converter_flags: Optional[dict[str, Any]] = None,
       _saved_model_dir: Optional[str] = None,
-  ) -> model.TfLiteModel:
+  ) -> model.TfLiteModel | litert_types.CompilationResult:
     """Finalizes the conversion and produces an edge model.
     This could be called with no arguments as follows:
@@ -144,7 +173,9 @@ class Converter:
         specified, a random temporary directory would be used.
     Returns:
-      The converted edge model.
+      The converted edge model. If compilation configs are provided, returns the
+      compilation result that contains the compiled edge models for different
+      targets.
     Raises:
       ValueError: If the arguments are not provided as expected. See the example
@@ -169,13 +200,16 @@ class Converter:
             "sample_args or sample_kwargs must be provided if a module is"
             " specified."
         )
-    return conversion.convert_signatures(
+    converted_model = conversion.convert_signatures(
         self._signatures,
         strict_export=strict_export,
         quant_config=quant_config,
         _tfl_converter_flags=_ai_edge_converter_flags,
         _saved_model_dir=_saved_model_dir,
     )
+    if self._compilation_configs:
+      return conversion.aot_compile(self._compilation_configs, converted_model)
+    return converted_model
 def signature(
@@ -211,6 +245,26 @@ def signature(
   )
+def experimental_add_compilation_backend(
+    target: litert_types.Target | None = None,
+    **kwargs: litert_types.Config,
+) -> Converter:
+  """Adds an AOT compilation target to the converter.
+  NOTE: This API is experimental and subject to change.
+  Args:
+    target: The target to compile for. If not specified, will compile to all
+      registered AOT targets in ai_edge_litert. See ai_edge_litert.aot.vendors
+      for more details. Adding a same target multiple times will be a no-op.
+    **kwargs: Additional arguments to pass to the backend compiler.
+  Returns:
+    The converter object itself.
+  """
+  return Converter().experimental_add_compilation_backend(target, **kwargs)
 def convert(
     module: torch.nn.Module = None,
     sample_args=None,

ai_edge_torch/_convert/fx_passes/__init__.py CHANGED Viewed

@@ -17,6 +17,7 @@ from typing import Sequence, Union
 from ai_edge_torch._convert.fx_passes.build_aten_composite_pass import BuildAtenCompositePass
 from ai_edge_torch._convert.fx_passes.cast_inputs_bf16_to_f32_pass import CastInputsBf16ToF32Pass
+from ai_edge_torch._convert.fx_passes.eliminate_dead_code_pass import EliminateDeadCodePass
 from ai_edge_torch._convert.fx_passes.inject_mlir_debuginfo_pass import InjectMlirDebuginfoPass
 from ai_edge_torch._convert.fx_passes.optimize_layout_transposes_pass import OptimizeLayoutTransposesPass
 from ai_edge_torch._convert.fx_passes.remove_non_user_outputs_pass import RemoveNonUserOutputsPass

ai_edge_torch/_convert/fx_passes/eliminate_dead_code_pass.py ADDED Viewed

@@ -0,0 +1,40 @@
+# Copyright 2025 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Pass to eliminate dead code for ai-edge-torch conversion."""
+from ai_edge_torch import fx_infra
+import torch
+class EliminateDeadCodePass(fx_infra.PassBase):
+  """Eliminates dead code with dedicated rules for ai-edge-torch conversion."""
+  def call(self, graph_module: torch.fx.GraphModule):
+    def is_impure_node(node: torch.fx.Node):
+      # Starting from torch 2.7.0, random torch ops with
+      # _nondeterministic_seeded set are no longer considered pure. However,
+      # for conversion, unused random ops/tensors should still be removed.
+      if getattr(node.target, "_nondeterministic_seeded", False):
+        return False
+      return node.is_impure()
+    try:
+      graph_module.graph.eliminate_dead_code(is_impure_node)
+    except TypeError:
+      # eliminate_dead_code has no is_impure_node input in old torch versions.
+      pass
+    return fx_infra.PassResult(graph_module, True)

ai_edge_torch/_convert/test/test_convert.py CHANGED Viewed

@@ -29,6 +29,8 @@ from torch.ao.quantization import quantize_pt2e
 import torchvision
 from absl.testing import absltest as googletest
+from ai_edge_litert.aot.core import types as litert_types
+from ai_edge_litert.aot.vendors import fallback_backend
 from ai_edge_litert import interpreter as tfl_interpreter  # pylint: disable=g-direct-tensorflow-import
@@ -574,6 +576,29 @@ class TestConvert(googletest.TestCase):
       self.fail(f"Conversion failed with bloat16 inputs: {err}")
     # pylint: enable=broad-except
+  def test_compile_model(self):
+    """Tests AOT compilation of a simple Add module."""
+    class Add(nn.Module):
+      def forward(self, a, b):
+        return a + b
+    args = (
+        torch.randn((5, 10)),
+        torch.randn((5, 10)),
+    )
+    torch_module = Add().eval()
+    compilation_result = ai_edge_torch.experimental_add_compilation_backend(
+        fallback_backend.FallbackTarget()
+    ).convert(torch_module, args)
+    assert isinstance(compilation_result, litert_types.CompilationResult)
+    self.assertLen(compilation_result.models_with_backend, 1)
+    self.assertEqual(
+        compilation_result.models_with_backend[0][0].id(),
+        fallback_backend.FallbackBackend.id(),
+    )
 if __name__ == "__main__":
   googletest.main()

ai_edge_torch/generative/examples/amd_llama_135m/amd_llama_135m.py CHANGED Viewed

@@ -15,8 +15,10 @@
 """Example of building AMD-Llama-135m."""
+from typing import Callable, Dict
 import ai_edge_torch.generative.layers.model_config as cfg
 from ai_edge_torch.generative.utilities import model_builder
+import torch
 from torch import nn
 TENSOR_NAMES = model_builder.TENSOR_NAMES_WITH_SEPARATE_LM_HEAD
@@ -49,9 +51,7 @@ def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       activation=cfg.ActivationConfig(cfg.ActivationType.SILU),
       intermediate_size=2048,
   )
-  norm_config = cfg.NormalizationConfig(
-      type=cfg.NormalizationType.RMS_NORM, enable_hlfb=True
-  )
+  norm_config = cfg.NormalizationConfig(type=cfg.NormalizationType.RMS_NORM)
   block_config = cfg.TransformerBlockConfig(
       attn_config=attn_config,
       ff_config=ff_config,
@@ -67,7 +67,6 @@ def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       block_configs=block_config,
       final_norm_config=norm_config,
       lm_head_share_weight_with_embedding=False,
-      enable_hlfb=True,
   )
   return config
@@ -80,10 +79,15 @@ def get_fake_model_config(**kwargs) -> cfg.ModelConfig:
   return config
-def build_model(checkpoint_path: str, **kwargs) -> nn.Module:
+def build_model(
+    checkpoint_path: str,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] | None = None,
+    **kwargs
+) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
       config=get_model_config(**kwargs),
       tensor_names=TENSOR_NAMES,
-      model_class=AmdLlama
+      model_class=AmdLlama,
+      custom_loader=custom_loader,
   )

ai_edge_torch/generative/examples/amd_llama_135m/convert_to_tflite.py CHANGED Viewed

@@ -19,13 +19,19 @@ from absl import app
 from ai_edge_torch.generative.examples.amd_llama_135m import amd_llama_135m
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config
+from ai_edge_torch.generative.utilities import loader
 flags = converter.define_conversion_flags("amd-llama-135m")
 def main(_):
+  checkpoint_path = flags.FLAGS.checkpoint_path
   pytorch_model = amd_llama_135m.build_model(
-      flags.FLAGS.checkpoint_path, kv_cache_max_len=flags.FLAGS.kv_cache_max_len
+      checkpoint_path,
+      custom_loader=loader.maybe_get_custom_loader(
+          checkpoint_path, flags.FLAGS.custom_checkpoint_loader
+      ),
+      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
   )
   converter.convert_to_tflite(
       pytorch_model,

ai_edge_torch/generative/examples/deepseek/convert_to_tflite.py CHANGED Viewed

@@ -17,15 +17,20 @@
 from absl import app
 from ai_edge_torch.generative.examples.deepseek import deepseek
-from ai_edge_torch.generative.layers import kv_cache
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config
+from ai_edge_torch.generative.utilities import loader
 flags = converter.define_conversion_flags('deepseek')
 def main(_):
+  checkpoint_path = flags.FLAGS.checkpoint_path
   pytorch_model = deepseek.build_model(
-      flags.FLAGS.checkpoint_path, kv_cache_max_len=flags.FLAGS.kv_cache_max_len
+      checkpoint_path,
+      custom_loader=loader.maybe_get_custom_loader(
+          checkpoint_path, flags.FLAGS.custom_checkpoint_loader
+      ),
+      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
   )
   converter.convert_to_tflite(
       pytorch_model,

ai_edge_torch/generative/examples/deepseek/deepseek.py CHANGED Viewed

@@ -15,8 +15,10 @@
 """Example of building DeepSeek R1 distilled models."""
+from typing import Callable, Dict
 import ai_edge_torch.generative.layers.model_config as cfg
 from ai_edge_torch.generative.utilities import model_builder
+import torch
 from torch import nn
 TENSOR_NAMES = model_builder.TENSOR_NAMES_WITH_SEPARATE_LM_HEAD
@@ -51,9 +53,7 @@ def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       intermediate_size=8960,
   )
   norm_config = cfg.NormalizationConfig(
-      type=cfg.NormalizationType.RMS_NORM,
-      epsilon=1e-06,
-      enable_hlfb=True,
+      type=cfg.NormalizationType.RMS_NORM, epsilon=1e-06
   )
   block_config = cfg.TransformerBlockConfig(
       attn_config=attn_config,
@@ -70,7 +70,6 @@ def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       block_configs=block_config,
       final_norm_config=norm_config,
       lm_head_share_weight_with_embedding=False,
-      enable_hlfb=True,
   )
   return config
@@ -84,10 +83,15 @@ def get_fake_model_config(**kwargs) -> cfg.ModelConfig:
   return config
-def build_model(checkpoint_path: str, **kwargs) -> nn.Module:
+def build_model(
+    checkpoint_path: str,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
+    **kwargs
+) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
       config=get_model_config(**kwargs),
       tensor_names=TENSOR_NAMES,
       model_class=DeepSeekDistillQwen,
+      custom_loader=custom_loader,
   )

ai_edge_torch/generative/examples/gemma/convert_gemma1_to_tflite.py CHANGED Viewed

@@ -19,13 +19,19 @@ from absl import app
 from ai_edge_torch.generative.examples.gemma import gemma1
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config
+from ai_edge_torch.generative.utilities import loader
 flags = converter.define_conversion_flags("gemma-2b")
 def main(_):
+  checkpoint_path = flags.FLAGS.checkpoint_path
   pytorch_model = gemma1.build_2b_model(
-      flags.FLAGS.checkpoint_path, kv_cache_max_len=flags.FLAGS.kv_cache_max_len
+      checkpoint_path,
+      custom_loader=loader.maybe_get_custom_loader(
+          checkpoint_path, flags.FLAGS.custom_checkpoint_loader
+      ),
+      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
   )
   converter.convert_to_tflite(
       pytorch_model,

ai_edge_torch/generative/examples/gemma/convert_gemma2_to_tflite.py CHANGED Viewed

@@ -19,6 +19,7 @@ from absl import app
 from ai_edge_torch.generative.examples.gemma import gemma2
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config
+from ai_edge_torch.generative.utilities import loader
 flags = converter.define_conversion_flags(
     "gemma2-2b", default_mask_as_input=True, default_transpose_kv_cache=True
@@ -26,8 +27,13 @@ flags = converter.define_conversion_flags(
 def main(_):
+  checkpoint_path = flags.FLAGS.checkpoint_path
   pytorch_model = gemma2.build_2b_model(
-      flags.FLAGS.checkpoint_path, kv_cache_max_len=flags.FLAGS.kv_cache_max_len
+      checkpoint_path,
+      custom_loader=loader.maybe_get_custom_loader(
+          checkpoint_path, flags.FLAGS.custom_checkpoint_loader
+      ),
+      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
   )
   converter.convert_to_tflite(
       pytorch_model,

ai_edge_torch/generative/examples/gemma/gemma1.py CHANGED Viewed

@@ -15,9 +15,12 @@
 """Example of building a Gemma1 model."""
+from typing import Callable, Dict
 import ai_edge_torch.generative.layers.model_config as cfg
 from ai_edge_torch.generative.utilities import model_builder
 import ai_edge_torch.generative.utilities.loader as loading_utils
+import torch
 from torch import nn
 TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
@@ -62,10 +65,7 @@ def get_model_config_2b(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       intermediate_size=16384,
   )
   norm_config = cfg.NormalizationConfig(
-      type=cfg.NormalizationType.RMS_NORM,
-      epsilon=1e-6,
-      zero_centered=True,
-      enable_hlfb=True,
+      type=cfg.NormalizationType.RMS_NORM, epsilon=1e-6, zero_centered=True
   )
   block_config = cfg.TransformerBlockConfig(
       attn_config=attn_config,
@@ -84,7 +84,6 @@ def get_model_config_2b(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       block_configs=block_config,
       final_norm_config=norm_config,
       lm_head_use_bias=False,
-      enable_hlfb=True,
   )
   return config
@@ -99,10 +98,15 @@ def get_fake_model_config(kv_cache_max_len: int = 128) -> cfg.ModelConfig:
   return config
-def build_2b_model(checkpoint_path: str, **kwargs) -> nn.Module:
+def build_2b_model(
+    checkpoint_path: str,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
+    **kwargs
+) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
       config=get_model_config_2b(**kwargs),
       tensor_names=TENSOR_NAMES,
       model_class=Gemma1,
+      custom_loader=custom_loader,
   )

ai_edge_torch/generative/examples/gemma/gemma2.py CHANGED Viewed

@@ -15,7 +15,7 @@
 """Example of building a Gemma2 model."""
-from typing import List, Optional, Tuple
+from typing import Callable, Dict, List, Optional, Tuple
 from ai_edge_torch.generative.layers import attention
 from ai_edge_torch.generative.layers import builder
@@ -233,10 +233,7 @@ def get_model_config_2b(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
     The model config for a Gemma 2B model.
   """
   norm_config = cfg.NormalizationConfig(
-      type=cfg.NormalizationType.RMS_NORM,
-      epsilon=1e-6,
-      zero_centered=True,
-      enable_hlfb=True,
+      type=cfg.NormalizationType.RMS_NORM, epsilon=1e-6, zero_centered=True
   )
   ff_config = cfg.FeedForwardConfig(
       type=cfg.FeedForwardType.GATED,
@@ -284,7 +281,6 @@ def get_model_config_2b(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       block_configs=[get_block_config(i) for i in range(num_layers)],
       final_norm_config=norm_config,
       lm_head_use_bias=False,
-      enable_hlfb=True,
       final_logit_softcap=30.0,
   )
   return config
@@ -306,7 +302,11 @@ def get_fake_model_config(kv_cache_max_len: int = 128) -> cfg.ModelConfig:
   return config
-def build_2b_model(checkpoint_path: str, **kwargs) -> nn.Module:
+def build_2b_model(
+    checkpoint_path: str,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
+    **kwargs,
+) -> nn.Module:
   for tensor_names in TENSOR_NAMES_DICT.values():
     try:
       return model_builder.build_decoder_only_model(
@@ -314,6 +314,7 @@ def build_2b_model(checkpoint_path: str, **kwargs) -> nn.Module:
           config=get_model_config_2b(**kwargs),
           tensor_names=tensor_names,
           model_class=Gemma2,
+          custom_loader=custom_loader,
       )
     except KeyError as _:
       continue

ai_edge_torch/generative/examples/gemma3/convert_gemma3_to_tflite.py CHANGED Viewed

@@ -25,13 +25,6 @@ flags = converter.define_conversion_flags(
     'gemma3-1b', default_mask_as_input=True, default_transpose_kv_cache=True
 )
-_CUSTOM_CHECKPOINT_LOADER = flags.DEFINE_bool(
-    'custom_checkpoint_loader',
-    False,
-    'If true, the conversion script will use a custom checkpoint loader which'
-    ' will read a checkpoint from a remote source.',
-)
 _MODEL_SIZE = flags.DEFINE_string(
     'model_size',
     '1b',
@@ -40,16 +33,14 @@ _MODEL_SIZE = flags.DEFINE_string(
 def main(_):
-  custom_loader = None
-  if flags.FLAGS.custom_checkpoint_loader:
-    # If loading from a remote source, try to get a custom loader first.
-    custom_loader = loader.get_custom_loader(flags.FLAGS.checkpoint_path)
+  checkpoint_path = flags.FLAGS.checkpoint_path
   if _MODEL_SIZE.value == '1b':
     pytorch_model = gemma3.build_model_1b(
-        flags.FLAGS.checkpoint_path,
+        checkpoint_path,
+        custom_loader=loader.maybe_get_custom_loader(
+            checkpoint_path, flags.FLAGS.custom_checkpoint_loader
+        ),
         kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
-        custom_loader=custom_loader,
     )
   else:
     raise ValueError(f'Unsupported model size: {_MODEL_SIZE.value}')

ai_edge_torch/generative/examples/gemma3/decoder.py CHANGED Viewed

@@ -149,8 +149,12 @@ class Decoder(nn.Module):
           cache_len=attention_mask.shape[-1],
           sliding_window_size=sliding_window_size,
       )
-      # Combine masks using logical AND (min in this case).
-      combined_mask = torch.min(attention_mask, sliding_mask)
+      # Expand sliding_mask to match attention_mask's dimensions
+      # (e.g., [B, 1, seq_len, cache_len]).
+      # Assuming the head dimension is dim 1 for attention_mask.
+      expanded_sliding_mask = sliding_mask.unsqueeze(1)
+      # Combine masks using logical AND (min ensures -inf propagates).
+      combined_mask = torch.min(attention_mask, expanded_sliding_mask)
       return combined_mask
     return attention_mask
@@ -161,9 +165,9 @@ class Decoder(nn.Module):
       sliding_window_size: int,
   ) -> torch.Tensor:
     """Creates mask for sliding window attention (PyTorch)."""
-    cache_positions = torch.tensor(
-        [i for i in range(cache_len)], dtype=torch.int32
-    )
+    # Use torch.arange to create a tensor with a range of integers in a
+    # Dynamo-friendly way.
+    cache_positions = torch.arange(cache_len, dtype=torch.int32)
     cache_positions = cache_positions.view(1, 1, -1)  # [1, 1, cache_len]
     segment_pos_expanded = segment_pos.clone().unsqueeze(-1)  # [B, seq_len, 1]
@@ -329,10 +333,7 @@ def get_decoder_config_1b(kv_cache_max_len: int = 2048) -> cfg.ModelConfig:
     The model config for a Gemma 1B model.
   """
   norm_config = cfg.NormalizationConfig(
-      type=cfg.NormalizationType.RMS_NORM,
-      epsilon=1e-6,
-      zero_centered=True,
-      enable_hlfb=True,
+      type=cfg.NormalizationType.RMS_NORM, epsilon=1e-6, zero_centered=True,
   )
   ff_config = cfg.FeedForwardConfig(
       type=cfg.FeedForwardType.GATED,
@@ -379,7 +380,6 @@ def get_decoder_config_1b(kv_cache_max_len: int = 2048) -> cfg.ModelConfig:
       block_configs=[get_block_config(i) for i in range(num_layers)],
       final_norm_config=norm_config,
       lm_head_use_bias=False,
-      enable_hlfb=True,
       final_logit_softcap=None,
   )
   return config

ai_edge_torch/generative/examples/gemma3/gemma3.py CHANGED Viewed

@@ -158,9 +158,7 @@ def get_fake_model_config(**kwargs) -> Gemma3MMConfig:
       image_projection_scale=128**0.5,
       image_projection_use_bias=False,
       mm_norm_config=cfg.NormalizationConfig(
-          type=cfg.NormalizationType.LAYER_NORM,
-          epsilon=1e-6,
-          enable_hlfb=True,
+          type=cfg.NormalizationType.LAYER_NORM, epsilon=1e-6
       ),
       mm_extra_tokens=32,
   )

ai_edge_torch/generative/examples/gemma3/image_encoder.py CHANGED Viewed

@@ -98,9 +98,7 @@ def get_image_encoder_config() -> cfg.ModelConfig:
       output_proj_use_bias=True,
   )
   norm_config = cfg.NormalizationConfig(
-      type=cfg.NormalizationType.LAYER_NORM,
-      epsilon=1e-6,
-      enable_hlfb=True,
+      type=cfg.NormalizationType.LAYER_NORM, epsilon=1e-6
   )
   ff_config = cfg.FeedForwardConfig(
       type=cfg.FeedForwardType.SEQUENTIAL,
@@ -123,7 +121,6 @@ def get_image_encoder_config() -> cfg.ModelConfig:
       image_embedding=image_embedding_config,
       block_configs=block_config,
       final_norm_config=norm_config,
-      enable_hlfb=True,
       num_mm_tokens_per_image=256,
   )
   return config

ai_edge_torch/generative/examples/hammer/convert_to_tflite.py CHANGED Viewed

@@ -19,6 +19,7 @@ from absl import app
 from ai_edge_torch.generative.examples.hammer import hammer
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config
+from ai_edge_torch.generative.utilities import loader
 flags = converter.define_conversion_flags('hammer')
@@ -36,8 +37,13 @@ _BUILDER = {
 def main(_):
+  checkpoint_path = flags.FLAGS.checkpoint_path
   pytorch_model = _BUILDER[_MODEL_SIZE.value](
-      flags.FLAGS.checkpoint_path, kv_cache_max_len=flags.FLAGS.kv_cache_max_len
+      checkpoint_path,
+      custom_loader=loader.maybe_get_custom_loader(
+          checkpoint_path, flags.FLAGS.custom_checkpoint_loader
+      ),
+      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
   )
   converter.convert_to_tflite(
       pytorch_model,

ai-edge-torch-nightly 0.5.0.dev20250515__py3-none-any.whl → 0.5.0.dev20250517__py3-none-any.whl

ai-edge-torch-nightly 0.5.0.dev20250515py3-none-any.whl → 0.5.0.dev20250517py3-none-any.whl