PyPI - ai-edge-torch-nightly - Versions diffs - 0.5.0.dev20250515__py3-none-any.whl → 0.5.0.dev20250516__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.5.0.dev20250515py3-none-any.whl → 0.5.0.dev20250516py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

ai_edge_torch/__init__.py CHANGED Viewed

@@ -14,6 +14,7 @@
 # ==============================================================================
 from ai_edge_torch._config import config
 from ai_edge_torch._convert.converter import convert
+from ai_edge_torch._convert.converter import experimental_add_compilation_backend
 from ai_edge_torch._convert.converter import signature
 from ai_edge_torch._convert.to_channel_last_io import to_channel_last_io
 from ai_edge_torch.model import Model

ai_edge_torch/_convert/conversion.py CHANGED Viewed

@@ -26,6 +26,9 @@ from ai_edge_torch.generative import fx_passes as generative_fx_passes
 from ai_edge_torch.quantize import quant_config as qcfg
 import torch
+from ai_edge_litert.aot import aot_compile as aot_compile_lib
+from ai_edge_litert.aot.core import types as litert_types
 def _run_convert_passes(
     exported_program: torch.export.ExportedProgram,
@@ -153,3 +156,23 @@ def convert_signatures(
   )
   return model.TfLiteModel(tflite_model)
+def aot_compile(
+    compilation_configs: list[litert_types.CompilationConfig],
+    cpu_model: model.TfLiteModel,
+) -> litert_types.CompilationResult:
+  """Compiles the given CPU model.
+  Args:
+    compilation_configs: The list of compilation configs to use.
+    cpu_model: The CPU model to compile.
+  Returns:
+    The compilation result.
+  """
+  litert_model = litert_types.Model.create_from_bytes(cpu_model.tflite_model())
+  return aot_compile_lib.aot_compile(
+      litert_model,
+      config=compilation_configs,
+  )

ai_edge_torch/_convert/converter.py CHANGED Viewed

@@ -23,6 +23,9 @@ from ai_edge_torch._convert import signature as signature_module
 from ai_edge_torch.quantize import quant_config as qcfg
 import torch
+from ai_edge_litert.aot.core import types as litert_types
+from ai_edge_litert.aot.vendors import import_vendor as vendor_lib
 class Converter:
   """A converter for converting PyTorch models to edge models.
@@ -32,6 +35,7 @@ class Converter:
   def __init__(self):
     self._signatures: list[signature_module.Signature] = []
+    self._compilation_configs: list[litert_types.CompilationConfig] = []
   def signature(
       self,
@@ -96,6 +100,31 @@ class Converter:
     )
     return self
+  def experimental_add_compilation_backend(
+      self,
+      target: litert_types.Target | None = None,
+      **kwargs: litert_types.Config,
+  ) -> Converter:
+    """Adds an AOT compilation target to the converter.
+    NOTE: This API is experimental and subject to change.
+    Args:
+      target: The target to compile for. If not specified, will compile to all
+        registered AOT targets in ai_edge_litert. See ai_edge_litert.aot.vendors
+        for more details. Adding a same target multiple times will be a no-op.
+      **kwargs: Additional arguments to pass to the backend compiler.
+    Returns:
+      The converter object itself.
+    """
+    if target is None:
+      target = vendor_lib.AllRegisteredTarget()
+    if isinstance(target, litert_types.Target):
+      target = litert_types.CompilationConfig(target=target, **kwargs)
+    self._compilation_configs.append(target)
+    return self
   def convert(
       self,
       module: torch.nn.Module = None,
@@ -107,7 +136,7 @@ class Converter:
       dynamic_shapes: Optional[Union[dict[str, Any], Tuple[Any, ...]]] = None,
       _ai_edge_converter_flags: Optional[dict[str, Any]] = None,
       _saved_model_dir: Optional[str] = None,
-  ) -> model.TfLiteModel:
+  ) -> model.TfLiteModel | litert_types.CompilationResult:
     """Finalizes the conversion and produces an edge model.
     This could be called with no arguments as follows:
@@ -144,7 +173,9 @@ class Converter:
         specified, a random temporary directory would be used.
     Returns:
-      The converted edge model.
+      The converted edge model. If compilation configs are provided, returns the
+      compilation result that contains the compiled edge models for different
+      targets.
     Raises:
       ValueError: If the arguments are not provided as expected. See the example
@@ -169,13 +200,16 @@ class Converter:
             "sample_args or sample_kwargs must be provided if a module is"
             " specified."
         )
-    return conversion.convert_signatures(
+    converted_model = conversion.convert_signatures(
         self._signatures,
         strict_export=strict_export,
         quant_config=quant_config,
         _tfl_converter_flags=_ai_edge_converter_flags,
         _saved_model_dir=_saved_model_dir,
     )
+    if self._compilation_configs:
+      return conversion.aot_compile(self._compilation_configs, converted_model)
+    return converted_model
 def signature(
@@ -211,6 +245,26 @@ def signature(
   )
+def experimental_add_compilation_backend(
+    target: litert_types.Target | None = None,
+    **kwargs: litert_types.Config,
+) -> Converter:
+  """Adds an AOT compilation target to the converter.
+  NOTE: This API is experimental and subject to change.
+  Args:
+    target: The target to compile for. If not specified, will compile to all
+      registered AOT targets in ai_edge_litert. See ai_edge_litert.aot.vendors
+      for more details. Adding a same target multiple times will be a no-op.
+    **kwargs: Additional arguments to pass to the backend compiler.
+  Returns:
+    The converter object itself.
+  """
+  return Converter().experimental_add_compilation_backend(target, **kwargs)
 def convert(
     module: torch.nn.Module = None,
     sample_args=None,

ai_edge_torch/_convert/test/test_convert.py CHANGED Viewed

@@ -29,6 +29,8 @@ from torch.ao.quantization import quantize_pt2e
 import torchvision
 from absl.testing import absltest as googletest
+from ai_edge_litert.aot.core import types as litert_types
+from ai_edge_litert.aot.vendors import fallback_backend
 from ai_edge_litert import interpreter as tfl_interpreter  # pylint: disable=g-direct-tensorflow-import
@@ -574,6 +576,29 @@ class TestConvert(googletest.TestCase):
       self.fail(f"Conversion failed with bloat16 inputs: {err}")
     # pylint: enable=broad-except
+  def test_compile_model(self):
+    """Tests AOT compilation of a simple Add module."""
+    class Add(nn.Module):
+      def forward(self, a, b):
+        return a + b
+    args = (
+        torch.randn((5, 10)),
+        torch.randn((5, 10)),
+    )
+    torch_module = Add().eval()
+    compilation_result = ai_edge_torch.experimental_add_compilation_backend(
+        fallback_backend.FallbackTarget()
+    ).convert(torch_module, args)
+    assert isinstance(compilation_result, litert_types.CompilationResult)
+    self.assertLen(compilation_result.models_with_backend, 1)
+    self.assertEqual(
+        compilation_result.models_with_backend[0][0].id(),
+        fallback_backend.FallbackBackend.id(),
+    )
 if __name__ == "__main__":
   googletest.main()

ai_edge_torch/generative/examples/amd_llama_135m/amd_llama_135m.py CHANGED Viewed

@@ -15,8 +15,10 @@
 """Example of building AMD-Llama-135m."""
+from typing import Callable, Dict
 import ai_edge_torch.generative.layers.model_config as cfg
 from ai_edge_torch.generative.utilities import model_builder
+import torch
 from torch import nn
 TENSOR_NAMES = model_builder.TENSOR_NAMES_WITH_SEPARATE_LM_HEAD
@@ -80,10 +82,15 @@ def get_fake_model_config(**kwargs) -> cfg.ModelConfig:
   return config
-def build_model(checkpoint_path: str, **kwargs) -> nn.Module:
+def build_model(
+    checkpoint_path: str,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] | None = None,
+    **kwargs
+) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
       config=get_model_config(**kwargs),
       tensor_names=TENSOR_NAMES,
-      model_class=AmdLlama
+      model_class=AmdLlama,
+      custom_loader=custom_loader,
   )

ai_edge_torch/generative/examples/amd_llama_135m/convert_to_tflite.py CHANGED Viewed

@@ -19,13 +19,19 @@ from absl import app
 from ai_edge_torch.generative.examples.amd_llama_135m import amd_llama_135m
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config
+from ai_edge_torch.generative.utilities import loader
 flags = converter.define_conversion_flags("amd-llama-135m")
 def main(_):
+  checkpoint_path = flags.FLAGS.checkpoint_path
   pytorch_model = amd_llama_135m.build_model(
-      flags.FLAGS.checkpoint_path, kv_cache_max_len=flags.FLAGS.kv_cache_max_len
+      checkpoint_path,
+      custom_loader=loader.maybe_get_custom_loader(
+          checkpoint_path, flags.FLAGS.custom_checkpoint_loader
+      ),
+      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
   )
   converter.convert_to_tflite(
       pytorch_model,

ai_edge_torch/generative/examples/deepseek/convert_to_tflite.py CHANGED Viewed

@@ -17,15 +17,20 @@
 from absl import app
 from ai_edge_torch.generative.examples.deepseek import deepseek
-from ai_edge_torch.generative.layers import kv_cache
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config
+from ai_edge_torch.generative.utilities import loader
 flags = converter.define_conversion_flags('deepseek')
 def main(_):
+  checkpoint_path = flags.FLAGS.checkpoint_path
   pytorch_model = deepseek.build_model(
-      flags.FLAGS.checkpoint_path, kv_cache_max_len=flags.FLAGS.kv_cache_max_len
+      checkpoint_path,
+      custom_loader=loader.maybe_get_custom_loader(
+          checkpoint_path, flags.FLAGS.custom_checkpoint_loader
+      ),
+      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
   )
   converter.convert_to_tflite(
       pytorch_model,

ai_edge_torch/generative/examples/deepseek/deepseek.py CHANGED Viewed

@@ -15,8 +15,10 @@
 """Example of building DeepSeek R1 distilled models."""
+from typing import Callable, Dict
 import ai_edge_torch.generative.layers.model_config as cfg
 from ai_edge_torch.generative.utilities import model_builder
+import torch
 from torch import nn
 TENSOR_NAMES = model_builder.TENSOR_NAMES_WITH_SEPARATE_LM_HEAD
@@ -84,10 +86,15 @@ def get_fake_model_config(**kwargs) -> cfg.ModelConfig:
   return config
-def build_model(checkpoint_path: str, **kwargs) -> nn.Module:
+def build_model(
+    checkpoint_path: str,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
+    **kwargs
+) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
       config=get_model_config(**kwargs),
       tensor_names=TENSOR_NAMES,
       model_class=DeepSeekDistillQwen,
+      custom_loader=custom_loader,
   )

ai_edge_torch/generative/examples/gemma/convert_gemma1_to_tflite.py CHANGED Viewed

@@ -19,13 +19,19 @@ from absl import app
 from ai_edge_torch.generative.examples.gemma import gemma1
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config
+from ai_edge_torch.generative.utilities import loader
 flags = converter.define_conversion_flags("gemma-2b")
 def main(_):
+  checkpoint_path = flags.FLAGS.checkpoint_path
   pytorch_model = gemma1.build_2b_model(
-      flags.FLAGS.checkpoint_path, kv_cache_max_len=flags.FLAGS.kv_cache_max_len
+      checkpoint_path,
+      custom_loader=loader.maybe_get_custom_loader(
+          checkpoint_path, flags.FLAGS.custom_checkpoint_loader
+      ),
+      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
   )
   converter.convert_to_tflite(
       pytorch_model,

ai_edge_torch/generative/examples/gemma/convert_gemma2_to_tflite.py CHANGED Viewed

@@ -19,6 +19,7 @@ from absl import app
 from ai_edge_torch.generative.examples.gemma import gemma2
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config
+from ai_edge_torch.generative.utilities import loader
 flags = converter.define_conversion_flags(
     "gemma2-2b", default_mask_as_input=True, default_transpose_kv_cache=True
@@ -26,8 +27,13 @@ flags = converter.define_conversion_flags(
 def main(_):
+  checkpoint_path = flags.FLAGS.checkpoint_path
   pytorch_model = gemma2.build_2b_model(
-      flags.FLAGS.checkpoint_path, kv_cache_max_len=flags.FLAGS.kv_cache_max_len
+      checkpoint_path,
+      custom_loader=loader.maybe_get_custom_loader(
+          checkpoint_path, flags.FLAGS.custom_checkpoint_loader
+      ),
+      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
   )
   converter.convert_to_tflite(
       pytorch_model,

ai_edge_torch/generative/examples/gemma/gemma1.py CHANGED Viewed

@@ -15,9 +15,12 @@
 """Example of building a Gemma1 model."""
+from typing import Callable, Dict
 import ai_edge_torch.generative.layers.model_config as cfg
 from ai_edge_torch.generative.utilities import model_builder
 import ai_edge_torch.generative.utilities.loader as loading_utils
+import torch
 from torch import nn
 TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
@@ -99,10 +102,15 @@ def get_fake_model_config(kv_cache_max_len: int = 128) -> cfg.ModelConfig:
   return config
-def build_2b_model(checkpoint_path: str, **kwargs) -> nn.Module:
+def build_2b_model(
+    checkpoint_path: str,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
+    **kwargs
+) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
       config=get_model_config_2b(**kwargs),
       tensor_names=TENSOR_NAMES,
       model_class=Gemma1,
+      custom_loader=custom_loader,
   )

ai_edge_torch/generative/examples/gemma/gemma2.py CHANGED Viewed

@@ -15,7 +15,7 @@
 """Example of building a Gemma2 model."""
-from typing import List, Optional, Tuple
+from typing import Callable, Dict, List, Optional, Tuple
 from ai_edge_torch.generative.layers import attention
 from ai_edge_torch.generative.layers import builder
@@ -306,7 +306,11 @@ def get_fake_model_config(kv_cache_max_len: int = 128) -> cfg.ModelConfig:
   return config
-def build_2b_model(checkpoint_path: str, **kwargs) -> nn.Module:
+def build_2b_model(
+    checkpoint_path: str,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
+    **kwargs,
+) -> nn.Module:
   for tensor_names in TENSOR_NAMES_DICT.values():
     try:
       return model_builder.build_decoder_only_model(
@@ -314,6 +318,7 @@ def build_2b_model(checkpoint_path: str, **kwargs) -> nn.Module:
           config=get_model_config_2b(**kwargs),
           tensor_names=tensor_names,
           model_class=Gemma2,
+          custom_loader=custom_loader,
       )
     except KeyError as _:
       continue

ai_edge_torch/generative/examples/gemma3/convert_gemma3_to_tflite.py CHANGED Viewed

@@ -25,13 +25,6 @@ flags = converter.define_conversion_flags(
     'gemma3-1b', default_mask_as_input=True, default_transpose_kv_cache=True
 )
-_CUSTOM_CHECKPOINT_LOADER = flags.DEFINE_bool(
-    'custom_checkpoint_loader',
-    False,
-    'If true, the conversion script will use a custom checkpoint loader which'
-    ' will read a checkpoint from a remote source.',
-)
 _MODEL_SIZE = flags.DEFINE_string(
     'model_size',
     '1b',
@@ -40,16 +33,14 @@ _MODEL_SIZE = flags.DEFINE_string(
 def main(_):
-  custom_loader = None
-  if flags.FLAGS.custom_checkpoint_loader:
-    # If loading from a remote source, try to get a custom loader first.
-    custom_loader = loader.get_custom_loader(flags.FLAGS.checkpoint_path)
+  checkpoint_path = flags.FLAGS.checkpoint_path
   if _MODEL_SIZE.value == '1b':
     pytorch_model = gemma3.build_model_1b(
-        flags.FLAGS.checkpoint_path,
+        checkpoint_path,
+        custom_loader=loader.maybe_get_custom_loader(
+            checkpoint_path, flags.FLAGS.custom_checkpoint_loader
+        ),
         kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
-        custom_loader=custom_loader,
     )
   else:
     raise ValueError(f'Unsupported model size: {_MODEL_SIZE.value}')

ai_edge_torch/generative/examples/hammer/convert_to_tflite.py CHANGED Viewed

@@ -19,6 +19,7 @@ from absl import app
 from ai_edge_torch.generative.examples.hammer import hammer
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config
+from ai_edge_torch.generative.utilities import loader
 flags = converter.define_conversion_flags('hammer')
@@ -36,8 +37,13 @@ _BUILDER = {
 def main(_):
+  checkpoint_path = flags.FLAGS.checkpoint_path
   pytorch_model = _BUILDER[_MODEL_SIZE.value](
-      flags.FLAGS.checkpoint_path, kv_cache_max_len=flags.FLAGS.kv_cache_max_len
+      checkpoint_path,
+      custom_loader=loader.maybe_get_custom_loader(
+          checkpoint_path, flags.FLAGS.custom_checkpoint_loader
+      ),
+      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
   )
   converter.convert_to_tflite(
       pytorch_model,

ai_edge_torch/generative/examples/hammer/hammer.py CHANGED Viewed

@@ -15,8 +15,10 @@
 """Example of building Hammer 2.1 models."""
+from typing import Callable, Dict
 import ai_edge_torch.generative.layers.model_config as cfg
 from ai_edge_torch.generative.utilities import model_builder
+import torch
 from torch import nn
 TENSOR_NAMES = model_builder.TENSOR_NAMES
@@ -89,19 +91,29 @@ def get_fake_model_config(**kwargs) -> cfg.ModelConfig:
   return config
-def build_1_5b_model(checkpoint_path: str, **kwargs) -> nn.Module:
+def build_1_5b_model(
+    checkpoint_path: str,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
+    **kwargs
+) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
       config=get_1_5b_model_config(**kwargs),
       tensor_names=TENSOR_NAMES,
       model_class=Hammer,
+      custom_loader=custom_loader,
   )
-def build_0_5b_model(checkpoint_path: str, **kwargs) -> nn.Module:
+def build_0_5b_model(
+    checkpoint_path: str,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
+    **kwargs
+) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
       config=get_0_5b_model_config(**kwargs),
       tensor_names=TENSOR_NAMES,
       model_class=Hammer,
+      custom_loader=custom_loader,
   )

ai_edge_torch/generative/examples/llama/convert_to_tflite.py CHANGED Viewed

@@ -19,6 +19,7 @@ from absl import app
 from ai_edge_torch.generative.examples.llama import llama
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config
+from ai_edge_torch.generative.utilities import loader
 flags = converter.define_conversion_flags('llama')
@@ -37,8 +38,13 @@ _BUILDER = {
 def main(_):
+  checkpoint_path = flags.FLAGS.checkpoint_path
   pytorch_model = _BUILDER[_MODEL_SIZE.value](
-      flags.FLAGS.checkpoint_path, kv_cache_max_len=flags.FLAGS.kv_cache_max_len
+      checkpoint_path,
+      custom_loader=loader.maybe_get_custom_loader(
+          checkpoint_path, flags.FLAGS.custom_checkpoint_loader
+      ),
+      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
   )
   converter.convert_to_tflite(
       pytorch_model,

ai_edge_torch/generative/examples/llama/llama.py CHANGED Viewed

@@ -17,7 +17,7 @@
 from functools import partial
 import math
-from typing import Tuple
+from typing import Callable, Dict, Tuple
 import ai_edge_torch.generative.layers.model_config as cfg
 from ai_edge_torch.generative.utilities import model_builder
@@ -180,19 +180,38 @@ def get_fake_model_config(**kwargs) -> cfg.ModelConfig:
 def _build_model(
-    checkpoint_path: str, config: cfg.ModelConfig
+    checkpoint_path: str,
+    config: cfg.ModelConfig,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
 ) -> torch.nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
       config=config,
       tensor_names=TENSOR_NAMES,
       model_class=Llama,
+      custom_loader=custom_loader,
   )
-def build_1b_model(checkpoint_path: str, **kwargs) -> torch.nn.Module:
-  return _build_model(checkpoint_path, get_1b_model_config(**kwargs))
+def build_1b_model(
+    checkpoint_path: str,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
+    **kwargs
+) -> torch.nn.Module:
+  return _build_model(
+      checkpoint_path,
+      get_1b_model_config(**kwargs),
+      custom_loader=custom_loader,
+  )
-def build_3b_model(checkpoint_path: str, **kwargs) -> torch.nn.Module:
-  return _build_model(checkpoint_path, get_3b_model_config(**kwargs))
+def build_3b_model(
+    checkpoint_path: str,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
+    **kwargs
+) -> torch.nn.Module:
+  return _build_model(
+      checkpoint_path,
+      get_3b_model_config(**kwargs),
+      custom_loader=custom_loader,
+  )

ai_edge_torch/generative/examples/moonshine/convert_moonshine_to_tflite.py CHANGED Viewed

@@ -22,7 +22,6 @@ from absl import app
 from absl import flags
 import ai_edge_torch
 from ai_edge_torch.generative.examples.moonshine import moonshine
-from ai_edge_torch.generative.utilities import converter
 import torch
 _CHECKPOINT_PATH = flags.DEFINE_string(

ai_edge_torch/generative/examples/openelm/convert_to_tflite.py CHANGED Viewed

@@ -19,13 +19,19 @@ from absl import app
 from ai_edge_torch.generative.examples.openelm import openelm
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config
+from ai_edge_torch.generative.utilities import loader
 flags = converter.define_conversion_flags("openelm")
 def main(_):
+  checkpoint_path = flags.FLAGS.checkpoint_path
   pytorch_model = openelm.build_model(
-      flags.FLAGS.checkpoint_path, kv_cache_max_len=flags.FLAGS.kv_cache_max_len
+      checkpoint_path,
+      custom_loader=loader.maybe_get_custom_loader(
+          checkpoint_path, flags.FLAGS.custom_checkpoint_loader
+      ),
+      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
   )
   converter.convert_to_tflite(
       pytorch_model,

ai_edge_torch/generative/examples/openelm/openelm.py CHANGED Viewed

@@ -15,9 +15,11 @@
 """Example of building an OpenELM model."""
+from typing import Callable, Dict
 import ai_edge_torch.generative.layers.model_config as cfg
 from ai_edge_torch.generative.utilities import model_builder
 import ai_edge_torch.generative.utilities.loader as loading_utils
+import torch
 from torch import nn
 TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
@@ -118,10 +120,15 @@ def get_fake_model_config(kv_cache_max_len: int = 128) -> cfg.ModelConfig:
   return config
-def build_model(checkpoint_path: str, **kwargs) -> nn.Module:
+def build_model(
+    checkpoint_path: str,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
+    **kwargs
+) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
       config=get_model_config(**kwargs),
       tensor_names=TENSOR_NAMES,
       model_class=OpenELM,
+      custom_loader=custom_loader,
   )

ai_edge_torch/generative/examples/paligemma/convert_to_tflite.py CHANGED Viewed

@@ -19,6 +19,7 @@ from absl import app
 from ai_edge_torch.generative.examples.paligemma import paligemma
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config
+from ai_edge_torch.generative.utilities import loader
 import torch
 flags = converter.define_conversion_flags('paligemma2-3b-224')
@@ -32,9 +33,13 @@ _VERSION = flags.DEFINE_enum(
 def main(_):
+  checkpoint_path = flags.FLAGS.checkpoint_path
   pytorch_model = paligemma.build_model(
-      flags.FLAGS.checkpoint_path,
+      checkpoint_path,
       version=int(_VERSION.value),
+      custom_loader=loader.maybe_get_custom_loader(
+          checkpoint_path, flags.FLAGS.custom_checkpoint_loader
+      ),
       kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
   )

ai_edge_torch/generative/examples/paligemma/decoder.py CHANGED Viewed

@@ -113,6 +113,7 @@ def get_decoder_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       type=cfg.NormalizationType.RMS_NORM,
       epsilon=1e-6,
       zero_centered=True,
+      enable_hlfb=True,
   )
   block_config = cfg.TransformerBlockConfig(
       attn_config=attn_config,

ai-edge-torch-nightly 0.5.0.dev20250515__py3-none-any.whl → 0.5.0.dev20250516__py3-none-any.whl

ai-edge-torch-nightly 0.5.0.dev20250515py3-none-any.whl → 0.5.0.dev20250516py3-none-any.whl