PyPI - ai-edge-torch-nightly - Versions diffs - 0.3.0.dev20241206__py3-none-any.whl → 0.3.0.dev20241214__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.3.0.dev20241206py3-none-any.whl → 0.3.0.dev20241214py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

ai_edge_torch/__init__.py CHANGED Viewed

@@ -13,13 +13,13 @@
 # limitations under the License.
 # ==============================================================================
+from ai_edge_torch._config import config
 from ai_edge_torch._convert.converter import convert
 from ai_edge_torch._convert.converter import signature
 from ai_edge_torch._convert.to_channel_last_io import to_channel_last_io
 from ai_edge_torch.model import Model
 from ai_edge_torch.version import __version__
 def load(path: str) -> Model:
   """Imports an ai_edge_torch model from disk.

ai_edge_torch/_config.py ADDED Viewed

@@ -0,0 +1,52 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Provides a configuration for the ai-edge-torch."""
+import functools
+import logging
+import os
+__all__ = ["config"]
+class _Config:
+  """ai-edge-torch global configs."""
+  @property
+  @functools.cache  # pylint: disable=method-cache-max-size-none
+  def use_torch_xla(self) -> bool:
+    """True if using torch_xla to lower torch ops to StableHLO.
+    To use torch_xla as the lowering backend, set environment variable
+    `USE_TORCH_XLA` to "true".
+    """
+    var = os.environ.get("USE_TORCH_XLA", "false")
+    var = var.lower().strip()
+    if var in ("y", "yes", "t", "true", "on", "1"):
+      return True
+    elif var in ("n", "no", "f", "false", "off", "0"):
+      return False
+    else:
+      logging.warning("Invalid USE_TORCH_XLA value is ignored: %s.", var)
+      return False
+  @property
+  def in_oss(self) -> bool:
+    """True if the code is not running in google internal environment."""
+    return True
+config = _Config()

ai_edge_torch/_convert/test/test_convert.py CHANGED Viewed

@@ -19,7 +19,6 @@ import os
 from typing import Tuple
 import ai_edge_torch
-from ai_edge_torch import config
 from ai_edge_torch._convert import conversion_utils
 from ai_edge_torch.quantize import pt2e_quantizer
 from ai_edge_torch.testing import model_coverage
@@ -292,7 +291,7 @@ class TestConvert(googletest.TestCase):
     self.assertTrue(result)
   @googletest.skipIf(
-      not config.Config.use_torch_xla,
+      not ai_edge_torch.config.use_torch_xla,
       reason="Shape polymorphism is not yet support with odml_torch.",
   )
   def test_convert_model_with_dynamic_batch(self):

ai_edge_torch/debug/test/test_culprit.py CHANGED Viewed

@@ -15,14 +15,14 @@
 import ast
-import io
-import sys
-from ai_edge_torch.debug import find_culprits
+import ai_edge_torch.debug
 import torch
 from absl.testing import absltest as googletest
+find_culprits = ai_edge_torch.debug.find_culprits
 _test_culprit_lib = torch.library.Library("test_culprit", "DEF")
 _test_culprit_lib.define("non_lowerable_op(Tensor x) -> Tensor")
@@ -52,6 +52,11 @@ class BadModel(torch.nn.Module):
 class TestCulprit(googletest.TestCase):
+  def setUp(self):
+    super().setUp()
+    torch.manual_seed(0)
+    torch._dynamo.reset()
   def test_find_culprits(self):
     model = BadModel().eval()
     args = (torch.rand(10),)

ai_edge_torch/generative/examples/amd_llama_135m/amd_llama_135m.py CHANGED Viewed

@@ -17,10 +17,16 @@
 import ai_edge_torch.generative.layers.model_config as cfg
 from ai_edge_torch.generative.utilities import model_builder
+from torch import nn
 TENSOR_NAMES = model_builder.TENSOR_NAMES_WITH_SEPARATE_LM_HEAD
+class AmdLlama(model_builder.DecoderOnlyModel):
+  """An AMD-Llama model built from the Edge Generative API layers."""
+  pass
 def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
   """Returns the model config for an AMD-Llama-135m model.
@@ -72,11 +78,10 @@ def get_fake_model_config(**kwargs) -> cfg.ModelConfig:
   return config
-def build_model(
-    checkpoint_path: str, **kwargs
-) -> model_builder.DecoderOnlyModel:
+def build_model(checkpoint_path: str, **kwargs) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
       config=get_model_config(**kwargs),
       tensor_names=TENSOR_NAMES,
+      model_class=AmdLlama
   )

ai_edge_torch/generative/examples/amd_llama_135m/convert_to_tflite.py CHANGED Viewed

@@ -22,6 +22,7 @@ from absl import app
 from absl import flags
 from ai_edge_torch.generative.examples.amd_llama_135m import amd_llama_135m
 from ai_edge_torch.generative.utilities import converter
+from ai_edge_torch.generative.utilities.model_builder import ExportConfig
 _CHECKPOINT_PATH = flags.DEFINE_string(
     'checkpoint_path',
@@ -61,6 +62,7 @@ def main(_):
       tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
       prefill_seq_len=_PREFILL_SEQ_LEN.value,
       quantize=_QUANTIZE.value,
+      export_config=ExportConfig(),
   )

ai_edge_torch/generative/examples/gemma/convert_gemma1_to_tflite.py CHANGED Viewed

@@ -22,6 +22,7 @@ from absl import app
 from absl import flags
 from ai_edge_torch.generative.examples.gemma import gemma1
 from ai_edge_torch.generative.utilities import converter
+from ai_edge_torch.generative.utilities.model_builder import ExportConfig
 _CHECKPOINT_PATH = flags.DEFINE_string(
     'checkpoint_path',
@@ -61,6 +62,7 @@ def main(_):
       tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
       prefill_seq_len=_PREFILL_SEQ_LENS.value,
       quantize=_QUANTIZE.value,
+      export_config=ExportConfig(),
   )

ai_edge_torch/generative/examples/gemma/convert_gemma2_to_tflite.py CHANGED Viewed

@@ -22,6 +22,7 @@ from absl import app
 from absl import flags
 from ai_edge_torch.generative.examples.gemma import gemma2
 from ai_edge_torch.generative.utilities import converter
+from ai_edge_torch.generative.utilities.model_builder import ExportConfig
 _CHECKPOINT_PATH = flags.DEFINE_string(
     'checkpoint_path',
@@ -61,6 +62,7 @@ def main(_):
       tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
       prefill_seq_len=_PREFILL_SEQ_LENS.value,
       quantize=_QUANTIZE.value,
+      export_config=ExportConfig(),
   )

ai_edge_torch/generative/examples/gemma/gemma1.py CHANGED Viewed

@@ -18,6 +18,7 @@
 import ai_edge_torch.generative.layers.model_config as cfg
 from ai_edge_torch.generative.utilities import model_builder
 import ai_edge_torch.generative.utilities.loader as loading_utils
+from torch import nn
 TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
     ff_up_proj="model.layers.{}.mlp.up_proj",
@@ -33,6 +34,11 @@ TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
 )
+class Gemma1(model_builder.DecoderOnlyModel):
+  """A Gemma1 model built from the Edge Generative API layers."""
+  pass
 def get_model_config_2b(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
   """Returns the model config for a Gemma 2B model.
@@ -91,11 +97,10 @@ def get_fake_model_config(kv_cache_max_len: int = 128) -> cfg.ModelConfig:
   return config
-def build_2b_model(
-    checkpoint_path: str, **kwargs
-) -> model_builder.DecoderOnlyModel:
+def build_2b_model(checkpoint_path: str, **kwargs) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
       config=get_model_config_2b(**kwargs),
       tensor_names=TENSOR_NAMES,
+      model_class=Gemma1,
   )

ai_edge_torch/generative/examples/gemma/gemma2.py CHANGED Viewed

@@ -22,6 +22,7 @@ from ai_edge_torch.generative.layers import builder
 from ai_edge_torch.generative.layers import kv_cache as kv_utils
 import ai_edge_torch.generative.layers.attention_utils as attn_utils
 import ai_edge_torch.generative.layers.model_config as cfg
+from ai_edge_torch.generative.utilities import model_builder
 import ai_edge_torch.generative.utilities.loader as loading_utils
 import torch
 from torch import nn
@@ -132,6 +133,7 @@ class Gemma2(nn.Module):
       tokens: torch.Tensor,
       input_pos: torch.Tensor,
       kv_cache: kv_utils.KVCache,
+      export_config: Optional[model_builder.ExportConfig] = None,
   ) -> dict[torch.Tensor, kv_utils.KVCache]:
     _, seq_len = tokens.size()
     assert self.config.max_seq_len >= seq_len, (
@@ -162,6 +164,13 @@ class Gemma2(nn.Module):
         updated_kv_entires.append(kv_entry)
     updated_kv_cache = kv_utils.KVCache(tuple(updated_kv_entires))
+    if export_config is not None:
+      if (
+          torch.numel(input_pos) > 1
+          and not export_config.output_logits_on_prefill
+      ):
+        return {"kv_cache": updated_kv_cache}
     x = self.final_norm(x)
     res = self.lm_head(x)  # (b, t, vocab_size)
     if self.config.final_logit_softcap is not None:
@@ -250,11 +259,9 @@ def get_fake_model_config(kv_cache_max_len: int = 128) -> cfg.ModelConfig:
 def build_2b_model(checkpoint_path: str, **kwargs) -> nn.Module:
-  config = get_model_config_2b(**kwargs)
-  model = Gemma2(config)
-  loader = loading_utils.ModelLoader(checkpoint_path, TENSOR_NAMES)
-  # Since embedding and lm-head use the same weight, we need to set strict
-  # to False.
-  loader.load(model, strict=False)
-  model.eval()
-  return model
+  return model_builder.build_decoder_only_model(
+      checkpoint_path=checkpoint_path,
+      config=get_model_config_2b(**kwargs),
+      tensor_names=TENSOR_NAMES,
+      model_class=Gemma2,
+  )

ai_edge_torch/generative/examples/llama/convert_to_tflite.py CHANGED Viewed

@@ -22,6 +22,7 @@ from absl import app
 from absl import flags
 from ai_edge_torch.generative.examples.llama import llama
 from ai_edge_torch.generative.utilities import converter
+from ai_edge_torch.generative.utilities.model_builder import ExportConfig
 _MODEL_SIZE = flags.DEFINE_enum(
     'model_size',
@@ -72,6 +73,7 @@ def main(_):
       tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
       prefill_seq_len=_PREFILL_SEQ_LENS.value,
       quantize=_QUANTIZE.value,
+      export_config=ExportConfig(),
   )

ai_edge_torch/generative/examples/llama/llama.py CHANGED Viewed

@@ -20,7 +20,6 @@ from typing import Tuple
 import ai_edge_torch.generative.layers.model_config as cfg
 from ai_edge_torch.generative.utilities import model_builder
-import ai_edge_torch.generative.utilities.loader as loading_utils
 import torch
 TENSOR_NAMES = model_builder.TENSOR_NAMES
@@ -177,23 +176,18 @@ def get_fake_model_config(**kwargs) -> cfg.ModelConfig:
 def _build_model(
     checkpoint_path: str, config: cfg.ModelConfig
-) -> model_builder.DecoderOnlyModel:
-  model = Llama(config)
-  loader = loading_utils.ModelLoader(checkpoint_path, TENSOR_NAMES)
-  # Since embedding and lm-head use the same weight, we need to set strict
-  # to False.
-  loader.load(model, strict=False)
-  model.eval()
-  return model
-def build_1b_model(
-    checkpoint_path: str, **kwargs
-) -> model_builder.DecoderOnlyModel:
+) -> torch.nn.Module:
+  return model_builder.build_decoder_only_model(
+      checkpoint_path=checkpoint_path,
+      config=config,
+      tensor_names=TENSOR_NAMES,
+      model_class=Llama,
+  )
+def build_1b_model(checkpoint_path: str, **kwargs) -> torch.nn.Module:
   return _build_model(checkpoint_path, get_1b_model_config(**kwargs))
-def build_3b_model(
-    checkpoint_path: str, **kwargs
-) -> model_builder.DecoderOnlyModel:
+def build_3b_model(checkpoint_path: str, **kwargs) -> torch.nn.Module:
   return _build_model(checkpoint_path, get_3b_model_config(**kwargs))

ai_edge_torch/generative/examples/openelm/convert_to_tflite.py CHANGED Viewed

@@ -22,6 +22,7 @@ from absl import app
 from absl import flags
 from ai_edge_torch.generative.examples.openelm import openelm
 from ai_edge_torch.generative.utilities import converter
+from ai_edge_torch.generative.utilities.model_builder import ExportConfig
 _CHECKPOINT_PATH = flags.DEFINE_string(
     'checkpoint_path',
@@ -64,6 +65,7 @@ def main(_):
       tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
       prefill_seq_len=_PREFILL_SEQ_LENS.value,
       quantize=_QUANTIZE.value,
+      export_config=ExportConfig(),
   )

ai_edge_torch/generative/examples/openelm/openelm.py CHANGED Viewed

@@ -18,6 +18,7 @@
 import ai_edge_torch.generative.layers.model_config as cfg
 from ai_edge_torch.generative.utilities import model_builder
 import ai_edge_torch.generative.utilities.loader as loading_utils
+from torch import nn
 TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
     ff_up_proj="transformer.layers.{}.ffn.proj_1",
@@ -34,6 +35,11 @@ TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
 )
+class OpenELM(model_builder.DecoderOnlyModel):
+  """An OpenELM model built from the Edge Generative API layers."""
+  pass
 def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
   """Returns the model config for an OpenELM model.
@@ -112,11 +118,10 @@ def get_fake_model_config(kv_cache_max_len: int = 128) -> cfg.ModelConfig:
   return config
-def build_model(
-    checkpoint_path: str, **kwargs
-) -> model_builder.DecoderOnlyModel:
+def build_model(checkpoint_path: str, **kwargs) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
       config=get_model_config(**kwargs),
       tensor_names=TENSOR_NAMES,
+      model_class=OpenELM,
   )

ai_edge_torch/generative/examples/paligemma/convert_to_tflite.py CHANGED Viewed

@@ -26,6 +26,7 @@ from absl import app
 from absl import flags
 from ai_edge_torch.generative.examples.paligemma import paligemma
 from ai_edge_torch.generative.utilities import converter
+from ai_edge_torch.generative.utilities.model_builder import ExportConfig
 import torch
 _CHECKPOINT_PATH = flags.DEFINE_string(
@@ -73,6 +74,7 @@ def main(_):
       pixel_values_size=torch.Size(_PIXEL_VALUES_SIZE.value),
       quantize=_QUANTIZE.value,
       config=pytorch_model.config.decoder_config,
+      export_config=ExportConfig(),
   )

ai_edge_torch/generative/examples/paligemma/decoder.py CHANGED Viewed

@@ -15,6 +15,8 @@
 """Example of building a decoder of PaliGemma 3B model which is Gemma1."""
+from typing import Optional
 from ai_edge_torch.generative.layers import kv_cache as kv_utils
 import ai_edge_torch.generative.layers.model_config as cfg
 from ai_edge_torch.generative.utilities import model_builder
@@ -51,6 +53,7 @@ class Decoder(model_builder.DecoderOnlyModel):
       input_pos: torch.Tensor,
       kv_cache: kv_utils.KVCache,
       input_embeds: torch.Tensor = None,
+      export_config: Optional[model_builder.ExportConfig] = None,
   ) -> dict[torch.Tensor, kv_utils.KVCache]:
     if input_embeds is None:
       return super().forward(tokens, input_pos, kv_cache)
@@ -130,12 +133,10 @@ def get_fake_decoder_config(kv_cache_max_len: int = 128) -> cfg.ModelConfig:
   return config
-def build_decoder(
-    checkpoint_path: str, **kwargs
-) -> model_builder.DecoderOnlyModel:
-  decoder = Decoder(get_decoder_config(**kwargs))
-  loader = loading_utils.ModelLoader(checkpoint_path, TENSOR_NAMES)
-  # Loose the strictness because only decoder is being loaded.
-  loader.load(decoder, strict=False)
-  decoder.eval()
-  return decoder
+def build_decoder(checkpoint_path: str, **kwargs) -> torch.nn.Module:
+  return model_builder.build_decoder_only_model(
+      checkpoint_path=checkpoint_path,
+      config=get_decoder_config(**kwargs),
+      tensor_names=TENSOR_NAMES,
+      model_class=Decoder,
+  )

ai_edge_torch/generative/examples/paligemma/paligemma.py CHANGED Viewed

@@ -16,11 +16,13 @@
 """Example of building a full-stack of PaliGemma model."""
 from dataclasses import dataclass
+from typing import Optional
 from ai_edge_torch.generative.examples.paligemma import decoder
 from ai_edge_torch.generative.examples.paligemma import image_encoder
 import ai_edge_torch.generative.layers.kv_cache as kv_utils
 import ai_edge_torch.generative.layers.model_config as cfg
+from ai_edge_torch.generative.utilities import model_builder
 import ai_edge_torch.generative.utilities.loader as loading_utils
 import torch
 from torch import nn
@@ -67,9 +69,16 @@ class PaliGemma(nn.Module):
       input_pos: torch.Tensor,
       kv_cache: kv_utils.KVCache,
       pixel_values: torch.Tensor = None,
+      export_config: Optional[model_builder.ExportConfig] = None,
   ) -> dict[torch.Tensor, kv_utils.KVCache]:
     if pixel_values is None:
-      return self.decoder(tokens, input_pos, kv_cache)
+      return self.decoder(
+          tokens=tokens,
+          input_pos=input_pos,
+          kv_cache=kv_cache,
+          input_embeds=None,
+          export_config=export_config
+      )
     input_embeds = self.decoder.tok_embedding(tokens)
@@ -100,6 +109,7 @@ class PaliGemma(nn.Module):
         input_pos=input_pos,
         kv_cache=kv_cache,
         input_embeds=input_embeds,
+        export_config=export_config,
     )

ai_edge_torch/generative/examples/phi/convert_phi3_to_tflite.py CHANGED Viewed

@@ -22,6 +22,7 @@ from absl import app
 from absl import flags
 from ai_edge_torch.generative.examples.phi import phi3
 from ai_edge_torch.generative.utilities import converter
+from ai_edge_torch.generative.utilities.model_builder import ExportConfig
 _CHECKPOINT_PATH = flags.DEFINE_string(
     'checkpoint_path',
@@ -61,6 +62,7 @@ def main(_):
       tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
       prefill_seq_len=_PREFILL_SEQ_LENS.value,
       quantize=_QUANTIZE.value,
+      export_config=ExportConfig(),
   )

ai_edge_torch/generative/examples/phi/convert_to_tflite.py CHANGED Viewed

@@ -22,6 +22,7 @@ from absl import app
 from absl import flags
 from ai_edge_torch.generative.examples.phi import phi2
 from ai_edge_torch.generative.utilities import converter
+from ai_edge_torch.generative.utilities.model_builder import ExportConfig
 _CHECKPOINT_PATH = flags.DEFINE_string(
     'checkpoint_path',
@@ -61,6 +62,7 @@ def main(_):
       tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
       prefill_seq_len=_PREFILL_SEQ_LENS.value,
       quantize=_QUANTIZE.value,
+      export_config=ExportConfig(),
   )

ai_edge_torch/generative/examples/phi/phi2.py CHANGED Viewed

@@ -18,6 +18,7 @@
 import ai_edge_torch.generative.layers.model_config as cfg
 from ai_edge_torch.generative.utilities import model_builder
 import ai_edge_torch.generative.utilities.loader as loading_utils
+from torch import nn
 TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
     ff_up_proj="model.layers.{}.mlp.fc1",
@@ -33,6 +34,11 @@ TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
 )
+class Phi2(model_builder.DecoderOnlyModel):
+  """A Phi-2 model built from the Edge Generative API layers."""
+  pass
 def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
   """Returns the model config for a Phi-2 model.
@@ -92,11 +98,10 @@ def get_fake_model_config(kv_cache_max_len: int = 128) -> cfg.ModelConfig:
   return config
-def build_model(
-    checkpoint_path: str, **kwargs
-) -> model_builder.DecoderOnlyModel:
+def build_model(checkpoint_path: str, **kwargs) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
       config=get_model_config(**kwargs),
       tensor_names=TENSOR_NAMES,
+      model_class=Phi2,
   )

ai_edge_torch/generative/examples/phi/phi3.py CHANGED Viewed

@@ -207,13 +207,11 @@ def get_fake_model_config(kv_cache_max_len: int = 128) -> cfg.ModelConfig:
   return config
-def build_model(
-    checkpoint_path: str, **kwargs
-) -> model_builder.DecoderOnlyModel:
+def build_model(checkpoint_path: str, **kwargs) -> torch.nn.Module:
   """Instantiates the model instance and load checkpoint if provided."""
-  config = get_model_config(**kwargs)
-  model = Phi3_5Mini(config)
-  loader = loading_utils.ModelLoader(checkpoint_path, TENSOR_NAMES)
-  loader.load(model)
-  model.eval()
-  return model
+  return model_builder.build_decoder_only_model(
+      checkpoint_path=checkpoint_path,
+      config=get_model_config(**kwargs),
+      tensor_names=TENSOR_NAMES,
+      model_class=Phi3_5Mini,
+  )

ai_edge_torch/generative/examples/qwen/convert_to_tflite.py CHANGED Viewed

@@ -22,6 +22,7 @@ from absl import app
 from absl import flags
 from ai_edge_torch.generative.examples.qwen import qwen
 from ai_edge_torch.generative.utilities import converter
+from ai_edge_torch.generative.utilities.model_builder import ExportConfig
 _MODEL_SIZE = flags.DEFINE_enum(
     'model_size',
@@ -76,6 +77,7 @@ def main(_):
       tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
       prefill_seq_len=_PREFILL_SEQ_LENS.value,
       quantize=_QUANTIZE.value,
+      export_config=ExportConfig(),
   )

ai_edge_torch/generative/examples/qwen/qwen.py CHANGED Viewed

@@ -17,10 +17,16 @@
 import ai_edge_torch.generative.layers.model_config as cfg
 from ai_edge_torch.generative.utilities import model_builder
+from torch import nn
 TENSOR_NAMES = model_builder.TENSOR_NAMES
+class Qwen(model_builder.DecoderOnlyModel):
+  """A Qwen model built from the Edge Generative API layers."""
+  pass
 def get_3b_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
   """Returns the model config for a Qwen 2.5 3B model.
@@ -101,31 +107,28 @@ def get_fake_model_config(**kwargs) -> cfg.ModelConfig:
   return config
-def build_3b_model(
-    checkpoint_path: str, **kwargs
-) -> model_builder.DecoderOnlyModel:
+def build_3b_model(checkpoint_path: str, **kwargs) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
       config=get_3b_model_config(**kwargs),
       tensor_names=TENSOR_NAMES,
+      model_class=Qwen,
   )
-def build_1_5b_model(
-    checkpoint_path: str, **kwargs
-) -> model_builder.DecoderOnlyModel:
+def build_1_5b_model(checkpoint_path: str, **kwargs) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
       config=get_1_5b_model_config(**kwargs),
       tensor_names=TENSOR_NAMES,
+      model_class=Qwen,
   )
-def build_0_5b_model(
-    checkpoint_path: str, **kwargs
-) -> model_builder.DecoderOnlyModel:
+def build_0_5b_model(checkpoint_path: str, **kwargs) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
       config=get_0_5b_model_config(**kwargs),
       tensor_names=TENSOR_NAMES,
+      model_class=Qwen,
   )

ai_edge_torch/generative/examples/smollm/convert_to_tflite.py CHANGED Viewed

@@ -22,6 +22,7 @@ from absl import app
 from absl import flags
 from ai_edge_torch.generative.examples.smollm import smollm
 from ai_edge_torch.generative.utilities import converter
+from ai_edge_torch.generative.utilities.model_builder import ExportConfig
 _CHECKPOINT_PATH = flags.DEFINE_string(
     'checkpoint_path',
@@ -54,6 +55,7 @@ def main(_):
   pytorch_model = smollm.build_model(
       _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
   )
   quant_suffix = 'q8' if _QUANTIZE.value else 'f32'
   output_filename = f'smollm_{quant_suffix}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
   converter.convert_to_tflite(
@@ -61,6 +63,7 @@ def main(_):
       tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
       prefill_seq_len=_PREFILL_SEQ_LENS.value,
       quantize=_QUANTIZE.value,
+      export_config=ExportConfig(),
   )

ai-edge-torch-nightly 0.3.0.dev20241206__py3-none-any.whl → 0.3.0.dev20241214__py3-none-any.whl

ai-edge-torch-nightly 0.3.0.dev20241206py3-none-any.whl → 0.3.0.dev20241214py3-none-any.whl