PyPI - ai-edge-torch-nightly - Versions diffs - 0.6.0.dev20250602__py3-none-any.whl → 0.6.0.dev20250604__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.6.0.dev20250602py3-none-any.whl → 0.6.0.dev20250604py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

ai_edge_torch/generative/examples/phi/phi4.py CHANGED Viewed

@@ -89,16 +89,8 @@ class Phi4Mini(model_builder.DecoderOnlyModel):
   pass
-def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
-  """Returns the model config for a Phi-4 model.
-  Args:
-    kv_cache_max_len (int): The maximum sequence length of the KV cache. Default
-      is 1024.
-  Returns:
-    The model config for a Phi-4 model.
-  """
+def get_model_config() -> cfg.ModelConfig:
+  """Returns the model config for a Phi-4 model."""
   attn_config = cfg.AttentionConfig(
       num_heads=24,
       head_dim=128,
@@ -135,7 +127,6 @@ def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       vocab_size=200064,
       num_layers=32,
       max_seq_len=max_seq_len,
-      kv_cache_max_len=kv_cache_max_len,
       embedding_dim=3072,
       block_configs=block_config,
       final_norm_config=norm_config,
@@ -144,11 +135,11 @@ def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
   return config
-def get_fake_model_config(kv_cache_max_len: int = 128) -> cfg.ModelConfig:
-  config = get_model_config(kv_cache_max_len)
+def get_fake_model_config() -> cfg.ModelConfig:
+  config = get_model_config()
   config.vocab_size = 128
   config.num_layers = 2
-  config.max_seq_len = 2 * kv_cache_max_len
+  config.max_seq_len = 256
   # Phi-4 has only one block config.
   config.block_config(0).ff_config.intermediate_size = 128
   return config
@@ -157,13 +148,14 @@ def get_fake_model_config(kv_cache_max_len: int = 128) -> cfg.ModelConfig:
 def build_model(
     checkpoint_path: str,
     custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
-    **kwargs
+    mask_cache_size: int = 0,
 ) -> torch.nn.Module:
   """Instantiates the model instance and load checkpoint if provided."""
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
-      config=get_model_config(**kwargs),
+      config=get_model_config(),
       tensor_names=TENSOR_NAMES,
       model_class=Phi4Mini,
       custom_loader=custom_loader,
+      mask_cache_size=mask_cache_size,
   )

ai_edge_torch/generative/examples/phi/verify_util.py CHANGED Viewed

@@ -15,7 +15,6 @@
 """Utils for verifying the Phi model."""
 import logging
-import os
 import pathlib
 from typing import Callable, Dict
@@ -39,7 +38,6 @@ _BUILDER = {
 def verify_phi(
     version: str,
     checkpoint_dir: str,
-    weight_filename: str = "model.safetensors",
     max_new_tokens: int = 30,
     prompts: list[str] | None = None,
     atol: float = 1e-04,
@@ -63,7 +61,7 @@ def verify_phi(
     )
     reauthored_checkpoint = pathlib.Path(cached_config_file).parent
   else:
-    reauthored_checkpoint = os.path.join(checkpoint_dir, weight_filename)
+    reauthored_checkpoint = checkpoint_dir
   logging.info("Building the reauthored model from: %s", reauthored_checkpoint)
   reauthored_model = _BUILDER[version](

ai_edge_torch/generative/examples/qwen/convert_to_tflite.py CHANGED Viewed

@@ -44,13 +44,14 @@ def main(_):
       custom_loader=loader.maybe_get_custom_loader(
           checkpoint_path, flags.FLAGS.custom_checkpoint_loader
       ),
-      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
+      mask_cache_size=converter.get_mask_cache_size_from_flags(),
   )
   converter.convert_to_tflite(
       pytorch_model,
       output_path=flags.FLAGS.output_path,
       output_name_prefix=flags.FLAGS.output_name_prefix,
       prefill_seq_len=flags.FLAGS.prefill_seq_lens,
+      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
       quantize=flags.FLAGS.quantize,
       lora_ranks=flags.FLAGS.lora_ranks,
       export_config=export_config.get_from_flags(),

ai_edge_torch/generative/examples/qwen/convert_v3_to_tflite.py CHANGED Viewed

@@ -44,13 +44,14 @@ def main(_):
       custom_loader=loader.maybe_get_custom_loader(
           checkpoint_path, flags.FLAGS.custom_checkpoint_loader
       ),
-      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
+      mask_cache_size=converter.get_mask_cache_size_from_flags(),
   )
   converter.convert_to_tflite(
       pytorch_model,
       output_path=flags.FLAGS.output_path,
       output_name_prefix=flags.FLAGS.output_name_prefix,
       prefill_seq_len=flags.FLAGS.prefill_seq_lens,
+      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
       quantize=flags.FLAGS.quantize,
       lora_ranks=flags.FLAGS.lora_ranks,
       export_config=export_config.get_from_flags(),

ai_edge_torch/generative/examples/qwen/qwen.py CHANGED Viewed

@@ -29,16 +29,8 @@ class Qwen(model_builder.DecoderOnlyModel):
   pass
-def get_3b_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
-  """Returns the model config for a Qwen 2.5 3B model.
-  Args:
-    kv_cache_max_len (int): The maximum sequence length of the KV cache. Default
-      is 1024.
-  Returns:
-    The model config for a SmolLM model.
-  """
+def get_3b_model_config() -> cfg.ModelConfig:
+  """Returns the model config for a Qwen 2.5 3B model."""
   attn_config = cfg.AttentionConfig(
       num_heads=16,
       head_dim=128,
@@ -66,16 +58,15 @@ def get_3b_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       num_layers=36,
       max_seq_len=32768,
       embedding_dim=2048,
-      kv_cache_max_len=kv_cache_max_len,
       block_configs=block_config,
       final_norm_config=norm_config,
   )
   return config
-def get_1_5b_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
+def get_1_5b_model_config() -> cfg.ModelConfig:
   """Returns the model config for a Qwen 2.5 1B model."""
-  config = get_3b_model_config(kv_cache_max_len)
+  config = get_3b_model_config()
   # Qwen has only one block config.
   block_config = config.block_config(0)
   block_config.attn_config.num_heads = 12
@@ -85,9 +76,9 @@ def get_1_5b_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
   return config
-def get_0_5b_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
+def get_0_5b_model_config() -> cfg.ModelConfig:
   """Returns the model config for a Qwen 2.5 0.5B model."""
-  config = get_3b_model_config(kv_cache_max_len)
+  config = get_3b_model_config()
   # Qwen has only one block config.
   block_config = config.block_config(0)
   block_config.attn_config.num_heads = 14
@@ -98,8 +89,8 @@ def get_0_5b_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
   return config
-def get_fake_model_config(**kwargs) -> cfg.ModelConfig:
-  config = get_3b_model_config(**kwargs)
+def get_fake_model_config() -> cfg.ModelConfig:
+  config = get_3b_model_config()
   config.vocab_size = 128
   config.num_layers = 2
   # Qwen has only one block config.
@@ -107,43 +98,47 @@ def get_fake_model_config(**kwargs) -> cfg.ModelConfig:
   return config
-def build_3b_model(
+def _build_model(
     checkpoint_path: str,
+    config: cfg.ModelConfig,
     custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
-    **kwargs
+    mask_cache_size: int = 0,
 ) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
-      config=get_3b_model_config(**kwargs),
+      config=config,
       tensor_names=TENSOR_NAMES,
       model_class=Qwen,
       custom_loader=custom_loader,
+      mask_cache_size=mask_cache_size,
+  )
+def build_3b_model(
+    checkpoint_path: str,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
+    mask_cache_size: int = 0,
+) -> nn.Module:
+  return _build_model(
+      checkpoint_path, get_3b_model_config(), custom_loader, mask_cache_size
   )
 def build_1_5b_model(
     checkpoint_path: str,
     custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
-    **kwargs
+    mask_cache_size: int = 0,
 ) -> nn.Module:
-  return model_builder.build_decoder_only_model(
-      checkpoint_path=checkpoint_path,
-      config=get_1_5b_model_config(**kwargs),
-      tensor_names=TENSOR_NAMES,
-      model_class=Qwen,
-      custom_loader=custom_loader,
+  return _build_model(
+      checkpoint_path, get_1_5b_model_config(), custom_loader, mask_cache_size
   )
 def build_0_5b_model(
     checkpoint_path: str,
     custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
-    **kwargs
+    mask_cache_size: int = 0,
 ) -> nn.Module:
-  return model_builder.build_decoder_only_model(
-      checkpoint_path=checkpoint_path,
-      config=get_0_5b_model_config(**kwargs),
-      tensor_names=TENSOR_NAMES,
-      model_class=Qwen,
-      custom_loader=custom_loader,
+  return _build_model(
+      checkpoint_path, get_0_5b_model_config(), custom_loader, mask_cache_size
   )

ai_edge_torch/generative/examples/qwen/qwen3.py CHANGED Viewed

@@ -42,20 +42,11 @@ TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
 class Qwen3(model_builder.DecoderOnlyModel):
   """A Qwen3 model built from the Edge Generative API layers."""
   pass
-def get_4b_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
-  """Returns the model config for a Qwen 3.0 4B model.
-  Args:
-    kv_cache_max_len (int): The maximum sequence length of the KV cache. Default
-      is 1024.
-  Returns:
-    The model config for a SmolLM model.
-  """
+def get_4b_model_config() -> cfg.ModelConfig:
+  """Returns the model config for a Qwen 3.0 4B model."""
   norm_config = cfg.NormalizationConfig(
       type=cfg.NormalizationType.RMS_NORM, epsilon=1e-06
   )
@@ -87,16 +78,15 @@ def get_4b_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       num_layers=36,
       max_seq_len=40960,
       embedding_dim=2560,
-      kv_cache_max_len=kv_cache_max_len,
       block_configs=block_config,
       final_norm_config=norm_config,
   )
   return config
-def get_1_7b_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
+def get_1_7b_model_config() -> cfg.ModelConfig:
   """Returns the model config for a Qwen 3.0 1.7B model."""
-  config = get_4b_model_config(kv_cache_max_len)
+  config = get_4b_model_config()
   # Qwen has only one block config.
   block_config = config.block_config(0)
   block_config.attn_config.num_heads = 16
@@ -107,9 +97,9 @@ def get_1_7b_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
   return config
-def get_0_6b_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
+def get_0_6b_model_config() -> cfg.ModelConfig:
   """Returns the model config for a Qwen 3.0 0.6B model."""
-  config = get_4b_model_config(kv_cache_max_len)
+  config = get_4b_model_config()
   # Qwen has only one block config.
   block_config = config.block_config(0)
   block_config.attn_config.num_heads = 16
@@ -120,8 +110,8 @@ def get_0_6b_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
   return config
-def get_fake_model_config(**kwargs) -> cfg.ModelConfig:
-  config = get_4b_model_config(**kwargs)
+def get_fake_model_config() -> cfg.ModelConfig:
+  config = get_4b_model_config()
   config.vocab_size = 128
   config.num_layers = 2
   # Qwen has only one block config.
@@ -129,43 +119,47 @@ def get_fake_model_config(**kwargs) -> cfg.ModelConfig:
   return config
-def build_4b_model(
+def _build_model(
     checkpoint_path: str,
+    config: cfg.ModelConfig,
     custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
-    **kwargs
+    mask_cache_size: int = 0,
 ) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
-      config=get_4b_model_config(**kwargs),
+      config=config,
       tensor_names=TENSOR_NAMES,
       model_class=Qwen3,
       custom_loader=custom_loader,
+      mask_cache_size=mask_cache_size,
+  )
+def build_4b_model(
+    checkpoint_path: str,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
+    mask_cache_size: int = 0,
+) -> nn.Module:
+  return _build_model(
+      checkpoint_path, get_4b_model_config(), custom_loader, mask_cache_size
   )
 def build_1_7b_model(
     checkpoint_path: str,
     custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
-    **kwargs
+    mask_cache_size: int = 0,
 ) -> nn.Module:
-  return model_builder.build_decoder_only_model(
-      checkpoint_path=checkpoint_path,
-      config=get_1_7b_model_config(**kwargs),
-      tensor_names=TENSOR_NAMES,
-      model_class=Qwen3,
-      custom_loader=custom_loader,
+  return _build_model(
+      checkpoint_path, get_1_7b_model_config(), custom_loader, mask_cache_size
   )
 def build_0_6b_model(
     checkpoint_path: str,
     custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
-    **kwargs
+    mask_cache_size: int = 0,
 ) -> nn.Module:
-  return model_builder.build_decoder_only_model(
-      checkpoint_path=checkpoint_path,
-      config=get_0_6b_model_config(**kwargs),
-      tensor_names=TENSOR_NAMES,
-      model_class=Qwen3,
-      custom_loader=custom_loader,
+  return _build_model(
+      checkpoint_path, get_0_6b_model_config(), custom_loader, mask_cache_size
   )

ai_edge_torch/generative/examples/qwen_vl/convert_to_tflite.py CHANGED Viewed

@@ -42,7 +42,7 @@ def main(_):
       custom_loader=loader.maybe_get_custom_loader(
           checkpoint_path, flags.FLAGS.custom_checkpoint_loader
       ),
-      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
+      mask_cache_size=converter.get_mask_cache_size_from_flags(),
       image_size=(_IMAGE_HEIGHT.value, _IMAGE_WIDTH.value),
   )
@@ -55,6 +55,7 @@ def main(_):
       output_path=flags.FLAGS.output_path,
       output_name_prefix=flags.FLAGS.output_name_prefix,
       prefill_seq_len=flags.FLAGS.prefill_seq_lens,
+      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
       pixel_values_size=(
           pytorch_model.image_encoder.get_pixel_values_size(grid_thw)
       ),

ai_edge_torch/generative/examples/qwen_vl/decoder.py CHANGED Viewed

@@ -60,8 +60,9 @@ class Decoder(model_builder.DecoderOnlyModel):
       rope = self.config.build_rope(input_pos, n_elem, attn_config.rotary_base)
     if mask is None:
+      assert kv_cache is not None, "KV cache must be provided."
       mask = self.mask_cache.index_select(2, input_pos)
-      mask = mask[:, :, :, : self.config.kv_cache_max]
+      mask = mask[:, :, :, :kv_cache.get_max_seq_len()]
     return self._forward_with_embeds(
         input_embeds,
@@ -73,16 +74,8 @@ class Decoder(model_builder.DecoderOnlyModel):
     )
-def get_decoder_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
-  """Returns the model config for a Qwen 2.5 VL 3B model.
-  Args:
-    kv_cache_max_len (int): The maximum sequence length of the KV cache. Default
-      is 1024.
-  Returns:
-    The model config for a Qwen 2.5 VL 3B model.
-  """
+def get_decoder_config() -> cfg.ModelConfig:
+  """Returns the model config for a Qwen 2.5 VL 3B model."""
   attn_config = cfg.AttentionConfig(
       num_heads=16,
       head_dim=128,
@@ -110,15 +103,14 @@ def get_decoder_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       num_layers=36,
       max_seq_len=32768,
       embedding_dim=2048,
-      kv_cache_max_len=kv_cache_max_len,
       block_configs=block_config,
       final_norm_config=norm_config,
   )
   return config
-def get_fake_decoder_config(**kwargs) -> cfg.ModelConfig:
-  config = get_decoder_config(**kwargs)
+def get_fake_decoder_config() -> cfg.ModelConfig:
+  config = get_decoder_config()
   config.vocab_size = 128
   config.num_layers = 2
   # Decoder has only one block config.
@@ -126,10 +118,13 @@ def get_fake_decoder_config(**kwargs) -> cfg.ModelConfig:
   return config
-def build_decoder(checkpoint_path: str, **kwargs) -> torch.nn.Module:
+def build_decoder(
+    checkpoint_path: str, mask_cache_size: int = 0
+) -> torch.nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
-      config=get_decoder_config(**kwargs),
+      config=get_decoder_config(),
       tensor_names=TENSOR_NAMES,
       model_class=Decoder,
+      mask_cache_size=mask_cache_size,
   )

ai_edge_torch/generative/examples/qwen_vl/qwen_vl.py CHANGED Viewed

@@ -41,13 +41,13 @@ class QwenVLConfig:
 class QwenVL(nn.Module):
   """Qwen VL model from the Edge Generative API."""
-  def __init__(self, config: QwenVLConfig):
+  def __init__(self, config: QwenVLConfig, mask_cache_size: int = 0):
     super().__init__()
     self.image_encoder = image_encoder.QwenVLImageEncoder(
         config.image_encoder_config
     )
-    self.decoder = decoder.Decoder(config.decoder_config)
+    self.decoder = decoder.Decoder(config.decoder_config, mask_cache_size)
     # The amount of adjustment in input_pos to calculate RoPE properly in
     # forward() calls after image is handled.
     self.rope_pos_adjust = 0
@@ -179,26 +179,21 @@ class QwenVL(nn.Module):
 def get_model_config(
-    kv_cache_max_len: int = 1024,
     image_size: Tuple[int, int] = (34 * 14, 46 * 14),
 ) -> QwenVLConfig:
-  """Returns the model config for a PaliGemma 3B-224 model.
-  Returns:
-    The model config for a PaliGemma 3B model.
-  """
+  """Returns the model config for a PaliGemma 3B-224 model."""
   return QwenVLConfig(
       image_encoder_config=image_encoder.get_image_encoder_config(image_size),
-      decoder_config=decoder.get_decoder_config(kv_cache_max_len),
+      decoder_config=decoder.get_decoder_config(),
       image_token_id=151655,
       mrope_section=[16, 24, 24],
   )
-def get_fake_model_config(**kwargs) -> QwenVLConfig:
+def get_fake_model_config() -> QwenVLConfig:
   return QwenVLConfig(
       image_encoder_config=image_encoder.get_fake_image_encoder_config(),
-      decoder_config=decoder.get_fake_decoder_config(**kwargs),
+      decoder_config=decoder.get_fake_decoder_config(),
       image_token_id=127,
       mrope_section=[16, 24, 24],
   )
@@ -207,10 +202,11 @@ def get_fake_model_config(**kwargs) -> QwenVLConfig:
 def build_model(
     checkpoint_path: str,
     custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
+    mask_cache_size: int = 0,
     **kwargs
 ) -> QwenVL:
   config = get_model_config(**kwargs)
-  model = QwenVL(config)
+  model = QwenVL(config, mask_cache_size)
   image_encoder.load_image_encoder(
       checkpoint_path, model.image_encoder, custom_loader
   )

ai_edge_torch/generative/examples/smollm/convert_to_tflite.py CHANGED Viewed

@@ -16,7 +16,6 @@
 """Example of converting SmolLM model to multi-signature tflite model."""
 from absl import app
-from absl import flags
 from ai_edge_torch.generative.examples.smollm import smollm
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config as export_cfg
@@ -38,7 +37,7 @@ def main(_):
       custom_loader=loader.maybe_get_custom_loader(
           checkpoint_path, flags.FLAGS.custom_checkpoint_loader
       ),
-      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
+      mask_cache_size=converter.get_mask_cache_size_from_flags(),
   )
   export_config = export_cfg.get_from_flags()
@@ -49,6 +48,7 @@ def main(_):
       output_path=flags.FLAGS.output_path,
       output_name_prefix=flags.FLAGS.output_name_prefix,
       prefill_seq_len=flags.FLAGS.prefill_seq_lens,
+      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
       quantize=flags.FLAGS.quantize,
       lora_ranks=flags.FLAGS.lora_ranks,
       export_config=export_config,

ai_edge_torch/generative/examples/smollm/convert_v2_to_tflite.py CHANGED Viewed

@@ -37,7 +37,7 @@ def main(_):
       custom_loader=loader.maybe_get_custom_loader(
           checkpoint_path, flags.FLAGS.custom_checkpoint_loader
       ),
-      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
+      mask_cache_size=converter.get_mask_cache_size_from_flags(),
   )
   export_config = export_cfg.get_from_flags()
@@ -48,6 +48,7 @@ def main(_):
       output_path=flags.FLAGS.output_path,
       output_name_prefix=flags.FLAGS.output_name_prefix,
       prefill_seq_len=flags.FLAGS.prefill_seq_lens,
+      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
       quantize=flags.FLAGS.quantize,
       lora_ranks=flags.FLAGS.lora_ranks,
       export_config=export_config,

ai_edge_torch/generative/examples/smollm/smollm.py CHANGED Viewed

@@ -29,16 +29,8 @@ class SmolLM(model_builder.DecoderOnlyModel):
   pass
-def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
-  """Returns the model config for a SmolLM 135M model.
-  Args:
-    kv_cache_max_len (int): The maximum sequence length of the KV cache. Default
-      is 1024.
-  Returns:
-    The model config for a SmolLM model.
-  """
+def get_model_config() -> cfg.ModelConfig:
+  """Returns the model config for a SmolLM 135M model."""
   attn_config = cfg.AttentionConfig(
       num_heads=9,
       head_dim=64,
@@ -63,15 +55,14 @@ def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       num_layers=30,
       max_seq_len=2048,
       embedding_dim=576,
-      kv_cache_max_len=kv_cache_max_len,
       block_configs=block_config,
       final_norm_config=norm_config,
   )
   return config
-def get_fake_model_config(**kwargs) -> cfg.ModelConfig:
-  config = get_model_config(**kwargs)
+def get_fake_model_config() -> cfg.ModelConfig:
+  config = get_model_config()
   config.vocab_size = 128
   config.num_layers = 2
   # SmolLM has only one block config.
@@ -82,14 +73,15 @@ def get_fake_model_config(**kwargs) -> cfg.ModelConfig:
 def build_model(
     checkpoint_path: str,
     custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
-    **kwargs
+    mask_cache_size: int = 0,
 ) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
-      config=get_model_config(**kwargs),
+      config=get_model_config(),
       tensor_names=TENSOR_NAMES,
       model_class=SmolLM,
       custom_loader=custom_loader,
+      mask_cache_size=mask_cache_size,
   )
@@ -98,23 +90,15 @@ class SmolLM2(model_builder.DecoderOnlyModel):
   pass
-def get_model_config_v2(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
-  """Returns the model config for a SmolLM2 135M model.
-  Args:
-    kv_cache_max_len (int): The maximum sequence length of the KV cache. Default
-      is 1024.
-  Returns:
-    The model config for a SmolLM2 model.
-  """
-  config = get_model_config(kv_cache_max_len)
+def get_model_config_v2() -> cfg.ModelConfig:
+  """Returns the model config for a SmolLM2 135M model."""
+  config = get_model_config()
   config.block_config(0).attn_config.rotary_base = 100000
   return config
-def get_fake_model_config_v2(**kwargs) -> cfg.ModelConfig:
-  config = get_model_config_v2(**kwargs)
+def get_fake_model_config_v2() -> cfg.ModelConfig:
+  config = get_model_config_v2()
   config.vocab_size = 128
   config.num_layers = 2
   # SmolLM2 has only one block config.
@@ -125,12 +109,13 @@ def get_fake_model_config_v2(**kwargs) -> cfg.ModelConfig:
 def build_model_v2(
     checkpoint_path: str,
     custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
-    **kwargs
+    mask_cache_size: int = 0,
 ) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
-      config=get_model_config_v2(**kwargs),
+      config=get_model_config_v2(),
       tensor_names=TENSOR_NAMES,
       model_class=SmolLM2,
       custom_loader=custom_loader,
+      mask_cache_size=mask_cache_size,
   )

ai-edge-torch-nightly 0.6.0.dev20250602__py3-none-any.whl → 0.6.0.dev20250604__py3-none-any.whl

ai-edge-torch-nightly 0.6.0.dev20250602py3-none-any.whl → 0.6.0.dev20250604py3-none-any.whl