PyPI - ai-edge-torch-nightly - Versions diffs - 0.5.0.dev20250515__py3-none-any.whl → 0.5.0.dev20250517__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.5.0.dev20250515py3-none-any.whl → 0.5.0.dev20250517py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

ai_edge_torch/generative/examples/qwen/qwen.py CHANGED Viewed

@@ -15,8 +15,10 @@
 """Example of building Qwen 2.5 models."""
+from typing import Callable, Dict
 import ai_edge_torch.generative.layers.model_config as cfg
 from ai_edge_torch.generative.utilities import model_builder
+import torch
 from torch import nn
 TENSOR_NAMES = model_builder.TENSOR_NAMES
@@ -51,9 +53,7 @@ def get_3b_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       intermediate_size=11008,
   )
   norm_config = cfg.NormalizationConfig(
-      type=cfg.NormalizationType.RMS_NORM,
-      epsilon=1e-06,
-      enable_hlfb=True,
+      type=cfg.NormalizationType.RMS_NORM, epsilon=1e-06
   )
   block_config = cfg.TransformerBlockConfig(
       attn_config=attn_config,
@@ -69,7 +69,6 @@ def get_3b_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       kv_cache_max_len=kv_cache_max_len,
       block_configs=block_config,
       final_norm_config=norm_config,
-      enable_hlfb=True,
   )
   return config
@@ -108,28 +107,43 @@ def get_fake_model_config(**kwargs) -> cfg.ModelConfig:
   return config
-def build_3b_model(checkpoint_path: str, **kwargs) -> nn.Module:
+def build_3b_model(
+    checkpoint_path: str,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
+    **kwargs
+) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
       config=get_3b_model_config(**kwargs),
       tensor_names=TENSOR_NAMES,
       model_class=Qwen,
+      custom_loader=custom_loader,
   )
-def build_1_5b_model(checkpoint_path: str, **kwargs) -> nn.Module:
+def build_1_5b_model(
+    checkpoint_path: str,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
+    **kwargs
+) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
       config=get_1_5b_model_config(**kwargs),
       tensor_names=TENSOR_NAMES,
       model_class=Qwen,
+      custom_loader=custom_loader,
   )
-def build_0_5b_model(checkpoint_path: str, **kwargs) -> nn.Module:
+def build_0_5b_model(
+    checkpoint_path: str,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
+    **kwargs
+) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
       config=get_0_5b_model_config(**kwargs),
       tensor_names=TENSOR_NAMES,
       model_class=Qwen,
+      custom_loader=custom_loader,
   )

ai_edge_torch/generative/examples/qwen_vl/convert_to_tflite.py CHANGED Viewed

@@ -19,6 +19,7 @@ from absl import app
 from ai_edge_torch.generative.examples.qwen_vl import qwen_vl
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config
+from ai_edge_torch.generative.utilities import loader
 flags = converter.define_conversion_flags('qwen_vl')
@@ -35,8 +36,12 @@ _IMAGE_WIDTH = flags.DEFINE_integer(
 def main(_):
+  checkpoint_path = flags.FLAGS.checkpoint_path
   pytorch_model = qwen_vl.build_model(
-      flags.FLAGS.checkpoint_path,
+      checkpoint_path,
+      custom_loader=loader.maybe_get_custom_loader(
+          checkpoint_path, flags.FLAGS.custom_checkpoint_loader
+      ),
       kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
       image_size=(_IMAGE_HEIGHT.value, _IMAGE_WIDTH.value),
   )

ai_edge_torch/generative/examples/qwen_vl/decoder.py CHANGED Viewed

@@ -97,8 +97,7 @@ def get_decoder_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       intermediate_size=11008,
   )
   norm_config = cfg.NormalizationConfig(
-      type=cfg.NormalizationType.RMS_NORM,
-      epsilon=1e-06,
+      type=cfg.NormalizationType.RMS_NORM, epsilon=1e-06
   )
   block_config = cfg.TransformerBlockConfig(
       attn_config=attn_config,
@@ -114,7 +113,6 @@ def get_decoder_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       kv_cache_max_len=kv_cache_max_len,
       block_configs=block_config,
       final_norm_config=norm_config,
-      enable_hlfb=True,
   )
   return config

ai_edge_torch/generative/examples/qwen_vl/image_encoder.py CHANGED Viewed

@@ -16,7 +16,7 @@
 """Example of building an image encoder of Qwen 2.5 VL model."""
 import dataclasses
-from typing import List, Optional, Tuple
+from typing import Callable, Dict, List, Optional, Tuple
 from ai_edge_torch.generative.layers import attention
 from ai_edge_torch.generative.layers import attention_utils
@@ -332,8 +332,7 @@ def get_image_encoder_config(image_size: Tuple[int, int]) -> QwenVLImageConfig:
       use_bias=True,
   )
   norm_config = cfg.NormalizationConfig(
-      type=cfg.NormalizationType.RMS_NORM,
-      epsilon=1e-6,
+      type=cfg.NormalizationType.RMS_NORM, epsilon=1e-6
   )
   block_config = cfg.TransformerBlockConfig(
       attn_config=attn_config,
@@ -359,7 +358,6 @@ def get_image_encoder_config(image_size: Tuple[int, int]) -> QwenVLImageConfig:
       window_size=112,
       spatial_merge_size=2,
       full_atten_block_indexes=[7, 15, 23, 31],
-      enable_hlfb=True,
   )
   return config
@@ -385,13 +383,21 @@ def build_image_encoder(
   return encoder
-def load_image_encoder(checkpoint_path: str, encoder: QwenVLImageEncoder):
-  loader = loading_utils.ModelLoader(checkpoint_path, TENSOR_NAMES)
+def load_image_encoder(
+    checkpoint_path: str,
+    encoder: QwenVLImageEncoder,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
+):
+  loader = loading_utils.ModelLoader(
+      checkpoint_path, TENSOR_NAMES, custom_loader
+  )
   # Loose the strictness because only image encoder is being loaded.
   loader.load(encoder, strict=False)
   # Load merger weights.
-  merger_loader = loading_utils.ModelLoader(checkpoint_path, None)
+  merger_loader = loading_utils.ModelLoader(
+      checkpoint_path, None, custom_loader
+  )
   state = merger_loader.get_state()
   w1_state = dict()
   w1_state["weight"] = state.pop(f"{MERGER_TENSOR_NAMES.ff_up_proj}.weight")

ai_edge_torch/generative/examples/qwen_vl/qwen_vl.py CHANGED Viewed

@@ -16,7 +16,7 @@
 """Example of building a full-stack of Qwen 2.5 VL model."""
 import dataclasses
-from typing import List, Optional, Tuple
+from typing import Callable, Dict, List, Optional, Tuple
 from ai_edge_torch.generative.examples.qwen_vl import decoder
 from ai_edge_torch.generative.examples.qwen_vl import image_encoder
@@ -204,12 +204,20 @@ def get_fake_model_config(**kwargs) -> QwenVLConfig:
   )
-def build_model(checkpoint_path: str, **kwargs) -> QwenVL:
+def build_model(
+    checkpoint_path: str,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
+    **kwargs
+) -> QwenVL:
   config = get_model_config(**kwargs)
   model = QwenVL(config)
-  image_encoder.load_image_encoder(checkpoint_path, model.image_encoder)
+  image_encoder.load_image_encoder(
+      checkpoint_path, model.image_encoder, custom_loader
+  )
   # Load the parameters of decoder.
-  loader = loading_utils.ModelLoader(checkpoint_path, decoder.TENSOR_NAMES)
+  loader = loading_utils.ModelLoader(
+      checkpoint_path, decoder.TENSOR_NAMES, custom_loader
+  )
   loader.load(model.decoder, strict=False)
   model.eval()
   return model

ai_edge_torch/generative/examples/qwen_vl/verify.py CHANGED Viewed

@@ -22,6 +22,7 @@ from absl import app
 from absl import flags
 from ai_edge_torch.generative.examples.qwen_vl import qwen_vl
 from ai_edge_torch.generative.layers import kv_cache
+from ai_edge_torch.generative.utilities import transformers_verifier
 from ai_edge_torch.generative.utilities import verifier
 from PIL import Image
 import requests
@@ -33,10 +34,15 @@ _IMAGE_URL = flags.DEFINE_string(
     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true",
     "The image URI to encode.",
 )
-_PROMPTS = flags.DEFINE_string(
-    "prompts",
+_PROMPTS_WITH_IMAGE = flags.DEFINE_string(
+    "prompts_with_image",
     "<|vision_start|><|image_pad|><|vision_end|>Describe the image.<|im_end|>",
-    "The input prompts to generate answers.",
+    "The input prompts to generate answers with an image.",
+)
+_PROMPTS_TEXT_ONLY = flags.DEFINE_multi_string(
+    "prompts_text_only",
+    "What is the meaning of life?",
+    "The input prompts to generate answers only with text.",
 )
 _MAX_NEW_TOKENS = flags.DEFINE_integer(
     "max_new_tokens",
@@ -68,13 +74,29 @@ def main(_):
   reauthored_checkpoint = pathlib.Path(cached_config_file).parent
   logging.info("Building the reauthored model from: %s", reauthored_checkpoint)
   reauthored_model = qwen_vl.build_model(str(reauthored_checkpoint))
+  wrapped_reauthored_model = ReauthoredQwenVLWrapper(reauthored_model)
   logging.info("Loading the processor from: %s", checkpoint)
   processor = transformers.AutoProcessor.from_pretrained(checkpoint)
+  logging.info("Verifying with text-only prompts...")
+  verifier.verify_reauthored_model(
+      original_model=transformers_verifier.TransformersModelWrapper(
+          original_model
+      ),
+      reauthored_model=wrapped_reauthored_model,
+      tokenizer=verifier.TokenizerWrapper(processor.tokenizer),
+      generate_prompts=_PROMPTS_TEXT_ONLY.value,
+      max_new_tokens=_MAX_NEW_TOKENS.value,
+      atol=1e-04,
+  )
+  logging.info("Verifying with image input...")
   logging.info("Loading the image from: %s", _IMAGE_URL.value)
   image = Image.open(requests.get(_IMAGE_URL.value, stream=True).raw)
-  inputs = processor(text=_PROMPTS.value, images=image, return_tensors="pt")
+  inputs = processor(
+      text=_PROMPTS_WITH_IMAGE.value, images=image, return_tensors="pt"
+  )
   logging.info("Verifying the reauthored model with model.forward()...")
   logging.info("Forwarding the original model...")
@@ -87,7 +109,6 @@ def main(_):
   logging.info("outputs_original: %s", outputs_original)
   logging.info("Forwarding the reauthored model...")
-  wrapped_reauthored_model = ReauthoredQwenVLWrapper(reauthored_model)
   grid_thw = inputs["image_grid_thw"].tolist()
   config = reauthored_model.config.image_encoder_config.image_embedding
   reauthored_model.image_encoder.set_image_size(

ai_edge_torch/generative/examples/smollm/convert_to_tflite.py CHANGED Viewed

@@ -15,12 +15,12 @@
 """Example of converting SmolLM model to multi-signature tflite model."""
-import os
 from absl import app
 from absl import flags
 from ai_edge_torch.generative.examples.smollm import smollm
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config as export_cfg
+from ai_edge_torch.generative.utilities import loader
 flags = converter.define_conversion_flags('smollm')
@@ -32,8 +32,13 @@ _DECODE_BATCH_SIZE = flags.DEFINE_integer(
 def main(_):
+  checkpoint_path = flags.FLAGS.checkpoint_path
   pytorch_model = smollm.build_model(
-      flags.FLAGS.checkpoint_path, kv_cache_max_len=flags.FLAGS.kv_cache_max_len
+      checkpoint_path,
+      custom_loader=loader.maybe_get_custom_loader(
+          checkpoint_path, flags.FLAGS.custom_checkpoint_loader
+      ),
+      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
   )
   export_config = export_cfg.get_from_flags()

ai_edge_torch/generative/examples/smollm/convert_v2_to_tflite.py CHANGED Viewed

@@ -19,6 +19,7 @@ from absl import app
 from ai_edge_torch.generative.examples.smollm import smollm
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config as export_cfg
+from ai_edge_torch.generative.utilities import loader
 flags = converter.define_conversion_flags('smollm2')
@@ -30,8 +31,13 @@ _DECODE_BATCH_SIZE = flags.DEFINE_integer(
 def main(_):
+  checkpoint_path = flags.FLAGS.checkpoint_path
   pytorch_model = smollm.build_model_v2(
-      flags.FLAGS.checkpoint_path, kv_cache_max_len=flags.FLAGS.kv_cache_max_len
+      checkpoint_path,
+      custom_loader=loader.maybe_get_custom_loader(
+          checkpoint_path, flags.FLAGS.custom_checkpoint_loader
+      ),
+      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
   )
   export_config = export_cfg.get_from_flags()

ai_edge_torch/generative/examples/smollm/smollm.py CHANGED Viewed

@@ -15,8 +15,10 @@
 """Example of building a SmolLM model."""
+from typing import Callable, Dict
 import ai_edge_torch.generative.layers.model_config as cfg
 from ai_edge_torch.generative.utilities import model_builder
+import torch
 from torch import nn
 TENSOR_NAMES = model_builder.TENSOR_NAMES
@@ -49,9 +51,7 @@ def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       activation=cfg.ActivationConfig(cfg.ActivationType.SILU),
       intermediate_size=1536,
   )
-  norm_config = cfg.NormalizationConfig(
-      type=cfg.NormalizationType.RMS_NORM, enable_hlfb=True
-  )
+  norm_config = cfg.NormalizationConfig(type=cfg.NormalizationType.RMS_NORM)
   block_config = cfg.TransformerBlockConfig(
       attn_config=attn_config,
       ff_config=ff_config,
@@ -66,7 +66,6 @@ def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       kv_cache_max_len=kv_cache_max_len,
       block_configs=block_config,
       final_norm_config=norm_config,
-      enable_hlfb=True,
   )
   return config
@@ -80,12 +79,17 @@ def get_fake_model_config(**kwargs) -> cfg.ModelConfig:
   return config
-def build_model(checkpoint_path: str, **kwargs) -> nn.Module:
+def build_model(
+    checkpoint_path: str,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
+    **kwargs
+) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
       config=get_model_config(**kwargs),
       tensor_names=TENSOR_NAMES,
       model_class=SmolLM,
+      custom_loader=custom_loader,
   )
@@ -118,10 +122,15 @@ def get_fake_model_config_v2(**kwargs) -> cfg.ModelConfig:
   return config
-def build_model_v2(checkpoint_path: str, **kwargs) -> nn.Module:
+def build_model_v2(
+    checkpoint_path: str,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
+    **kwargs
+) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
       config=get_model_config_v2(**kwargs),
       tensor_names=TENSOR_NAMES,
       model_class=SmolLM2,
+      custom_loader=custom_loader,
   )

ai_edge_torch/generative/examples/smollm/verify.py CHANGED Viewed

@@ -43,8 +43,8 @@ _MODEL_VERSION = flags.DEFINE_enum(
     "The version of SmolLm to verify.",
 )
 _CHECKPOINT = {
-    "v1": "HuggingFaceTB/SmolLM-135M",
-    "v2": "HuggingFaceTB/SmolLM2-135M",
+    "v1": "HuggingFaceTB/SmolLM-135M-Instruct",
+    "v2": "HuggingFaceTB/SmolLM2-135M-Instruct",
 }
 _BUILDER = {

ai_edge_torch/generative/examples/stable_diffusion/clip.py CHANGED Viewed

@@ -57,7 +57,8 @@ class CLIP(nn.Module):
     super().__init__()
     self.tok_embedding = nn.Embedding(config.vocab_size, config.embedding_dim)
     self.tok_embedding_position = nn.Parameter(
-        torch.zeros((config.max_seq_len, config.embedding_dim))
+        torch.zeros((config.max_seq_len, config.embedding_dim)),
+        requires_grad=False,
     )
     self.config = config
@@ -112,7 +113,9 @@ def get_model_config() -> cfg.ModelConfig:
       use_bias=True,
   )
-  norm_config = cfg.NormalizationConfig(type=cfg.NormalizationType.LAYER_NORM)
+  norm_config = cfg.NormalizationConfig(
+      type=cfg.NormalizationType.LAYER_NORM, enable_hlfb=False
+  )
   block_config = cfg.TransformerBlockConfig(
       attn_config=attn_config,
@@ -128,7 +131,6 @@ def get_model_config() -> cfg.ModelConfig:
       embedding_dim=embedding_dim,
       block_configs=block_config,
       final_norm_config=norm_config,
-      enable_hlfb=True,
   )
   return config
@@ -163,7 +165,9 @@ def get_fake_model_config() -> cfg.ModelConfig:
       use_bias=True,
   )
-  norm_config = cfg.NormalizationConfig(type=cfg.NormalizationType.LAYER_NORM)
+  norm_config = cfg.NormalizationConfig(
+      type=cfg.NormalizationType.LAYER_NORM, enable_hlfb=False
+  )
   block_config = cfg.TransformerBlockConfig(
       attn_config=attn_config,
@@ -179,7 +183,6 @@ def get_fake_model_config() -> cfg.ModelConfig:
       embedding_dim=embedding_dim,
       block_configs=block_config,
       final_norm_config=norm_config,
-      enable_hlfb=True,
   )
   return config

ai_edge_torch/generative/examples/t5/t5.py CHANGED Viewed

@@ -393,8 +393,7 @@ def get_model_config_t5() -> cfg.ModelConfig:
   )
   # T5 Confirmed as RMS Norm and eps = 1e-6 TJA.
   norm_config = cfg.NormalizationConfig(
-      type=cfg.NormalizationType.RMS_NORM,
-      epsilon=1e-6,
+      type=cfg.NormalizationType.RMS_NORM, epsilon=1e-6, enable_hlfb=False
   )
   block_config = cfg.TransformerBlockConfig(
       attn_config=attn_config,
@@ -411,7 +410,6 @@ def get_model_config_t5() -> cfg.ModelConfig:
       block_configs=block_config,
       final_norm_config=norm_config,
       lm_head_use_bias=False,
-      enable_hlfb=True,
   )
   return config

ai_edge_torch/generative/examples/test_models/toy_model.py CHANGED Viewed

@@ -138,7 +138,9 @@ def get_model_config() -> cfg.ModelConfig:
       activation=cfg.ActivationConfig(cfg.ActivationType.SILU),
       intermediate_size=256,
   )
-  norm_config = cfg.NormalizationConfig(type=cfg.NormalizationType.RMS_NORM)
+  norm_config = cfg.NormalizationConfig(
+      type=cfg.NormalizationType.RMS_NORM, enable_hlfb=False
+  )
   block_config = cfg.TransformerBlockConfig(
       attn_config=attn_config,
       ff_config=ff_config,
@@ -152,5 +154,6 @@ def get_model_config() -> cfg.ModelConfig:
       embedding_dim=128,
       block_configs=block_config,
       final_norm_config=norm_config,
+      enable_hlfb=False,
   )
   return config

ai_edge_torch/generative/examples/test_models/toy_model_with_kv_cache.py CHANGED Viewed

@@ -108,7 +108,9 @@ def get_model_config() -> cfg.ModelConfig:
       activation=cfg.ActivationConfig(cfg.ActivationType.SILU),
       intermediate_size=256,
   )
-  norm_config = cfg.NormalizationConfig(type=cfg.NormalizationType.RMS_NORM)
+  norm_config = cfg.NormalizationConfig(
+      type=cfg.NormalizationType.RMS_NORM, enable_hlfb=False
+  )
   block_config = cfg.TransformerBlockConfig(
       attn_config=attn_config,
       ff_config=ff_config,
@@ -122,7 +124,6 @@ def get_model_config() -> cfg.ModelConfig:
       embedding_dim=128,
       block_configs=block_config,
       final_norm_config=norm_config,
-      enable_hlfb=True,
   )
   return config

ai_edge_torch/generative/examples/tiny_llama/convert_to_tflite.py CHANGED Viewed

@@ -19,13 +19,19 @@ from absl import app
 from ai_edge_torch.generative.examples.tiny_llama import tiny_llama
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config
+from ai_edge_torch.generative.utilities import loader
 flags = converter.define_conversion_flags("tiny_llama")
 def main(_):
+  checkpoint_path = flags.FLAGS.checkpoint_path
   pytorch_model = tiny_llama.build_model(
-      flags.FLAGS.checkpoint_path, kv_cache_max_len=flags.FLAGS.kv_cache_max_len
+      checkpoint_path,
+      custom_loader=loader.maybe_get_custom_loader(
+          checkpoint_path, flags.FLAGS.custom_checkpoint_loader
+      ),
+      kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
   )
   converter.convert_to_tflite(
       pytorch_model,

ai_edge_torch/generative/examples/tiny_llama/tiny_llama.py CHANGED Viewed

@@ -15,8 +15,10 @@
 """Example of building a TinyLlama model."""
+from typing import Callable, Dict
 import ai_edge_torch.generative.layers.model_config as cfg
 from ai_edge_torch.generative.utilities import model_builder
+import torch
 from torch import nn
 TENSOR_NAMES = model_builder.TENSOR_NAMES_WITH_SEPARATE_LM_HEAD
@@ -49,9 +51,7 @@ def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       activation=cfg.ActivationConfig(cfg.ActivationType.SILU),
       intermediate_size=5632,
   )
-  norm_config = cfg.NormalizationConfig(
-      type=cfg.NormalizationType.RMS_NORM, enable_hlfb=True
-  )
+  norm_config = cfg.NormalizationConfig(type=cfg.NormalizationType.RMS_NORM)
   block_config = cfg.TransformerBlockConfig(
       attn_config=attn_config,
       ff_config=ff_config,
@@ -67,7 +67,6 @@ def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       block_configs=block_config,
       final_norm_config=norm_config,
       lm_head_share_weight_with_embedding=False,
-      enable_hlfb=True,
   )
   return config
@@ -81,10 +80,15 @@ def get_fake_model_config(**kwargs) -> cfg.ModelConfig:
   return config
-def build_model(checkpoint_path: str, **kwargs) -> nn.Module:
+def build_model(
+    checkpoint_path: str,
+    custom_loader: Callable[[str], Dict[str, torch.Tensor]] = None,
+    **kwargs
+) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
       config=get_model_config(**kwargs),
       tensor_names=TENSOR_NAMES,
       model_class=TinyLlama,
+      custom_loader=custom_loader,
   )

ai_edge_torch/generative/layers/model_config.py CHANGED Viewed

@@ -66,7 +66,7 @@ class NormalizationConfig:
   """Normalizater parameters."""
   type: NormalizationType = NormalizationType.NONE
-  enable_hlfb: bool = False
+  enable_hlfb: bool = True
   epsilon: float = 1e-5
   zero_centered: bool = False
   # Number of groups used in group normalization.
@@ -218,7 +218,7 @@ class ModelConfig:
   lm_head_share_weight_with_embedding: bool = True
   # Whether to turn on high-level function boundary.
-  enable_hlfb: bool = False
+  enable_hlfb: bool = True
   # The maximum sequence length of the KV cache. Should not exceed max_seq_len.
   kv_cache_max_len: int = 0

ai_edge_torch/generative/utilities/converter.py CHANGED Viewed

@@ -100,7 +100,8 @@ def define_conversion_flags(
   flags.DEFINE_string(
       'quantize',
       'dynamic_int8',
-      'How the model should be quantized.',
+      'How the model should be quantized. Set to "none" to disable'
+      ' quantization. See `QuantizationName` for supported quantization types.',
   )
   flags.DEFINE_multi_integer(
       'lora_ranks',
@@ -119,6 +120,12 @@ def define_conversion_flags(
       default_transpose_kv_cache,
       'If true, the model will be converted with transposed KV cache.',
   )
+  flags.DEFINE_bool(
+      'custom_checkpoint_loader',
+      False,
+      'If true, the conversion script will use a custom checkpoint loader which'
+      ' will read a checkpoint from a remote source.',
+  )
   return flags
@@ -397,13 +404,19 @@ def _export_helper(
       )
       if prefill_pixel_values is not None:
-        sample_kwargs['tokens'] = prefill_tokens_list_with_pixel[i]
-        sample_kwargs['input_pos'] = prefill_input_pos_list_with_pixel[i]
-        sample_kwargs['pixel_values'] = prefill_pixel_values
+        sample_pixel_kwargs = {
+            'tokens': prefill_tokens_list_with_pixel[i],
+            'input_pos': prefill_input_pos_list_with_pixel[i],
+            'kv_cache': prefill_kv,
+            'pixel_values': prefill_pixel_values,
+        }
+        # mask should be built internally when pixel values are passed.
+        if lora is not None:
+          sample_pixel_kwargs['lora'] = lora
         converter.add_signature(
             prefill_signature_name + '_pixel',
             mod,
-            sample_kwargs=sample_kwargs,
+            sample_kwargs=sample_pixel_kwargs,
         )
     sample_kwargs = {

ai_edge_torch/generative/utilities/loader.py CHANGED Viewed

@@ -49,6 +49,25 @@ def get_custom_loader(
   raise ValueError(f"Unsupported checkpoint format: {checkpoint_path}")
+def maybe_get_custom_loader(
+    checkpoint_path: str,
+    use_custom_loader: bool = False,
+) -> Callable[[str], Dict[str, torch.Tensor]] | None:
+  """Returns a custom loader for the given checkpoint path.
+  If use_custom_loader is True, the function will return a custom loader.
+  Otherwise, it will return None.
+  Args:
+    checkpoint_path (string): The path to the checkpoint.
+    use_custom_loader (bool): Whether to use a custom loader.
+  Returns:
+    Callable[[str], Dict[str, torch.Tensor]] | None: The custom loader.
+  """
+  return get_custom_loader(checkpoint_path) if use_custom_loader else None
 def load_safetensors(full_path: str):
   """Loads safetensors into a single state dictionary.

ai-edge-torch-nightly 0.5.0.dev20250515__py3-none-any.whl → 0.5.0.dev20250517__py3-none-any.whl

ai-edge-torch-nightly 0.5.0.dev20250515py3-none-any.whl → 0.5.0.dev20250517py3-none-any.whl