PyPI - ai-edge-torch-nightly - Versions diffs - 0.3.0.dev20240924__py3-none-any.whl → 0.3.0.dev20240928__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.3.0.dev20240924py3-none-any.whl → 0.3.0.dev20240928py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

ai_edge_torch/generative/examples/smollm/verify.py CHANGED Viewed

@@ -15,43 +15,53 @@
 """Verifies the reauthored SmolLM-135M model."""
+import logging
 import pathlib
 from absl import app
 from absl import flags
 from ai_edge_torch.generative.examples.smollm import smollm
+from ai_edge_torch.generative.utilities import transformers_verifier
 from ai_edge_torch.generative.utilities import verifier
 import transformers
 _PROMPTS = flags.DEFINE_multi_string(
     "prompts",
     "What is the meaning of life?",
     "The input prompts to generate answers.",
 )
+_MAX_NEW_TOKENS = flags.DEFINE_integer(
+    "max_new_tokens",
+    30,
+    "The maximum size of the generated tokens.",
+)
 def main(_):
   checkpoint = "HuggingFaceTB/SmolLM-135M"
-  verifier.log_msg("Loading the original model from", checkpoint)
-  wrapper_model = verifier.ModelWrapper(
-      model=transformers.AutoModelForCausalLM.from_pretrained(checkpoint),
-  )
+  logging.info("Loading the original model from: %s", checkpoint)
+  original_model = transformers.AutoModelForCausalLM.from_pretrained(checkpoint)
   # Locate the cached dir.
   cached_config_file = transformers.utils.cached_file(
       checkpoint, transformers.utils.CONFIG_NAME
   )
   reauthored_checkpoint = pathlib.Path(cached_config_file).parent
-  verifier.log_msg("Building the reauthored model from", reauthored_checkpoint)
+  logging.info("Building the reauthored model from: %s", reauthored_checkpoint)
   reauthored_model = smollm.build_model(reauthored_checkpoint)
-  verifier.log_msg("Loading the tokenizer from", checkpoint)
+  logging.info("Loading the tokenizer from: %s", checkpoint)
   tokenizer = transformers.AutoTokenizer.from_pretrained(checkpoint)
   verifier.verify_reauthored_model(
-      original_model=wrapper_model,
-      reauthored_model=reauthored_model,
-      tokenizer=tokenizer,
+      original_model=transformers_verifier.TransformersModelWrapper(
+          original_model
+      ),
+      reauthored_model=verifier.ReauthoredModelWrapper(reauthored_model),
+      tokenizer=verifier.TokenizerWrapper(tokenizer),
       generate_prompts=_PROMPTS.value,
+      max_new_tokens=_MAX_NEW_TOKENS.value,
       atol=1e-04,
   )

ai_edge_torch/generative/examples/stable_diffusion/clip.py CHANGED Viewed

@@ -48,7 +48,7 @@ TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
 class CLIP(nn.Module):
-  """CLIP text encoder
+  """CLIP text encoder.
   For details, see https://arxiv.org/abs/2103.00020
   """
@@ -86,6 +86,7 @@ class CLIP(nn.Module):
 def get_model_config() -> cfg.ModelConfig:
+  """Get configs for the CLIP of Stable Diffusion v1.5."""
   max_seq_len = 77
   vocab_size = 49408
   num_layers = 12
@@ -97,6 +98,58 @@ def get_model_config() -> cfg.ModelConfig:
       num_heads=num_heads,
       head_dim=embedding_dim // num_heads,
       num_query_groups=num_query_groups,
+      rotary_base=0,
+      rotary_percentage=0.0,
+      qkv_use_bias=True,
+      qkv_transpose_before_split=True,
+      qkv_fused_interleaved=False,
+      output_proj_use_bias=True,
+      enable_kv_cache=False,
+  )
+  ff_config = cfg.FeedForwardConfig(
+      type=cfg.FeedForwardType.SEQUENTIAL,
+      activation=cfg.ActivationConfig(cfg.ActivationType.GELU_QUICK),
+      intermediate_size=embedding_dim * 4,
+      use_bias=True,
+  )
+  norm_config = cfg.NormalizationConfig(type=cfg.NormalizationType.LAYER_NORM)
+  block_config = cfg.TransformerBlockConfig(
+      attn_config=attn_config,
+      ff_config=ff_config,
+      pre_attention_norm_config=norm_config,
+      post_attention_norm_config=norm_config,
+  )
+  config = cfg.ModelConfig(
+      vocab_size=vocab_size,
+      num_layers=num_layers,
+      max_seq_len=max_seq_len,
+      embedding_dim=embedding_dim,
+      block_configs=block_config,
+      final_norm_config=norm_config,
+      enable_hlfb=True,
+  )
+  return config
+def get_fake_model_config() -> cfg.ModelConfig:
+  """Get fake configs for the CLIP of Stable Diffusion v1.5 for testing."""
+  max_seq_len = 6
+  vocab_size = 100
+  num_layers = 2
+  num_heads = 12
+  num_query_groups = 12
+  embedding_dim = 24
+  attn_config = cfg.AttentionConfig(
+      num_heads=num_heads,
+      head_dim=embedding_dim // num_heads,
+      num_query_groups=num_query_groups,
+      rotary_base=0,
       rotary_percentage=0.0,
       qkv_use_bias=True,
       qkv_transpose_before_split=True,

ai_edge_torch/generative/examples/stable_diffusion/decoder.py CHANGED Viewed

@@ -295,6 +295,64 @@ def get_model_config() -> unet_cfg.AutoEncoderConfig:
           enable_kv_cache=False,
           qkv_transpose_before_split=True,
           qkv_fused_interleaved=False,
+          rotary_base=0,
+          rotary_percentage=0.0,
+      ),
+      enable_hlfb=False,
+  )
+  mid_block_config = unet_cfg.MidBlock2DConfig(
+      in_channels=block_out_channels[-1],
+      normalization_config=norm_config,
+      activation_config=layers_cfg.ActivationConfig(
+          layers_cfg.ActivationType.SILU
+      ),
+      num_layers=1,
+      attention_block_config=att_config,
+  )
+  config = unet_cfg.AutoEncoderConfig(
+      in_channels=in_channels,
+      latent_channels=latent_channels,
+      out_channels=out_channels,
+      activation_config=layers_cfg.ActivationConfig(
+          layers_cfg.ActivationType.SILU
+      ),
+      block_out_channels=block_out_channels,
+      scaling_factor=scaling_factor,
+      layers_per_block=layers_per_block,
+      normalization_config=norm_config,
+      mid_block_config=mid_block_config,
+  )
+  return config
+def get_fake_model_config() -> unet_cfg.AutoEncoderConfig:
+  """Get fake configs for the Decoder of Stable Diffusion v1.5 for testing."""
+  in_channels = 3
+  latent_channels = 4
+  out_channels = 3
+  block_out_channels = [2, 4]
+  scaling_factor = 0.18215
+  layers_per_block = 2
+  norm_config = layers_cfg.NormalizationConfig(
+      layers_cfg.NormalizationType.GROUP_NORM, group_num=2
+  )
+  att_config = unet_cfg.AttentionBlock2DConfig(
+      dim=block_out_channels[-1],
+      normalization_config=norm_config,
+      attention_config=layers_cfg.AttentionConfig(
+          num_heads=1,
+          head_dim=block_out_channels[-1],
+          num_query_groups=1,
+          qkv_use_bias=True,
+          output_proj_use_bias=True,
+          enable_kv_cache=False,
+          qkv_transpose_before_split=True,
+          qkv_fused_interleaved=False,
+          rotary_base=0,
           rotary_percentage=0.0,
       ),
       enable_hlfb=False,

ai_edge_torch/generative/examples/stable_diffusion/diffusion.py CHANGED Viewed

@@ -199,6 +199,7 @@ def build_attention_config(
     num_heads,
     dim,
     num_query_groups,
+    rotary_base=0,
     rotary_percentage=0.0,
     qkv_transpose_before_split=True,
     qkv_use_bias=False,
@@ -211,6 +212,7 @@ def build_attention_config(
       num_heads=num_heads,
       head_dim=dim // num_heads,
       num_query_groups=num_query_groups,
+      rotary_base=rotary_base,
       rotary_percentage=rotary_percentage,
       qkv_transpose_before_split=qkv_transpose_before_split,
       qkv_use_bias=qkv_use_bias,
@@ -603,7 +605,7 @@ def get_model_config(batch_size: int) -> unet_cfg.DiffusionModelConfig:
   # Transformer configs.
   transformer_num_attention_heads = 8
   transformer_batch_size = batch_size
-  transformer_cross_attention_dim = 768  # Embedding fomr CLIP model
+  transformer_cross_attention_dim = 768  # Embedding from CLIP model
   transformer_pre_conv_norm_config = layers_cfg.NormalizationConfig(
       layers_cfg.NormalizationType.GROUP_NORM, epsilon=1e-6, group_num=32
   )
@@ -645,3 +647,71 @@ def get_model_config(batch_size: int) -> unet_cfg.DiffusionModelConfig:
       final_norm_config=final_norm_config,
       final_activation_type=final_activation_type,
   )
+def get_fake_model_config(batch_size: int) -> unet_cfg.DiffusionModelConfig:
+  """Get fake configs for the Diffusion model of Stable Diffusion v1.5 for testing.
+  Args:
+    batch_size (int): the batch size of input.
+  Retruns:
+    The configuration of diffusion model of Stable Diffusion v1.5.
+  """
+  in_channels = 4
+  out_channels = 4
+  block_out_channels = [2, 4, 8, 8]
+  layers_per_block = 1
+  downsample_padding = 1
+  # Residual configs.
+  residual_norm_config = layers_cfg.NormalizationConfig(
+      layers_cfg.NormalizationType.GROUP_NORM, group_num=2
+  )
+  residual_activation_type = layers_cfg.ActivationType.SILU
+  # Transformer configs.
+  transformer_num_attention_heads = 1
+  transformer_batch_size = batch_size
+  transformer_cross_attention_dim = 4  # Embedding from CLIP model
+  transformer_pre_conv_norm_config = layers_cfg.NormalizationConfig(
+      layers_cfg.NormalizationType.GROUP_NORM, epsilon=1e-6, group_num=2
+  )
+  transformer_norm_config = layers_cfg.NormalizationConfig(
+      layers_cfg.NormalizationType.LAYER_NORM
+  )
+  transformer_ff_activation_type = layers_cfg.ActivationType.GE_GLU
+  # Time embedding configs.
+  time_embedding_dim = 2
+  time_embedding_blocks_dim = 4
+  # Mid block configs.
+  mid_block_layers = 1
+  # Finaly layer configs.
+  final_norm_config = layers_cfg.NormalizationConfig(
+      layers_cfg.NormalizationType.GROUP_NORM, group_num=2
+  )
+  final_activation_type = layers_cfg.ActivationType.SILU
+  return unet_cfg.DiffusionModelConfig(
+      in_channels=in_channels,
+      out_channels=out_channels,
+      block_out_channels=block_out_channels,
+      layers_per_block=layers_per_block,
+      downsample_padding=downsample_padding,
+      residual_norm_config=residual_norm_config,
+      residual_activation_type=residual_activation_type,
+      transformer_batch_size=transformer_batch_size,
+      transformer_num_attention_heads=transformer_num_attention_heads,
+      transformer_cross_attention_dim=transformer_cross_attention_dim,
+      transformer_pre_conv_norm_config=transformer_pre_conv_norm_config,
+      transformer_norm_config=transformer_norm_config,
+      transformer_ff_activation_type=transformer_ff_activation_type,
+      mid_block_layers=mid_block_layers,
+      time_embedding_dim=time_embedding_dim,
+      time_embedding_blocks_dim=time_embedding_blocks_dim,
+      final_norm_config=final_norm_config,
+      final_activation_type=final_activation_type,
+  )

ai_edge_torch/generative/examples/t5/t5.py CHANGED Viewed

@@ -335,8 +335,6 @@ class T5Decoder(nn.Module):
     self.dec_attn_mask_cache = attn_utils.build_causal_mask_cache(
         size=config.kv_cache_max,
-        dtype=torch.float32,
-        device=torch.device("cpu"),
     )
   @torch.inference_mode

ai_edge_torch/generative/examples/test_models/convert_toy_model.py ADDED Viewed

@@ -0,0 +1,105 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# A toy example which has a single-layer transformer block.
+from absl import app
+import ai_edge_torch
+from ai_edge_torch import lowertools
+from ai_edge_torch.generative.examples.test_models import toy_model
+from ai_edge_torch.generative.examples.test_models import toy_model_with_kv_cache
+from ai_edge_torch.generative.layers import kv_cache as kv_utils
+import torch
+KV_CACHE_MAX_LEN = 100
+def convert_toy_model(_) -> None:
+  """Converts a toy model to tflite."""
+  model = toy_model.ToySingleLayerModel(toy_model.get_model_config())
+  idx = torch.unsqueeze(torch.arange(0, KV_CACHE_MAX_LEN), 0)
+  input_pos = torch.arange(0, KV_CACHE_MAX_LEN)
+  print('running an inference')
+  print(
+      model.forward(
+          idx,
+          input_pos,
+      )
+  )
+  # Convert model to tflite.
+  print('converting model to tflite')
+  edge_model = ai_edge_torch.convert(
+      model,
+      (
+          idx,
+          input_pos,
+      ),
+  )
+  edge_model.export('/tmp/toy_model.tflite')
+def _export_stablehlo_mlir(model, args):
+  ep = torch.export.export(model, args)
+  return lowertools.exported_program_to_mlir_text(ep)
+def convert_toy_model_with_kv_cache(_) -> None:
+  """Converts a toy model with kv cache to tflite."""
+  dump_mlir = False
+  config = toy_model_with_kv_cache.get_model_config()
+  model = toy_model_with_kv_cache.ToyModelWithKVCache(config)
+  model.eval()
+  print('running an inference')
+  kv = kv_utils.KVCache.from_model_config(config)
+  tokens, input_pos = toy_model_with_kv_cache.get_sample_prefill_inputs()
+  decode_token, decode_input_pos = (
+      toy_model_with_kv_cache.get_sample_decode_inputs()
+  )
+  print(model.forward(tokens, input_pos, kv))
+  if dump_mlir:
+    mlir_text = _export_stablehlo_mlir(model, (tokens, input_pos, kv))
+    with open('/tmp/toy_model_with_external_kv.stablehlo.mlir', 'w') as f:
+      f.write(mlir_text)
+  # Convert model to tflite with 2 signatures (prefill + decode).
+  print('converting toy model to tflite with 2 signatures (prefill + decode)')
+  edge_model = (
+      ai_edge_torch.signature(
+          'prefill',
+          model,
+          sample_kwargs={
+              'tokens': tokens,
+              'input_pos': input_pos,
+              'kv_cache': kv,
+          },
+      )
+      .signature(
+          'decode',
+          model,
+          sample_kwargs={
+              'tokens': decode_token,
+              'input_pos': decode_input_pos,
+              'kv_cache': kv,
+          },
+      )
+      .convert()
+  )
+  edge_model.export('/tmp/toy_external_kv_cache.tflite')
+if __name__ == '__main__':
+  app.run(convert_toy_model)

ai_edge_torch/generative/examples/test_models/toy_model.py CHANGED Viewed

@@ -15,13 +15,12 @@
 # A toy example which has a single-layer transformer block.
 from typing import Tuple
-import ai_edge_torch
+from ai_edge_torch.generative.layers import builder
 from ai_edge_torch.generative.layers.attention import TransformerBlock
 import ai_edge_torch.generative.layers.attention_utils as attn_utils
-import ai_edge_torch.generative.layers.builder as builder
 import ai_edge_torch.generative.layers.model_config as cfg
 import torch
-import torch.nn as nn
+from torch import nn
 RoPECache = Tuple[torch.Tensor, torch.Tensor]
 KV_CACHE_MAX_LEN = 100
@@ -45,13 +44,10 @@ class ToySingleLayerModel(torch.nn.Module):
     self.rope_cache = attn_utils.build_rope_cache(
         size=config.max_seq_len,
         dim=int(attn_config.rotary_percentage * attn_config.head_dim),
-        base=10_000,
-        condense_ratio=1,
-        dtype=torch.float32,
-        device=torch.device('cpu'),
+        base=attn_config.rotary_base,
     )
     self.mask_cache = attn_utils.build_causal_mask_cache(
-        size=config.max_seq_len, dtype=torch.float32, device=torch.device('cpu')
+        size=config.max_seq_len,
     )
     self.config = config
@@ -94,13 +90,10 @@ class ToySingleLayerModelWeightSharing(torch.nn.Module):
     self.rope_cache = attn_utils.build_rope_cache(
         size=config.max_seq_len,
         dim=int(attn_config.rotary_percentage * attn_config.head_dim),
-        base=10_000,
-        condense_ratio=1,
-        dtype=torch.float32,
-        device=torch.device('cpu'),
+        base=attn_config.rotary_base,
     )
     self.mask_cache = attn_utils.build_causal_mask_cache(
-        size=config.max_seq_len, dtype=torch.float32, device=torch.device('cpu')
+        size=config.max_seq_len,
     )
     self.config = config
@@ -125,6 +118,7 @@ def get_model_config() -> cfg.ModelConfig:
       num_heads=32,
       head_dim=4,
       num_query_groups=4,
+      rotary_base=10000,
       rotary_percentage=1.0,
       enable_kv_cache=False,
   )
@@ -149,31 +143,3 @@ def get_model_config() -> cfg.ModelConfig:
       final_norm_config=norm_config,
   )
   return config
-def define_and_run() -> None:
-  model = ToySingleLayerModel(get_model_config())
-  idx = torch.unsqueeze(torch.arange(0, KV_CACHE_MAX_LEN), 0)
-  input_pos = torch.arange(0, KV_CACHE_MAX_LEN)
-  print('running an inference')
-  print(
-      model.forward(
-          idx,
-          input_pos,
-      )
-  )
-  # Convert model to tflite.
-  print('converting model to tflite')
-  edge_model = ai_edge_torch.convert(
-      model,
-      (
-          idx,
-          input_pos,
-      ),
-  )
-  edge_model.export('/tmp/toy_model.tflite')
-if __name__ == '__main__':
-  define_and_run()

ai_edge_torch/generative/examples/test_models/toy_model_with_kv_cache.py CHANGED Viewed

@@ -17,15 +17,14 @@
 from typing import Tuple
-import ai_edge_torch
-from ai_edge_torch import lowertools
+from absl import app
 from ai_edge_torch.generative.layers import attention
 from ai_edge_torch.generative.layers import builder
 from ai_edge_torch.generative.layers import kv_cache as kv_utils
 import ai_edge_torch.generative.layers.attention_utils as attn_utils
 import ai_edge_torch.generative.layers.model_config as cfg
 import torch
-import torch.nn as nn
+from torch import nn
 RoPECache = Tuple[torch.Tensor, torch.Tensor]
@@ -52,13 +51,10 @@ class ToyModelWithKVCache(torch.nn.Module):
     self.rope_cache = attn_utils.build_rope_cache(
         size=config.max_seq_len,
         dim=int(attn_config.rotary_percentage * attn_config.head_dim),
-        base=10_000,
-        condense_ratio=1,
-        dtype=torch.float32,
-        device=torch.device('cpu'),
+        base=attn_config.rotary_base,
     )
     self.mask_cache = attn_utils.build_causal_mask_cache(
-        size=config.max_seq_len, dtype=torch.float32, device=torch.device('cpu')
+        size=config.max_seq_len,
     )
     self.config = config
@@ -87,16 +83,12 @@ class ToyModelWithKVCache(torch.nn.Module):
     return {'logits': self.lm_head(x), 'kv_cache': updated_kv_cache}
-def _export_stablehlo_mlir(model, args):
-  ep = torch.export.export(model, args)
-  return lowertools.exported_program_to_mlir_text(ep)
 def get_model_config() -> cfg.ModelConfig:
   attn_config = cfg.AttentionConfig(
       num_heads=32,
       head_dim=4,
       num_query_groups=4,
+      rotary_base=10000,
       rotary_percentage=1.0,
   )
   ff_config = cfg.FeedForwardConfig(
@@ -133,51 +125,3 @@ def get_sample_decode_inputs() -> Tuple[torch.Tensor, torch.Tensor]:
   tokens = torch.tensor([[1]], dtype=torch.int)
   input_pos = torch.tensor([10])
   return tokens, input_pos
-def define_and_run() -> None:
-  dump_mlir = False
-  config = get_model_config()
-  model = ToyModelWithExternalKV(config)
-  model.eval()
-  print('running an inference')
-  kv = kv_utils.KVCache.from_model_config(config)
-  tokens, input_pos = get_sample_prefill_inputs()
-  decode_token, decode_input_pos = get_sample_decode_inputs()
-  print(model.forward(tokens, input_pos, kv))
-  if dump_mlir:
-    mlir_text = _export_stablehlo_mlir(model, (tokens, input_pos, kv))
-    with open('/tmp/toy_model_with_external_kv.stablehlo.mlir', 'w') as f:
-      f.write(mlir_text)
-  # Convert model to tflite with 2 signatures (prefill + decode).
-  print('converting toy model to tflite with 2 signatures (prefill + decode)')
-  edge_model = (
-      ai_edge_torch.signature(
-          'prefill',
-          model,
-          sample_kwargs={
-              'tokens': tokens,
-              'input_pos': input_pos,
-              'kv_cache': kv,
-          },
-      )
-      .signature(
-          'decode',
-          model,
-          sample_kwargs={
-              'tokens': decode_token,
-              'input_pos': decode_input_pos,
-              'kv_cache': kv,
-          },
-      )
-      .convert()
-  )
-  edge_model.export('/tmp/toy_external_kv_cache.tflite')
-if __name__ == '__main__':
-  define_and_run()

ai_edge_torch/generative/examples/tiny_llama/tiny_llama.py CHANGED Viewed

@@ -67,15 +67,10 @@ class TinyLlama(nn.Module):
     self.rope_cache = attn_utils.build_rope_cache(
         size=config.kv_cache_max,
         dim=int(attn_config.rotary_percentage * attn_config.head_dim),
-        base=10_000,
-        condense_ratio=1,
-        dtype=torch.float32,
-        device=torch.device("cpu"),
+        base=attn_config.rotary_base,
     )
     self.mask_cache = attn_utils.build_causal_mask_cache(
         size=config.kv_cache_max,
-        dtype=torch.float32,
-        device=torch.device("cpu"),
     )
     self.config = config
@@ -132,6 +127,7 @@ def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       num_heads=32,
       head_dim=64,
       num_query_groups=4,
+      rotary_base=10000,
       rotary_percentage=1.0,
   )
   ff_config = cfg.FeedForwardConfig(

ai_edge_torch/generative/examples/tiny_llama/verify.py CHANGED Viewed

@@ -15,45 +15,55 @@
 """Verifies the reauthored TinyLlama-1.1B model."""
+import logging
 import pathlib
 from absl import app
 from absl import flags
 from ai_edge_torch.generative.examples.tiny_llama import tiny_llama
+from ai_edge_torch.generative.utilities import transformers_verifier
 from ai_edge_torch.generative.utilities import verifier
 import transformers
 _PROMPTS = flags.DEFINE_multi_string(
     "prompts",
     "Show me the program to add 2 and 3.",
     "The input prompts to generate answers.",
 )
+_MAX_NEW_TOKENS = flags.DEFINE_integer(
+    "max_new_tokens",
+    30,
+    "The maximum size of the generated tokens.",
+)
 def main(_):
   checkpoint = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
-  verifier.log_msg("Loading the original model from", checkpoint)
-  wrapper_model = verifier.ModelWrapper(
-      model=transformers.AutoModelForCausalLM.from_pretrained(
-          checkpoint, trust_remote_code=True
-      ),
+  logging.info("Loading the original model from: %s", checkpoint)
+  original_model = transformers.AutoModelForCausalLM.from_pretrained(
+      checkpoint, trust_remote_code=True
   )
   # Locate the cached dir.
   cached_config_file = transformers.utils.cached_file(
       checkpoint, transformers.utils.CONFIG_NAME
   )
   reauthored_checkpoint = pathlib.Path(cached_config_file).parent
-  verifier.log_msg("Building the reauthored model from", reauthored_checkpoint)
+  logging.info("Building the reauthored model from: %s", reauthored_checkpoint)
   reauthored_model = tiny_llama.build_model(reauthored_checkpoint)
-  verifier.log_msg("Loading the tokenizer from", checkpoint)
+  logging.info("Loading the tokenizer from: %s", checkpoint)
   tokenizer = transformers.AutoTokenizer.from_pretrained(checkpoint)
   verifier.verify_reauthored_model(
-      original_model=wrapper_model,
-      reauthored_model=reauthored_model,
-      tokenizer=tokenizer,
+      original_model=transformers_verifier.TransformersModelWrapper(
+          original_model
+      ),
+      reauthored_model=verifier.ReauthoredModelWrapper(reauthored_model),
+      tokenizer=verifier.TokenizerWrapper(tokenizer),
       generate_prompts=_PROMPTS.value,
+      max_new_tokens=_MAX_NEW_TOKENS.value,
       atol=1e-04,
   )

ai-edge-torch-nightly 0.3.0.dev20240924__py3-none-any.whl → 0.3.0.dev20240928__py3-none-any.whl

ai-edge-torch-nightly 0.3.0.dev20240924py3-none-any.whl → 0.3.0.dev20240928py3-none-any.whl